55#include <lal/definitions.hpp>
56#include <lal/io/report_correctness.hpp>
57#include <lal/graphs/directed_graph.hpp>
58#include <lal/internal/graphs/cycles.hpp>
59#include <lal/internal/macros.hpp>
65graphs::directed_graph head_vector_to_directed_graph(
const head_vector& hv)
68 const uint32_t n =
static_cast<uint32_t
>(hv.size());
69 graphs::directed_graph t(n);
70 for (uint32_t i = 0; i < n; ++i) {
75 t.add_edge_bulk(i, hv[i] - 1);
78 t.finish_bulk_add(
true);
82#define file_does_not_exist(F) \
83"Error: Treebank '" + F + "' does not exist."
85#define file_could_not_be_opened(F) \
86"Error: Treebank '" + F + "' could not be opened."
88#define invalid_integer(i, chunk) \
89"Error: Value at position '" + std::to_string(i) + "' (value: '" + chunk + "') \
90is not a valid non-negative integer number."
92#define number_out_of_bounds(i) \
93"Error: Number at position '" + std::to_string(i) + "' (value: \
94" + std::to_string(hv[i]) + ") is out of bounds."
96#define wrong_num_roots(r) \
97"Error: Wrong number of roots: " + std::to_string(n_roots) + "."
99#define wrong_num_edges(n, m) \
100"Error: Wrong number of edges. Number of vertices is '" + std::to_string(n) + \
101 "'. Number of edges is '" + std::to_string(m) + "'; " + \
102 "should be '" + std::to_string(n-1) + "'."
104#define graph_has_cycles \
105"Error: The graph described is not a tree, i.e., it has cycles."
107#define isolated_vertex(u) \
108"Error: Vertex '" + std::to_string(u) + "' is isolated."
110#define self_loop(pos) \
111"Error: found a self-loop at position '" + std::to_string(pos) + "'."
114std::conditional_t<decide, bool, std::vector<io::report_treebank_file>>
116(
const std::string& current_line,
const size_t line)
119 std::vector<io::report_treebank_file> treebank_err_list;
121 if constexpr (decide) {
122 UNUSED(treebank_err_list);
126 bool non_numeric_characters =
false;
132 std::stringstream ss(current_line);
134 while (ss >> chunk) {
137 const auto result = std::from_chars
138 (&chunk[0], (&chunk[chunk.size() - 1]) + 1, value);
140 if (result.ec == std::errc::invalid_argument) {
141 if constexpr (decide) {
return true; }
143 treebank_err_list.emplace_back(line, invalid_integer(i, chunk));
144 non_numeric_characters =
true;
158 if (non_numeric_characters) {
159 if constexpr (decide) {
return true; }
160 else {
return treebank_err_list; }
164 const uint32_t n =
static_cast<uint32_t
>(hv.size());
166 uint32_t n_roots = 0;
167 bool can_make_graph =
true;
170 for (
size_t i = 0; i < hv.size(); ++i) {
177 if (hv[i] > hv.size()) {
178 if constexpr (decide) {
return true; }
180 treebank_err_list.emplace_back(line, number_out_of_bounds(i));
181 can_make_graph =
false;
185 else if (hv[i] == i + 1) {
186 if constexpr (decide) {
return true; }
188 treebank_err_list.emplace_back(line, self_loop(i + 1));
189 can_make_graph =
false;
196 if constexpr (decide) {
return true; }
198 treebank_err_list.emplace_back(line, wrong_num_roots(n_roots));
202 if (can_make_graph) {
205 if constexpr (decide) {
return false; }
206 else {
return treebank_err_list; }
210 const auto dgraph = head_vector_to_directed_graph(hv);
211 const bool has_cycles = internal::has_undirected_cycles(dgraph);
213 if constexpr (decide) {
return true; }
215 treebank_err_list.emplace_back(line, graph_has_cycles);
220 for (
node u = 0; u < dgraph.get_num_nodes(); ++u) {
221 if (dgraph.get_degree(u) == 0) {
222 if constexpr (decide) {
return true; }
224 treebank_err_list.emplace_back(line, isolated_vertex(u));
230 if (dgraph.get_num_edges() != dgraph.get_num_nodes() - 1) {
231 if constexpr (decide) {
return true; }
233 treebank_err_list.emplace_back
234 (line, wrong_num_edges(dgraph.get_num_nodes(), dgraph.get_num_edges()));
239 if constexpr (decide) {
return false; }
240 else {
return treebank_err_list;}
244std::conditional_t<decide, bool, std::vector<io::report_treebank_file>>
245check_correctness_treebank(
const std::string& treebank_filename)
248 if (not std::filesystem::exists(treebank_filename)) {
249 if constexpr (decide) {
return true; }
250 else {
return {{0, file_does_not_exist(treebank_filename)}}; }
253 std::ifstream fin(treebank_filename);
254 if (not fin.is_open()) {
255 if constexpr (decide) {
return true; }
256 else {
return {{0, file_could_not_be_opened(treebank_filename)}}; }
259 std::vector<io::report_treebank_file> treebank_err_list;
260 std::string current_line;
263 while (getline(fin, current_line)) {
264 if (current_line ==
"") {
268 const auto r = find_errors<decide>(current_line, line);
269 if constexpr (decide) {
271 if (r) {
return true; }
275 treebank_err_list.insert(
276 treebank_err_list.end(), r.begin(), r.end()
284 if constexpr (decide) {
return false; }
285 else {
return treebank_err_list; }
290std::conditional_t<decide, bool, std::vector<io::report_treebank_collection>>
291check_correctness_treebank_collection
292(
const std::string& main_file_name,
size_t n_threads)
295 if (not std::filesystem::exists(main_file_name)) {
296 if constexpr (decide) {
return true; }
297 else {
return {{
"-", 0, 0, file_does_not_exist(main_file_name)}}; }
299 std::ifstream fin_main_file(main_file_name);
300 if (not fin_main_file.is_open()) {
301 if constexpr (decide) {
return true; }
302 else {
return {{
"-", 0, 0, file_could_not_be_opened(main_file_name)}}; }
305 std::vector<io::report_treebank_collection> dataset_err_list;
306 char errors_found = 0;
308 #pragma omp parallel num_threads(n_threads) shared(errors_found)
311 const int tid = omp_get_thread_num();
314 size_t main_file_line = 1;
315 std::string id, treebankname;
317 while (fin_main_file >>
id >> treebankname and errors_found == 0) {
319 std::filesystem::path treebank_full_path(main_file_name);
320 treebank_full_path.replace_filename(treebankname);
321 const std::string full_path_as_string = treebank_full_path.string();
326 if (errors_found == 0) {
330 check_correctness_treebank<decide>(full_path_as_string);
332 if constexpr (decide) {
342 for (
const auto& report_treebank : r) {
343 if (report_treebank.get_line_number() > 0) {
344 dataset_err_list.emplace_back(
347 report_treebank.get_line_number(),
348 report_treebank.get_error_message()
352 const auto& err_msg = report_treebank.get_error_message();
353 std::string new_err_msg;
354 if (err_msg.find(
"exist") != std::string::npos) {
355 new_err_msg =
"Treebank file does not exist";
357 else if (err_msg.find(
"opened") != std::string::npos) {
358 new_err_msg =
"Treebank file could not be opened";
361 dataset_err_list.emplace_back(
364 report_treebank.get_line_number(),
379 if constexpr (decide) {
380 return (errors_found == 0 ?
false : true);
382 else {
return dataset_err_list; }
Main namespace of the library.
Definition definitions.hpp:48
uint32_t node
Node type.
Definition definitions.hpp:51
std::vector< uint32_t > head_vector
A head vector representation of a (usually) rooted tree.
Definition definitions.hpp:114