54#include <lal/basic_types.hpp>
55#include <lal/io/report_correctness.hpp>
56#include <lal/graphs/directed_graph.hpp>
57#include <lal/detail/graphs/cycles.hpp>
62#define file_does_not_exist(F) \
63"Error: Treebank '" + F + "' does not exist."
65#define file_could_not_be_opened(F) \
66"Error: Treebank '" + F + "' could not be opened."
68#define invalid_integer(i, chunk) \
69"Error: Value at position '" + std::to_string(i) + "' (value: '" + chunk + "') \
70is not a valid non-negative integer number."
72#define number_out_of_bounds(i) \
73"Error: Number at position '" + std::to_string(i) + "' (value: \
74" + std::to_string(hv[i]) + ") is out of bounds."
76#define wrong_num_roots(r) \
77"Error: Wrong number of roots: " + std::to_string(n_roots) + "."
79#define wrong_num_edges(n, m) \
80"Error: Wrong number of edges. Number of vertices is '" + std::to_string(n) + \
81 "'. Number of edges is '" + std::to_string(m) + "'; " + \
82 "should be '" + std::to_string(n-1) + "'."
84#define graph_has_cycles \
85"Error: The graph described is not a tree, i.e., it has cycles."
87#define isolated_vertex(u) \
88"Error: Vertex '" + std::to_string(u) + "' is isolated."
90#define self_loop(pos) \
91"Error: found a self-loop at position '" + std::to_string(pos) + "'."
97 const uint64_t n = hv.size();
99 for (uint64_t i = 0; i < n; ++i) {
120template <
bool dec
ide>
121std::conditional_t<decide, bool, std::vector<io::report_treebank_file>>
125 std::vector<io::report_treebank_file> treebank_err_list;
128 const uint64_t n = hv.size();
130 uint64_t n_roots = 0;
131 bool can_make_graph =
true;
134 for (std::size_t i = 0; i < hv.size(); ++i) {
141 if (hv[i] > hv.size()) {
142 if constexpr (decide) {
return true; }
144 treebank_err_list.emplace_back(line, number_out_of_bounds(i));
145 can_make_graph =
false;
149 else if (hv[i] == i + 1) {
150 if constexpr (decide) {
return true; }
152 treebank_err_list.emplace_back(line, self_loop(i + 1));
153 can_make_graph =
false;
160 if constexpr (decide) {
return true; }
162 treebank_err_list.emplace_back(line, wrong_num_roots(n_roots));
166 if (can_make_graph) {
169 if constexpr (decide) {
return false; }
170 else {
return treebank_err_list; }
177 if constexpr (decide) {
return true; }
179 treebank_err_list.emplace_back(line, graph_has_cycles);
184 for (
node u = 0; u < dgraph.get_num_nodes(); ++u) {
185 if (dgraph.get_degree(u) == 0) {
186 if constexpr (decide) {
return true; }
188 treebank_err_list.emplace_back(line, isolated_vertex(u));
194 if (dgraph.get_num_edges() != dgraph.get_num_nodes() - 1) {
195 if constexpr (decide) {
return true; }
197 treebank_err_list.emplace_back
198 (line, wrong_num_edges(dgraph.get_num_nodes(), dgraph.get_num_edges()));
203 if constexpr (decide) {
return false; }
204 else {
return treebank_err_list;}
214template <
bool dec
ide>
215std::conditional_t<decide, bool, std::vector<io::report_treebank_file>>
216find_errors(
const std::string& current_line,
const std::size_t line)
219 std::vector<io::report_treebank_file> treebank_err_list;
221 bool non_numeric_characters =
false;
227 std::stringstream ss(current_line);
229 while (ss >> chunk) {
232 const auto result = std::from_chars
233 (&chunk[0], (&chunk[chunk.size() - 1]) + 1, value);
235 if (result.ec == std::errc::invalid_argument) {
236 if constexpr (decide) {
return true; }
238 treebank_err_list.emplace_back(line, invalid_integer(i, chunk));
239 non_numeric_characters =
true;
253 if (non_numeric_characters) {
254 if constexpr (decide) {
return true; }
255 else {
return treebank_err_list; }
258 if constexpr (decide) {
259 return find_errors<decide>(hv, line);
262 auto errors = find_errors<decide>(hv, line);
263 for (std::size_t i = 0; i < errors.size(); ++i) {
264 treebank_err_list.emplace_back( std::move(errors[i]) );
266 return treebank_err_list;
276template <
bool dec
ide>
277std::conditional_t<decide, bool, std::vector<io::report_treebank_file>>
281 if (not std::filesystem::exists(treebank_filename)) {
282 if constexpr (decide) {
return true; }
283 else {
return {{0, file_does_not_exist(treebank_filename)}}; }
286 std::ifstream fin(treebank_filename);
287 if (not fin.is_open()) {
288 if constexpr (decide) {
return true; }
289 else {
return {{0, file_could_not_be_opened(treebank_filename)}}; }
292 std::vector<io::report_treebank_file> treebank_err_list;
293 std::string current_line;
295 std::size_t line = 1;
296 while (getline(fin, current_line)) {
297 if (current_line ==
"") {
301 const auto r = find_errors<decide>(current_line, line);
302 if constexpr (decide) {
304 if (r) {
return true; }
308 treebank_err_list.insert(
309 treebank_err_list.end(), r.begin(), r.end()
317 if constexpr (decide) {
return false; }
318 else {
return treebank_err_list; }
328template <
bool dec
ide>
329std::conditional_t<decide, bool, std::vector<io::report_treebank_collection>>
331(
const std::string& main_file_name, std::size_t n_threads)
334 if (not std::filesystem::exists(main_file_name)) {
335 if constexpr (decide) {
return true; }
336 else {
return {{
"-", 0, 0, file_does_not_exist(main_file_name)}}; }
338 std::ifstream fin_main_file(main_file_name);
339 if (not fin_main_file.is_open()) {
340 if constexpr (decide) {
return true; }
341 else {
return {{
"-", 0, 0, file_could_not_be_opened(main_file_name)}}; }
344 std::vector<io::report_treebank_collection> dataset_err_list;
345 char errors_found = 0;
347 #pragma omp parallel num_threads(n_threads) shared(errors_found)
350 const int tid = omp_get_thread_num();
353 std::size_t main_file_line = 1;
354 std::string id, treebankname;
356 while (fin_main_file >>
id >> treebankname and errors_found == 0) {
358 std::filesystem::path treebank_full_path(main_file_name);
359 treebank_full_path.replace_filename(treebankname);
360 const std::string full_path_as_string = treebank_full_path.string();
365 if (errors_found == 0) {
369 check_correctness_treebank<decide>(full_path_as_string);
371 if constexpr (decide) {
381 for (
const auto& report_treebank : r) {
382 if (report_treebank.get_line_number() > 0) {
383 dataset_err_list.emplace_back(
386 report_treebank.get_line_number(),
387 report_treebank.get_error_message()
391 const auto& err_msg = report_treebank.get_error_message();
392 std::string new_err_msg;
393 if (err_msg.find(
"exist") != std::string::npos) {
394 new_err_msg =
"Treebank file does not exist";
396 else if (err_msg.find(
"opened") != std::string::npos) {
397 new_err_msg =
"Treebank file could not be opened";
400 dataset_err_list.emplace_back(
403 report_treebank.get_line_number(),
418 if constexpr (decide) {
419 return (errors_found == 0 ?
false :
true);
421 else {
return dataset_err_list; }
Directed graph class.
Definition: directed_graph.hpp:68
directed_graph & add_edge_bulk(node s, node t) noexcept
Adds an edge to the graph.
void finish_bulk_add(bool norm=true, bool check=true) noexcept
Completes the inner structure of the graph after adding a bulk of edges.
std::conditional_t< decide, bool, std::vector< io::report_treebank_file > > find_errors(const head_vector &hv, const std::size_t line) noexcept
Find errors in a head vector.
Definition: check_correctness.hpp:122
bool has_undirected_cycles(const graph_t &g, BFS< graph_t > &bfs) noexcept
Returns true if, and only if, the graph has UNDIRECTED cycles.
Definition: cycles.hpp:135
std::conditional_t< decide, bool, std::vector< io::report_treebank_collection > > check_correctness_treebank_collection(const std::string &main_file_name, std::size_t n_threads) noexcept
Find errors in a treebank collection.
Definition: check_correctness.hpp:331
graphs::directed_graph head_vector_to_directed_graph(const head_vector &hv) noexcept
Transforms a head vector in a directed graph.
Definition: check_correctness.hpp:94
std::conditional_t< decide, bool, std::vector< io::report_treebank_file > > check_correctness_treebank(const std::string &treebank_filename) noexcept
Find errors in a treebank file.
Definition: check_correctness.hpp:278
Main namespace of the library.
Definition: basic_types.hpp:50
std::vector< uint64_t > head_vector
See Head vector page for further details.
Definition: basic_types.hpp:64
uint64_t node
Node type. See Node / Vertex page for further details.
Definition: basic_types.hpp:53