56#include <lal/basic_types.hpp>
57#include <lal/io/head_vector_error.hpp>
58#include <lal/io/treebank_file_error.hpp>
59#include <lal/io/treebank_file_report.hpp>
60#include <lal/io/treebank_collection_report.hpp>
61#include <lal/graphs/directed_graph.hpp>
62#include <lal/detail/graphs/cycles.hpp>
63#include <lal/detail/graphs/conversions.hpp>
68#define file_does_not_exist(F) \
69"Error: Treebank '" + F + "' does not exist."
71#define file_could_not_be_opened(F) \
72"Error: Treebank '" + F + "' could not be opened."
74#define invalid_integer(i, chunk) \
75"Error: Value at position '" + std::to_string(i) + "' (value: '" + chunk + "') \
76is not a valid non-negative integer number."
78#define head_out_of_bounds(i) \
79"Error: Head index at position '" + std::to_string(i) + "' (value: \
80" + std::to_string(hv[i]) + ") is out of bounds."
82#define wrong_num_roots(r) \
83"Error: Wrong number of roots: " + std::to_string(n_roots) + "."
85#define wrong_num_edges(n, m) \
86"Error: Wrong number of edges. Number of vertices is '" + std::to_string(n) + \
87 "'. Number of edges is '" + std::to_string(m) + "'; " + \
88 "should be '" + std::to_string(n-1) + "'."
90#define graph_has_cycles_msg \
91"Error: The graph described is not a tree, i.e., it has cycles."
93#define isolated_vertex(u) \
94"Error: Vertex '" + std::to_string(u) + "' is isolated."
96#define self_loop(pos) \
97"Error: found a self-loop at position '" + std::to_string(pos) + "'."
107template <
bool dec
ide>
108[[nodiscard]] std::conditional_t<decide, bool, std::vector<io::head_vector_error>>
112 std::vector<io::head_vector_error> error_list;
115 const uint64_t n = hv.size();
117 uint64_t n_roots = 0;
118 bool can_make_graph =
true;
121 for (std::size_t i = 0; i < hv.size(); ++i) {
128 if (hv[i] > hv.size()) {
129 if constexpr (decide) {
return true; }
131 error_list.emplace_back(
132 head_out_of_bounds(i),
135 can_make_graph =
false;
139 else if (hv[i] == i + 1) {
140 if constexpr (decide) {
return true; }
142 error_list.emplace_back(
146 can_make_graph =
false;
153 if constexpr (decide) {
return true; }
155 error_list.emplace_back(
156 wrong_num_roots(n_roots),
162 if (can_make_graph) {
165 if constexpr (decide) {
return false; }
166 else {
return error_list; }
174 if constexpr (decide) {
return true; }
176 error_list.emplace_back(
177 graph_has_cycles_msg,
185 for (
node u = 0; u < dgraph.get_num_nodes(); ++u) {
186 if (dgraph.get_degree(u) == 0) {
187 if constexpr (decide) {
return true; }
189 error_list.emplace_back(
198 if (dgraph.get_num_edges() != dgraph.get_num_nodes() - 1) {
199 if constexpr (decide) {
return true; }
201 error_list.emplace_back(
202 wrong_num_edges(dgraph.get_num_nodes(), dgraph.get_num_edges()),
209 if constexpr (decide) {
return false; }
210 else {
return error_list;}
219template <
bool dec
ide>
220[[nodiscard]] std::conditional_t<decide, bool, std::vector<io::head_vector_error>>
224 std::vector<io::head_vector_error> error_list;
226 bool non_numeric_characters =
false;
232 std::stringstream ss(current_line);
234 while (ss >> chunk) {
237 const auto result = std::from_chars
238 (&chunk[0], (&chunk[chunk.size() - 1]) + 1, value);
240 if (result.ec == std::errc::invalid_argument) {
241 if constexpr (decide) {
return true; }
243 error_list.emplace_back(
244 invalid_integer(i, chunk),
247 non_numeric_characters =
true;
260 if (non_numeric_characters) {
261 if constexpr (decide) {
return true; }
262 else {
return error_list; }
267 assert(error_list.size() == 0);
278template <
bool dec
ide>
279[[nodiscard]] std::conditional_t<decide, bool, io::treebank_file_report>
283 if (not std::filesystem::exists(treebank_filename)) {
284 if constexpr (decide) {
return true; }
287 file_does_not_exist(treebank_filename),
293 std::ifstream fin(treebank_filename);
294 if (not fin.is_open()) {
295 if constexpr (decide) {
return true; }
298 file_could_not_be_opened(treebank_filename),
307 std::string current_line;
309 std::size_t line = 1;
310 while (getline(fin, current_line)) {
311 if (current_line ==
"") {
316 if constexpr (decide) {
318 if (r) {
return true; }
331 if constexpr (decide) {
return false; }
332 else {
return report; }
342template <
bool dec
ide>
343[[nodiscard]] std::conditional_t<decide, bool, io::treebank_collection_report>
345(
const std::string& main_file_name,
const std::size_t n_threads)
348 if (not std::filesystem::exists(main_file_name)) {
349 if constexpr (decide) {
return true; }
352 file_does_not_exist(main_file_name),
357 std::ifstream fin_main_file(main_file_name);
358 if (not fin_main_file.is_open()) {
359 if constexpr (decide) {
return true; }
362 file_could_not_be_opened(main_file_name),
370 char errors_found = 0;
372 #pragma omp parallel num_threads(n_threads) shared(errors_found)
375 const int tid = omp_get_thread_num();
378 std::size_t main_file_line = 1;
379 std::string id, treebankname;
381 while (fin_main_file >>
id >> treebankname and errors_found == 0) {
383 std::filesystem::path treebank_full_path(main_file_name);
384 treebank_full_path.replace_filename(treebankname);
385 const std::string full_path_as_string = treebank_full_path.string();
390 if (errors_found == 0) {
395 if constexpr (decide) {
404 if (r.get_num_errors() > 0) {
409 std::move(treebankname),
424 if constexpr (decide) {
425 return (errors_found == 0 ?
false :
true);
427 else {
return report; }
Head vector error report class.
Definition head_vector_error.hpp:64
Report on a treebank collection.
Definition treebank_collection_report.hpp:67
void set_treebank_error(const treebank_file_error &err) noexcept
Sets the error concerning the main file of the collection.
Definition treebank_collection_report.hpp:136
void add_report(const uint64_t line_number, const std::string &treebank_file_name, const std::string &treebank_id, const treebank_file_report &err) noexcept
Adds a report on a treebank file.
Definition treebank_collection_report.hpp:103
Report on a treebank file.
Definition treebank_file_report.hpp:69
void add_error(const uint64_t line_number, const head_vector_error &err) noexcept
Adds an error to the list of errors.
Definition treebank_file_report.hpp:125
void set_treebank_error(const treebank_file_error &err) noexcept
Sets the treebank error m_treebank_error.
Definition treebank_file_report.hpp:138
std::conditional_t< decide, bool, io::treebank_collection_report > check_correctness_treebank_collection(const std::string &main_file_name, const std::size_t n_threads) noexcept
Find errors in a treebank collection.
Definition check_correctness.hpp:345
std::conditional_t< decide, bool, std::vector< io::head_vector_error > > find_errors(const head_vector &hv) noexcept
Find errors in a head vector.
Definition check_correctness.hpp:109
graph_t from_head_vector_to_graph(const head_vector &hv, const bool normalize, const bool check) noexcept
Transforms a head vector in a directed graph.
Definition conversions.hpp:191
bool has_undirected_cycles(const graph_t &g, BFS< graph_t > &bfs) noexcept
Returns true if, and only if, the graph has UNDIRECTED cycles.
Definition cycles.hpp:138
std::conditional_t< decide, bool, io::treebank_file_report > check_correctness_treebank(const std::string &treebank_filename) noexcept
Find errors in a treebank file.
Definition check_correctness.hpp:280
@ self_loop
The current head index points to itself.
@ wrong_number_of_edges
The graph does not contain enough edges to be a tree.
@ head_out_bounds
The current head index is a valid non-negative integer value, but points outside the head vector.
@ isolated_vertex
There are isolated vertices in the graph.
@ graph_has_cycles
The graph contains an undirected cycle, that is, the graph is not a tree.
@ invalid_integer
The current head index is not a valid non-integer integer number. It could be a letter,...
@ wrong_number_of_roots
The head vector contains too many roots.
@ main_file_could_not_be_opened
Main file could not be opened.
@ no_error
No error occurred.
@ treebank_file_does_not_exist
A treebank was not found in disk.
@ treebank_result_file_could_not_be_opened
The resulting file of processing a treebank could not be opened.
@ main_file_does_not_exist
Main file does not exist.
Main namespace of the library.
Definition basic_types.hpp:48
std::vector< uint64_t > head_vector
See Head vector page for further details.
Definition basic_types.hpp:58
uint64_t node
Node type. See Node / Vertex page for further details.
Definition basic_types.hpp:51