LAL: Linear Arrangement Library 23.01.00
A library focused on algorithms on linear arrangements of graphs.
Loading...
Searching...
No Matches
check_correctness.hpp
1/*********************************************************************
2 *
3 * Linear Arrangement Library - A library that implements a collection
4 * algorithms for linear arrangments of graphs.
5 *
6 * Copyright (C) 2019 - 2023
7 *
8 * This file is part of Linear Arrangement Library. The full code is available
9 * at:
10 * https://github.com/LAL-project/linear-arrangement-library.git
11 *
12 * Linear Arrangement Library is free software: you can redistribute it
13 * and/or modify it under the terms of the GNU Affero General Public License
14 * as published by the Free Software Foundation, either version 3 of the
15 * License, or (at your option) any later version.
16 *
17 * Linear Arrangement Library is distributed in the hope that it will be
18 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU Affero General Public License for more details.
21 *
22 * You should have received a copy of the GNU Affero General Public License
23 * along with Linear Arrangement Library. If not, see <http://www.gnu.org/licenses/>.
24 *
25 * Contact:
26 *
27 * LluĂ­s Alemany Puig (lalemany@cs.upc.edu)
28 * LARCA (Laboratory for Relational Algorithmics, Complexity and Learning)
29 * CQL (Complexity and Quantitative Linguistics Lab)
30 * Jordi Girona St 1-3, Campus Nord UPC, 08034 Barcelona. CATALONIA, SPAIN
31 * Webpage: https://cqllab.upc.edu/people/lalemany/
32 *
33 * Ramon Ferrer i Cancho (rferrericancho@cs.upc.edu)
34 * LARCA (Laboratory for Relational Algorithmics, Complexity and Learning)
35 * CQL (Complexity and Quantitative Linguistics Lab)
36 * Office S124, Omega building
37 * Jordi Girona St 1-3, Campus Nord UPC, 08034 Barcelona. CATALONIA, SPAIN
38 * Webpage: https://cqllab.upc.edu/people/rferrericancho/
39 *
40 ********************************************************************/
41
42// C includes
43#include <omp.h>
44
45// C++ includes
46#include <filesystem>
47#include <charconv>
48#include <fstream>
49#include <sstream>
50#include <string>
51#include <iostream>
52
53// lal includes
54#include <lal/basic_types.hpp>
55#include <lal/io/report_correctness.hpp>
56#include <lal/graphs/directed_graph.hpp>
57#include <lal/detail/graphs/cycles.hpp>
58
59namespace lal {
60namespace detail {
61
62#define file_does_not_exist(F) \
63"Error: Treebank '" + F + "' does not exist."
64
65#define file_could_not_be_opened(F) \
66"Error: Treebank '" + F + "' could not be opened."
67
68#define invalid_integer(i, chunk) \
69"Error: Value at position '" + std::to_string(i) + "' (value: '" + chunk + "') \
70is not a valid non-negative integer number."
71
72#define number_out_of_bounds(i) \
73"Error: Number at position '" + std::to_string(i) + "' (value: \
74" + std::to_string(hv[i]) + ") is out of bounds."
75
76#define wrong_num_roots(r) \
77"Error: Wrong number of roots: " + std::to_string(n_roots) + "."
78
79#define wrong_num_edges(n, m) \
80"Error: Wrong number of edges. Number of vertices is '" + std::to_string(n) + \
81 "'. Number of edges is '" + std::to_string(m) + "'; " + \
82 "should be '" + std::to_string(n-1) + "'."
83
84#define graph_has_cycles \
85"Error: The graph described is not a tree, i.e., it has cycles."
86
87#define isolated_vertex(u) \
88"Error: Vertex '" + std::to_string(u) + "' is isolated."
89
90#define self_loop(pos) \
91"Error: found a self-loop at position '" + std::to_string(pos) + "'."
92
95noexcept
96{
97 const uint64_t n = hv.size();
99 for (uint64_t i = 0; i < n; ++i) {
100 if (hv[i] != 0) {
101 // add the edge:
102 // * i ranges in [0,n-1]
103 // * L[i] ranges in [1,n]
104 t.add_edge_bulk(i, hv[i] - 1);
105 }
106 }
107 t.finish_bulk_add(true);
108 return t;
109}
110
120template <bool decide>
121std::conditional_t<decide, bool, std::vector<io::report_treebank_file>>
122find_errors(const head_vector& hv, const std::size_t line)
123noexcept
124{
125 std::vector<io::report_treebank_file> treebank_err_list;
126
127 // number of nodes of the graph
128 const uint64_t n = hv.size();
129
130 uint64_t n_roots = 0;
131 bool can_make_graph = true;
132
133 // inspect the head vector
134 for (std::size_t i = 0; i < hv.size(); ++i) {
135 if (hv[i] == 0) {
136 ++n_roots;
137 continue;
138 }
139
140 // ensure that the number is within bounds
141 if (hv[i] > hv.size()) {
142 if constexpr (decide) { return true; }
143 else {
144 treebank_err_list.emplace_back(line, number_out_of_bounds(i));
145 can_make_graph = false;
146 }
147 }
148 // self loop
149 else if (hv[i] == i + 1) {
150 if constexpr (decide) { return true; }
151 else {
152 treebank_err_list.emplace_back(line, self_loop(i + 1));
153 can_make_graph = false;
154 }
155 }
156 }
157
158 // check there is exactly one root
159 if (n_roots != 1) {
160 if constexpr (decide) { return true; }
161 else {
162 treebank_err_list.emplace_back(line, wrong_num_roots(n_roots));
163 }
164 }
165
166 if (can_make_graph) {
167 // ignore singleton graphs
168 if (n == 1) {
169 if constexpr (decide) { return false; }
170 else { return treebank_err_list; }
171 }
172
173 // make a directed graph with the values
174 const auto dgraph = head_vector_to_directed_graph(hv);
175 const bool has_cycles = detail::has_undirected_cycles(dgraph);
176 if (has_cycles) {
177 if constexpr (decide) { return true; }
178 else {
179 treebank_err_list.emplace_back(line, graph_has_cycles);
180 }
181 }
182
183 // find isolated vertices
184 for (node u = 0; u < dgraph.get_num_nodes(); ++u) {
185 if (dgraph.get_degree(u) == 0) {
186 if constexpr (decide) { return true; }
187 else {
188 treebank_err_list.emplace_back(line, isolated_vertex(u));
189 }
190 }
191 }
192
193 // check the number of edges is correct
194 if (dgraph.get_num_edges() != dgraph.get_num_nodes() - 1) {
195 if constexpr (decide) { return true; }
196 else {
197 treebank_err_list.emplace_back
198 (line, wrong_num_edges(dgraph.get_num_nodes(), dgraph.get_num_edges()));
199 }
200 }
201 }
202
203 if constexpr (decide) { return false; }
204 else { return treebank_err_list;}
205}
206
214template <bool decide>
215std::conditional_t<decide, bool, std::vector<io::report_treebank_file>>
216find_errors(const std::string& current_line, const std::size_t line)
217noexcept
218{
219 std::vector<io::report_treebank_file> treebank_err_list;
220
221 bool non_numeric_characters = false;
222 head_vector hv;
223
224 // ensure there are only numeric characters
225 {
226 std::size_t i = 1;
227 std::stringstream ss(current_line);
228 std::string chunk;
229 while (ss >> chunk) {
230
231 uint64_t value;
232 const auto result = std::from_chars
233 (&chunk[0], (&chunk[chunk.size() - 1]) + 1, value);
234
235 if (result.ec == std::errc::invalid_argument) {
236 if constexpr (decide) { return true; }
237 else {
238 treebank_err_list.emplace_back(line, invalid_integer(i, chunk));
239 non_numeric_characters = true;
240 }
241 }
242 else {
243 hv.push_back(value);
244 }
245
246 ++i;
247 }
248 }
249
250 // if the current line contains non-numeric characters then the
251 // appropriate error messages have been stored, and so we can skip
252 // to the next line
253 if (non_numeric_characters) {
254 if constexpr (decide) { return true; }
255 else { return treebank_err_list; }
256 }
257
258 if constexpr (decide) {
259 return find_errors<decide>(hv, line);
260 }
261 else {
262 auto errors = find_errors<decide>(hv, line);
263 for (std::size_t i = 0; i < errors.size(); ++i) {
264 treebank_err_list.emplace_back( std::move(errors[i]) );
265 }
266 return treebank_err_list;
267 }
268}
269
276template <bool decide>
277std::conditional_t<decide, bool, std::vector<io::report_treebank_file>>
278check_correctness_treebank(const std::string& treebank_filename)
279noexcept
280{
281 if (not std::filesystem::exists(treebank_filename)) {
282 if constexpr (decide) { return true; }
283 else { return {{0, file_does_not_exist(treebank_filename)}}; }
284 }
285
286 std::ifstream fin(treebank_filename);
287 if (not fin.is_open()) {
288 if constexpr (decide) { return true; }
289 else { return {{0, file_could_not_be_opened(treebank_filename)}}; }
290 }
291
292 std::vector<io::report_treebank_file> treebank_err_list;
293 std::string current_line;
294
295 std::size_t line = 1;
296 while (getline(fin, current_line)) {
297 if (current_line == "") {
298 // do nothing
299 }
300 else {
301 const auto r = find_errors<decide>(current_line, line);
302 if constexpr (decide) {
303 // if there are errors, return
304 if (r) { return true; }
305 }
306 else {
307 // append errors to the list
308 treebank_err_list.insert(
309 treebank_err_list.end(), r.begin(), r.end()
310 );
311 }
312 }
313
314 ++line;
315 }
316
317 if constexpr (decide) { return false; }
318 else { return treebank_err_list; }
319}
320
328template <bool decide>
329std::conditional_t<decide, bool, std::vector<io::report_treebank_collection>>
331(const std::string& main_file_name, std::size_t n_threads)
332noexcept
333{
334 if (not std::filesystem::exists(main_file_name)) {
335 if constexpr (decide) { return true; }
336 else { return {{"-", 0, 0, file_does_not_exist(main_file_name)}}; }
337 }
338 std::ifstream fin_main_file(main_file_name);
339 if (not fin_main_file.is_open()) {
340 if constexpr (decide) { return true; }
341 else { return {{"-", 0, 0, file_could_not_be_opened(main_file_name)}}; }
342 }
343
344 std::vector<io::report_treebank_collection> dataset_err_list;
345 char errors_found = 0;
346
347 #pragma omp parallel num_threads(n_threads) shared(errors_found)
348 {
349
350 const int tid = omp_get_thread_num();
351 if (tid == 0) {
352
353 std::size_t main_file_line = 1;
354 std::string id, treebankname;
355
356 while (fin_main_file >> id >> treebankname and errors_found == 0) {
357 // make full path to the treebank
358 std::filesystem::path treebank_full_path(main_file_name);
359 treebank_full_path.replace_filename(treebankname);
360 const std::string full_path_as_string = treebank_full_path.string();
361
362 #pragma omp task
363 {
364
365 if (errors_found == 0) {
366
367 // check correctess of treebank
368 const auto r =
369 check_correctness_treebank<decide>(full_path_as_string);
370
371 if constexpr (decide) {
372 if (r) {
373 errors_found = 1;
374 }
375 }
376 else {
377 // append errors found in the treebank to
378 // the list of errors of this dataset
379 #pragma omp critical
380 {
381 for (const auto& report_treebank : r) {
382 if (report_treebank.get_line_number() > 0) {
383 dataset_err_list.emplace_back(
384 full_path_as_string,
385 main_file_line,
386 report_treebank.get_line_number(),
387 report_treebank.get_error_message()
388 );
389 }
390 else {
391 const auto& err_msg = report_treebank.get_error_message();
392 std::string new_err_msg;
393 if (err_msg.find("exist") != std::string::npos) {
394 new_err_msg = "Treebank file does not exist";
395 }
396 else if (err_msg.find("opened") != std::string::npos) {
397 new_err_msg = "Treebank file could not be opened";
398 }
399
400 dataset_err_list.emplace_back(
401 full_path_as_string,
402 main_file_line,
403 report_treebank.get_line_number(),
404 new_err_msg
405 );
406 }
407 }
408 }
409 }
410 }
411 }
412 ++main_file_line;
413 }
414
415 }
416 }
417
418 if constexpr (decide) {
419 return (errors_found == 0 ? false : true);
420 }
421 else { return dataset_err_list; }
422}
423
424} // -- namespace detail
425} // -- namespace lal
Directed graph class.
Definition: directed_graph.hpp:68
directed_graph & add_edge_bulk(node s, node t) noexcept
Adds an edge to the graph.
void finish_bulk_add(bool norm=true, bool check=true) noexcept
Completes the inner structure of the graph after adding a bulk of edges.
std::conditional_t< decide, bool, std::vector< io::report_treebank_file > > find_errors(const head_vector &hv, const std::size_t line) noexcept
Find errors in a head vector.
Definition: check_correctness.hpp:122
bool has_undirected_cycles(const graph_t &g, BFS< graph_t > &bfs) noexcept
Returns true if, and only if, the graph has UNDIRECTED cycles.
Definition: cycles.hpp:135
std::conditional_t< decide, bool, std::vector< io::report_treebank_collection > > check_correctness_treebank_collection(const std::string &main_file_name, std::size_t n_threads) noexcept
Find errors in a treebank collection.
Definition: check_correctness.hpp:331
graphs::directed_graph head_vector_to_directed_graph(const head_vector &hv) noexcept
Transforms a head vector in a directed graph.
Definition: check_correctness.hpp:94
std::conditional_t< decide, bool, std::vector< io::report_treebank_file > > check_correctness_treebank(const std::string &treebank_filename) noexcept
Find errors in a treebank file.
Definition: check_correctness.hpp:278
Main namespace of the library.
Definition: basic_types.hpp:50
std::vector< uint64_t > head_vector
See Head vector page for further details.
Definition: basic_types.hpp:64
uint64_t node
Node type. See Node / Vertex page for further details.
Definition: basic_types.hpp:53