LAL: Linear Arrangement Library 21.07.01
A library focused on algorithms on linear arrangements of graphs.
Loading...
Searching...
No Matches
check_correctness.hpp
1/*********************************************************************
2 *
3 * Linear Arrangement Library - A library that implements a collection
4 * algorithms for linear arrangments of graphs.
5 *
6 * Copyright (C) 2019 - 2021
7 *
8 * This file is part of Linear Arrangement Library. To see the full code
9 * visit the webpage:
10 * https://github.com/lluisalemanypuig/linear-arrangement-library.git
11 *
12 * Linear Arrangement Library is free software: you can redistribute it
13 * and/or modify it under the terms of the GNU Affero General Public License
14 * as published by the Free Software Foundation, either version 3 of the
15 * License, or (at your option) any later version.
16 *
17 * Linear Arrangement Library is distributed in the hope that it will be
18 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU Affero General Public License for more details.
21 *
22 * You should have received a copy of the GNU Affero General Public License
23 * along with Linear Arrangement Library. If not, see <http://www.gnu.org/licenses/>.
24 *
25 * Contact:
26 *
27 * LluĂ­s Alemany Puig (lalemany@cs.upc.edu)
28 * LARCA (Laboratory for Relational Algorithmics, Complexity and Learning)
29 * CQL (Complexity and Quantitative Linguistics Lab)
30 * Jordi Girona St 1-3, Campus Nord UPC, 08034 Barcelona. CATALONIA, SPAIN
31 * Webpage: https://cqllab.upc.edu/people/lalemany/
32 *
33 * Ramon Ferrer i Cancho (rferrericancho@cs.upc.edu)
34 * LARCA (Laboratory for Relational Algorithmics, Complexity and Learning)
35 * CQL (Complexity and Quantitative Linguistics Lab)
36 * Office S124, Omega building
37 * Jordi Girona St 1-3, Campus Nord UPC, 08034 Barcelona. CATALONIA, SPAIN
38 * Webpage: https://cqllab.upc.edu/people/rferrericancho/
39 *
40 ********************************************************************/
41
42// C includes
43#include <omp.h>
44
45// C++ includes
46#include <filesystem>
47#include <charconv>
48#include <fstream>
49#include <sstream>
50#include <string>
51
52#include <iostream>
53
54// lal includes
55#include <lal/definitions.hpp>
56#include <lal/io/report_correctness.hpp>
57#include <lal/graphs/directed_graph.hpp>
58#include <lal/internal/graphs/cycles.hpp>
59#include <lal/internal/macros.hpp>
60
61namespace lal {
62namespace internal {
63
64inline
65graphs::directed_graph head_vector_to_directed_graph(const head_vector& hv)
66noexcept
67{
68 const uint32_t n = static_cast<uint32_t>(hv.size());
69 graphs::directed_graph t(n);
70 for (uint32_t i = 0; i < n; ++i) {
71 if (hv[i] != 0) {
72 // add the edge:
73 // * i ranges in [0,n-1]
74 // * L[i] ranges in [1,n]
75 t.add_edge_bulk(i, hv[i] - 1);
76 }
77 }
78 t.finish_bulk_add(true);
79 return t;
80}
81
82#define file_does_not_exist(F) \
83"Error: Treebank '" + F + "' does not exist."
84
85#define file_could_not_be_opened(F) \
86"Error: Treebank '" + F + "' could not be opened."
87
88#define invalid_integer(i, chunk) \
89"Error: Value at position '" + std::to_string(i) + "' (value: '" + chunk + "') \
90is not a valid non-negative integer number."
91
92#define number_out_of_bounds(i) \
93"Error: Number at position '" + std::to_string(i) + "' (value: \
94" + std::to_string(hv[i]) + ") is out of bounds."
95
96#define wrong_num_roots(r) \
97"Error: Wrong number of roots: " + std::to_string(n_roots) + "."
98
99#define wrong_num_edges(n, m) \
100"Error: Wrong number of edges. Number of vertices is '" + std::to_string(n) + \
101 "'. Number of edges is '" + std::to_string(m) + "'; " + \
102 "should be '" + std::to_string(n-1) + "'."
103
104#define graph_has_cycles \
105"Error: The graph described is not a tree, i.e., it has cycles."
106
107#define isolated_vertex(u) \
108"Error: Vertex '" + std::to_string(u) + "' is isolated."
109
110#define self_loop(pos) \
111"Error: found a self-loop at position '" + std::to_string(pos) + "'."
112
113template<bool decide>
114std::conditional_t<decide, bool, std::vector<io::report_treebank_file>>
115find_errors
116(const std::string& current_line, const size_t line)
117noexcept
118{
119 std::vector<io::report_treebank_file> treebank_err_list;
120
121 if constexpr (decide) {
122 UNUSED(treebank_err_list);
123 UNUSED(line);
124 }
125
126 bool non_numeric_characters = false;
127 head_vector hv;
128
129 // ensure there are only numeric characters
130 {
131 size_t i = 1;
132 std::stringstream ss(current_line);
133 std::string chunk;
134 while (ss >> chunk) {
135
136 uint32_t value;
137 const auto result = std::from_chars
138 (&chunk[0], (&chunk[chunk.size() - 1]) + 1, value);
139
140 if (result.ec == std::errc::invalid_argument) {
141 if constexpr (decide) { return true; }
142 else {
143 treebank_err_list.emplace_back(line, invalid_integer(i, chunk));
144 non_numeric_characters = true;
145 }
146 }
147 else {
148 hv.push_back(value);
149 }
150
151 ++i;
152 }
153 }
154
155 // if the current line contains non-numeric characters then the
156 // appropriate error messages have been stored, and so we can skip
157 // to the next line
158 if (non_numeric_characters) {
159 if constexpr (decide) { return true; }
160 else { return treebank_err_list; }
161 }
162
163 // number of nodes of the graph
164 const uint32_t n = static_cast<uint32_t>(hv.size());
165
166 uint32_t n_roots = 0;
167 bool can_make_graph = true;
168
169 // inspect the head vector
170 for (size_t i = 0; i < hv.size(); ++i) {
171 if (hv[i] == 0) {
172 ++n_roots;
173 continue;
174 }
175
176 // ensure that the number is within bounds
177 if (hv[i] > hv.size()) {
178 if constexpr (decide) { return true; }
179 else {
180 treebank_err_list.emplace_back(line, number_out_of_bounds(i));
181 can_make_graph = false;
182 }
183 }
184 // self loop
185 else if (hv[i] == i + 1) {
186 if constexpr (decide) { return true; }
187 else {
188 treebank_err_list.emplace_back(line, self_loop(i + 1));
189 can_make_graph = false;
190 }
191 }
192 }
193
194 // check there is exactly one root
195 if (n_roots != 1) {
196 if constexpr (decide) { return true; }
197 else {
198 treebank_err_list.emplace_back(line, wrong_num_roots(n_roots));
199 }
200 }
201
202 if (can_make_graph) {
203 // ignore singleton graphs
204 if (n == 1) {
205 if constexpr (decide) { return false; }
206 else { return treebank_err_list; }
207 }
208
209 // make a directed graph with the values
210 const auto dgraph = head_vector_to_directed_graph(hv);
211 const bool has_cycles = internal::has_undirected_cycles(dgraph);
212 if (has_cycles) {
213 if constexpr (decide) { return true; }
214 else {
215 treebank_err_list.emplace_back(line, graph_has_cycles);
216 }
217 }
218
219 // find isolated vertices
220 for (node u = 0; u < dgraph.get_num_nodes(); ++u) {
221 if (dgraph.get_degree(u) == 0) {
222 if constexpr (decide) { return true; }
223 else {
224 treebank_err_list.emplace_back(line, isolated_vertex(u));
225 }
226 }
227 }
228
229 // check the number of edges is correct
230 if (dgraph.get_num_edges() != dgraph.get_num_nodes() - 1) {
231 if constexpr (decide) { return true; }
232 else {
233 treebank_err_list.emplace_back
234 (line, wrong_num_edges(dgraph.get_num_nodes(), dgraph.get_num_edges()));
235 }
236 }
237 }
238
239 if constexpr (decide) { return false; }
240 else { return treebank_err_list;}
241}
242
243template<bool decide>
244std::conditional_t<decide, bool, std::vector<io::report_treebank_file>>
245check_correctness_treebank(const std::string& treebank_filename)
246noexcept
247{
248 if (not std::filesystem::exists(treebank_filename)) {
249 if constexpr (decide) { return true; }
250 else { return {{0, file_does_not_exist(treebank_filename)}}; }
251 }
252
253 std::ifstream fin(treebank_filename);
254 if (not fin.is_open()) {
255 if constexpr (decide) { return true; }
256 else { return {{0, file_could_not_be_opened(treebank_filename)}}; }
257 }
258
259 std::vector<io::report_treebank_file> treebank_err_list;
260 std::string current_line;
261
262 size_t line = 1;
263 while (getline(fin, current_line)) {
264 if (current_line == "") {
265 // do nothing
266 }
267 else {
268 const auto r = find_errors<decide>(current_line, line);
269 if constexpr (decide) {
270 // if there are errors, return
271 if (r) { return true; }
272 }
273 else {
274 // append errors to the list
275 treebank_err_list.insert(
276 treebank_err_list.end(), r.begin(), r.end()
277 );
278 }
279 }
280
281 ++line;
282 }
283
284 if constexpr (decide) { return false; }
285 else { return treebank_err_list; }
286}
287
288// file, line, what
289template<bool decide>
290std::conditional_t<decide, bool, std::vector<io::report_treebank_collection>>
291check_correctness_treebank_collection
292(const std::string& main_file_name, size_t n_threads)
293noexcept
294{
295 if (not std::filesystem::exists(main_file_name)) {
296 if constexpr (decide) { return true; }
297 else { return {{"-", 0, 0, file_does_not_exist(main_file_name)}}; }
298 }
299 std::ifstream fin_main_file(main_file_name);
300 if (not fin_main_file.is_open()) {
301 if constexpr (decide) { return true; }
302 else { return {{"-", 0, 0, file_could_not_be_opened(main_file_name)}}; }
303 }
304
305 std::vector<io::report_treebank_collection> dataset_err_list;
306 char errors_found = 0;
307
308 #pragma omp parallel num_threads(n_threads) shared(errors_found)
309 {
310
311 const int tid = omp_get_thread_num();
312 if (tid == 0) {
313
314 size_t main_file_line = 1;
315 std::string id, treebankname;
316
317 while (fin_main_file >> id >> treebankname and errors_found == 0) {
318 // make full path to the treebank
319 std::filesystem::path treebank_full_path(main_file_name);
320 treebank_full_path.replace_filename(treebankname);
321 const std::string full_path_as_string = treebank_full_path.string();
322
323 #pragma omp task
324 {
325
326 if (errors_found == 0) {
327
328 // check correctess of treebank
329 const auto r =
330 check_correctness_treebank<decide>(full_path_as_string);
331
332 if constexpr (decide) {
333 if (r) {
334 errors_found = 1;
335 }
336 }
337 else {
338 // append errors found in the treebank to
339 // the list of errors of this dataset
340 #pragma omp critical
341 {
342 for (const auto& report_treebank : r) {
343 if (report_treebank.get_line_number() > 0) {
344 dataset_err_list.emplace_back(
345 full_path_as_string,
346 main_file_line,
347 report_treebank.get_line_number(),
348 report_treebank.get_error_message()
349 );
350 }
351 else {
352 const auto& err_msg = report_treebank.get_error_message();
353 std::string new_err_msg;
354 if (err_msg.find("exist") != std::string::npos) {
355 new_err_msg = "Treebank file does not exist";
356 }
357 else if (err_msg.find("opened") != std::string::npos) {
358 new_err_msg = "Treebank file could not be opened";
359 }
360
361 dataset_err_list.emplace_back(
362 full_path_as_string,
363 main_file_line,
364 report_treebank.get_line_number(),
365 new_err_msg
366 );
367 }
368 }
369 }
370 }
371 }
372 }
373 ++main_file_line;
374 }
375
376 }
377 }
378
379 if constexpr (decide) {
380 return (errors_found == 0 ? false : true);
381 }
382 else { return dataset_err_list; }
383}
384
385} // -- namespace internal
386} // -- namespace lal
Main namespace of the library.
Definition definitions.hpp:48
uint32_t node
Node type.
Definition definitions.hpp:51
std::vector< uint32_t > head_vector
A head vector representation of a (usually) rooted tree.
Definition definitions.hpp:114