LAL: Linear Arrangement Library 23.01.00
A library focused on algorithms on linear arrangements of graphs.
Loading...
Searching...
No Matches
treebank_collection_processor.hpp
1/*********************************************************************
2 *
3 * Linear Arrangement Library - A library that implements a collection
4 * algorithms for linear arrangments of graphs.
5 *
6 * Copyright (C) 2019 - 2023
7 *
8 * This file is part of Linear Arrangement Library. The full code is available
9 * at:
10 * https://github.com/LAL-project/linear-arrangement-library.git
11 *
12 * Linear Arrangement Library is free software: you can redistribute it
13 * and/or modify it under the terms of the GNU Affero General Public License
14 * as published by the Free Software Foundation, either version 3 of the
15 * License, or (at your option) any later version.
16 *
17 * Linear Arrangement Library is distributed in the hope that it will be
18 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU Affero General Public License for more details.
21 *
22 * You should have received a copy of the GNU Affero General Public License
23 * along with Linear Arrangement Library. If not, see <http://www.gnu.org/licenses/>.
24 *
25 * Contact:
26 *
27 * LluĂ­s Alemany Puig (lalemany@cs.upc.edu)
28 * LARCA (Laboratory for Relational Algorithmics, Complexity and Learning)
29 * CQL (Complexity and Quantitative Linguistics Lab)
30 * Jordi Girona St 1-3, Campus Nord UPC, 08034 Barcelona. CATALONIA, SPAIN
31 * Webpage: https://cqllab.upc.edu/people/lalemany/
32 *
33 * Ramon Ferrer i Cancho (rferrericancho@cs.upc.edu)
34 * LARCA (Laboratory for Relational Algorithmics, Complexity and Learning)
35 * CQL (Complexity and Quantitative Linguistics Lab)
36 * Office S124, Omega building
37 * Jordi Girona St 1-3, Campus Nord UPC, 08034 Barcelona. CATALONIA, SPAIN
38 * Webpage: https://cqllab.upc.edu/people/rferrericancho/
39 *
40 ********************************************************************/
41
42#pragma once
43
44// C++ includes
45#if defined DEBUG
46#include <cassert>
47#endif
48#include <vector>
49#include <string>
50#include <tuple>
51#include <array>
52
53// lal includes
54#include <lal/io/treebank_error.hpp>
55#include <lal/io/process_treebank_base.hpp>
56
57namespace lal {
58namespace io {
59
111public:
112 // SETTERS
113
118 void set_join_files(bool v) noexcept { m_join_files = v; }
119
121 void set_number_threads(std::size_t n_threads) noexcept {
122#if defined DEBUG
123 assert(n_threads != 0);
124#endif
125 m_num_threads = n_threads;
126 }
127
128 // GETTERS
129
131 std::size_t get_num_errors() const noexcept
132 { return m_errors_from_processing.size(); }
133
139 const treebank_error& get_error_type(std::size_t i) const noexcept
140 { return std::get<0>(m_errors_from_processing[i]); }
141
148 const std::string& get_error_treebank_filename(std::size_t i) const noexcept
149 { return std::get<1>(m_errors_from_processing[i]); }
150
157 const std::string& get_error_treebank_name(std::size_t i) const noexcept
158 { return std::get<2>(m_errors_from_processing[i]); }
159
169 void set_join_to_file_name(const std::string& join_to) noexcept {
170 m_join_to_file = join_to;
171 }
172
174 void set_treebank_column_name(const std::string& name) noexcept {
176 }
177
178 // PROCESS THE TREEBANK COLLECTION
179
190 (const std::string& main_file, const std::string& output_directory)
191 noexcept;
192
220
221private:
227
228private:
230 std::vector<std::string> m_all_individual_treebank_ids;
232 std::string m_join_to_file = "";
234 bool m_join_files = true;
236 std::string m_treebank_column_name = "treebank";
238 std::size_t m_num_threads = 1;
240 std::string m_column_join_name = "";
241
243 std::vector<std::tuple<treebank_error, std::string, std::string>>
245
247 std::string m_out_dir = "none";
249 std::string m_main_file = "none";
250};
251
267inline
269 const std::string& treebank_collection_main_file,
270 const std::string& output_directory,
271 std::size_t num_threads = 1
272)
273noexcept
274{
276 auto err = tbcolproc.init(treebank_collection_main_file, output_directory);
277 tbcolproc.set_number_threads(num_threads);
279 return err;
280 }
281 return tbcolproc.process();
282}
283
284} // -- namespace io
285} // -- namespace lal
The processor base class.
Definition: process_treebank_base.hpp:61
Automatic processing of treebank collections.
Definition: treebank_collection_processor.hpp:110
treebank_error process() noexcept
Process the treebank collection.
const treebank_error & get_error_type(std::size_t i) const noexcept
Get the ith error.
Definition: treebank_collection_processor.hpp:139
void set_join_files(bool v) noexcept
Join the resulting files into a single file.
Definition: treebank_collection_processor.hpp:118
std::vector< std::string > m_all_individual_treebank_ids
The list of names of the treebanks.
Definition: treebank_collection_processor.hpp:230
treebank_error init(const std::string &main_file, const std::string &output_directory) noexcept
Initialise the processor with a new collection.
bool m_join_files
Join the files into a single file.
Definition: treebank_collection_processor.hpp:234
const std::string & get_error_treebank_filename(std::size_t i) const noexcept
Get the treebank's file name where the ith error happened.
Definition: treebank_collection_processor.hpp:148
treebank_error join_all_files() const noexcept
Joins all resulting files into a single file.
std::vector< std::tuple< treebank_error, std::string, std::string > > m_errors_from_processing
Set of errors resulting from processing the treebank collection.
Definition: treebank_collection_processor.hpp:244
std::string m_treebank_column_name
Name of the column that identifies each treebank.
Definition: treebank_collection_processor.hpp:236
const std::string & get_error_treebank_name(std::size_t i) const noexcept
Get the treebank's name for where the ith error happened.
Definition: treebank_collection_processor.hpp:157
std::size_t get_num_errors() const noexcept
Returns the number of errors that arised during processing.
Definition: treebank_collection_processor.hpp:131
void set_treebank_column_name(const std::string &name) noexcept
Sets the name of the column used to group lines according to the treebank.
Definition: treebank_collection_processor.hpp:174
std::string m_main_file
File containing the list of languages and their treebanks.
Definition: treebank_collection_processor.hpp:249
std::string m_out_dir
Output directory.
Definition: treebank_collection_processor.hpp:247
std::string m_join_to_file
The name of the file that joins all result files.
Definition: treebank_collection_processor.hpp:232
void set_join_to_file_name(const std::string &join_to) noexcept
Sets the name of the file where all values are going to be stored.
Definition: treebank_collection_processor.hpp:169
std::string m_column_join_name
The name of the column in the join file.
Definition: treebank_collection_processor.hpp:240
std::size_t m_num_threads
Number of threads to use.
Definition: treebank_collection_processor.hpp:238
void set_number_threads(std::size_t n_threads) noexcept
Set the number of threads.
Definition: treebank_collection_processor.hpp:121
Treebank error report class.
Definition: treebank_error.hpp:64
@ no_error
No error occurred.
treebank_error process_treebank_collection(const std::string &treebank_collection_main_file, const std::string &output_directory, std::size_t num_threads=1) noexcept
Automatically process a treebank collection.
Definition: treebank_collection_processor.hpp:268
Main namespace of the library.
Definition: basic_types.hpp:50