LAL: Linear Arrangement Library 24.10.00
A library focused on algorithms on linear arrangements of graphs.
Loading...
Searching...
No Matches
treebank_collection_processor.hpp
1/*********************************************************************
2 *
3 * Linear Arrangement Library - A library that implements a collection
4 * algorithms for linear arrangments of graphs.
5 *
6 * Copyright (C) 2019 - 2024
7 *
8 * This file is part of Linear Arrangement Library. The full code is available
9 * at:
10 * https://github.com/LAL-project/linear-arrangement-library.git
11 *
12 * Linear Arrangement Library is free software: you can redistribute it
13 * and/or modify it under the terms of the GNU Affero General Public License
14 * as published by the Free Software Foundation, either version 3 of the
15 * License, or (at your option) any later version.
16 *
17 * Linear Arrangement Library is distributed in the hope that it will be
18 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU Affero General Public License for more details.
21 *
22 * You should have received a copy of the GNU Affero General Public License
23 * along with Linear Arrangement Library. If not, see <http://www.gnu.org/licenses/>.
24 *
25 * Contact:
26 *
27 * LluĂ­s Alemany Puig (lluis.alemany.puig@upc.edu)
28 * LQMC (Quantitative, Mathematical, and Computational Linguisitcs)
29 * CQL (Complexity and Quantitative Linguistics Lab)
30 * Jordi Girona St 1-3, Campus Nord UPC, 08034 Barcelona. CATALONIA, SPAIN
31 * Webpage: https://cqllab.upc.edu/people/lalemany/
32 *
33 * Ramon Ferrer i Cancho (rferrericancho@cs.upc.edu)
34 * LQMC (Quantitative, Mathematical, and Computational Linguisitcs)
35 * CQL (Complexity and Quantitative Linguistics Lab)
36 * Office 220, Omega building
37 * Jordi Girona St 1-3, Campus Nord UPC, 08034 Barcelona. CATALONIA, SPAIN
38 * Webpage: https://cqllab.upc.edu/people/rferrericancho/
39 *
40 ********************************************************************/
41
42#pragma once
43
44// C++ includes
45#if defined DEBUG
46#include <cassert>
47#endif
48#include <vector>
49#include <string>
50#include <tuple>
51
52// lal includes
53#include <lal/io/treebank_file_error.hpp>
54#include <lal/io/treebank_processor_base.hpp>
55
56namespace lal {
57namespace io {
58
110public:
111 // SETTERS
112
117 void set_join_files(const bool v) noexcept {
118 m_join_files = v;
119 }
120
122 void set_number_threads(const std::size_t n_threads) noexcept {
123#if defined DEBUG
124 assert(n_threads != 0);
125#endif
126 m_num_threads = n_threads;
127 }
128
129 // GETTERS
130
136 [[nodiscard]] std::size_t get_num_errors() const noexcept {
137 return m_errors_from_processing.size();
138 }
139
147 [[nodiscard]] const treebank_file_error& get_error_type(const std::size_t i)
148 const noexcept
149 {
150 return std::get<0>(m_errors_from_processing[i]);
151 }
152
161 [[nodiscard]] const std::string& get_error_treebank_filename(const std::size_t i)
162 const noexcept
163 {
164 return std::get<1>(m_errors_from_processing[i]);
165 }
166
175 [[nodiscard]] const std::string& get_error_treebank_name(const std::size_t i)
176 const noexcept
177 {
178 return std::get<2>(m_errors_from_processing[i]);
179 }
180
190 void set_join_to_file_name(const std::string& join_to) noexcept {
191 m_join_to_file = join_to;
192 }
193
195 void set_treebank_column_name(const std::string& name) noexcept {
197 }
198
199 // PROCESS THE TREEBANK COLLECTION
200
211 (const std::string& main_file, const std::string& output_directory)
212 noexcept;
213
240 [[nodiscard]] treebank_file_error process() noexcept;
241
242private:
247 [[nodiscard]] treebank_file_error join_all_files() const noexcept;
248
249private:
251 std::vector<std::string> m_all_individual_treebank_ids;
253 std::string m_join_to_file = "";
255 bool m_join_files = true;
257 std::string m_treebank_column_name = "treebank";
259 std::size_t m_num_threads = 1;
261 std::string m_column_join_name = "";
262
274 std::vector<std::tuple<treebank_file_error, std::string, std::string>>
276
278 std::string m_out_dir = "none";
280 std::string m_main_file = "none";
281};
282
299(
300 const std::string& treebank_collection_main_file,
301 const std::string& output_directory,
302 const std::size_t num_threads = 1
303)
304noexcept
305{
307 auto err = tbcolproc.init(treebank_collection_main_file, output_directory);
308 if (not err.is_error()) { return err; }
309 tbcolproc.set_number_threads(num_threads);
310 return tbcolproc.process();
311}
312
313} // -- namespace io
314} // -- namespace lal
The processor base class.
Definition treebank_processor_base.hpp:61
Automatic processing of treebank collections.
Definition treebank_collection_processor.hpp:109
const treebank_file_error & get_error_type(const std::size_t i) const noexcept
Get the ith error.
Definition treebank_collection_processor.hpp:147
treebank_file_error join_all_files() const noexcept
Joins all resulting files into a single file.
const std::string & get_error_treebank_name(const std::size_t i) const noexcept
Get the treebank's name for where the ith error happened.
Definition treebank_collection_processor.hpp:175
std::vector< std::string > m_all_individual_treebank_ids
The list of names of the treebanks.
Definition treebank_collection_processor.hpp:251
const std::string & get_error_treebank_filename(const std::size_t i) const noexcept
Get the treebank's file name where the ith error happened.
Definition treebank_collection_processor.hpp:161
bool m_join_files
Join the files into a single file.
Definition treebank_collection_processor.hpp:255
void set_join_files(const bool v) noexcept
Join the resulting files into a single file.
Definition treebank_collection_processor.hpp:117
std::vector< std::tuple< treebank_file_error, std::string, std::string > > m_errors_from_processing
Set of errors resulting from processing the treebank collection.
Definition treebank_collection_processor.hpp:275
std::string m_treebank_column_name
Name of the column that identifies each treebank.
Definition treebank_collection_processor.hpp:257
treebank_file_error process() noexcept
Process the treebank collection.
std::size_t get_num_errors() const noexcept
Returns the number of errors that arised during processing.
Definition treebank_collection_processor.hpp:136
void set_treebank_column_name(const std::string &name) noexcept
Sets the name of the column used to group lines according to the treebank.
Definition treebank_collection_processor.hpp:195
std::string m_main_file
File containing the list of languages and their treebanks.
Definition treebank_collection_processor.hpp:280
std::string m_out_dir
Output directory.
Definition treebank_collection_processor.hpp:278
void set_number_threads(const std::size_t n_threads) noexcept
Set the number of threads.
Definition treebank_collection_processor.hpp:122
std::string m_join_to_file
The name of the file that joins all result files.
Definition treebank_collection_processor.hpp:253
void set_join_to_file_name(const std::string &join_to) noexcept
Sets the name of the file where all values are going to be stored.
Definition treebank_collection_processor.hpp:190
std::string m_column_join_name
The name of the column in the join file.
Definition treebank_collection_processor.hpp:261
std::size_t m_num_threads
Number of threads to use.
Definition treebank_collection_processor.hpp:259
treebank_file_error init(const std::string &main_file, const std::string &output_directory) noexcept
Initialize the processor with a new collection.
Treebank file error report class.
Definition treebank_file_error.hpp:64
treebank_file_error process_treebank_collection(const std::string &treebank_collection_main_file, const std::string &output_directory, const std::size_t num_threads=1) noexcept
Automatically process a treebank collection.
Definition treebank_collection_processor.hpp:299
Main namespace of the library.
Definition basic_types.hpp:48