54#include <lal/io/treebank_error.hpp>
55#include <lal/io/process_treebank_base.hpp>
123 assert(n_threads != 0);
190 (
const std::string& main_file,
const std::string& output_directory)
269 const std::
string& treebank_collection_main_file,
270 const std::
string& output_directory,
271 std::
size_t num_threads = 1
276 auto err = tbcolproc.
init(treebank_collection_main_file, output_directory);
The processor base class.
Definition: process_treebank_base.hpp:61
Automatic processing of treebank collections.
Definition: treebank_collection_processor.hpp:110
treebank_error process() noexcept
Process the treebank collection.
const treebank_error & get_error_type(std::size_t i) const noexcept
Get the ith error.
Definition: treebank_collection_processor.hpp:139
void set_join_files(bool v) noexcept
Join the resulting files into a single file.
Definition: treebank_collection_processor.hpp:118
std::vector< std::string > m_all_individual_treebank_ids
The list of names of the treebanks.
Definition: treebank_collection_processor.hpp:230
treebank_error init(const std::string &main_file, const std::string &output_directory) noexcept
Initialise the processor with a new collection.
bool m_join_files
Join the files into a single file.
Definition: treebank_collection_processor.hpp:234
const std::string & get_error_treebank_filename(std::size_t i) const noexcept
Get the treebank's file name where the ith error happened.
Definition: treebank_collection_processor.hpp:148
treebank_error join_all_files() const noexcept
Joins all resulting files into a single file.
std::vector< std::tuple< treebank_error, std::string, std::string > > m_errors_from_processing
Set of errors resulting from processing the treebank collection.
Definition: treebank_collection_processor.hpp:244
std::string m_treebank_column_name
Name of the column that identifies each treebank.
Definition: treebank_collection_processor.hpp:236
const std::string & get_error_treebank_name(std::size_t i) const noexcept
Get the treebank's name for where the ith error happened.
Definition: treebank_collection_processor.hpp:157
std::size_t get_num_errors() const noexcept
Returns the number of errors that arised during processing.
Definition: treebank_collection_processor.hpp:131
void set_treebank_column_name(const std::string &name) noexcept
Sets the name of the column used to group lines according to the treebank.
Definition: treebank_collection_processor.hpp:174
std::string m_main_file
File containing the list of languages and their treebanks.
Definition: treebank_collection_processor.hpp:249
std::string m_out_dir
Output directory.
Definition: treebank_collection_processor.hpp:247
std::string m_join_to_file
The name of the file that joins all result files.
Definition: treebank_collection_processor.hpp:232
void set_join_to_file_name(const std::string &join_to) noexcept
Sets the name of the file where all values are going to be stored.
Definition: treebank_collection_processor.hpp:169
std::string m_column_join_name
The name of the column in the join file.
Definition: treebank_collection_processor.hpp:240
std::size_t m_num_threads
Number of threads to use.
Definition: treebank_collection_processor.hpp:238
void set_number_threads(std::size_t n_threads) noexcept
Set the number of threads.
Definition: treebank_collection_processor.hpp:121
Treebank error report class.
Definition: treebank_error.hpp:64
@ no_error
No error occurred.
treebank_error process_treebank_collection(const std::string &treebank_collection_main_file, const std::string &output_directory, std::size_t num_threads=1) noexcept
Automatically process a treebank collection.
Definition: treebank_collection_processor.hpp:268
Main namespace of the library.
Definition: basic_types.hpp:50