LAL: Linear Arrangement Library 24.10.00
A library focused on algorithms on linear arrangements of graphs.
Loading...
Searching...
No Matches
treebank_processor.hpp
1/*********************************************************************
2 *
3 * Linear Arrangement Library - A library that implements a collection
4 * algorithms for linear arrangments of graphs.
5 *
6 * Copyright (C) 2019 - 2024
7 *
8 * This file is part of Linear Arrangement Library. The full code is available
9 * at:
10 * https://github.com/LAL-project/linear-arrangement-library.git
11 *
12 * Linear Arrangement Library is free software: you can redistribute it
13 * and/or modify it under the terms of the GNU Affero General Public License
14 * as published by the Free Software Foundation, either version 3 of the
15 * License, or (at your option) any later version.
16 *
17 * Linear Arrangement Library is distributed in the hope that it will be
18 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU Affero General Public License for more details.
21 *
22 * You should have received a copy of the GNU Affero General Public License
23 * along with Linear Arrangement Library. If not, see <http://www.gnu.org/licenses/>.
24 *
25 * Contact:
26 *
27 * LluĂ­s Alemany Puig (lluis.alemany.puig@upc.edu)
28 * LQMC (Quantitative, Mathematical, and Computational Linguisitcs)
29 * CQL (Complexity and Quantitative Linguistics Lab)
30 * Jordi Girona St 1-3, Campus Nord UPC, 08034 Barcelona. CATALONIA, SPAIN
31 * Webpage: https://cqllab.upc.edu/people/lalemany/
32 *
33 * Ramon Ferrer i Cancho (rferrericancho@cs.upc.edu)
34 * LQMC (Quantitative, Mathematical, and Computational Linguisitcs)
35 * CQL (Complexity and Quantitative Linguistics Lab)
36 * Office 220, Omega building
37 * Jordi Girona St 1-3, Campus Nord UPC, 08034 Barcelona. CATALONIA, SPAIN
38 * Webpage: https://cqllab.upc.edu/people/rferrericancho/
39 *
40 ********************************************************************/
41
42#pragma once
43
44// C++ includes
45#include <string>
46
47// lal includes
48#include <lal/linear_arrangement.hpp>
49#include <lal/io/treebank_file_error.hpp>
50#include <lal/io/treebank_processor_base.hpp>
51
52namespace lal {
53namespace io {
54
100public:
101 // PROCESS THE TREEBANK collection
102
113 (
114 const std::string& treebank_input_file,
115 const std::string& output_file,
116 const std::string& treebank_id = ""
117 )
118 noexcept;
119
137 [[nodiscard]] treebank_file_error process() noexcept;
138
139private:
141 template <class TREE, class OUT_STREAM>
143 (
144 const TREE& rT,
145 double * const props,
146 char * const prop_set,
147 OUT_STREAM& out_lab_file
148 )
149 noexcept;
150
151 // HEADER
152
154 template <class OUT_STREAM>
155 void output_tree_type_header(OUT_STREAM& out_lab_file)
156 const noexcept;
157
159 template <class OUT_STREAM>
160 void output_syndepstruct_type_header(OUT_STREAM& out_lab_file)
161 const noexcept;
162
163 // VALUES
164
166 template <class TREE_TYPE, class OUT_STREAM>
167 void output_tree_type_values(TREE_TYPE& t, OUT_STREAM& out_lab_file)
168 const noexcept;
169
171 template <class TREE_TYPE, class OUT_STREAM>
173 (
174 const TREE_TYPE& t,
175 const uint64_t C,
176 OUT_STREAM& out_lab_file
177 )
178 const noexcept;
179
180private:
182 std::string m_treebank_filename = "none";
184 std::string m_output_file = "none";
186 std::string m_treebank_id = "";
187
188 static constexpr std::size_t n_idx = treebank_feature_to_index(treebank_feature_type::num_nodes);
189 static constexpr std::size_t k2_idx = treebank_feature_to_index(treebank_feature_type::second_moment_degree);
190 static constexpr std::size_t k2_out_idx = treebank_feature_to_index(treebank_feature_type::second_moment_degree_out);
191 static constexpr std::size_t k3_idx = treebank_feature_to_index(treebank_feature_type::third_moment_degree);
192 static constexpr std::size_t k3_out_idx = treebank_feature_to_index(treebank_feature_type::third_moment_degree_out);
193 static constexpr std::size_t SK2_idx = treebank_feature_to_index(treebank_feature_type::sum_squared_degrees);
194 static constexpr std::size_t SK2_out_idx = treebank_feature_to_index(treebank_feature_type::sum_squared_out_degrees);
195 static constexpr std::size_t SK3_idx = treebank_feature_to_index(treebank_feature_type::sum_cubed_degrees);
196 static constexpr std::size_t SK3_out_idx = treebank_feature_to_index(treebank_feature_type::sum_cubed_out_degrees);
197 static constexpr std::size_t num_pairs_independent_edges_idx = treebank_feature_to_index(treebank_feature_type::num_pairs_independent_edges);
198 static constexpr std::size_t head_initial_idx = treebank_feature_to_index(treebank_feature_type::head_initial);
199 static constexpr std::size_t hubiness_idx = treebank_feature_to_index(treebank_feature_type::hubiness);
200 static constexpr std::size_t sum_hierarchical_distance_idx = treebank_feature_to_index(treebank_feature_type::sum_hierarchical_distances);
201 static constexpr std::size_t mean_hierarchical_distance_idx = treebank_feature_to_index(treebank_feature_type::mean_hierarchical_distance);
202 static constexpr std::size_t tree_centre_idx = treebank_feature_to_index(treebank_feature_type::tree_centre);
203 static constexpr std::size_t tree_centroid_idx = treebank_feature_to_index(treebank_feature_type::tree_centroid);
204 static constexpr std::size_t tree_diameter_idx = treebank_feature_to_index(treebank_feature_type::tree_diameter);
205 static constexpr std::size_t tree_caterpillar_distance_idx = treebank_feature_to_index(treebank_feature_type::tree_caterpillar_distance);
206 static constexpr std::size_t C_idx = treebank_feature_to_index(treebank_feature_type::num_crossings);
207 static constexpr std::size_t C_predicted_idx = treebank_feature_to_index(treebank_feature_type::predicted_num_crossings);
208 static constexpr std::size_t C_expected_idx = treebank_feature_to_index(treebank_feature_type::exp_num_crossings);
209 static constexpr std::size_t C_variance_idx = treebank_feature_to_index(treebank_feature_type::var_num_crossings);
210 static constexpr std::size_t C_z_score_idx = treebank_feature_to_index(treebank_feature_type::z_score_num_crossings);
211 static constexpr std::size_t D_idx = treebank_feature_to_index(treebank_feature_type::sum_edge_lengths);
212 static constexpr std::size_t D_expected_idx = treebank_feature_to_index(treebank_feature_type::exp_sum_edge_lengths);
213 static constexpr std::size_t D_expected_bipartite_idx = treebank_feature_to_index(treebank_feature_type::exp_sum_edge_lengths_bipartite);
214 static constexpr std::size_t D_expected_projective_idx = treebank_feature_to_index(treebank_feature_type::exp_sum_edge_lengths_projective);
215 static constexpr std::size_t D_expected_planar_idx = treebank_feature_to_index(treebank_feature_type::exp_sum_edge_lengths_planar);
216 static constexpr std::size_t D_variance_idx = treebank_feature_to_index(treebank_feature_type::var_sum_edge_lengths);
217 static constexpr std::size_t D_z_score_idx = treebank_feature_to_index(treebank_feature_type::z_score_sum_edge_lengths);
218 static constexpr std::size_t Dmin_Unconstrained_idx = treebank_feature_to_index(treebank_feature_type::min_sum_edge_lengths);
219 static constexpr std::size_t Dmin_Bipartite_idx = treebank_feature_to_index(treebank_feature_type::min_sum_edge_lengths_bipartite);
220 static constexpr std::size_t Dmin_Planar_idx = treebank_feature_to_index(treebank_feature_type::min_sum_edge_lengths_planar);
221 static constexpr std::size_t Dmin_Projective_idx = treebank_feature_to_index(treebank_feature_type::min_sum_edge_lengths_projective);
222 static constexpr std::size_t DMax_Unconstrained_idx = treebank_feature_to_index(treebank_feature_type::max_sum_edge_lengths);
223 static constexpr std::size_t DMax_1_thistle_idx = treebank_feature_to_index(treebank_feature_type::max_sum_edge_lengths_1_thistle);
224 static constexpr std::size_t DMax_Bipartite_idx = treebank_feature_to_index(treebank_feature_type::max_sum_edge_lengths_bipartite);
225 static constexpr std::size_t DMax_Planar_idx = treebank_feature_to_index(treebank_feature_type::max_sum_edge_lengths_planar);
226 static constexpr std::size_t DMax_Projective_idx = treebank_feature_to_index(treebank_feature_type::max_sum_edge_lengths_projective);
227 static constexpr std::size_t mean_dependency_distance_idx = treebank_feature_to_index(treebank_feature_type::mean_dependency_distance);
228 static constexpr std::size_t flux_max_weight_idx = treebank_feature_to_index(treebank_feature_type::flux_max_weight);
229 static constexpr std::size_t flux_mean_weight_idx = treebank_feature_to_index(treebank_feature_type::flux_mean_weight);
230 static constexpr std::size_t flux_min_weight_idx = treebank_feature_to_index(treebank_feature_type::flux_min_weight);
231 static constexpr std::size_t flux_max_left_span_idx = treebank_feature_to_index(treebank_feature_type::flux_max_left_span);
232 static constexpr std::size_t flux_mean_left_span_idx = treebank_feature_to_index(treebank_feature_type::flux_mean_left_span);
233 static constexpr std::size_t flux_min_left_span_idx = treebank_feature_to_index(treebank_feature_type::flux_min_left_span);
234 static constexpr std::size_t flux_max_right_span_idx = treebank_feature_to_index(treebank_feature_type::flux_max_right_span);
235 static constexpr std::size_t flux_mean_right_span_idx = treebank_feature_to_index(treebank_feature_type::flux_mean_right_span);
236 static constexpr std::size_t flux_min_right_span_idx = treebank_feature_to_index(treebank_feature_type::flux_min_right_span);
237 static constexpr std::size_t flux_max_RL_ratio_idx = treebank_feature_to_index(treebank_feature_type::flux_max_RL_ratio);
238 static constexpr std::size_t flux_mean_RL_ratio_idx = treebank_feature_to_index(treebank_feature_type::flux_mean_RL_ratio);
239 static constexpr std::size_t flux_min_RL_ratio_idx = treebank_feature_to_index(treebank_feature_type::flux_min_RL_ratio);
240 static constexpr std::size_t flux_max_WS_ratio_idx = treebank_feature_to_index(treebank_feature_type::flux_max_WS_ratio);
241 static constexpr std::size_t flux_mean_WS_ratio_idx = treebank_feature_to_index(treebank_feature_type::flux_mean_WS_ratio);
242 static constexpr std::size_t flux_min_WS_ratio_idx = treebank_feature_to_index(treebank_feature_type::flux_min_WS_ratio);
243 static constexpr std::size_t flux_max_size_idx = treebank_feature_to_index(treebank_feature_type::flux_max_size);
244 static constexpr std::size_t flux_mean_size_idx = treebank_feature_to_index(treebank_feature_type::flux_mean_size);
245 static constexpr std::size_t flux_min_size_idx = treebank_feature_to_index(treebank_feature_type::flux_min_size);
246};
247
260(const std::string& treebank_file, const std::string& output_file)
261noexcept
262{
263 treebank_processor tbproc;
264 auto err = tbproc.init(treebank_file, output_file);
265 if (not err.is_error()) { return err; }
266 return tbproc.process();
267}
268
269} // -- namespace io
270} // -- namespace lal
The processor base class.
Definition treebank_processor_base.hpp:61
Treebank file error report class.
Definition treebank_file_error.hpp:64
Automatic processing of treebank files.
Definition treebank_processor.hpp:99
std::string m_treebank_filename
File containing the list of languages and their treebanks.
Definition treebank_processor.hpp:182
std::string m_treebank_id
Treebank identifier.
Definition treebank_processor.hpp:186
treebank_file_error init(const std::string &treebank_input_file, const std::string &output_file, const std::string &treebank_id="") noexcept
Initialize the processor with a new collection.
treebank_file_error process() noexcept
Process the treebank file.
void output_tree_type_values(TREE_TYPE &t, OUT_STREAM &out_lab_file) const noexcept
Output the values for the tree types.
void output_tree_type_header(OUT_STREAM &out_lab_file) const noexcept
Output the header for the tree types.
void output_syndepstruct_type_header(OUT_STREAM &out_lab_file) const noexcept
Output the header for the tree types.
void output_syndepstruct_type_values(const TREE_TYPE &t, const uint64_t C, OUT_STREAM &out_lab_file) const noexcept
Output the values for the syntactic dependency tree types.
void process_tree(const TREE &rT, double *const props, char *const prop_set, OUT_STREAM &out_lab_file) noexcept
Process a single tree in a treebank.
std::string m_output_file
Output directory.
Definition treebank_processor.hpp:184
treebank_file_error process_treebank(const std::string &treebank_file, const std::string &output_file) noexcept
Automatically process a treebank.
Definition treebank_processor.hpp:260
treebank_feature_type
The features that can be computed in automatic processing of treebanks.
Definition treebank_feature_type.hpp:68
@ exp_num_crossings
First moment of expectation of , .
@ flux_min_size
Minimum flux size.
@ exp_sum_edge_lengths_bipartite
Expectation of constrained to bipartite arrangements, .
@ tree_centroid
Centroid of the tree.
@ z_score_num_crossings
z-score of , .
@ second_moment_degree_out
Second moment of out-degree .
@ num_nodes
Number of nodes of the tree.
@ flux_mean_right_span
Mean right span.
@ mean_hierarchical_distance
Mean hierarchical distance of the tree.
@ third_moment_degree
Third moment of degree .
@ min_sum_edge_lengths_projective
Minimum sum of length of edges under the planarity constraint.
@ flux_mean_RL_ratio
Mean R/L ratio.
@ flux_max_size
Maximum flux size.
@ max_sum_edge_lengths_1_thistle
Maximum sum of length of edges over arrangements with 1 thistle vertex.
@ head_initial
Headedness of the tree.
@ flux_mean_WS_ratio
Mean W/S ratio.
@ predicted_num_crossings
Prediction of the number of crossings .
@ max_sum_edge_lengths_planar
Maximum sum of length of edges under the planarity constraint.
@ sum_cubed_out_degrees
Sum of cube out-degrees.
@ hubiness
Hubiness of the tree.
@ flux_mean_weight
Mean flux weight.
@ flux_min_left_span
Minimum left span.
@ flux_min_RL_ratio
Minimum R/L ratio.
@ flux_mean_size
Mean flux size.
@ max_sum_edge_lengths_bipartite
Maximum sum of length of edges among bipartite arrangements.
@ third_moment_degree_out
Third moment of out-degree .
@ sum_edge_lengths
Sum of length of edges .
@ flux_max_WS_ratio
Maximum W/S ratio.
@ mean_dependency_distance
Mean dependency distance of the tree.
@ max_sum_edge_lengths_projective
Maximum sum of length of edges under the planarity constraint.
@ flux_min_right_span
Minimum right span.
@ num_crossings
Number of edge crossings .
@ flux_max_left_span
Maximum left span.
@ sum_squared_degrees
Sum of squared degrees.
@ var_num_crossings
Variance of , .
@ flux_max_RL_ratio
Maximum R/L ratio.
@ flux_max_right_span
Maximum right span.
@ sum_cubed_degrees
Sum of cube degrees.
@ second_moment_degree
Second moment of degree .
@ tree_centre
Centre of the tree.
@ min_sum_edge_lengths_planar
Minimum sum of length of edges under the planarity constraint.
@ num_pairs_independent_edges
Size of the set of this tree .
@ z_score_sum_edge_lengths
z-score of , .
@ exp_sum_edge_lengths_planar
Expectation of constrained to planar arrangements, .
@ exp_sum_edge_lengths
Expectation of , .
@ var_sum_edge_lengths
Variance of , .
@ tree_caterpillar_distance
Caterpillar distance of the tree.
@ exp_sum_edge_lengths_projective
Expectation of constrained to projective arrangements, .
@ min_sum_edge_lengths_bipartite
Minimum sum of length of edges over bipartite arrangements.
@ flux_max_weight
Maximum flux weight.
@ min_sum_edge_lengths
Unconstrained minimum sum of length of edges.
@ sum_squared_out_degrees
Sum of squared out-degrees.
@ sum_hierarchical_distances
Sum of hierarchical distances of the tree.
@ flux_mean_left_span
Mean left span.
@ flux_min_WS_ratio
Minimum W/S ratio.
@ tree_diameter
Diameter of the tree.
@ flux_min_weight
Minimum flux weight.
@ max_sum_edge_lengths
Maximum sum of length of edges over all arrangements.
constexpr std::size_t treebank_feature_to_index(const io::treebank_feature_type &tf) noexcept
Returns the index of the input treebank feature.
Definition treebank_feature_type.hpp:639
Main namespace of the library.
Definition basic_types.hpp:48