LAL: Linear Arrangement Library 23.01.00
A library focused on algorithms on linear arrangements of graphs.
Loading...
Searching...
No Matches
treebank_processor.hpp
1/*********************************************************************
2 *
3 * Linear Arrangement Library - A library that implements a collection
4 * algorithms for linear arrangments of graphs.
5 *
6 * Copyright (C) 2019 - 2023
7 *
8 * This file is part of Linear Arrangement Library. The full code is available
9 * at:
10 * https://github.com/LAL-project/linear-arrangement-library.git
11 *
12 * Linear Arrangement Library is free software: you can redistribute it
13 * and/or modify it under the terms of the GNU Affero General Public License
14 * as published by the Free Software Foundation, either version 3 of the
15 * License, or (at your option) any later version.
16 *
17 * Linear Arrangement Library is distributed in the hope that it will be
18 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU Affero General Public License for more details.
21 *
22 * You should have received a copy of the GNU Affero General Public License
23 * along with Linear Arrangement Library. If not, see <http://www.gnu.org/licenses/>.
24 *
25 * Contact:
26 *
27 * LluĂ­s Alemany Puig (lalemany@cs.upc.edu)
28 * LARCA (Laboratory for Relational Algorithmics, Complexity and Learning)
29 * CQL (Complexity and Quantitative Linguistics Lab)
30 * Jordi Girona St 1-3, Campus Nord UPC, 08034 Barcelona. CATALONIA, SPAIN
31 * Webpage: https://cqllab.upc.edu/people/lalemany/
32 *
33 * Ramon Ferrer i Cancho (rferrericancho@cs.upc.edu)
34 * LARCA (Laboratory for Relational Algorithmics, Complexity and Learning)
35 * CQL (Complexity and Quantitative Linguistics Lab)
36 * Office S124, Omega building
37 * Jordi Girona St 1-3, Campus Nord UPC, 08034 Barcelona. CATALONIA, SPAIN
38 * Webpage: https://cqllab.upc.edu/people/rferrericancho/
39 *
40 ********************************************************************/
41
42#pragma once
43
44// C++ includes
45#if defined DEBUG
46#include <cassert>
47#endif
48#include <vector>
49#include <string>
50
51// lal includes
52#include <lal/linear_arrangement.hpp>
53#include <lal/io/treebank_error.hpp>
54#include <lal/io/process_treebank_base.hpp>
55
56namespace lal {
57namespace io {
58
104public:
105 // PROCESS THE TREEBANK collection
106
117 const std::string& treebank_input_file,
118 const std::string& output_file,
119 const std::string& treebank_id = ""
120 )
121 noexcept;
122
141
142private:
144 template <class TREE, class OUT_STREAM>
146 (const TREE& rT, double *props, char *prop_set, OUT_STREAM& out_lab_file)
147 noexcept;
148
149 // HEADER
150
152 template <class OUT_STREAM>
154 (OUT_STREAM& out_lab_file)
155 const noexcept;
156
158 template <class OUT_STREAM>
160 (OUT_STREAM& out_lab_file)
161 const noexcept;
162
163 // VALUES
164
166 template <class TREE_TYPE, class OUT_STREAM>
168 (TREE_TYPE& t, OUT_STREAM& out_lab_file)
169 const noexcept;
170
172 template <class TREE_TYPE, class OUT_STREAM>
174 (const TREE_TYPE& t, uint64_t C, OUT_STREAM& out_lab_file)
175 const noexcept;
176
177private:
179 std::string m_treebank_filename = "none";
181 std::string m_output_file = "none";
183 std::string m_treebank_id = "";
184
185 static constexpr std::size_t n_idx = treebank_feature_to_index(treebank_feature::num_nodes);
186 static constexpr std::size_t k2_idx = treebank_feature_to_index(treebank_feature::second_moment_degree);
187 static constexpr std::size_t k2_out_idx = treebank_feature_to_index(treebank_feature::second_moment_degree_out);
188 static constexpr std::size_t k3_idx = treebank_feature_to_index(treebank_feature::third_moment_degree);
189 static constexpr std::size_t k3_out_idx = treebank_feature_to_index(treebank_feature::third_moment_degree_out);
190 static constexpr std::size_t SK2_idx = treebank_feature_to_index(treebank_feature::sum_squared_degrees);
191 static constexpr std::size_t SK2_out_idx = treebank_feature_to_index(treebank_feature::sum_squared_out_degrees);
192 static constexpr std::size_t SK3_idx = treebank_feature_to_index(treebank_feature::sum_cubed_degrees);
193 static constexpr std::size_t SK3_out_idx = treebank_feature_to_index(treebank_feature::sum_cubed_out_degrees);
194 static constexpr std::size_t num_pairs_independent_edges_idx = treebank_feature_to_index(treebank_feature::num_pairs_independent_edges);
195 static constexpr std::size_t head_initial_idx = treebank_feature_to_index(treebank_feature::head_initial);
196 static constexpr std::size_t hubiness_idx = treebank_feature_to_index(treebank_feature::hubiness);
197 static constexpr std::size_t sum_hierarchical_distance_idx = treebank_feature_to_index(treebank_feature::sum_hierarchical_distances);
198 static constexpr std::size_t mean_hierarchical_distance_idx = treebank_feature_to_index(treebank_feature::mean_hierarchical_distance);
199 static constexpr std::size_t tree_centre_idx = treebank_feature_to_index(treebank_feature::tree_centre);
200 static constexpr std::size_t tree_centroid_idx = treebank_feature_to_index(treebank_feature::tree_centroid);
201 static constexpr std::size_t tree_diameter_idx = treebank_feature_to_index(treebank_feature::tree_diameter);
202 static constexpr std::size_t tree_caterpillar_distance_idx = treebank_feature_to_index(treebank_feature::tree_caterpillar_distance);
203 static constexpr std::size_t C_idx = treebank_feature_to_index(treebank_feature::num_crossings);
204 static constexpr std::size_t C_predicted_idx = treebank_feature_to_index(treebank_feature::predicted_num_crossings);
205 static constexpr std::size_t C_expected_idx = treebank_feature_to_index(treebank_feature::exp_num_crossings);
206 static constexpr std::size_t C_variance_idx = treebank_feature_to_index(treebank_feature::var_num_crossings);
207 static constexpr std::size_t C_z_score_idx = treebank_feature_to_index(treebank_feature::z_score_num_crossings);
208 static constexpr std::size_t D_idx = treebank_feature_to_index(treebank_feature::sum_edge_lengths);
209 static constexpr std::size_t D_expected_idx = treebank_feature_to_index(treebank_feature::exp_sum_edge_lengths);
210 static constexpr std::size_t D_expected_projective_idx = treebank_feature_to_index(treebank_feature::exp_sum_edge_lengths_projective);
211 static constexpr std::size_t D_expected_planar_idx = treebank_feature_to_index(treebank_feature::exp_sum_edge_lengths_planar);
212 static constexpr std::size_t D_variance_idx = treebank_feature_to_index(treebank_feature::var_sum_edge_lengths);
213 static constexpr std::size_t D_z_score_idx = treebank_feature_to_index(treebank_feature::z_score_sum_edge_lengths);
214 static constexpr std::size_t Dmin_Unconstrained_idx = treebank_feature_to_index(treebank_feature::min_sum_edge_lengths);
215 static constexpr std::size_t Dmin_Planar_idx = treebank_feature_to_index(treebank_feature::min_sum_edge_lengths_planar);
216 static constexpr std::size_t Dmin_Projective_idx = treebank_feature_to_index(treebank_feature::min_sum_edge_lengths_projective);
217 static constexpr std::size_t DMax_Planar_idx = treebank_feature_to_index(treebank_feature::max_sum_edge_lengths_planar);
218 static constexpr std::size_t DMax_Projective_idx = treebank_feature_to_index(treebank_feature::max_sum_edge_lengths_projective);
219 static constexpr std::size_t mean_dependency_distance_idx = treebank_feature_to_index(treebank_feature::mean_dependency_distance);
220 static constexpr std::size_t flux_max_weight_idx = treebank_feature_to_index(treebank_feature::flux_max_weight);
221 static constexpr std::size_t flux_mean_weight_idx = treebank_feature_to_index(treebank_feature::flux_mean_weight);
222 static constexpr std::size_t flux_min_weight_idx = treebank_feature_to_index(treebank_feature::flux_min_weight);
223 static constexpr std::size_t flux_max_left_span_idx = treebank_feature_to_index(treebank_feature::flux_max_left_span);
224 static constexpr std::size_t flux_mean_left_span_idx = treebank_feature_to_index(treebank_feature::flux_mean_left_span);
225 static constexpr std::size_t flux_min_left_span_idx = treebank_feature_to_index(treebank_feature::flux_min_left_span);
226 static constexpr std::size_t flux_max_right_span_idx = treebank_feature_to_index(treebank_feature::flux_max_right_span);
227 static constexpr std::size_t flux_mean_right_span_idx = treebank_feature_to_index(treebank_feature::flux_mean_right_span);
228 static constexpr std::size_t flux_min_right_span_idx = treebank_feature_to_index(treebank_feature::flux_min_right_span);
229 static constexpr std::size_t flux_max_RL_ratio_idx = treebank_feature_to_index(treebank_feature::flux_max_RL_ratio);
230 static constexpr std::size_t flux_mean_RL_ratio_idx = treebank_feature_to_index(treebank_feature::flux_mean_RL_ratio);
231 static constexpr std::size_t flux_min_RL_ratio_idx = treebank_feature_to_index(treebank_feature::flux_min_RL_ratio);
232 static constexpr std::size_t flux_max_WS_ratio_idx = treebank_feature_to_index(treebank_feature::flux_max_WS_ratio);
233 static constexpr std::size_t flux_mean_WS_ratio_idx = treebank_feature_to_index(treebank_feature::flux_mean_WS_ratio);
234 static constexpr std::size_t flux_min_WS_ratio_idx = treebank_feature_to_index(treebank_feature::flux_min_WS_ratio);
235 static constexpr std::size_t flux_max_size_idx = treebank_feature_to_index(treebank_feature::flux_max_size);
236 static constexpr std::size_t flux_mean_size_idx = treebank_feature_to_index(treebank_feature::flux_mean_size);
237 static constexpr std::size_t flux_min_size_idx = treebank_feature_to_index(treebank_feature::flux_min_size);
238};
239
251inline
253(const std::string& treebank_file, const std::string& output_file)
254noexcept
255{
256 treebank_processor tbproc;
257 auto err = tbproc.init(treebank_file, output_file);
259 return err;
260 }
261 return tbproc.process();
262}
263
264} // -- namespace io
265} // -- namespace lal
The processor base class.
Definition: process_treebank_base.hpp:61
Treebank error report class.
Definition: treebank_error.hpp:64
Automatic processing of treebank files.
Definition: treebank_processor.hpp:103
std::string m_treebank_filename
File containing the list of languages and their treebanks.
Definition: treebank_processor.hpp:179
std::string m_treebank_id
Treebank identifier.
Definition: treebank_processor.hpp:183
void output_tree_type_values(TREE_TYPE &t, OUT_STREAM &out_lab_file) const noexcept
Output the values for the tree types.
treebank_error process() noexcept
Process the treebank file.
void output_tree_type_header(OUT_STREAM &out_lab_file) const noexcept
Output the header for the tree types.
void output_syndepstruct_type_header(OUT_STREAM &out_lab_file) const noexcept
Output the header for the tree types.
void process_tree(const TREE &rT, double *props, char *prop_set, OUT_STREAM &out_lab_file) noexcept
Process a single tree in a treebank.
treebank_error init(const std::string &treebank_input_file, const std::string &output_file, const std::string &treebank_id="") noexcept
Initialise the processor with a new collection.
void output_syndepstruct_type_values(const TREE_TYPE &t, uint64_t C, OUT_STREAM &out_lab_file) const noexcept
Output the values for the syntactic dependency tree types.
std::string m_output_file
Output directory.
Definition: treebank_processor.hpp:181
treebank_feature
The features that can be computed in automatic processing of treebanks.
Definition: treebank_feature.hpp:68
@ exp_num_crossings
First moment of expectation of , .
@ flux_min_size
Minimum flux size.
@ tree_centroid
Centroid of the tree.
@ z_score_num_crossings
z-score of , .
@ second_moment_degree_out
Second moment of out-degree .
@ num_nodes
Number of nodes of the tree.
@ flux_mean_right_span
Mean right span.
@ mean_hierarchical_distance
Mean hierarchical distance of the tree.
@ third_moment_degree
Third moment of degree .
@ min_sum_edge_lengths_projective
Minimum sum of length of edges under the planary constraint.
@ flux_mean_RL_ratio
Mean R/L ratio.
@ flux_max_size
Maximum flux size.
@ head_initial
Headedness of the tree.
@ flux_mean_WS_ratio
Mean W/S ratio.
@ predicted_num_crossings
Prediction of the number of crossings .
@ max_sum_edge_lengths_planar
Maximum sum of length of edges under the planary constraint.
@ sum_cubed_out_degrees
Sum of cube out-degrees.
@ hubiness
Hubiness of the tree.
@ flux_mean_weight
Mean flux weight.
@ flux_min_left_span
Minimum left span.
@ flux_min_RL_ratio
Minimum R/L ratio.
@ flux_mean_size
Mean flux size.
@ third_moment_degree_out
Third moment of out-degree .
@ sum_edge_lengths
Sum of length of edges .
@ flux_max_WS_ratio
Maximum W/S ratio.
@ mean_dependency_distance
Mean dependency distance of the tree.
@ max_sum_edge_lengths_projective
Maximum sum of length of edges under the planary constraint.
@ flux_min_right_span
Minimum right span.
@ num_crossings
Number of edge crossings .
@ flux_max_left_span
Maximum left span.
@ sum_squared_degrees
Sum of squared degrees.
@ var_num_crossings
Variance of , .
@ flux_max_RL_ratio
Maximum R/L ratio.
@ flux_max_right_span
Maximum right span.
@ sum_cubed_degrees
Sum of cube degrees.
@ second_moment_degree
Second moment of degree .
@ tree_centre
Centre of the tree.
@ min_sum_edge_lengths_planar
Minimum sum of length of edges under the planary constraint.
@ num_pairs_independent_edges
Size of the set of this tree .
@ z_score_sum_edge_lengths
z-score of , .
@ exp_sum_edge_lengths_planar
Expectation of constrained to planar arrangements, .
@ exp_sum_edge_lengths
Expectation of , .
@ var_sum_edge_lengths
Variance of , .
@ tree_caterpillar_distance
Caterpillar distance of the tree.
@ exp_sum_edge_lengths_projective
Expectation of constrained to projective arrangements, .
@ flux_max_weight
Maximum flux weight.
@ min_sum_edge_lengths
Unconstrained minimum sum of length of edges.
@ sum_squared_out_degrees
Sum of squared out-degrees.
@ sum_hierarchical_distances
Sum of hierarchical distances of the tree.
@ flux_mean_left_span
Mean left span.
@ flux_min_WS_ratio
Minimum W/S ratio.
@ tree_diameter
Diameter of the tree.
@ flux_min_weight
Minimum flux weight.
@ no_error
No error occurred.
treebank_error process_treebank(const std::string &treebank_file, const std::string &output_file) noexcept
Automatically process a treebank.
Definition: treebank_processor.hpp:253
constexpr std::size_t treebank_feature_to_index(const io::treebank_feature &tf) noexcept
Returns the index of the input treebank feature.
Definition: treebank_feature.hpp:603
Main namespace of the library.
Definition: basic_types.hpp:50