1 #ifndef SEMSPEAR_TREE_PARSER_H_
2 #define SEMSPEAR_TREE_PARSER_H_
18 #include <boost/shared_ptr.hpp>
19 #include <boost/tuple/tuple.hpp>
92 ) : lexer_(syn_stream), learn_(learn)
94 initialize_lexicon(lex_stream);
108 boost::tie(contents, code) = lexem();
115 std::cerr <<
"parse_constituent() encountered EOF"
126 "Syntax error: Left parenthesis expected",
127 lexer_.getLineCount()
133 Constituent_ptr result(
new T(at_root ? T::TOP : T::DEPENDENCY, learn_));
136 boost::tie(contents,code) = lexem();
144 result -> set_label(nt_lexicon().
encode(contents, learn_));
146 boost::tie(contents, code) = lexem();
155 int head_position = -1;
161 unget(contents, code);
166 result -> add_child(child);
168 boost::tie(contents, code) = lexem();
174 head_position < (
int) result -> children().size() )
177 for(
typename T::Child_list::const_iterator it =
178 result -> children().begin();
179 it != result -> children().end();
180 it++, head_position--)
183 if(head_position == 0)
185 result -> set_head(it);
189 }
else unget(contents, code);
193 result -> set_word(lexicon().
encode(contents, learn_));
197 "Syntax error: Left parenthesis or string expected",
198 lexer_.getLineCount()));
201 boost::tie(contents, code) = lexem();
205 "Syntax error: Right parenthesis expected",
206 lexer_.getLineCount()
212 if(VERBOSE) std::cerr <<
"Setting events." << std::endl;
213 result -> preprocess_tree();
214 result -> complete_tree();
217 std::cerr <<
"Resulting tree is:" << std::endl;
218 result -> print_dependency_tree(std::cerr);
231 if(buffer_.empty() ==
true)
236 Token_contents contents;
237 Token_code code = lexer_.lexem(contents);
238 return boost::tie(contents, code);
246 Token_tuple last = buffer_.back();
253 void Semspear_tree_parser<T>::unget(
254 Semspear_tree_parser<T>::Token_contents& contents,
255 Semspear_tree_parser<T>::Token_code code
258 buffer_.push_back(Token_tuple(contents, code));
262 void Semspear_tree_parser<T>::initialize_lexicon(Input_stream& input)
264 std::string word, tag;
266 std::string line_string;
270 std::istringstream line(line_string);
275 T::lf_word_map()[wcode] = is_hi_freq;
284 bool Semspear_tree_parser<T>::VERBOSE =
false;
boost::tuple< Token_contents, Token_code > Token_tuple
Definition: Semspear_tree_parser.h:42
Constituent_ptr parse_constituent(bool at_root=true)
parse the next edge in the stream
Definition: Semspear_tree_parser.h:99
static const int TOKEN_STRING
Definition: EdgeLexer.h:24
int toInteger(const String &s)
Definition: CharUtils.cc:74
static Nonterminal_db & nt_lexicon()
Definition: Semspear_tree_parser.h:47
std::list< Token_tuple > Tuple_stack
Definition: Semspear_tree_parser.h:43
std::string Token_contents
Definition: Semspear_tree_parser.h:40
static const int TOKEN_RP
Definition: EdgeLexer.h:26
static bool VERBOSE
Static initialization.
Definition: Semspear_tree_parser.h:48
#define encode(triedge)
Definition: triangle.c:819
size_t Val_type
Definition: Token_map.h:24
Definition: Exception.h:15
Definition: EdgeLexer.h:19
Definition: Lexicon_db.h:17
boost::shared_ptr< T > Constituent_ptr
Definition: Semspear_tree_parser.h:44
int getline(FILE *fp, std::string *line, char EOL= '\n')
Like C's fgets but with std::string, or C++'s getline but with FILE*.
std::istream Input_stream
Definition: Semspear_tree_parser.h:39
static const int TOKEN_EOF
Definition: EdgeLexer.h:23
Definition: Nonterminal_db.h:16
static const int TOKEN_LP
Definition: EdgeLexer.h:25
Definition: Semspear_tree_parser.h:36
static Lexicon_db & lexicon()
Definition: Semspear_tree_parser.h:46
int Token_code
Definition: Semspear_tree_parser.h:41
Semspear_tree_parser(Input_stream &tree_stream, Input_stream &lexicon_stream, bool learn=true)
construct a parser object with text input 'istream'
Definition: Semspear_tree_parser.h:88