KJB
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
EdgeParser.h
Go to the documentation of this file.
1 #ifndef EDGE_PARSER_H_
2 #define EDGE_PARSER_H_
3 
11 #include <iostream>
12 #include <list>
13 #include <string>
14 
15 #include "spear/Pair.h"
16 #include "spear/EdgeLexer.h"
17 #include "spear/Lexicon.h"
18 #include "spear/CharUtils.h"
19 #include "spear/Exception.h"
20 
21 #define EXTRACT_TOKEN(type, msg){ \
22  if(lexem(text) != type){ \
23  throw spear::Exception(msg, lexer_.getLineCount()); \
24  } \
25 }
26 
27 namespace spear {
28 
35  template <class T>
36  class EdgeParser
37  {
38  public:
41  EdgeParser(IStream& syn_stream);
42 
47 
48  private:
49 
50  spear::EdgeLexer lexer_;
57  int lexem(String& text);
58 
61  void unget(String& text, int lex);
62 
64  std::list< spear::Pair<String, int> > buffer_;
65 
66  };
67 
69 
70  template<class T> EdgeParser<T>::EdgeParser(
71  IStream& syn_stream
72  ) : lexer_(syn_stream)
73  {
74  };
75 
76  template<class T>
78  {
79  String text;
80  int lex;
81 
82  // return value (lex) is code for what type of token
83  // (paren, EOL or content string);
84  // if content encountered, writes to text.
85  // lexem() is defined in EdgeLexer.cc
86  lex = lexem(text);
87 
88  // end of file reached
89  if(lex == EdgeLexer::TOKEN_EOF)
90  {
91  return spear::RCIPtr<T>();
92  }
93 
94  // should start with left paren
96  {
97  throw(
98  Exception(
99  "Syntax error: Left parenthesis expected",
100  lexer_.getLineCount()
101  )
102  );
103  }
104 
105  // if so, create an edge pointer
106  spear::RCIPtr<T> edge(new T);
107 
108  // read in the next token
109  lex = lexem(text);
110 
111  // Phrase label
112  // (have already found left paren, so next
113  // string is a phrase label)
115  {
116  // text contains the phrase label,
117  // so attach this label to the edge object
118  edge -> setLabel(text);
119  // read in the next token
120  lex = lexem(text);
121  } else
122  {
123  // after a left paren, the only time a string
124  // is not encountered is at the root
125  edge -> setLabel("TOP");
126  }
127 
128  // This is a non-terminal phrase
129  // (if not, should encounter another string,
130  // not a left paren)
131  if(lex == spear::EdgeLexer::TOKEN_LP)
132  {
133  // The head position might be specified immediately
134  // after the children
135  int headPosition = -1;
136 
137  // Parse all children
138  while(lex == spear::EdgeLexer::TOKEN_LP)
139  {
140  // put the node on the stack
141  unget(text, lex);
142  // recursively parse the nonterminal below
143  spear::RCIPtr<T> child = parseEdge();
144  // once parsed, add the subtree as a child
145  // of this node
146  edge -> addChild(child);
147  // get the next token
148  lex = lexem(text);
149  }
150 
151  // This token might be the head position
152  if(lex == spear::EdgeLexer::TOKEN_STRING &&
153  (headPosition = toInteger(text)) >= 0 &&
154  headPosition < (int) edge->getChildren().size())
155  {
156  // Set the head
157  for(typename std::list< spear::RCIPtr<T> >::
158  const_iterator it = edge->getChildren().begin();
159  it != edge->getChildren().end();
160  it ++, headPosition --)
161  {
162  // Found the head child
163  if(headPosition == 0)
164  {
165  edge->setHead(*it);
166  break;
167  }
168  }
169  } else unget(text, lex);
170 
171  // Phrase word for terminal phrases
172  } else if(lex == spear::EdgeLexer::TOKEN_STRING) {
173  edge->setWord(text);
174  } else {
175  throw(
177  "Syntax error: Left parenthesis or string expected",
178  lexer_.getLineCount()));
179  }
180 
183  "Syntax error: Right parenthesis expected");
184 
185  return edge;
186  }
187 
188  template<class T>
189  int EdgeParser<T>::lexem(String& text)
190  {
191  if(buffer_.empty() == true)
192  {
193  // call EdgeLexer lexem method
194  // once the buffer is empty
195  return lexer_.lexem(text);
196  }
197 
198  // buffer stack contains pairs whose first
199  // element is a string, and whose second is a
200  // code denoting the element type
201 
202  // last is at the top of the buffer stack
203  spear::Pair<String, int> last = buffer_.back();
204  buffer_.pop_back();
205 
206  // text is the actual string
207  text = last.getFirst();
208  // return value is code for type of string
209  return last.getSecond();
210  }
211 
212  template<class T>
213  void EdgeParser<T>::unget(String& text, int lex)
214  {
215  buffer_.push_back(spear::Pair<String, int>(text, lex));
216  }
217 
218 
219 
220 
221 } // end namespace spear
222 
223 #endif
EdgeParser(IStream &syn_stream)
construct a parser object with text input 'syn_stream'
Definition: EdgeParser.h:70
static const int TOKEN_STRING
Definition: EdgeLexer.h:24
int toInteger(const String &s)
Definition: CharUtils.cc:74
const T1 & getFirst() const
Definition: Pair.h:23
Definition: RCIPtr.h:67
Definition: EdgeParser.h:36
static const int TOKEN_RP
Definition: EdgeLexer.h:26
const T2 & getSecond() const
Definition: Pair.h:25
Definition: Exception.h:15
Definition: EdgeLexer.h:19
#define EXTRACT_TOKEN(type, msg)
Definition: EdgeParser.h:21
#define String
Definition: Wide.h:36
spear::RCIPtr< T > parseEdge()
parse the next edge in the stream
Definition: EdgeParser.h:77
static const int TOKEN_EOF
Definition: EdgeLexer.h:23
static const int TOKEN_LP
Definition: EdgeLexer.h:25
#define IStream
Definition: Wide.h:39
Definition: Pair.h:17