KJB
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Semspear_tree.h
Go to the documentation of this file.
1 #ifndef SEMSPEAR_TREE_H_
2 #define SEMSPEAR_TREE_H_
3 
11 #include "m_cpp/m_vector.h"
15 #include "semantics/SemanticIO.h"
16 #include "semantics/Event_parser.h"
17 #include <boost/make_shared.hpp>
18 #include <list>
19 #include <map>
20 
21 
22 namespace semantics
23 {
24 
25  class Semantic_step_proposal;
26 
28 
33  {
34  public:
35  /*------------------------------------------------------------
36  * TYPEDEFS
37  *------------------------------------------------------------*/
38  typedef boost::shared_ptr<Semspear_tree> Self_ptr;
40  typedef boost::shared_ptr<Word> Word_ptr;
42  typedef boost::shared_ptr<Label> Label_ptr;
43  typedef std::list<Self_ptr> Child_list;
47  typedef boost::shared_ptr<Elaboration_tree> Sem_tree_ptr;
51  typedef std::map<Lexicon_db::Val_type, bool> LF_map_t;
52  typedef std::map<Word, size_t> Freq_map;
53  typedef std::list<Token_map::Key_type> Symbol_list;
54 
56 
59  static bool VERBOSE;
60 
61  public:
62 
63  /*------------------------------------------------------------
64  * CONSTRUCTORS/DESTRUCTOR
65  *------------------------------------------------------------*/
66 
71  const Role& role,
72  const bool& learn = true,
73  const bool& collins = false
74  );
75 
82  const Word& word,
83  const Label& label,
84  const Role& role,
85  const bool& learn = true,
86  const bool& collins = false
87  );
88 
92  const Label& label,
93  const Role& role,
94  const bool& learn = true,
95  const bool& collins = false
96  );
97 
106  const Self_ptr other,
107  const bool& learn = false
108  );
109 
112  ~Semspear_tree();
113 
114 
115  /*------------------------------------------------------------
116  * ACCESSORS
117  *------------------------------------------------------------*/
118 
119  Word word() const {if(word_ != NULL) return *word_; else return Word();}
120 
123  Label label() const
124  {
125  if(label_ != NULL) return *label_;
126  else return Label();
127  }
128 
131  const Self_ptr head() const
132  {
133  if(head_ == children_.end()) return Self_ptr();
134  else return *head_;
135  }
136 
139  const Child_list& children() const { return children_; }
140 
143  Word head_word() const;
144 
147  Label head_tag() const;
148 
152 
155  const Hash_pair& semantic_codes() const { return semantic_codes_; }
156 
159  const Syn_event_ptr syntactic_event() const { return event_; }
160 
161  /*------------------------------------------------------------
162  * QUERIES
163  *------------------------------------------------------------*/
164 
167  bool is_terminal() const {return children_.empty() && word_ != NULL;}
168 
171  bool is_punctuation() const {return is_punc_;}
172 
175  bool is_coordination() const {return is_coord_;}
176 
177  /*------------------------------------------------------------
178  * CALCULATION
179  *------------------------------------------------------------*/
180 
183  double node_log_probability() const;
184 
187 #ifdef USE_SEMANTICS
189  const Elab_ptr_const semantic_parent
190  ) const;
191 #else
192  double subtree_log_probability() const;
193 #endif
194 
195  /*------------------------------------------------------------
196  * MEMBER ASSIGNMENT
197  *------------------------------------------------------------*/
198 
201  void set_word(const Word& word);
202 
205  void set_label(const Label& label);
206 
209  void set_head(const Child_list::const_iterator head);
210 
213  void set_role(const Role& role) { role_ = role; }
214 
217  void set_coord(const bool new_value)
218  {
219  is_coord_ = new_value;
220  if(is_coord_ && !is_base_np_) set_role(COORD);
221  }
222 
225  void set_semantic_tree(const Sem_tree_ptr new_tree);
226 
229  void set_semantic_data(
230  const Elab_ptr_const semantic_parent,
231  const Step_code_t& step_code
232  );
233 
234 
235  /*------------------------------------------------------------
236  * MANIPULATION
237  *------------------------------------------------------------*/
238 
241  void add_child(const Self_ptr& new_child, bool on_left = false);
242 
245  void preprocess_tree();
246 
250  {
251  add_stops();
252  rebuild_events_recursively(NULL, this);
253  }
254 
256  {
257  if(event_ != NULL) event_ -> release_view_counts();
258  if(sem_event_ != NULL) sem_event_ -> release_view_counts();
259  learn_ = false;
260  std::for_each(
261  children_.begin(), children_.end(),
262  boost::bind(&Semspear_tree::release_event_counts, _1)
263  );
264  }
265 
267  {
268  if(event_ != NULL) event_->reacquire_view_counts();
269  if(sem_event_ != NULL) sem_event_->reacquire_view_counts();
270  learn_ = true;
271  }
272 
274  {
276  std::for_each(
277  children_.begin(), children_.end(),
278  boost::bind(
280  _1)
281  );
282  }
283 
285 
286  /*------------------------------------------------------------
287  * DISPLAY
288  *------------------------------------------------------------*/
289 
293  std::ostream& os,
294  bool is_head = false,
295  int indent_level = 0
296  ) const;
297 
301  std::ostream& os,
302  bool is_head = false,
303  int indent_level = 0
304  ) const;
305 
308  void print_events_with_probabilities(std::ostream& os) const;
309 
312  void print_subtree_view_counts(std::ostream& os) const;
313 
314 
315  /*------------------------------------------------------------
316  * FRIEND FUNCTIONS
317  *------------------------------------------------------------*/
318 
319  friend void initialize_special_symbols();
320 
321  friend boost::tuple<Self_ptr, kjb::Vector> propose_new_tree(
322  const Self_ptr source
323  );
324 
325  friend void resample_event_tables(Self_ptr& source);
326 
327  public:
328  /*------------------------------------------------------------
329  * STATIC ACCESSORS
330  *------------------------------------------------------------*/
331 
335 
338  static Nonterminal_db& nt_lexicon();
339 
343 
347  {
348  static boost::shared_ptr<Freq_map> wfm(new Freq_map());
349  return *wfm;
350  };
351 
352  private:
353 
354 
357  Child_list::iterator head_iterator()
358  {
359  return head_;
360  }
361 
364  Child_list::reverse_iterator head_riterator()
365  {
366  return std::reverse_iterator<Child_list::iterator>(head_);
367  }
368 
371  void add_stops();
372 
376  double resample_semantic_move(
377  const Elab_ptr_const& semantic_parent,
378  bool& altered
379  );
380 
383  double evaluate_reverse_move(
384  const Elab_ptr_const& semantic_parent,
385  const bool tree_is_altered
386  ) const;
387 
388  private:
389 
390  /*------------------------------------------------------------
391  * UPDATE-RELATED HELPERS
392  *------------------------------------------------------------*/
393 
406  void update_local_event(
407  Semspear_tree* parent,
408  Semspear_tree* sister_head,
409  const int& dist_code,
410  bool punc_flag,
411  const Node_data& punc_data,
412  bool coord_flag,
413  const Node_data& coord_data,
414  const int& depth
415  );
416 
423  void update_semantic_context(
424  const Hash_pair& parent_semantics,
425  const Hash_pair& head_semantics
426  );
427 
437  void update_semantic_association
438  (
439  Elab_ptr_const semantic_parent,
440  bool resample_step_codes = true
441  );
442 
449  void rebuild_events_recursively(
450  Semspear_tree* const parent = NULL,
451  Semspear_tree* const sister_head = NULL,
452  const int& dist_code = 0,
453  bool punc_flag = false,
454  const Node_data& punc_data = Node_data(0, 0, 0),
455  bool coord_flag = false,
456  const Node_data& coord_data = Node_data(0, 0, 0),
457  const int& depth = 0
458  );
459 
469  template<typename Iter>
470  void update_events_in_child_range(
471  const Iter& start,
472  const Iter& end,
473  const int& dist_base,
474  const int& depth
475  );
476 
477  /*------------------------------------------------------------
478  * PROPOSAL HELPERS
479  *------------------------------------------------------------*/
480 
495  template<typename Iter>
496  static kjb::Vector propose_associations_in_child_range(
497  const Self_ptr source,
498  Self_ptr& dest,
499  Iter source_start,
500  Iter source_end,
501  Iter dest_start,
502  Iter dest_end,
503  bool tree_is_altered
504  );
505 
516  static kjb::Vector propose_new_associations(
517  const Self_ptr source,
518  Self_ptr& dest,
519  const Elab_ptr_const source_semantic_parent,
520  const Elab_ptr_const dest_semantic_parent,
521  const bool tree_is_altered
522  );
523 
524  /*------------------------------------------------------------
525  * PREPROCESSING HELPER FUNCTIONS
526  *------------------------------------------------------------*/
527 
530  bool contains_verb() const;
531 
534  bool should_be_npb() const;
535 
538  void prune_unnecessary_nodes();
539 
542  void process_npbs();
543 
546  void insert_npb_parents();
547 
550  void raise_punctuation(
551  Child_list& siblings,
552  Child_list::iterator begin,
553  Child_list::iterator end
554  );
555 
558  void mark_coordinated_phrase_children();
559 
560  private:
561  Role role_;
562  Word_ptr word_;
563  Label_ptr label_;
564  Child_list children_;
565  Child_list::iterator head_;
566  Sem_tree_ptr semantic_tree_;
567  Elab_ptr_const semantic_node_;
568  Hash_pair semantic_codes_;
569  Step_code_t step_code_;
570  Syn_event_ptr event_;
571  Sem_event_ptr sem_event_;
572  bool is_punc_;
573  bool is_coord_;
574  bool is_base_np_;
575  bool learn_;
576  bool collins_;
577  };
578 
579  /*------------------------------------------------------------
580  * FREE FUNCTIONS
581  *------------------------------------------------------------*/
582 
583  boost::tuple<Semspear_tree::Self_ptr, kjb::Vector>
585 
587 
588  /*------------------------------------------------------------
589  * TEMPLATE MEMBER DEFINITIONS
590  *------------------------------------------------------------*/
591 
592  template<typename Iter>
593  void Semspear_tree::update_events_in_child_range(
594  const Iter& start,
595  const Iter& end,
596  const int& dist_base,
597  const int& depth
598  )
599  {
600  Semspear_tree* hh = head().get();
601  bool adjacent_to_head = true;
602  bool has_intervening_verb = false;
603  bool next_punc_flag = false;
604  Node_data next_punc_data = Node_data(0, 0, 0);
605  bool next_coord_flag = false;
606  Node_data next_coord_data = Node_data(0, 0, 0);
607  int dist = dist_base + 10*adjacent_to_head + 1*has_intervening_verb;
608  for(Iter it = start; it != end; it++)
609  {
610  if(*it == head()) continue;
611  (*it) ->
612  rebuild_events_recursively(
613  this,
614  hh,
615  dist,
616  next_punc_flag,
617  next_punc_data,
618  next_coord_flag,
619  next_coord_data,
620  depth + 1
621  );
622  if(!((*it) -> is_punctuation())
623  && (!(*it) -> is_coordination() || is_base_np_))
624  {
625  adjacent_to_head = false;
626  next_punc_flag = false;
627  next_punc_data = Node_data(0, 0, 0);
628  next_coord_flag = false;
629  next_coord_data = Node_data(0, 0, 0);
630  if(is_base_np_)
631  {
632  hh = (*it).get();
633  adjacent_to_head = true;
634  has_intervening_verb = false;
635  } else if((*it) -> contains_verb())
636  {
637  has_intervening_verb = true;
638  }
639  } else if((*it) -> is_punctuation() && !next_punc_flag) {
640  assert((*it) -> word() != 0);
641  assert((*it) -> label() != 0);
642  next_punc_flag = true;
643  next_punc_data =
644  Node_data((*it) -> word(), (*it) -> label(), 0);
645  } else if((*it) -> is_coordination() && !next_coord_flag) {
646  next_coord_flag = true;
647  next_coord_data =
648  Node_data((*it) -> word(), (*it) -> label(), 0);
649  }
650  dist = dist_base + 10*adjacent_to_head + 1*has_intervening_verb;
651  }
652  }
653 
654 /* /\ /\ /\ /\ /\ /\ /\ /\ /\ /\ /\ /\ /\ /\ /\ /\ /\ /\ /\ /\ /\ /\ /\ /\ */
655 
656  template<typename Iter>
657  kjb::Vector Semspear_tree::propose_associations_in_child_range(
658  const Self_ptr source,
659  Self_ptr& dest,
660  Iter source_child_start,
661  Iter source_child_end,
662  Iter dest_child_start,
663  Iter dest_child_end,
664  const bool tree_is_altered
665  )
666  {
667  typedef kjb::Vector Result;
668  Result result(0.0, 0.0);
669  Semspear_tree* hh = dest->head().get();
670  Iter s_it = source_child_start, d_it = dest_child_start;
671  for( ;
672  s_it != source_child_end && d_it != dest_child_end;
673  ++s_it, ++d_it)
674  {
675  if((*s_it) == source->head() || (*s_it)->is_punctuation()) continue;
676  (*d_it)->update_semantic_context(
677  dest->semantic_codes(),
678  hh->semantic_codes()
679  );
680  result +=
681  Semspear_tree::propose_new_associations(
682  *s_it, *d_it,
683  source->semantic_node_,
684  dest->semantic_node_,
685  tree_is_altered
686  );
687  if(dest->is_base_np_)
688  {
689  hh = (*d_it).get();
690  }
691  }
692  return result;
693  }
694 };
695 
696 #endif
void update_event_views_recursively()
Definition: Semspear_tree.cpp:538
boost::shared_ptr< Syntactic_event > Event_ptr
Definition: Syntactic_event.h:27
void set_label(const Label &label)
assign a new value to the label field
Definition: Semspear_tree.cpp:374
friend boost::tuple< Self_ptr, kjb::Vector > propose_new_tree(const Self_ptr source)
Definition: Semspear_tree.cpp:70
bool is_coordination() const
is this a coordinating conjunction?
Definition: Semspear_tree.h:175
void preprocess_tree()
do some preprocessing in accordance with Collins' model
Definition: Semspear_tree.cpp:1033
const Hash_pair & semantic_codes() const
get the pair of semantic codes at the root of this subtree
Definition: Semspear_tree.h:155
Role
Definition: Semspear_tree.h:55
boost::shared_ptr< Semspear_tree > Self_ptr
Definition: Semspear_tree.h:38
void set_semantic_data(const Elab_ptr_const semantic_parent, const Step_code_t &step_code)
set semantic data for this node
Definition: Semspear_tree.cpp:410
Word head_word() const
get the head word for this subtree
Definition: Semspear_tree.cpp:330
friend void resample_event_tables(Self_ptr &source)
Definition: Semspear_tree.cpp:87
boost::shared_ptr< Word > Word_ptr
Definition: Semspear_tree.h:40
std::list< Self_ptr > Child_list
Definition: Semspear_tree.h:43
std::list< Token_map::Key_type > Symbol_list
Definition: Semspear_tree.h:53
Definition: Semspear_tree.h:55
static LF_map_t & lf_word_map()
accessor to a global map flagging low frequency words
Definition: Event_parser.h:93
Label head_tag() const
get the head tag for this subtree
Definition: Semspear_tree.cpp:339
boost::shared_ptr< Elaboration_tree > Sem_tree_ptr
Definition: Semspear_tree.h:47
The main syntactic tree object for semantic parsing.
Definition: Semspear_tree.h:32
Definition: Semspear_tree.h:55
void set_coord(const bool new_value)
set the coordinated phrase flag
Definition: Semspear_tree.h:217
void initialize_special_symbols()
This class implements vectors, in the linear-algebra sense, with real-valued elements.
Definition: m_vector.h:87
Elab_ptr_const semantic_root() const
get a pointer to the root of the associated semantic tree
Definition: Semspear_tree.cpp:350
Semantic_elaboration::Self_ptr Elab_ptr
Definition: Elaboration_tree.h:33
Definition: Semspear_tree.h:55
boost::tuple< Semspear_tree::Self_ptr, kjb::Vector > propose_new_tree(const Semspear_tree::Self_ptr source)
Definition: Semspear_tree.cpp:70
Semspear_tree(const Role &role, const bool &learn=true, const bool &collins=false)
default constructor, creates an empty tree
Definition: Semspear_tree.cpp:182
double dist(const pt a, const pt b)
compute approx. Great Circle distance between two UTM points
Definition: layer.cpp:45
Value_type Label_type
Definition: Tree_event.h:39
Step_code::Code Step_code_t
Definition: SemanticIO.h:73
~Semspear_tree()
decrements word count when destructed (if learn_ is true)
Definition: Semspear_tree.cpp:304
void set_role(const Role &role)
set the syntactic role of this node
Definition: Semspear_tree.h:213
Syntactic_event::Word_type Word
Definition: Semspear_tree.h:39
Syntactic_event::Node_data Node_data
Definition: Semspear_tree.h:46
void add_child(const Self_ptr &new_child, bool on_left=false)
append a new child subtree
Definition: Semspear_tree.cpp:318
void reacquire_event_counts()
Definition: Semspear_tree.h:266
void set_semantic_tree(const Sem_tree_ptr new_tree)
associate this syntactic tree w/ a new semantic tree
Definition: Semspear_tree.cpp:397
Syntactic_event::Label_type Label
Definition: Semspear_tree.h:41
static Freq_map & word_freq_map()
return a reference to the map indexing actual word frequencies
Definition: Semspear_tree.h:346
const Child_list & children() const
returns the list of children
Definition: Semspear_tree.h:139
Semantic_step_proposal::Elab_ptr_const Elab_ptr_const
Definition: Semantic_step_proposal.cpp:35
void set_head(const Child_list::const_iterator head)
assign a new head and acquire its info
Definition: Semspear_tree.cpp:384
boost::shared_ptr< Label > Label_ptr
Definition: Semspear_tree.h:42
void release_event_counts()
Definition: Semspear_tree.h:255
void print_constituency_tree(std::ostream &os, bool is_head=false, int indent_level=0) const
print as constituency tree (marking head)
Definition: Semspear_tree.cpp:676
double node_log_probability() const
compute log prob of this node by itself
Definition: Semspear_tree.cpp:626
boost::shared_ptr< Semantic_step_event > Event_ptr
Definition: Semantic_step_event.h:18
Elaboration_tree::Elab_ptr Elab_ptr
Definition: Semspear_tree.h:48
static Nonterminal_db & nt_lexicon()
return a reference to the underlying nonterminal lexicon
Definition: Semspear_tree.cpp:309
Definition: Lexicon_db.h:17
std::map< Word, size_t > Freq_map
Definition: Semspear_tree.h:52
static Lexicon_db & lexicon()
return a reference to the underlying terminal lexicon
Definition: Semspear_tree.h:334
friend void initialize_special_symbols()
Syntactic_event::Event_ptr Syn_event_ptr
Definition: Semspear_tree.h:44
void set_word(const Word &word)
assign a new value to the word field
Definition: Semspear_tree.cpp:360
const Syn_event_ptr syntactic_event() const
returns a pointer to the probabilistic event at the root
Definition: Semspear_tree.h:159
#define dest(triedge, pointptr)
Definition: triangle.c:938
void print_events_with_probabilities(std::ostream &os) const
print events and associated log probabilities
Definition: Semspear_tree.cpp:778
double subtree_log_probability() const
compute log prob of this tree according to events
Definition: Semspear_tree.cpp:633
Semantic_step_event::Event_ptr Sem_event_ptr
Definition: Semspear_tree.h:45
void print_dependency_tree(std::ostream &os, bool is_head=false, int indent_level=0) const
display as dependency tree
Definition: Semspear_tree.cpp:714
Elaboration_tree::Hash_pair Hash_pair
Definition: Semspear_tree.h:50
const Self_ptr head() const
returns a pointer to the head subtree
Definition: Semspear_tree.h:131
bool is_terminal() const
is this subtree a leaf?
Definition: Semspear_tree.h:167
Definition: Semspear_tree.h:55
Label label() const
returns the label field
Definition: Semspear_tree.h:123
Definition: Semspear_tree.h:55
void complete_tree()
signals that tree is fully built, and to process data
Definition: Semspear_tree.h:249
void reacquire_event_counts_recursively()
Definition: Semspear_tree.h:273
void print_subtree_view_counts(std::ostream &os) const
print count info at each node
Definition: Semspear_tree.cpp:760
boost::tuple< Word_type, Label_type, Label_type > Node_data
Definition: Tree_event.h:41
Word word() const
Definition: Semspear_tree.h:119
Definition: Semspear_tree.h:55
static Lexicon_db & lexicon()
Definition: Tree_event.cpp:20
Semantic_data_base::Hash_pair Hash_pair
Definition: Elaboration_tree.h:32
Elaboration_tree::Elab_ptr_const Elab_ptr_const
Definition: Semspear_tree.h:49
Definition: Nonterminal_db.h:16
Semantic_elaboration::Self_ptr_const Elab_ptr_const
Definition: Elaboration_tree.h:34
Code
Definition: SemanticIO.h:54
static LF_map_t & lf_word_map()
return a reference to the map indexing whether words are rare
Definition: Semspear_tree.h:342
Definition for the Vector class, a thin wrapper on the KJB Vector struct and its related functionalit...
void resample_event_tables(std::vector< Semspear_tree::Self_ptr > &trees)
Definition: sample_associations.cpp:89
Definition: Semspear_tree.h:55
bool is_punctuation() const
is this a punctuation node?
Definition: Semspear_tree.h:171
Value_type Word_type
Definition: Tree_event.h:37
static bool VERBOSE
should debugging messages be printed?
Definition: Semspear_tree.h:59
std::map< Lexicon_db::Val_type, bool > LF_map_t
Definition: Semspear_tree.h:51