Source code for alex.components.nlg.tectotpl.block.t2a.cs.marksubject

#!/usr/bin/env python
# coding=utf-8
#
# A Treex block
#
from __future__ import unicode_literals

from alex.components.nlg.tectotpl.core.block import Block
from alex.components.nlg.tectotpl.core.exception import LoadingException
import re


__author__ = "Ondřej Dušek"
__date__ = "2012"


[docs]class MarkSubject(Block): """ Marks the subject of each clause with the Afun 'Sb'. Arguments: language: the language of the target tree selector: the selector of the target tree """ def __init__(self, scenario, args): "Constructor, just checking the argument values" Block.__init__(self, scenario, args) if self.language is None: raise LoadingException('Language must be defined!')
[docs] def process_ttree(self, ttree): "Mark all subjects in a sentence" # filter out oblique casus and TWHEN etc. expressions, # e.g. "minulé pondělí" etc. and create a set of viable a-nodes nominatives = set([cand.lex_anode for cand in ttree.get_descendants() if cand.formeme in ['n:1', 'drop'] and not cand.functor.startswith('T')]) # find all verbs and mark subjects for each of them for tnode in filter(lambda t: re.match(r'^v.+(fin|rc)$', t.formeme), ttree.get_descendants()): if tnode.lex_anode: asubj = self.__find_subject(tnode.lex_anode, nominatives) if asubj is not None: asubj.afun = 'Sb'
def __find_subject(self, anode, nominatives): """\ Mark subjects of a verbal node (only if they are in the candidate set) """ # select all children in the right order candidates = list(reversed(anode.get_echildren(preceding_only=True))) \ + anode.get_echildren(following_only=True) # discard those which are not "proper" nominatives, # return if nothing remains candidates = filter(lambda a: a in nominatives, candidates) # filter copula verb candidates - demonstrative pronouns if anode.lemma == 'být': candidates = filter(lambda a: a.lemma not in ['ten', 'tento'], candidates) # return the first (i.e. best) candidate, if available return candidates[0] if candidates else None