Source code for alex.components.nlg.tectotpl.block.t2a.cs.initmorphcat

#!/usr/bin/env python
# coding=utf-8
#
# Creating an a-tree from a t-tree.
#
from __future__ import unicode_literals

from alex.components.nlg.tectotpl.core.block import Block
from alex.components.nlg.tectotpl.core.exception import LoadingException
import re

__author__ = "Ondřej Dušek"
__date__ = "2012"


[docs]class InitMorphcat(Block): """ According to t-layer grammatemes, this initializes the morphcat structure at the a-layer that is the basis for a later POS tag limiting in the word form generation. Arguments: language: the language of the target tree selector: the selector of the target tree """ GENDER = {None: '.', 'anim': 'M', 'inan': 'I', 'fem': 'F', 'neut': 'N', 'nr': '.', 'inher': '.'} NUMBER = {None: '.', 'sg': 'S', 'pl': 'P', 'nr': '.', 'inher': '.'} DEGREE = {None: '.', 'pos': '1', 'comp': '2', 'acomp': '2', 'sup': '3', 'nr': '.'} PERSON = {None: '.', '1': '1', '2': '2', '3': '3', 'inher': '.'} NEGATION = {None: 'A', 'neg0': 'A', 'neg1': 'N'} VOICE = {None: '.', 'active': 'A', 'passive': 'P', 'act': 'A', 'pas': 'P', 'deagent': 'A'} def __init__(self, scenario, args): "Constructor, checking the argument values" Block.__init__(self, scenario, args) if self.language is None: raise LoadingException('Language must be defined!')
[docs] def process_tnode(self, tnode): "Initialize the morphcat structure in the given node" # obtain anode anode = tnode.lex_anode if not anode: return # rule out uninflected words if tnode.nodetype in ['coap', 'atom', 'dphr']: anode.morphcat_pos = '!' return # POS (more or less not needed) anode.morphcat_pos = tnode.mlayer_pos or '.' if tnode.formeme.startswith('v'): anode.morphcat_pos = 'V' if tnode.t_lemma == ':': anode.morphcat_pos = 'Z' # set 'simple' things anode.morphcat_person = self.PERSON[tnode.gram_person] anode.morphcat_number = self.NUMBER[tnode.gram_number] anode.morphcat_gender = self.GENDER[tnode.gram_gender] # set detailed categories for personal pronouns if tnode.t_lemma == '#PersPron': self.set_perspron_categories(tnode, anode) # set case self.set_case(tnode, anode) # set degree of comparison degree = tnode.gram_degcmp if re.match(r'quant|pron', tnode.gram_sempos or ''): degree = None anode.morphcat_grade = self.DEGREE[degree] # set negation if re.match(r'^[nav](?!.*(pron|quant))', tnode.gram_sempos or '') and \ tnode.t_lemma != '#PersPron': anode.morphcat_negation = self.NEGATION[tnode.gram_negation] # set voice if (tnode.gram_sempos or '').startswith('v'): anode.morphcat_voice = self.VOICE[tnode.voice or tnode.gram_diathesis]
[docs] def set_case(self, tnode, anode): """\ Set the morphological case for an a-node according to the corresponding t-node's formeme, where applicable. """ # take the case directly from the formeme, if possible case = re.search(r'(\d)', tnode.formeme) if case: anode.morphcat_case = case.group(1) # pro-drop pronouns are assumed to have nominative case # (since this is how a subject is searched for) elif tnode.formeme == 'drop': anode.morphcat_case = '1' # this is actually a hack for older formemes which lacked case # in adjectives with prepositions elif re.match(r'^adj:za+X$', tnode.formeme): anode.morphcat_case = '4'
[docs] def set_perspron_categories(self, tnode, anode): """\ Set detailed morphological categories of personal pronouns of various types (possessive, reflexive, personal per se) """ # possessive/reflexive possessive pronouns if re.search(r'poss|attr', tnode.formeme): # reflexive possessive tnouns = tnode.get_eparents() if tnouns and tnode.coref_gram_nodes and \ not (tnouns[0].formeme and '1' in tnouns[0].formeme): anode.morphcat_person = '.' anode.morphcat_subpos = '8' return # other possessive anode.morphcat_possnumber = self.NUMBER[tnode.gram_number] if anode.morphcat_person == '3': anode.morphcat_possgender = self.GENDER[tnode.gram_gender] anode.morphcat_subpos = 'S' return # plain reflexive pronouns if tnode.is_reflexive: anode.morphcat_person = '.' if re.search(r'[^+][34]', tnode.formeme): # no preposition - short 'se', 'si' anode.morphcat_subpos = '7' else: # after preposition - long 'sebe', 'sobě' anode.morphcat_subpos = '6' return # personal pronouns after prepositions -- 'něho', 'něm' etc. if re.match(r'n:.*\+', tnode.formeme) and anode.morphcat_person == '3': anode.morphcat_subpos = '3' # personal pronoun 'ho', 'mu' elif re.search(r'[34]', tnode.formeme) and \ tnode.gram_number == 'sg' and tnode.gram_gender != 'fem': anode.morphcat_subpos = 'H' # default form else: anode.morphcat_subpos = 'P'