Source code for alex.components.nlg.tectotpl.block.t2a.cs.vocalizeprepos

#!/usr/bin/env python
# coding=utf-8
from __future__ import unicode_literals

from alex.components.nlg.tectotpl.core.block import Block
from alex.components.nlg.tectotpl.core.exception import LoadingException
import re

__author__ = "Ondřej Dušek"
__date__ = "2012"

[docs]class VocalizePrepos(Block): """\ This block replaces the forms of prepositions 'k', 'v', 'z', 's' with their vocalized variants 'ke'/'ku', 've', 'ze', 'se' according to the following word. """ def __init__(self, scenario, args): "Constructor, checking the argument values" Block.__init__(self, scenario, args) if self.language is None: raise LoadingException('Language must be defined!')
[docs] def process_atree(self, aroot): """\ Find and vocalize prepositions according to their context. """ anodes = aroot.get_descendants(ordered=True) for anode, anext in zip(anodes[:-1], anodes[1:]): if anode.morphcat_pos == 'R' and \ anode.lemma in {'k', 'v', 'z', 's'}: anode.form = self.vocalize(anode.lemma, anext.form.lower())
[docs] def vocalize(self, prep, follow): """\ Given a preposition lemma and the form of the word following it, return the appropriate form (base or vocalized). """ if prep == 'k' and re.match('^(prospěch|příklad)', follow): return 'ku' if prep == 'k' and re.match('^(k|g|sp|sn|zv|zm|sc|zl|sl|sk|zp|zk|šk|' + 'zd|zt|zb|zr|sv|mn|vš|vs|ct|sj|dv|zř|zh|' + 'vč|šp|lá|šť|mř|zc|št|vk|sta|vzn|stu|' + 'vzd|smí|stě|dnu|vzo|sti|sty|sro|dnů|' + 'sdr|sbl|sbí|čty|zná)', follow): return 'ke' if prep == 'v' and re.match('^(v|f|st|sp|čt|sk|sv|kt|fr|fi|sl|sn|fu|' + 'zl|fo|šv|zn|zp|šk|wa|ii|hř|dv|zd|sb|šp|' + 'sh|št|zb|fa|fá|rw|zk|wi|tm|jm|we|fs|fy|' + 'fó|žď|hv|gy|mz|žd|šl|gi|zh|sj|zt|žr|šr|' + 'cv|sw|sro|sml|tří|tva|srá|obž|zví|psa|' + 'smr|žlu|sca|zrů|sce|zvo|zme|mně$|mne$)', follow): return 've' if prep == 's' and re.match('^(s|z|kt|vz|vš|mn|šk|že|čt|šv|št|ps|vs|' + 'šp|ži|cm|ža|ct|cv|dž|šl|še|bý|čle|jmě|' + 'ple|šam|lst|prs|dvě|dře|7|17$|1\d\d\D?)', follow): return 'se' if prep == 'z' and re.match('^(s|z|kt|dn|šk|vs|šv|vš|št|šu|dř|mz|ži|' + 'tm|kb|šp|pé|ša|kč|hv|nk|ši|rt|lh|ký|ža|' + 'lv|šl|žď|žl|hry|vzd|tří|rom|jmě|šes|' + 'mne|řet|hři|lan|žel|pan|wil|dou|thp|' + 'pak|půt|cih|brá|hrd|mik|idy|psů|mst|' + 'mag|vas|4|7|17|1\d\d\D?)', follow): return 'ze' return prep