#!/usr/bin/env python
# coding=utf-8
#
# Classes related to Treex documents, bundles and zones
#
from __future__ import unicode_literals
from alex.components.nlg.tectotpl.core.exception import RuntimeException
import alex.components.nlg.tectotpl.core.node
__author__ = "Ondřej Dušek"
__date__ = "2012"
[docs]class Document(object):
"""\
This represents a Treex document, i.e. a sequence of bundles.
It contains an index of node IDs.
"""
def __init__(self, filename=None, data=None):
"""\
Constructor. The data should contain a list of bundles that will be
passed to the constructor of Bundle.
"""
data = data or []
self.__index = {}
self.__backref = {}
self.filename = filename
self.bundles = [Bundle(self, data=bundle_data, b_ord=b_ord)
for b_ord, bundle_data in enumerate(data, start=1)]
[docs] def index_node(self, node):
"""\
Index a node by its id. Also index the node's references in the
backwards reference index.
"""
self.__index[node.id] = node
refs = node.get_referenced_ids()
for ref_type, value in refs.iteritems():
self.index_backref(ref_type, node.id, value)
[docs] def remove_node(self, node_id):
"Remove a node from all indexes."
# delete from normal index
del self.__index[node_id]
# using backward references, remove all references to the node
for backref_type in self.__backref:
refs = self.__backref[backref_type].get(node_id)
if refs:
for ref in refs:
referencing_node = self.get_node_by_id(ref)
referencing_node.remove_reference(backref_type, node_id)
# remove the backward references from the index
del self.__backref[backref_type][node_id]
[docs] def get_node_by_id(self, node_id):
return self.__index[node_id]
def __getitem__(self, key):
return self.get_node_by_id(key)
def __setitem__(self, key, value):
if value.id != key:
raise ValueError
return self.index_node(self, value)
def __delitem__(self, key):
self.remove_node(key)
[docs] def index_backref(self, attr_name, source_id, target_ids):
"""\
Keep track of a backward reference (source, target node IDs are in the
direction of the original reference)
"""
# create the backward index if it does not exist
if not self.__backref.get(attr_name):
self.__backref[attr_name] = {}
# work always with lists of IDs, but handle also single IDs by
# putting them into a list
if not isinstance(target_ids, (list, tuple)):
target_ids = [target_ids]
# save the backward references (note the direction target -> source)
for target_id in target_ids:
if not self.__backref[attr_name].get(target_id):
self.__backref[attr_name][target_id] = []
self.__backref[attr_name][target_id].append(source_id)
[docs] def remove_backref(self, attr_name, source_id, target_ids):
"""\
Remove references from the backwards index.
"""
# return if the index does not exist at all
if not self.__backref.get(attr_name):
return
# work always with lists of IDs, but handle also single IDs
# by putting them into a list
if not isinstance(target_ids, (list, tuple)):
target_ids = [target_ids]
# delete all references
for target_id in target_ids:
try:
self.__backref[attr_name][target_id].remove(source_id)
except: # if the reference is not there, we don't care
pass
[docs] def create_bundle(self, data=None):
"""\
Append a new bundle and return it.
"""
self.bundles.append(Bundle(self, data, b_ord=len(self.bundles) + 1))
return self.bundles[-1]
[docs]class Bundle(object):
"""\
Represents a bundle, i.e. a list of zones pertaining
to the same sentence (in different variations).
"""
def __init__(self, document, data=None, b_ord=None):
"""\
Constructor. The data should contain a list of zones
that will be passed to the constructor of Zone.
"""
data = data or []
self.__document = document
# if no order is given, default to -1
self.__ord = b_ord is not None and b_ord or -1
self.__zones = {}
# sort zones according to language and selector
for zone_data in data:
zone = Zone(data=zone_data, bundle=self)
self.__zones[(zone.language, zone.selector)] = zone
[docs] def get_all_zones(self):
"""\
Return all zones contained in this bundle.
"""
return self.__zones.values()
[docs] def get_zone(self, language, selector):
"""\
Returns the corresponding zone for a language and selector;
raises an exception if the zone does not exist.
"""
return self.__zones[(language, selector)]
[docs] def get_or_create_zone(self, language, selector):
"""\
Returns the zone for a language and selector; if it does
not exist, creates an empty zone.
"""
if self.has_zone(language, selector):
return self.get_zone(language, selector)
return self.create_zone(language, selector)
[docs] def has_zone(self, language, selector):
"""\
Returns True if the bundle has a zone for the
given language and selector.
"""
return self.__zones.get((language, selector)) and True or False
[docs] def create_zone(self, language, selector):
"""\
Creates a zone at the given language and selector.
Will overwrite any existing zones.
"""
self.__zones[(language, selector)] = Zone(bundle=self,
language=language,
selector=selector)
return self.__zones[(language, selector)]
@property
[docs] def document(self):
"The document this bundle belongs to."
return self.__document
@property
[docs] def ord(self):
"The order of this bundle in the document, as given by constructor"
return self.__ord
[docs]class Zone(object):
"""\
Represents a zone, i.e. a sentence and corresponding trees.
"""
def __init__(self, data=None, language=None, selector=None, bundle=None):
"""\
Constructor. The data should contain a dictionary with
the following keys: language, selector, sentence, Xtree (where X
is one of t, a, n, p).
"""
data = data or {}
self.__bundle = bundle
self.__document = self.bundle and self.bundle.document or None
self.language = data.get('language') or language
self.selector = data.get('selector') or selector or ''
self.sentence = data.get('sentence')
for layer in ('t', 'a', 'n', 'p'):
if layer + 'tree' in data:
self.create_tree(layer, data[layer + 'tree'])
@property
[docs] def bundle(self):
"The bundle in which this zone is located"
return self.__bundle
@property
[docs] def document(self):
"The document in which this zone is located"
return self.__document
[docs] def has_tree(self, layer):
"""\
Return True if this zone has a tree on the given layer, False
otherwise.
"""
return hasattr(self, layer + 'tree')
[docs] def get_tree(self, layer):
"""\
Return a tree this node has on the given layer or raise an
exception if the tree does not exist.
"""
return getattr(self, layer + 'tree')
[docs] def create_tree(self, layer, data=None):
"""\
Create a tree on the given layer, filling it with the given data
(if applicable).
"""
# store data for child nodes for later use
nodes_data = None
if data is not None and 'nodes' in data:
nodes_data = data['nodes']
del data['nodes']
if data is None:
data = {'id': layer + '-node-' + self.language_and_selector +
('-s' + str(self.bundle.ord) if self.bundle else '') +
'-root'}
# call the appropriate constructor of the corresponding
# class from alex.components.nlg.tectotpl.core.node (A, T, N, P)
node_type = getattr(alex.components.nlg.tectotpl.core.node, layer.upper())
# create the root
root = node_type(data=data, zone=self)
if hasattr(root, 'ord') and root.ord is None: # set root's ord to 0 if not set in data
root.ord = 0
setattr(self, layer + 'tree', root)
# create all the children given in data
if nodes_data is not None:
nodes = [(node_data['parent_id'],
node_type(data=node_data, parent=root, zone=self))
for node_data in nodes_data]
doc = self.document
for (parent_id, node) in nodes:
node.parent = doc.get_node_by_id(parent_id)
return self.get_tree(layer)
[docs] def has_ttree(self):
"Return true if this zone has a t-tree."
return hasattr(self, 'ttree')
[docs] def has_atree(self):
"Return true if this zone has an a-tree."
return hasattr(self, 'atree')
[docs] def has_ntree(self):
"Return true if this zone has an n-tree."
return hasattr(self, 'ntree')
[docs] def has_ptree(self):
"Return true if this zone has a p-tree."
return hasattr(self, 'ptree')
@property
def ttree(self):
"""\
Direct access to t-tree (will raise an exception if the
tree does not exist).
"""
return self.__ttree
@ttree.setter
[docs] def ttree(self, value):
if self.has_ttree():
raise RuntimeException('Can\'t create a t-tree: tree exists')
self.__ttree = value
@property
def atree(self):
"""\
Direct access to a-tree (will raise an exception if the tree
does not exist).
"""
return self.__atree
@atree.setter
[docs] def atree(self, value):
if self.has_atree():
raise RuntimeException('Can\'t create an a-tree: tree exists')
self.__atree = value
@property
def ntree(self):
"""\
Direct access to n-tree (will raise an exception if the tree
does not exist).
"""
return self.__ntree
@ntree.setter
[docs] def ntree(self, value):
if self.has_ntree():
raise RuntimeException('Can\'t create an n-tree: tree exists')
self.__ntree = value
@property
def ptree(self):
"""\
Direct access to p-tree (will raise an exception if the tree
does not exist).
"""
return self.__ptree
@ptree.setter
[docs] def ptree(self, value):
if self.has_ptree():
raise RuntimeException('Can\'t create a p-tree: tree exists')
self.__ptree = value
[docs] def create_ttree(self):
"Create a tree on the t-layer"
return self.create_tree('t')
[docs] def create_atree(self):
"Create a tree on the a-layer"
return self.create_tree('a')
[docs] def create_ntree(self):
"Create a tree on the n-layer"
return self.create_tree('n')
[docs] def create_ptree(self):
"Create a tree on the p-layer"
return self.create_tree('p')
@property
[docs] def language_and_selector(self):
"""\
Return string concatenation of the zone's language and selector.
"""
ret = str(self.language)
if self.selector != '':
ret += '_' + str(self.selector)
return ret