#!/usr/bin/env python
# -*- coding: utf-8 -*-
import collections
import __init__
from alex.utils.text import split_by_comma
[docs]class CUEDSlot:
def __init__(self, slot):
self.slot = slot
return
def __str__(self):
return unicode(self).encode('ascii', 'replace')
def __unicode__(self):
s = self.name
if self.value:
s += self.equal + '"' + self.value + '"'
return s
[docs] def parse(self):
i = self.slot.find('!=')
if i == -1:
i = self.slot.find('=')
if i == -1:
self.name = self.slot
self.equal = ''
self.value = ''
return
else:
self.equal = '='
else:
self.equal = '!='
self.name = self.slot[:i]
self.value = self.slot[i:]
self.value = self.value.replace('!', '')
self.value = self.value.replace('=', '')
self.value = self.value.replace('"', '')
if self.value == 'value':
raise ValueError('FIXME: Ignore slots for which no values were '
'found in the database.')
return
[docs]class CUEDDialogueAct:
def __init__(self, text, da, database=None, dictionary=None):
self.text = text
self.cuedDA = da
self.db = database
return
def __str__(self):
return unicode(self).encode('ascii', 'replace')
def __unicode__(self):
s = self.dialogue_act_type
try:
s += '\n' + '\n'.join(self.slots)
except:
pass
return s
[docs] def get_slots_and_values(self):
slots = collections.defaultdict(set)
for slt in self.slots:
slots[slt.name].add(slt.value)
return slots
[docs] def get_cued_da(self):
s = self.dialogue_act_type
s += '('
try:
s += ','.join([str(slt) for slt in self.slots])
except:
pass
s += ')'
return s
[docs] def get_ufal_da(self):
s = []
if self.dialogue_act_type == 'inform':
for slt in self.slots:
if slt.equal == '=':
s.append('inform(' + str(slt) + ')')
else:
s.append('deny(' + slt.name + '="' + slt.value + '")')
if self.dialogue_act_type == 'request':
for slt in self.slots:
if slt.value:
if slt.equal == '=':
s.append('inform(' + str(slt) + ')')
else:
s.append('deny(' + slt.name + '="' + slt.value + '")')
else:
s.append('request(' + slt.name + ')')
if self.dialogue_act_type == 'confirm':
for slt in self.slots:
if slt.name == 'name':
s.append('inform(' + str(slt) + ')')
else:
s.append('confirm(' + str(slt) + ')')
if self.dialogue_act_type == 'select':
ss = 'select('
ss += str(self.slots[0]) + ',' + str(self.slots[1])
ss += ')'
s.append(ss)
if self.dialogue_act_type in ['silence', 'thankyou', 'ack', 'bye', 'hangup', 'repeat', 'help', 'restart', 'null']:
s.append(self.dialogue_act_type + '()')
if self.dialogue_act_type in ['hello', 'affirm', 'negate', 'reqalts', 'reqmore']:
s.append(self.dialogue_act_type + '()')
for slt in self.slots:
if slt.equal == '=':
s.append('inform(' + str(slt) + ')')
else:
s.append('deny(' + slt.name + '="' + slt.value + '")')
if self.dialogue_act_type == 'deny':
i = 1
for slt in self.slots:
if i == 1:
s.append('deny(' + slt.name + '="' + slt.value + '")')
else:
if slt.equal == '=':
s.append('inform(' + str(slt) + ')')
else:
s.append('deny(' + slt.name + '="' + slt.value + '")')
i += 1
# normalise data
if "thank you" in self.text and "thankyou()" not in s:
s.append("thankyou()")
if "thanks" in self.text and "thankyou()" not in s:
s.append("thankyou()")
if "thank" in self.text and "thankyou()" not in s:
s.append("thankyou()")
if "good bye" in self.text and "bye()" not in s:
s.append("bye()")
if "goodbye" in self.text and "bye()" not in s:
s.append("bye()")
if "no thank" in self.text and "negate()" not in s:
s.append('negate()')
if "no good" in self.text and "negate()" not in s:
s.append('negate()')
if "no i " in self.text and "negate()" not in s:
s.append('negate()')
if "no i'm " in self.text and "negate()" not in s:
s.append('negate()')
if "hello" in self.text and "hello()" not in s:
s.append("hello()")
if " hi " in self.text and "hello()" not in s:
s.append("hello()")
if "hi " in self.text and "hello()" not in s:
s.append("hello()")
if " looking " in self.text and 'inform(task="find")' not in s:
s.append('inform(task="find")')
if "not" == self.text:
s = ['negate()', ]
if "it does not matter" == self.text:
s = ['inform(="dontcare")', ]
if "type of food" == self.text:
s = ['request(food)', ]
if "addenbrooke's" == self.text:
s = ['request(name="addenbrookes")', ]
s = '&'.join(sorted(s))
if not s:
print '# CUEDDialogueAct.get_ufal_da()'
print '#' + '=' * 120
print '#', self.text
print '#', self.cuedDA
print '#', 'null()'
print '#' + '.' * 120
if not s:
s = 'null()'
return s
[docs] def parse(self):
cuedDA = self.cuedDA
numOfDAs = len(split_by_comma(cuedDA))
if numOfDAs > 1:
raise ValueError('Too many DAs in input text.')
# get the dialogue act type
i = cuedDA.index("(")
dialogue_act_type = cuedDA[:i]
slots = cuedDA[i:].lower()
slots = slots.replace('(', '')
slots = slots.replace(')', '')
slts = []
if slots == '':
# no slots to process
slots = []
else:
# split slots
slots = split_by_comma(slots)
for slt in slots:
try:
s = CUEDSlot(slt)
s.parse()
slts.append(s)
except ValueError:
# check for invalid slot items
pass
self.dialogue_act_type = dialogue_act_type
self.slots = slts
return