#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import re
[docs]def findall(text, char, start=0, end=-1):
idxs = list()
if end == -1:
end = len(text)
nextidx = text.find(char, start, end)
while nextidx != -1:
idxs.append(nextidx)
nextidx = text.find(char, nextidx + 1, end)
return idxs
[docs]def split_by_comma(text):
parentheses = 0
splitList = []
oldI = 0
for i in xrange(len(text)):
if text[i] == '(':
parentheses += 1
elif text[i] == ')':
parentheses -= 1
if parentheses < 0:
raise ValueError("Missing a left parenthesis.")
elif text[i] == ',':
if parentheses == 0:
if oldI == i:
raise ValueError(
"Split segment must not start with a comma.")
else:
splitList.append(text[oldI:i].strip())
oldI = i + 1
else:
splitList.append(text[oldI:].strip())
return splitList
[docs]def split_by(text, splitter,
opening_parentheses='',
closing_parentheses='',
quotes="'\""):
"""
Splits the input text at each occurrence of the splitter only if it is not
enclosed in parentheses.
text - the input text string
splitter - multi-character string which is used to determine the position
of splitting of the text
opening_parentheses - an iterable of opening parentheses that has to be
respected when splitting, e.g. "{(" (default: '')
closing_parentheses - an iterable of closing parentheses that has to be
respected when splitting, e.g. "})" (default: '')
quotes - an iterable of quotes that have to come in pairs, e.g. '"'
"""
split_list = []
# Interpret the arguments.
parentheses_counter = dict((char, 0)
for char in opening_parentheses + quotes)
map_closing_to_opening = dict(zip(closing_parentheses,
opening_parentheses))
segment_start = 0
segment_end = 0
while segment_end < len(text):
cur_char = text[segment_end]
if cur_char in opening_parentheses:
parentheses_counter[cur_char] += 1
elif cur_char in closing_parentheses:
parentheses_counter[map_closing_to_opening[cur_char]] -= 1
if parentheses_counter[map_closing_to_opening[cur_char]] < 0:
raise ValueError(("Missing an opening parenthesis for: {par} "
"in the text: {text}").format(par=cur_char,
text=text))
elif cur_char in quotes:
parentheses_counter[cur_char] = (
parentheses_counter[cur_char] + 1) % 2
elif text[segment_end:].startswith(splitter):
# Test that all parentheses are closed.
if not any(parentheses_counter.values()):
split_list.append(text[segment_start:segment_end].strip())
segment_end += len(splitter)
segment_start = segment_end
segment_end += 1
else:
split_list.append(text[segment_start:segment_end].strip())
return split_list
[docs]def parse_command(command):
"""Parse the command name(var1="val1",...) into a dictionary structure:
E.g. call(destination="1245",opt="X") will be parsed into:
{ "__name__": "call",
"destination": "1245",
"opt": "X"}
Return the parsed command in a dictionary.
"""
try:
i = command.index('(')
except ValueError:
raise Exception(
"Parsing error in: %s. Missing opening parenthesis." % command)
name = command[:i]
d = {"__name__": name}
# remove the parentheses
command_svs = command[i + 1:len(command) - 1]
if not command_svs:
# there are no parameters
return d
command_svs = split_by(command_svs, ',', '', '', '"')
for command_sv in command_svs:
i = split_by(command_sv, '=', '', '', '"')
if len(i) == 1:
raise Exception(("Parsing error in: {cmd}: {slot}. There is only "
"variable name")
.format(cmd=command, slot=unicode(i)))
elif len(i) == 2:
# There is a slot name and a value.
d[i[0]] = i[1][1:-1]
else:
raise Exception("Parsing error in: %s: %s" % (command, str(i)))
return d
[docs]def min_edit_dist(target, source):
''' Computes the min edit distance from target to source. '''
n = len(target)
m = len(source)
distance = [[0.0 for i in range(m)] for j in range(n)]
for i in range(1,n):
distance[i][0] = distance[i-1][0] + 1
for j in range(1,m):
distance[0][j] = distance[0][j-1] + 1
for i in range(1,n):
for j in range(1,m):
distance[i][j] = min(distance[i-1][j] + 1,
distance[i][j-1] + 1,
distance[i-1][j-1] + (0 if target[i] == source[j] else 2))
return distance[n-1][m-1]
[docs]def min_edit_ops(target, source, cost=lambda insertions, deletions, substitutions: insertions + deletions + 2.0 * substitutions):
""" Computes the min edit operations from target to source.
:param target: a target sequence
:param source: a source sequence
:param cost: an expression for computing cost of the edit operations
:return: a tuple of (insertions, deletions, substitutions)
"""
n = len(target)
m = len(source)
ops = [[(0, 0, 0) for i in range(m + 1)] for j in range(n + 1)]
for i in range(1, n + 1):
ops[i][0] = (ops[i - 1][0][0] + 1, ops[i - 1][0][1], ops[i - 1][0][2])
for j in range(1, m + 1):
ops[0][j] = (ops[0][j - 1][0], ops[0][j - 1][1] + 1, ops[0][j - 1][2])
for i in range(1, n + 1):
for j in range(1, m + 1):
insertion = cost(ops[i - 1][j][0] + 1, ops[i - 1][j][1], ops[i - 1][j][2])
deletion = cost(ops[i][j - 1][0], ops[i][j - 1][1] + 1, ops[i][j - 1][2])
if source[j - 1] != target[i - 1]:
substitution = cost(ops[i - 1][j - 1][0], ops[i - 1][j - 1][1], ops[i - 1][j - 1][2] + 1)
else:
substitution = cost(ops[i - 1][j - 1][0], ops[i - 1][j - 1][1], ops[i - 1][j - 1][2])
if substitution <= insertion and substitution <= deletion:
if source[j - 1] != target[i - 1]:
ops[i][j] = (ops[i - 1][j - 1][0], ops[i - 1][j - 1][1], ops[i - 1][j - 1][2] + 1)
else:
ops[i][j] = (ops[i - 1][j - 1][0], ops[i - 1][j - 1][1], ops[i - 1][j - 1][2])
elif insertion <= deletion and insertion <= deletion:
ops[i][j] = (ops[i - 1][j][0] + 1, ops[i - 1][j][1], ops[i - 1][j][2])
elif deletion <= insertion and deletion <= substitution:
ops[i][j] = (ops[i][j - 1][0], ops[i][j - 1][1] + 1, ops[i][j - 1][2])
else:
raise Exception("min_edit_ops unexpected state")
return ops[n][m]
[docs]class Escaper(object):
"""
Creates a customised escaper for strings. The characters that need
escaping, as well as the one used for escaping can be specified.
"""
# TODO Write tests.
# Constants for types of characters in a text that has been escaped.
ESCAPER = 0
ESCAPED = 1
NORMAL = 2
def __init__(self, chars="'\"", escaper='\\', re_flags=0):
"""Constructs an escaper for escaping the specified characters.
Arguments:
chars -- a collection of characters to escape (default: "'\"")
escaper -- the character used as the escaper (default: '\\')
re_flags -- any regex flags (as defined in the built-in `re'
module) to use when building the escaping regexp (default: 0)
"""
self.rx = re.compile(Escaper.re_literal_list(chars + escaper),
re_flags)
escaper_lit = Escaper.re_literal(escaper)
self.sub = escaper_lit + '\\g<0>'
self.unrx = re.compile(escaper_lit + '(.)')
self.unsub = '\\1'
_re_br_spec_chars_rx = re.compile('[]\\\\^-]')
@staticmethod
[docs] def re_literal_list(chars):
"""
Builds a [] group for a regular expression that matches exactly the
characters specified.
"""
return '[{esced}]'.format(
esced=Escaper._re_br_spec_chars_rx.sub('\\\\\\g<0>', chars))
_re_combining_chars = '1234567890AbBdDsSwWZafnrtvx'
@staticmethod
[docs] def re_literal(char):
"""
Escapes the character so that when it is used in a regexp, it matches
itself.
"""
return char if (char in Escaper._re_combining_chars) else ('\\' + char)
[docs] def escape(self, text):
"""Escapes the text using the parameters defined in the constructor."""
return self.rx.sub(self.sub, text)
[docs] def unescape(self, text):
"""
Unescapes the text using the parameters defined in the constructor."""
# TODO Test whether this picks disjunct matches (yes, it should).
return self.unrx.sub(self.unsub, text)
[docs] def annotate(self, esced):
"""
Annotates each character of a text that has been escaped whether:
Escaper.ESCAPER - it is the escape character
Escaper.ESCAPED - it is a character that was escaped
Escaper.NORMAL - otherwise.
It is expected that only parts of the text may have actually been
escaped.
Returns a list with the annotation values, co-indexed with characters
of the input text.
"""
annion = [Escaper.NORMAL] * len(esced)
for match in self.unrx.finditer(esced):
first = match.start()
annion[first] = Escaper.ESCAPER
annion[first + 1] = Escaper.ESCAPED
return annion
[docs]def escape_special_characters_shell(text, characters="'\""):
"""
Simple function that tries to escape quotes. Not guaranteed to produce
the correct result!! If that is needed, use the new `Escaper' class.
"""
for character in characters:
text = text.replace(character, '\\' + character)
return text