Source code for alex.applications.PublicTransportInfoCS.data.add_cities_to_stops

#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
A script that creates a compatibility table from a list of stops in a certain city and
its neighborhood and a list of towns and cities.

Usage:

./add_cities_to_stops.py [-d "Main city"] stops.txt cities.txt cities_stops.tsv
"""

from __future__ import unicode_literals
import codecs
import sys
from getopt import getopt


[docs]def load_list(filename, suppress_comments=False, cols=1): data = [] with codecs.open(filename, 'r', 'UTF-8') as fh_in: for line in fh_in: line = line.strip() # handle comments (strip or skip) if line.startswith('#'): if suppress_comments: line = line.lstrip('#') else: continue # handle columns -- convert to arrays, delete superfluous line = line.split("\t") line = line[:cols] if len(line) == 1: data.append(line[0]) else: data.append(line) return data
[docs]def get_city_for_stop(cities, stop, main_city): # stop is a city by itself if stop in cities: return stop # try to split by ',' and '-' + some names occurring in train stops where no punctuation is used for sepchar in [',', '-', ';', ' u ', ' nad ', ' pod ', ' v ', ' ve ', 'zastávka', 'město', '{', '[', '/', 'hlavní nádraží', 'hl. n.', ' na ', 'klášter', 'obec', 'severní', 'jižní', 'západ', 'východ', 'jih', 'sever', 'západní', 'východní', 'centrum', 'střed', 'zámecká zahrada', 'zálesí', 'kolonie', 'lázně', 'hlavní', 'střelnice', 'bazén', 'koupaliště', 'předměstí', 'místní', 'zámek', 'horní', 'dolní', 'Cihelna', 'jeskyně', 'dílny', 'rybník', 'bažantnice', 'nemocnice', 'Masarykovo', 'jedna', 'dvě', 'závod', 'obec']: if sepchar in stop: prefix, suffix = [x.strip() for x in stop.split(sepchar, 1)] if prefix in cities: return prefix # city is separated by a '/' (after city part) if sepchar == '/': city = get_city_for_stop(cities, suffix, None) if city is not None: return city # fallback to main city or store in list of unresolved if main_city is not None: return main_city else: return None
[docs]def add_cities_to_stops(cities, stops, main_city): mapping = {} unresolved = [] def add_to_mapping(city, stop): entry = mapping.get(city, set()) entry.add(stop) mapping[city] = entry # process list of stops for stop in stops: city = get_city_for_stop(cities, stop, main_city) if city: add_to_mapping(city, stop) else: unresolved.append(stop) # return the result return mapping, unresolved
[docs]def main(): opts, files = getopt(sys.argv[1:], 'd:') main_city = None for opt, arg in opts: if opt == '-d': main_city = arg # sanity check if len(files) != 3: sys.exit(__doc__) # initialization file_stops, file_cities, file_out = files stderr = codecs.getwriter('UTF-8')(sys.stderr) # load list of cities cities = set(load_list(file_cities, suppress_comments=True)) # load list of stops stops = load_list(file_stops, cols=1) mapping, unresolved = add_cities_to_stops(cities, stops, main_city) # write the result with codecs.open(file_out, 'w', 'UTF-8') as fh_out: for city in sorted(mapping.keys()): for stop in sorted(mapping[city]): print >> fh_out, city + "\t" + stop # print any errors if unresolved: print >> stderr, 'Could not resolve:' for stop in unresolved: print >> stderr, stop
if __name__ == '__main__': main()