subway/clustered_path.py at master · rywit/subway · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
from Subway import *
import pandas as pd
import numpy as np


def main():

    def station_filter(station):
        return True
#        return station.get_division() == "IRT"

    # Load the data from disk
    system = SubwayLinkSystem("data", station_filter)
    system.calc_distances(DistanceType.Segments)

    stations = sorted(system.get_stations())

    by_borough = {}

    # Group by borough and make an ordered list
    for station in stations:
        borough = station.get_borough()
        by_borough.setdefault(borough, []).append(station)

    next_stations = {}

    for borough in by_borough:
        station_list = by_borough[borough]

        for station, next_station in zip(station_list, station_list[1:]):
            next_stations[station] = next_station

        last_station = station_list[-1]
        first_station = station_list[0]
        next_stations[last_station] = first_station

    rows = []

    for station1 in stations:

        row = []
        borough = station1.get_borough()
        next_station = next_stations[station1]

        for station2 in stations:
            if station1 == station2:
                row.append(0)
            elif station2 == next_station:
                row.append(0)
            elif borough == station2.get_borough():
                row.append(None)
            else:
                seg_dist = next_station.get_distance(DistanceType.Segments, station2)
                row.append(seg_dist)

        rows.append(row)

    station_ids = [x.get_id() for x in stations]

    a = np.asarray(rows)
    df = pd.DataFrame(a)
    df.to_csv("data/clustered.csv", index=False, header=station_ids)

    print("Num stations: %d" % len(station_ids))

    print("Done!")


if __name__ == "__main__":
    main()