-
Notifications
You must be signed in to change notification settings - Fork 0
/
topo_import.py
executable file
·192 lines (147 loc) · 6.27 KB
/
topo_import.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
#!/usr/bin/env python3
# Copyright © 2018 Exatel S.A.
# Contact: [email protected]
# LICENSE: GPL-3.0-or-later, See COPYING file
# Author: Tomasz Fortuna
import argparse
import os.path
import sys
from time import time
import psycopg2
import psycopg2.extras
from IPython import embed
from osmpbf import AddressExtractor, GeometryMatcher, PBFParser, StreetMatcher, TopologyMigrator
def parse_args():
p = argparse.ArgumentParser()
p.add_argument("--pbf", required=True, help="pbf file to import")
p.add_argument("--host", default="127.0.0.1", type=str,
help="postgresql database address")
p.add_argument("--db", default="pgroute", type=str,
help="database name")
p.add_argument("--topo-import", action="store_true",
help="Import topology into postgresql DB")
p.add_argument("--address-import", action="store_true",
help="Import address data")
p.add_argument("--cache-mem", action="store_true",
help="Cache nodes/points in memory instead of a file")
p.add_argument("--port", default=None,
help="database connection port")
p.add_argument("--username", required=False,
help="database connection username")
# TODO: Use file or ask instead
p.add_argument("--password", required=False,
help="database connection password (WARN: will be listed in process list)")
p.add_argument("--max-meters", default=None,
required=False,
type=int,
help="split ways exceeding X meters")
p.add_argument("--output-path", nargs="?", default=None, type=str, const="output.csv",
help="Use with address import."
"The path for the csv file where the output from the address importer"
"should be written."
"If not specified, an ipython embed will open at the end.")
args = p.parse_args()
if args.topo_import == args.address_import:
p.error("Pass either --topo-import or --address-import")
if args.topo_import and (args.username is None or args.password is None):
p.error("--topo-import requires DB username/password")
if not os.path.exists(args.pbf):
print("The binary file %s cannot be found" % args.pbf)
sys.exit(1)
return args
def connect(args):
print("Connect to database")
conn = psycopg2.connect(host=args.host,
dbname=args.db,
port=args.port,
user=args.username,
password=args.password,
cursor_factory=psycopg2.extras.NamedTupleCursor)
return conn
def address_import(args):
"""Import address geolocalization data."""
extractor = AddressExtractor()
if args.cache_mem:
idx = 'flex_mem'
else:
idx = 'sparse_file_array,node-cache.data'
try:
extractor.start_inner = time()
extractor.apply_file(args.pbf, locations=True, idx=idx)
took = time() - extractor.start_inner
print(f"Aggregating places took {took:.1f} seconds")
print(f"Final stats {dict(extractor.stats)}")
extractor.start_inner = time()
matcher = GeometryMatcher(extractor)
print("Starting Relations geometry matcher")
matcher.apply_file(args.pbf, locations=True, idx=idx)
took = time() - extractor.start_inner
print(f"Matching Relations geometry took {took:.1f} seconds")
print(f"Matcher stats {dict(matcher.stats)}")
extractor.start_inner = time()
matcher = StreetMatcher(extractor)
print("Starting street matcher")
matcher.apply_file(args.pbf, locations=True, idx=idx)
took = time() - extractor.start_inner
print(f"Matching streets took {took:.1f} seconds")
print(f"Matcher stats {dict(matcher.stats)}")
extractor.start_inner = time()
print("Starting matching cities (areas) to places.")
extractor.finish()
took = time() - extractor.start_inner
print(f"Matching addresses to administrative boundaries took {took:.1f} seconds")
print(f"Total extract time is {extractor.took:.1f}s")
except KeyboardInterrupt:
print("Starting shell after interrupt")
print(dict(extractor.stats))
if args.output_path:
extractor.save_to_csv(args.output_path)
sys.exit(0)
# Allow manipulation of loaded data before quitting.
embed()
def topo_import(args):
conn = connect(args)
# 1) First pass - migrate ways and a aggregate node ids
migrator = TopologyMigrator(conn, args.max_meters)
print("Create an empty scheme")
migrator.create_db()
print()
print("1st-pass: Aggregate node ids of intersections and parts of ways:")
with open(args.pbf, "rb") as fpbf:
# While going through ways (way_callback) call node_optimisation_cb
# function to gather node ids and intersections required later.
p = PBFParser(fpbf,
way_callback=migrator.node_optimisation_cb)
if not p.parse():
print("Error while parsing the file")
return
print()
print("2nd-pass: Gather node coordinates and import ways:")
with open(args.pbf, "rb") as fpbf:
# node_callback will simply aggregate latitude and longitude
# of previously marked nodes in RAM.
# way_callback aggregates way data with all the geometry and stores in
# the DB as it reads them. It holds most logic as it can split imported
# ways into smaller parts.
p = PBFParser(fpbf,
node_callback=migrator.node_cb,
way_callback=migrator.way_cb)
if not p.parse():
print("Error while parsing the file")
return
print()
# Import intersection nodes
migrator.import_nodes()
# Convert length and index data
migrator.finish()
def main():
# the main part of the program starts here
# extract the command line options
args = parse_args()
print("Loading the PBF file: %s" % args.pbf)
if args.topo_import:
topo_import(args)
if args.address_import:
address_import(args)
if __name__ == '__main__':
main()