Python/Script for snapping nodes to nearby nodes

From OpenStreetMap Wiki
Jump to navigation Jump to search

Developed for the New Zealand bulk import of the LINZ dataset.

  • do not expect this script to be efficient or robust.
  • this is hardly tested at all

TODO:

  • this will not merge node tags! (yet) maybe try reversed: new=roads established=gates
  • this will not deal with </node> on a separate line in the new-comer input file
  • this will not deal with ' as the value quoting char in the new-comer input file
  • update to do real XML parsing with the expat library or similar


#!/usr/bin/env python
############################################################################
#
# MODULE:       osm_sync_nodes.py
# AUTHOR:       Hamish Bowman, Dunedin, New Zealand
# PURPOSE:      Replace what nodes we can with nearby established ones
# COPYRIGHT:    (c) 2010 Hamish Bowman, and the OpenStreetMap Foundation
#
#               This program is free software under the GNU General Public
#               License (>=v2) and comes with ABSOLUTELY NO WARRANTY.
#               See http://www.gnu.org/licenses/gpl2.html for details.
#
#############################################################################
# do not expect this script to be efficient or robust.
# this is hardly tested at all
# TODO:
#  this will not merge node tags! (yet) maybe try reversed: new=roads established=gates
#  this will not deal with </node> on a separate line in the new-comer input file
#  this will not deal with ' as the value quoting char in the new-comer input file

import sys
import os
from math import cos, radians
 
def main():
    input_new = 'bridge_cl.osm'
    input_established = 'chatham_roads_dl.osm'
    output = 'bridge_cl_newnodes.osm'

    # threshold measured in meters (2.5cm ~ 1")
    threshold_m = 0.025

    thresh_lat = threshold_m / (1852. * 60)
    #thresh_lon calc'd per node by cos(node_lat)

    #### set up input files
    infile_new = input_new
    if not os.path.exists(infile_new):
        print("ERROR: Unable to read new input data")
        sys.exit(1)

    infile_old = input_established
    if not os.path.exists(infile_old):
        print("ERROR: Unable to read established input data")
        sys.exit(1)
    inf_old = file(infile_old)
    print("old-timer input file=[%s]" % infile_old)

    # read in old file first and build a table of node positions and IDs
    #   node|lon|lat    long|double|double
    # init array
    old_id_lonlat = [[] for i in range(3)]

    while True:
        line = inf_old.readline()
        #.strip()
        if not line:
            break

        if 'node id=' not in line:
            continue

        #bits = line.split('"')
        id_i = line.find('id=') + 4
        lat_i = line.find('lat=') + 5
        lon_i = line.find('lon=') + 5

        old_id = line[id_i:].replace('"',"'").split("'")[0]
        old_lat = float(line[lat_i:].replace('"',"'").split("'")[0])
        old_lon = float(line[lon_i:].replace('"',"'").split("'")[0])

        old_id_lonlat[0].append(old_id)
        old_id_lonlat[1].append(old_lon)
        old_id_lonlat[2].append(old_lat)

    inf_old.close()


    #### read in new file and build a table of node positions and IDs which have a pair
    # open new-comer file.osm
    inf_new = file(infile_new)
    print("new-comer input file=[%s]" % infile_new)

    #   new_node|old_node|lon|lat    long|long|double|double
    # init array
    newid_oldid_lonlat = [[] for i in range(4)]

    lines = inf_new.readlines()

    for i in range(len(lines)):
        lines[i] = lines[i].rstrip('\n')


    for line in lines:
        if 'node id=' not in line:
            continue
        #elif line.strip() == '</node>':
        #    continue
        else:
            id_i = line.find('id=') + 4
            lat_i = line.find('lat=') + 5
            lon_i = line.find('lon=') + 5

            new_id = line[id_i:].replace('"',"'").split("'")[0]
            new_lat = float(line[lat_i:].replace('"',"'").split("'")[0])
            new_lon = float(line[lon_i:].replace('"',"'").split("'")[0])

            thresh_lon = thresh_lat / abs(cos(radians(new_lon)))
            #print thresh_lat, thresh_lon, new_id

            for i in range(len(old_id_lonlat[0])):
                if abs(new_lon - old_id_lonlat[1][i]) < thresh_lon and \
                   abs(new_lat - old_id_lonlat[2][i]) < thresh_lat:
                    newid_oldid_lonlat[0].append(new_id)
                    newid_oldid_lonlat[1].append(old_id_lonlat[0][i])
                    newid_oldid_lonlat[2].append(old_id_lonlat[1][i])
                    newid_oldid_lonlat[3].append(old_id_lonlat[2][i])
                    #print 'hit: node %s is %s' % (new_id, old_id_lonlat[0][i])

    inf_new.close()


    ##### with those two tables populated, write output

    # set up output file
    if not output:
        outfile = None
        outf = sys.stdout
    else:
        outfile = output
        outf = open(outfile, 'w')
        print("output file=[%s] (new-comer input reduced to only contain unique nodes)" % outfile)


    # read in new-comer file.osm
    inf_new = file(infile_new)
    lines = inf_new.readlines()
 
    for i in range(len(lines)):
        lines[i] = lines[i].rstrip('\n')

    for line in lines:
        if 'node id=' not in line and 'nd ref=' not in line:
            outf.write(str(line) + '\n')
        else:
            if 'node id=' in line:
                id_i = line.find('id=') + 4
                id_val = line[id_i:].replace('"',"'").split("'")[0]
                if id_val in newid_oldid_lonlat[0]:
                    # if node is being replaced by an established one then skip writing it
                    continue
                else:
                    outf.write(str(line) + '\n')

            if 'nd ref=' in line:
                id_i = line.find('ref=') + 5
                id_val = line[id_i:].replace('"',"'").split("'")[0]
                if id_val in newid_oldid_lonlat[0]:
                    # if way calls new-comer node use established node instead
                    i = newid_oldid_lonlat[0].index(id_val)

                    bits = line.split('"')
                    outf.write('%s"%s"%s"\n' % (bits[0], newid_oldid_lonlat[1][i], bits[2]) )
                else:
                    outf.write(str(line) + '\n')

    inf_new.close()
    if outfile is not None:
        outf.close()
 
 
if __name__ == "__main__":
    main()