#This script generates a xml file of all the train stations in the Netherlands #using Lines.xml outputed by get_all_lines.py. import xml.etree.cElementTree as ET from retrieve_station import retrieve_station #It reads in the Lines.xml file to construct #a set of stations line_tree = ET.parse("../data/Lines.xml") lines_element = line_tree.getroot() station_links = set() for line_element in lines_element: for station in line_element[1]: station_links.add(station.attrib["Link"]) stations_element = None #It now tries to read Stations.xml and removes all the stations from the set #that are already downloaded in the file try: tree = tree = ET.parse("Stations.xml") stations_element = tree.getroot() for station_element in stations_element: link = station_element[1].text station_links.discard(link) print(f"Discarded {link}") except: print("Could not open Stations.xml") stations_element = ET.Element("Stations") print(f"Still need retrieve {len(station_links)} stations") n = 0 for station_link in station_links: print(f"{n}:{station_link}") try: station_element = retrieve_station(station_link) except: print(f"failed to read {station_element}") break stations_element.append(station_element.toXML()) n+=1 tree = ET.ElementTree(stations_element) ET.indent(tree,'\t') tree.write("Stations.xml")