diff options
Diffstat (limited to 'parsers')
-rw-r--r-- | parsers/QMs.py | 31 | ||||
-rw-r--r-- | parsers/survex.py | 11 |
2 files changed, 26 insertions, 16 deletions
diff --git a/parsers/QMs.py b/parsers/QMs.py index f3044ff..5a0be79 100644 --- a/parsers/QMs.py +++ b/parsers/QMs.py @@ -141,16 +141,26 @@ def parse_KH_QMs(kh, inputFile, ticked): with open(os.path.join(settings.EXPOWEB, inputFile), "r") as khQMfile: khQMs = khQMfile.readlines() nqms = 0 - for line in khQMs: + line = 0 + fails = 0 + for dataline in khQMs: # <dt><a href="sibria.htm#qC1997-161-27" name="C1997-161-27">C1997-161-27</a> A<dd>Sib: pitch at end of Fuzzy Logic [Paradox Rift - continues] [sep.fuzzy.13] + line += 1 res = re.search( - r"name=\"[CB](?P<year>\d*)-(?P<cave>\d*)-(?P<number>\d*).*</a> (?P<grade>[ABCDX])<dd>(?P<location_description>.*)\[(?P<nearest_station_name>.*)\]", - line, + r"name=\"[CB](?P<year>\d*)-(?P<cave>\d*)-(?P<number>\d*).*</a>\s*(?P<grade>[ABCDX?V])<dd>(?P<location_description>.*)(\[(?P<station_name>.*)\])?", + dataline, ) if res: res = res.groupdict() year = int(res["year"]) - + + nearest_station_name = "" + resolution_station_name = "" + if res["station_name"]: + if ticked: + resolution_station_name = res["station_name"].replace("<a href=\"","<a href=\"/1623/161/") + else: + nearest_station_name = res["station_name"] lookupAttribs = { #'found_by':placeholder, "blockname": "", @@ -163,13 +173,20 @@ def parse_KH_QMs(kh, inputFile, ticked): "ticked": ticked, "page_ref": "", "completion_description": "", - "nearest_station_name": res["nearest_station_name"], - "location_description": res["location_description"], + "nearest_station_name": nearest_station_name, + "resolution_station_name": resolution_station_name, + "location_description": res["location_description"].replace("<a href=\"","<a href=\"/1623/161/"), } # Create new. We know it doesn't exist as we deleted evrything when we started. instance = QM.objects.create(**nonLookupAttribs, **lookupAttribs) - nqms += 1 + else: + if dataline.startswith("<dt><a href"): + fails += 1 + message = f" ! - {inputFile} line {line} Parse error \n{str(dataline)} " + print(message) + DataIssue.objects.create(parser="QMs", message=message) + print(f" - {fails:2g} parsing errors in {inputFile}") return nqms diff --git a/parsers/survex.py b/parsers/survex.py index 79e90d1..1e961c8 100644 --- a/parsers/survex.py +++ b/parsers/survex.py @@ -1313,7 +1313,7 @@ class LoadingSurvex: blockname = survexblock.name else: blockname = survexblock.name[:6] + survexblock.name[-1:] - # logslug = f'D{int(qmyear)}_{blockname}_{int(qm_no):03d}' + # logslug = f'D{int(qmyear)}_{blockname}_{int(qm_no):03d}' qm_ticked = False # default qm_no = qmline.group(1) # this is NOT unique across multiple survex files @@ -1340,14 +1340,7 @@ class LoadingSurvex: else: qm_ticked = True # print(f"{survexblock.survexfile.cave} {survexblock}:{qm_no}{qm_grade} {qmline.group(4)}", file=sys.stderr) - if resolution_station_name: - qm_ticked = True - # if qmline.group(6) and qmline.group(6) != "-": - # resolution_station_name = qmline.group(6) - # if qmline.group(7): - # resolution_station_name = resolution_station_name + "." + qmline.group(7) - # else: - # resolution_station_name = "" + qm_notes = qmline.group(5) # qm_notes = qmline.group(8) |