summaryrefslogtreecommitdiffstats
path: root/parsers
diff options
context:
space:
mode:
Diffstat (limited to 'parsers')
-rw-r--r--parsers/QMs.py31
-rw-r--r--parsers/survex.py11
2 files changed, 26 insertions, 16 deletions
diff --git a/parsers/QMs.py b/parsers/QMs.py
index f3044ff..5a0be79 100644
--- a/parsers/QMs.py
+++ b/parsers/QMs.py
@@ -141,16 +141,26 @@ def parse_KH_QMs(kh, inputFile, ticked):
with open(os.path.join(settings.EXPOWEB, inputFile), "r") as khQMfile:
khQMs = khQMfile.readlines()
nqms = 0
- for line in khQMs:
+ line = 0
+ fails = 0
+ for dataline in khQMs:
# <dt><a href="sibria.htm#qC1997-161-27" name="C1997-161-27">C1997-161-27</a> A<dd>Sib: pitch at end of Fuzzy Logic [Paradox Rift - continues] [sep.fuzzy.13]
+ line += 1
res = re.search(
- r"name=\"[CB](?P<year>\d*)-(?P<cave>\d*)-(?P<number>\d*).*</a> (?P<grade>[ABCDX])<dd>(?P<location_description>.*)\[(?P<nearest_station_name>.*)\]",
- line,
+ r"name=\"[CB](?P<year>\d*)-(?P<cave>\d*)-(?P<number>\d*).*</a>\s*(?P<grade>[ABCDX?V])<dd>(?P<location_description>.*)(\[(?P<station_name>.*)\])?",
+ dataline,
)
if res:
res = res.groupdict()
year = int(res["year"])
-
+
+ nearest_station_name = ""
+ resolution_station_name = ""
+ if res["station_name"]:
+ if ticked:
+ resolution_station_name = res["station_name"].replace("<a href=\"","<a href=\"/1623/161/")
+ else:
+ nearest_station_name = res["station_name"]
lookupAttribs = {
#'found_by':placeholder,
"blockname": "",
@@ -163,13 +173,20 @@ def parse_KH_QMs(kh, inputFile, ticked):
"ticked": ticked,
"page_ref": "",
"completion_description": "",
- "nearest_station_name": res["nearest_station_name"],
- "location_description": res["location_description"],
+ "nearest_station_name": nearest_station_name,
+ "resolution_station_name": resolution_station_name,
+ "location_description": res["location_description"].replace("<a href=\"","<a href=\"/1623/161/"),
}
# Create new. We know it doesn't exist as we deleted evrything when we started.
instance = QM.objects.create(**nonLookupAttribs, **lookupAttribs)
-
nqms += 1
+ else:
+ if dataline.startswith("<dt><a href"):
+ fails += 1
+ message = f" ! - {inputFile} line {line} Parse error \n{str(dataline)} "
+ print(message)
+ DataIssue.objects.create(parser="QMs", message=message)
+ print(f" - {fails:2g} parsing errors in {inputFile}")
return nqms
diff --git a/parsers/survex.py b/parsers/survex.py
index 79e90d1..1e961c8 100644
--- a/parsers/survex.py
+++ b/parsers/survex.py
@@ -1313,7 +1313,7 @@ class LoadingSurvex:
blockname = survexblock.name
else:
blockname = survexblock.name[:6] + survexblock.name[-1:]
- # logslug = f'D{int(qmyear)}_{blockname}_{int(qm_no):03d}'
+ # logslug = f'D{int(qmyear)}_{blockname}_{int(qm_no):03d}'
qm_ticked = False # default
qm_no = qmline.group(1) # this is NOT unique across multiple survex files
@@ -1340,14 +1340,7 @@ class LoadingSurvex:
else:
qm_ticked = True
# print(f"{survexblock.survexfile.cave} {survexblock}:{qm_no}{qm_grade} {qmline.group(4)}", file=sys.stderr)
- if resolution_station_name:
- qm_ticked = True
- # if qmline.group(6) and qmline.group(6) != "-":
- # resolution_station_name = qmline.group(6)
- # if qmline.group(7):
- # resolution_station_name = resolution_station_name + "." + qmline.group(7)
- # else:
- # resolution_station_name = ""
+
qm_notes = qmline.group(5)
# qm_notes = qmline.group(8)