summaryrefslogtreecommitdiffstats
path: root/parsers/QMs.py
diff options
context:
space:
mode:
authorPhilip Sargent <philip.sargent@gmail.com>2023-01-28 14:04:32 +0000
committerPhilip Sargent <philip.sargent@gmail.com>2023-01-28 14:04:32 +0000
commit2704fc42d4912b361481a19f525e9d1c508dd4b7 (patch)
tree6350e41670dff173e08575154ed3a4b81d0076df /parsers/QMs.py
parentd9a40696627b408170a0bb8f84fe18ae33a1feec (diff)
downloadtroggle-2704fc42d4912b361481a19f525e9d1c508dd4b7.tar.gz
troggle-2704fc42d4912b361481a19f525e9d1c508dd4b7.tar.bz2
troggle-2704fc42d4912b361481a19f525e9d1c508dd4b7.zip
faster db creation, safer file reading with 'with'
Diffstat (limited to 'parsers/QMs.py')
-rw-r--r--parsers/QMs.py189
1 files changed, 91 insertions, 98 deletions
diff --git a/parsers/QMs.py b/parsers/QMs.py
index da3d6c7..9fa5bc7 100644
--- a/parsers/QMs.py
+++ b/parsers/QMs.py
@@ -1,6 +1,7 @@
import csv
import os
import re
+from pathlib import Path
from django.conf import settings
@@ -65,112 +66,104 @@ def parseCaveQMs(cave, inputFile, ticked=False):
return nqms
# qmPath = settings.EXPOWEB+inputFile
- qmPath = os.path.join(settings.EXPOWEB, inputFile) # why not use the pathlib stuff ?
-
- qmCSVContents = open(qmPath, "r")
- dialect = csv.Sniffer().sniff(qmCSVContents.read())
- qmCSVContents.seek(0, 0)
- qmReader = csv.reader(qmCSVContents, dialect=dialect)
- next(qmReader) # Skip header row
- n = 0
- nqms = 0
- for line in qmReader:
- try:
- n += 1
- year = int(line[0][1:5])
- f"PH_{int(year)}_{int(n):02d}"
- QMnum = re.match(r".*?-\d*?-X?(?P<numb>\d*)", line[0]).group("numb")
- newQM = QM()
- # newQM.found_by=placeholder
- newQM.number = QMnum
- newQM.cave = caveid
- newQM.blockname = ""
- if line[1] == "Dig":
- newQM.grade = "D"
- else:
- newQM.grade = line[1]
- newQM.area = line[2]
- newQM.location_description = line[3]
-
- # In the table, completion is indicated by the presence of a completion discription.
- newQM.completion_description = line[4]
- newQM.nearest_station_description = line[5]
- if newQM.completion_description:
- newQM.ticked = True
- else:
- newQM.ticked = False
-
- newQM.comment = line[6]
+ qmPath = Path(settings.EXPOWEB, inputFile)
+
+ with open(qmPath, "r") as qmCSVContents:
+ dialect = csv.Sniffer().sniff(qmCSVContents.read())
+ qmCSVContents.seek(0, 0)
+ qmReader = csv.reader(qmCSVContents, dialect=dialect)
+ next(qmReader) # Skip header row
+ n = 0
+ nqms = 0
+ for line in qmReader:
try:
- # year and number are unique for a cave in CSV imports
- preexistingQM = QM.objects.get(
- number=QMnum, found_by__date__year=year
- ) # if we don't have this one in the DB, save it
- if (
- preexistingQM.new_since_parsing is False
- ): # if the pre-existing QM has not been modified, overwrite it - VERY OLD THING
- preexistingQM.delete()
+ n += 1
+ year = int(line[0][1:5])
+ f"PH_{int(year)}_{int(n):02d}"
+ QMnum = re.match(r".*?-\d*?-X?(?P<numb>\d*)", line[0]).group("numb")
+ newQM = QM()
+ # newQM.found_by=placeholder
+ newQM.number = QMnum
+ newQM.cave = caveid
+ newQM.blockname = ""
+ if line[1] == "Dig":
+ newQM.grade = "D"
+ else:
+ newQM.grade = line[1]
+ newQM.area = line[2]
+ newQM.location_description = line[3]
+
+ # In the table, completion is indicated by the presence of a completion discription.
+ newQM.completion_description = line[4]
+ newQM.nearest_station_description = line[5]
+ if newQM.completion_description:
+ newQM.ticked = True
+ else:
+ newQM.ticked = False
+
+ newQM.comment = line[6]
+ try:
+ # year and number are unique for a cave in CSV imports
+ preexistingQM = QM.objects.get(
+ number=QMnum, found_by__date__year=year
+ ) # if we don't have this one in the DB, save it
+ if (
+ preexistingQM.new_since_parsing is False
+ ): # if the pre-existing QM has not been modified, overwrite it - VERY OLD THING
+ preexistingQM.delete()
+ newQM.expoyear = year
+ newQM.save()
+ else: # otherwise, print that it was ignored
+ print((" - preserving " + str(preexistingQM) + ", which was edited in admin \r"))
+
+ except QM.DoesNotExist: # if there is no pre-existing QM, save the new one
newQM.expoyear = year
newQM.save()
- else: # otherwise, print that it was ignored
- print((" - preserving " + str(preexistingQM) + ", which was edited in admin \r"))
-
- except QM.DoesNotExist: # if there is no pre-existing QM, save the new one
- newQM.expoyear = year
- newQM.save()
- nqms += 1
- except KeyError: # check on this one
- message = f" ! - {qmPath} KeyError {str(line)} "
- print(message)
- DataIssue.objects.create(parser="QMs", message=message)
- continue
- except IndexError:
- message = f" ! - {qmPath} IndexError {str(line)} "
- print(message)
- DataIssue.objects.create(parser="QMs", message=message)
- continue
+ nqms += 1
+ except KeyError: # check on this one
+ message = f" ! - {qmPath} KeyError {str(line)} "
+ print(message)
+ DataIssue.objects.create(parser="QMs", message=message)
+ continue
+ except IndexError:
+ message = f" ! - {qmPath} IndexError {str(line)} "
+ print(message)
+ DataIssue.objects.create(parser="QMs", message=message)
+ continue
return nqms
def parse_KH_QMs(kh, inputFile, ticked):
"""import QMs from the 1623-161 (Kaninchenhohle) html pages, different format"""
- khQMs = open(os.path.join(settings.EXPOWEB, inputFile), "r")
- khQMs = khQMs.readlines()
- nqms = 0
- for line in khQMs:
- res = re.search(
- r"name=\"[CB](?P<year>\d*)-(?P<cave>\d*)-(?P<number>\d*).*</a> (?P<grade>[ABDCV])<dd>(?P<description>.*)\[(?P<nearest_station>.*)\]",
- line,
- )
- if res:
- res = res.groupdict()
- year = int(res["year"])
- # logbook placeholder code was previously here. No longer needed.
- # check if placeholder exists for given year, create it if not
- # message = " ! - "+ str(year) + " logbook: placeholder entry for '161 KH' created. DUMMY EXPEDITION ID. Should be re-attached to the actual trip."
- # placeholder, hadToCreate = LogbookEntry.objects.get_or_create(date__year=year, place="161", title="placeholder for QMs in 161", text=message, entry_type="DUMMY", expedition_id=1, defaults={"date": date((year), 1, 1),"cave_slug":str(kh)})
- # # if hadToCreate:
- # print(message)
- # DataIssue.objects.create(parser='QMs', message=message)
- lookupArgs = {
- #'found_by':placeholder,
- "blockname": "",
- "expoyear": year,
- "number": res["number"],
- "cave": kh,
- "grade": res["grade"],
- }
- nonLookupArgs = {
- "ticked": ticked,
- "nearest_station_name": res["nearest_station"],
- "location_description": res["description"],
- }
- instance, created = save_carefully(QM, lookupArgs, nonLookupArgs)
- # if created:
- # message = f" ! - {instance.code()} QM entry for '161 KH' created. ticked: {ticked}"
- # print(message)
- # DataIssue.objects.create(parser='QMs', message=message)
- nqms += 1
+ with open(os.path.join(settings.EXPOWEB, inputFile), "r") as khQMfile:
+ khQMs = khQMfile.readlines()
+ nqms = 0
+ for line in khQMs:
+ res = re.search(
+ r"name=\"[CB](?P<year>\d*)-(?P<cave>\d*)-(?P<number>\d*).*</a> (?P<grade>[ABDCV])<dd>(?P<description>.*)\[(?P<nearest_station>.*)\]",
+ line,
+ )
+ if res:
+ res = res.groupdict()
+ year = int(res["year"])
+
+ lookupAttribs = {
+ #'found_by':placeholder,
+ "blockname": "",
+ "expoyear": year,
+ "number": res["number"],
+ "cave": kh,
+ "grade": res["grade"],
+ }
+ nonLookupAttribs = {
+ "ticked": ticked,
+ "nearest_station_name": res["nearest_station"],
+ "location_description": res["description"],
+ }
+ # Create new. We know it doesn't exist as we deleted evrything when we started.
+ instance = QM.objects.create(**nonLookupAttribs, **lookupAttribs)
+
+ nqms += 1
return nqms