summaryrefslogtreecommitdiffstats
path: root/parsers
diff options
context:
space:
mode:
Diffstat (limited to 'parsers')
-rw-r--r--parsers/caves.py218
-rw-r--r--parsers/survex.py57
2 files changed, 158 insertions, 117 deletions
diff --git a/parsers/caves.py b/parsers/caves.py
index 9458d7a..6ddbea7 100644
--- a/parsers/caves.py
+++ b/parsers/caves.py
@@ -86,78 +86,115 @@ def do_pending_cave(k, url, area):
Note that at this point in importing the data we have not yet seen the survex files, so we can't
look inside the relevant survex file to find the year and so we con't provide helpful links.
'''
+ def get_survex_file(k):
+ '''Guesses at and finds a survex file for this pending cave.
+ Convoluted. Sorry. Needs rewriting
+ '''
+ if k[0:3] == "162":
+ id = Path(k[5:])
+ else:
+ id = Path(k)
+
+ survex_file = f"caves-{area.short_name}/{id}/{id}.svx"
+ if Path(settings.SURVEX_DATA, survex_file).is_file():
+ return survex_file
+ else:
+ survex_file = f"caves-{area.short_name}/{id}.svx"
+ if Path(settings.SURVEX_DATA, survex_file).is_file():
+ return survex_file
+
+ survex_file = ""
+ d = Path(settings.SURVEX_DATA, f"caves-{area.short_name}/{id}")
+ if d.is_dir():
+ prime_suspect = ""
+ dir = d.iterdir()
+ for f in dir:
+ if f.suffix == ".svx":
+ survex_file = f.relative_to(settings.SURVEX_DATA)
+ chk = min(5, len(f.name)-1)
+ if str(f.name)[:chk].lower() == str(id.name)[:chk].lower(): # bodge which mostly works
+ prime_suspect = survex_file
+ if prime_suspect:
+ survex_file = prime_suspect
+ # message = f" ! {k:14} Found a survex file which might be the right one: {survex_file}"
+ # DataIssue.objects.create(parser='caves', message=message, url=url)
+ # print(message)
+ return survex_file
+
slug = k
g = GetCaveLookup()
- if slug in g:
- message = f" ! {k} cave listed in pendingcaves.txt already exists."
- DataIssue.objects.create(parser='caves', message=message, url=url)
- print(message)
- return
+ with transaction.atomic():
+ if slug in g:
+ message = f" ! {k:18} cave listed in pendingcaves.txt already exists."
+ DataIssue.objects.create(parser='caves', message=message, url=url)
+ print(message)
+ return
-
+
- default_note = f"_Survex file found in loser repo but no description in expoweb <br><br><br>\n"
- default_note += f"INSTRUCTIONS: First open 'This survex file' (link above the CaveView panel) to find the date and info. Then "
- default_note += f"<br><br>\n\n - (1) search in the survex file for the *ref to find a "
- default_note += f"relevant wallet, e.g.<a href='/survey_scans/2009%252311/'>2009#11</a> and read the notes image files <br>\n - "
- default_note += f"<br><br>\n\n - (2) search in the Expo for that year e.g. <a href='/expedition/2009'>2009</a> to find a "
- default_note += f"relevant logbook entry, remember that the date may have been recorded incorrectly, "
- default_note += f"so check for trips i.e. logbook entries involving the same people as were listed in the survex file, "
- default_note += f"and you should also check the scanned copy of the logbook (linked from each logbook entry page) "
- default_note += f"just in case a vital trip was not transcribed, then <br>\n - "
- default_note += f"click on 'Edit this cave' and copy the information you find in the survex file and the logbook"
- default_note += f"and delete all the text in the 'Notes' section - which is the text you are reading now."
- default_note += f"<br><br>\n\n - Only two fields on this form are essential. "
- default_note += f"Documentation of all the fields on 'Edit this cave' form is in <a href='/handbook/survey/caveentryfields.html'>handbook/survey/caveentryfields</a>"
- default_note += f"<br><br>\n\n - "
- default_note += f"You will also need to create a new entrance from the 'Edit this cave' page. Ignore the existing dummy one, it will evaporate on the next full import."
- default_note += f"<br><br>\n\n - "
- default_note += f"When you Submit it will create a new file in expoweb/cave_data/ "
- default_note += f"<br><br>\n\n - Now you can edit the entrance info: click on Edit below for the dummy entrance. "
- default_note += f"and then Submit to save it (if you forget to do this, a dummy entrance will be created for your new cave description)."
- default_note += f"<br><br>\n\n - Finally, you need to find a nerd to edit the file '<var>expoweb/cave_data/pending.txt</var>' "
- default_note += f"to remove the line <br><var>{slug}</var><br> as it is no longer 'pending' but 'done. Well Done."
+ default_note = f"_Survex file found in loser repo but no description in expoweb <br><br><br>\n"
+ default_note += f"INSTRUCTIONS: First open 'This survex file' (link above the CaveView panel) to find the date and info. Then "
+ default_note += f"<br><br>\n\n - (1) search in the survex file for the *ref to find a "
+ default_note += f"relevant wallet, e.g.<a href='/survey_scans/2009%252311/'>2009#11</a> and read the notes image files <br>\n - "
+ default_note += f"<br><br>\n\n - (2) search in the Expo for that year e.g. <a href='/expedition/2009'>2009</a> to find a "
+ default_note += f"relevant logbook entry, remember that the date may have been recorded incorrectly, "
+ default_note += f"so check for trips i.e. logbook entries involving the same people as were listed in the survex file, "
+ default_note += f"and you should also check the scanned copy of the logbook (linked from each logbook entry page) "
+ default_note += f"just in case a vital trip was not transcribed, then <br>\n - "
+ default_note += f"click on 'Edit this cave' and copy the information you find in the survex file and the logbook"
+ default_note += f"and delete all the text in the 'Notes' section - which is the text you are reading now."
+ default_note += f"<br><br>\n\n - Only two fields on this form are essential. "
+ default_note += f"Documentation of all the fields on 'Edit this cave' form is in <a href='/handbook/survey/caveentryfields.html'>handbook/survey/caveentryfields</a>"
+ default_note += f"<br><br>\n\n - "
+ default_note += f"You will also need to create a new entrance from the 'Edit this cave' page. Ignore the existing dummy one, it will evaporate on the next full import."
+ default_note += f"<br><br>\n\n - "
+ default_note += f"When you Submit it will create a new file in expoweb/cave_data/ "
+ default_note += f"<br><br>\n\n - Now you can edit the entrance info: click on Edit below for the dummy entrance. "
+ default_note += f"and then Submit to save it (if you forget to do this, a dummy entrance will be created for your new cave description)."
+ default_note += f"<br><br>\n\n - Finally, you need to find a nerd to edit the file '<var>expoweb/cave_data/pending.txt</var>' "
+ default_note += f"to remove the line <br><var>{slug}</var><br> as it is no longer 'pending' but 'done. Well Done."
-
- cave = Cave(
- unofficial_number = k,
- underground_description = "Pending cave write-up - creating as empty object. No XML file available yet.",
- survex_file = f"caves-{area.short_name}/{k[5:]}/{k[5:]}.svx",
- url = url,
- notes = default_note)
- if cave:
- cave.save() # must save to have id before foreign keys work. This is also a ManyToMany key.
- cave.area.add(area)
- cave.save()
- message = f" ! {k:14} {cave.underground_description} url: {url}"
- DataIssue.objects.create(parser='caves', message=message, url=url)
- print(message)
-
- try: # Now create a cave slug ID
- cs = CaveSlug.objects.update_or_create(cave = cave,
- slug = slug, primary = False)
- except:
- message = f" ! {k:11s} PENDING cave SLUG create failure"
+ survex_file = get_survex_file(k)
+
+ cave = Cave(
+ unofficial_number = k,
+ underground_description = "Pending cave write-up - creating as empty object. No XML file available yet.",
+ survex_file = survex_file,
+ url = url,
+ notes = default_note)
+ if cave:
+ cave.save() # must save to have id before foreign keys work. This is also a ManyToMany key.
+ cave.area.add(area)
+ cave.save()
+ message = f" ! {k:18} {cave.underground_description} url: {url}"
+ DataIssue.objects.create(parser='caves', message=message, url=url)
+ print(message)
+
+ try: # Now create a cave slug ID
+ cs = CaveSlug.objects.update_or_create(cave = cave,
+ slug = slug, primary = False)
+ except:
+ message = f" ! {k:11s} PENDING cave SLUG create failure"
+ DataIssue.objects.create(parser='caves', message=message)
+ print(message)
+ else:
+ message = f' ! {k:11s} PENDING cave create failure'
DataIssue.objects.create(parser='caves', message=message)
print(message)
- else:
- message = f' ! {k:11s} PENDING cave create failure'
- DataIssue.objects.create(parser='caves', message=message)
- print(message)
- try:
- ent = dummy_entrance(k, slug, msg="PENDING")
- ceinsts = CaveAndEntrance.objects.update_or_create(cave = cave, entrance_letter = "", entrance = ent)
- for ceinst in ceinsts:
- if str(ceinst) == str(cave): # magic runes... why is the next value a Bool?
- ceinst.cave = cave
- ceinst.save()
- break
- except:
- message = f" ! {k:11s} PENDING entrance + cave UNION create failure '{cave}' [{ent}]"
- DataIssue.objects.create(parser='caves', message=message)
- print(message)
+ try:
+ ent = dummy_entrance(k, slug, msg="PENDING")
+ ceinsts = CaveAndEntrance.objects.update_or_create(cave = cave, entrance_letter = "", entrance = ent)
+ for ceinst in ceinsts:
+ if str(ceinst) == str(cave): # magic runes... why is the next value a Bool?
+ ceinst.cave = cave
+ ceinst.save()
+ break
+ except:
+ message = f" ! {k:11s} PENDING entrance + cave UNION create failure '{cave}' [{ent}]"
+ DataIssue.objects.create(parser='caves', message=message)
+ print(message)
@@ -391,7 +428,7 @@ def readcave(filename):
if description_file[0]: # if not an empty string
- message = f' - {slug:12} complex description filename "{description_file[0]}" inside "{CAVEDESCRIPTIONS}/{filename}"'
+ message = f' - {slug:12} Note (not an error): complex description filename "{description_file[0]}" inside "{CAVEDESCRIPTIONS}/{filename}"'
DataIssue.objects.create(parser='caves ok', message=message, url=f'/{slug}_cave_edit/')
print(message)
@@ -434,7 +471,7 @@ def readcaves():
with open(fpending, "r") as fo:
cids = fo.readlines()
for cid in cids:
- pending.add(cid.rstrip('\n'))
+ pending.add(cid.strip().rstrip('\n').upper())
with transaction.atomic():
print(" - Deleting Caves and Entrances")
@@ -460,16 +497,12 @@ def readcaves():
print(" - Creating Areas 1623, 1624, 1627 and 1626")
# This crashes on the server with MariaDB even though a null parent is explicitly allowed.
area_1623= Area.objects.create(short_name = "1623", super=None)
- print(" - Saving Area 1623")
area_1623.save()
area_1624= Area.objects.create(short_name = "1624", super=None)
- print(" - Saving Area 1624")
area_1624.save()
area_1626= Area.objects.create(short_name = "1626", super=None)
- print(" - Saving Area 1626")
area_1626.save()
area_1627= Area.objects.create(short_name = "1627", super=None)
- print(" - Saving Area 1627")
area_1627.save()
@@ -495,29 +528,32 @@ def readcaves():
print (" - Setting pending caves")
# Do this last, so we can detect if they are created and no longer 'pending'
- for k in pending:
-
- if k[0:3] == "162":
- areanum = k[0:4]
- url = f'{areanum}/{k[5:]}' # Note we are not appending the .htm as we are modern folks now.
- else:
- areanum = "1623"
- url = f'1623/{k}'
-
+ with transaction.atomic():
+ for k in pending:
+
+ if k[0:3] == "162":
+ areanum = k[0:4]
+ url = f'{areanum}/{k[5:]}' # Note we are not appending the .htm as we are modern folks now.
+ else:
+ areanum = "1623"
+ url = f'1623/{k}'
+
- area = area_1623
- if areanum == "1623":
area = area_1623
- if areanum == "1624":
- area = area_1624
- if areanum == "1626":
- area = area_1626
- try:
- do_pending_cave(k, url, area)
- except:
- message = f" ! Error. Cannot create pending cave and entrance, pending-id:{k} in area {areanum}"
- DataIssue.objects.create(parser='caves', message=message)
- print(message)
- raise
+ if areanum == "1623":
+ area = area_1623
+ if areanum == "1624":
+ area = area_1624
+ if areanum == "1626":
+ area = area_1626
+ if areanum == "1627":
+ area = area_1627
+ try:
+ do_pending_cave(k, url, area)
+ except:
+ message = f" ! Error. Cannot create pending cave and entrance, pending-id:{k} in area {areanum}"
+ DataIssue.objects.create(parser='caves', message=message)
+ print(message)
+ raise
diff --git a/parsers/survex.py b/parsers/survex.py
index 3789cb7..406f08a 100644
--- a/parsers/survex.py
+++ b/parsers/survex.py
@@ -523,7 +523,7 @@ class LoadingSurvex():
yr, letterx, wallet = argsgps.groups()
else:
perps = get_people_on_trip(survexblock)
- message = f" ! Wallet *REF '{args}' malformed id in '{survexblock.survexfile.path}' {perps}"
+ message = f" ! Wallet *REF bad in '{survexblock.survexfile.path}' malformed id '{args}' {perps}"
print(self.insp+message)
DataIssue.objects.create(parser='survex', message=message, url=url)
return
@@ -549,19 +549,25 @@ class LoadingSurvex():
message = " ! Wallet *REF {} - not numeric in '{}'".format(refscan, survexblock.survexfile.path)
print(self.insp+message)
DataIssue.objects.create(parser='survex', message=message, url=url)
+
manywallets = Wallet.objects.filter(walletname=refscan) # assumes all wallets found in earlier pass of data import
if manywallets:
- survexblock.scanswallet = manywallets[0] # this is a ForeignKey field
- message = f' - Wallet {manywallets[0]=} successfully found in db. *ref in {survexblock.survexfile.path}'
- # print(self.insp+message)
- survexblock.save()
if len(manywallets) > 1:
- message = " ! Wallet *REF {} - more than one found {} wallets in block {}".format(refscan, len(manywallets), survexblock.survexfile.path)
+ message = " ! Wallet *REF {} - more than one found {} wallets in db with same id {}".format(refscan, len(manywallets), survexblock.survexfile.path)
print(self.insp+message)
DataIssue.objects.create(parser='survex', message=message, url=url)
+
+ if survexblock.scanswallet:
+ if survexblock.scanswallet.walletname != refscan:
+ message = f" ! Wallet *REF {refscan} in {survexblock.survexfile.path} - Already a DIFFERENT wallet is set for this block '{survexblock.scanswallet.walletname}'"
+ print(self.insp+message)
+ DataIssue.objects.create(parser='survex', message=message, url=url)
+ else:
+ survexblock.scanswallet = manywallets[0] # this is a ForeignKey field
+ survexblock.save()
else:
perps = get_people_on_trip(survexblock)
- message = f" ! Wallet *REF '{refscan}' in '{survexblock.survexfile.path}' {perps} NOT in database i.e. wallet does not exist."
+ message = f" ! Wallet *REF bad in '{survexblock.survexfile.path}' '{refscan}' NOT in database i.e. wallet does not exist {perps}."
print(self.insp+message)
DataIssue.objects.create(parser='survex', message=message, url=url)
@@ -994,7 +1000,7 @@ class LoadingSurvex():
blockcount +=1
if blockcount % 20 ==0 :
print(".", file=sys.stderr,end='')
- if blockcount % 400 ==0 :
+ if blockcount % 800 ==0 :
print("\n", file=sys.stderr,end='')
mem=get_process_memory()
print(" - MEM:{:7.3f} MB in use".format(mem),file=sys.stderr)
@@ -1603,16 +1609,12 @@ def FindAndLoadSurvex(survexblockroot):
-
-
# Before doing this, it would be good to identify the *equate and *entrance we need that are relevant to the
# entrance locations currently loaded after this by LoadPos(), but could better be done before ?
# look in MapLocations() for how we find the entrances
print('\n - Loading All Survex Blocks (LinearLoad)',file=sys.stderr)
svx_load = LoadingSurvex()
- mem1 = get_process_memory()
- print(" - MEM:{:7.2f} MB after creating empty loading object.".format(mem1),file=sys.stderr)
svx_load.survexdict[survexfileroot.survexdirectory] = []
svx_load.survexdict[survexfileroot.survexdirectory].append(survexfileroot)
@@ -1624,6 +1626,8 @@ def FindAndLoadSurvex(survexblockroot):
svxlines = fcollate.read().splitlines()
#pr2 = cProfile.Profile()
#pr2.enable()
+ mem1 = get_process_memory()
+ print(f" - MEM:{mem1:7.2f} MB immediately after reading '{collatefilename}' into memory.",file=sys.stderr)
print(" ", file=sys.stderr,end='')
#----------------------------------------------------------------
svx_load.LinearLoad(survexblockroot,survexfileroot.path, svxlines)
@@ -1633,6 +1637,7 @@ def FindAndLoadSurvex(survexblockroot):
# ps = pstats.Stats(pr2, stream=f)
# ps.sort_stats(SortKey.CUMULATIVE)
# ps.print_stats()
+ svxlines = [] # empty 30MB of stashed file
mem1 = get_process_memory()
print("\n - MEM:{:7.2f} MB STOP".format(mem1),file=sys.stderr)
print(" - MEM:{:7.3f} MB ADDITIONALLY USED".format(mem1-mem0),file=sys.stderr)
@@ -1748,24 +1753,24 @@ def LoadPositions():
capture_output=True, check=False, text=True) #check=False means exception not raised
if sp.returncode != 0:
message = f' ! Error: cavern: creating {file3d} in runcavern3()'
- DataIssue.objects.create(parser='survex', message=message)
+ DataIssue.objects.create(parser='entrances', message=message)
print(message)
# find the errors in the 1623.log file
sp = subprocess.run(["grep", "error:", f"{topdata}.log"],
capture_output=True, check=False, text=True) #check=False means exception not raised
- message = f' ! Error: cavern: {sp.stdout}'
- DataIssue.objects.create(parser='survex', message=message)
+ message = f' ! Error: cavern: {sp.stdout} creating {file3d} '
+ DataIssue.objects.create(parser='entrances', message=message)
print(message)
except:
- message = " ! CalledProcessError 'cavern' in runcavern3() at {topdata}."
- DataIssue.objects.create(parser='survex', message=message)
+ message = f" ! CalledProcessError 'cavern' in runcavern3() at {topdata}."
+ DataIssue.objects.create(parser='entrances', message=message)
print(message)
if file3d.is_file():
- message = " ! CalledProcessError. File permissions {file3d.stat().st_mode} on {str(file3d)}"
- DataIssue.objects.create(parser='survex', message=message)
+ message = f" ! CalledProcessError. File permissions {file3d.stat().st_mode} on {str(file3d)}"
+ DataIssue.objects.create(parser='entrances', message=message)
print(message)
if file3d.is_file(): # might be an old one though
@@ -1776,7 +1781,7 @@ def LoadPositions():
if sp.returncode != 0:
print(f' ! Error: survexport creating {topdata}.pos in runcavern3().\n\n' + str(sp.stdout) + '\n\nreturn code: ' + str(sp.returncode))
except:
- message = " ! CalledProcessError 'survexport' in runcavern3() at {file3d}."
+ message = f" ! CalledProcessError 'survexport' in runcavern3() at {file3d}."
DataIssue.objects.create(parser='entrances', message=message)
print(message)
else:
@@ -1837,9 +1842,9 @@ def LoadPositions():
try:
survexblockroot = SurvexBlock.objects.get(id=1)
except:
- message = ' ! FAILED to find root SurvexBlock'
+ message = f' ! FAILED to find root SurvexBlock'
print(message)
- DataIssue.objects.create(parser='survex', message=message)
+ DataIssue.objects.create(parser='entrances', message=message)
raise
for line in posfile.readlines():
r = poslineregex.match(line)
@@ -1859,17 +1864,17 @@ def LoadPositions():
if len(sbqs)>1:
message = " ! MULTIPLE SurvexBlocks {:3} matching Entrance point {} {} '{}'".format(len(sbqs), blockpath, sid, id)
print(message)
- DataIssue.objects.create(parser='survex', message=message)
+ DataIssue.objects.create(parser='entrances', message=message)
sb = sbqs[0]
elif len(sbqs)<=0:
message = " ! ZERO SurvexBlocks matching Entrance point {} {} '{}'".format(blockpath, sid, id)
print(message)
- DataIssue.objects.create(parser='survex', message=message)
+ DataIssue.objects.create(parser='entrances', message=message)
sb = survexblockroot
except:
message = ' ! FAIL in getting SurvexBlock matching Entrance point {} {}'.format(blockpath, sid)
print(message)
- DataIssue.objects.create(parser='survex', message=message)
+ DataIssue.objects.create(parser='entrances', message=message)
try:
ss = SurvexStation(name=id, block=survexblockroot)
ss.x = float(x)
@@ -1880,7 +1885,7 @@ def LoadPositions():
except:
message = ' ! FAIL to create SurvexStation Entrance point {} {}'.format(blockpath, sid)
print(message)
- DataIssue.objects.create(parser='survex', message=message)
+ DataIssue.objects.create(parser='entrances', message=message)
raise
print(" - {} SurvexStation entrances found.".format(found))