diff options
Diffstat (limited to 'parsers')
-rw-r--r-- | parsers/caves.py | 68 | ||||
-rw-r--r-- | parsers/drawings.py | 2 | ||||
-rw-r--r-- | parsers/scans.py | 74 | ||||
-rw-r--r-- | parsers/survex.py | 46 |
4 files changed, 135 insertions, 55 deletions
diff --git a/parsers/caves.py b/parsers/caves.py index 3549c75..9d95f32 100644 --- a/parsers/caves.py +++ b/parsers/caves.py @@ -13,23 +13,16 @@ from troggle.core.models.caves import Area, Cave, Entrance, CaveSlug, EntranceSl '''Reads all the cave description data by parsing the xml files (stored as e.g. :EXPOWEB:/cave_data/1623-161.html ) and creating the various Cave, Entrance and necessary Area objects. -This is the first import that happens after the dabase is reinitialised. +This is the first import that happens after the database is reinitialised. So is the first thing that creates tables. -BUT in Django 2.0 and later we cannot do any queries on data we have just entered -because this is all happening inside one transaction. Bummer. - -django.db.transaction.TransactionManagementError: -An error occurred in the current transaction. You can't execute queries until the end of the 'atomic' block. ''' -todo='''- db Update does not work when a cave id is in the pending list but a proper cave description file exists - and is being imported. It should work. But currently Django aborts and he file is not read in. - +todo=''' - Cannot use Edit This Page for pendingcaves.txt_edit as Edit This Page is expecting an html file. So we will need a separate file-editing capability just for this configuration file ?! -- crashes on MariaDB on server when deleting Caves and complains Area needs a non null parent, But this is not true. +- crashes on MariaDB in databasereset.py on server when deleting Caves and complains Area needs a non null parent, But this is not true. The only solution we have found is to let it crash, then stop and restart MariaDB (requires a logon able to sudo) and then restart the databasereset.py again. (status as of July 2022) ''' @@ -91,6 +84,15 @@ def do_pending_cave(k, url, area): in expoweb/cave_data/1623-"k".html ''' slug = k + + g = GetCaveLookup() + if slug in g: + message = f" ! {k} cave listed in pendingcaves.txt already exists." + DataIssue.objects.create(parser='caves', message=message, url=url) + print(message) + return + + default_note = f"_Survex file found in loser repo but no description in expoweb <br><br><br>\n" default_note += f"INSTRUCTIONS: First open 'This survex file' (link above the CaveView panel) to find the date and info. Then " @@ -118,7 +120,7 @@ def do_pending_cave(k, url, area): cave = Cave( unofficial_number = k, underground_description = "Pending cave write-up - creating as empty object. No XML file available yet.", - survex_file = f"caves-{area.short_name}/{k}/{k}.svx", + survex_file = f"caves-{area.short_name}/{k[5:]}/{k[5:]}.svx", url = url, notes = default_note) if cave: @@ -465,27 +467,6 @@ def readcaves(): print(" - Saving Area 1626") area_1626.save() - print (" - Setting pending caves") - # Do this first, so that these empty entries are overwritten as they get properly created. - - for k in pending: - - area = area_1623 - areanum = k[0:4] - url = areanum + "/" + k[5:] # Note we are not appending the .htm as we are modern folks now. - if areanum == "1623": - area = area_1623 - if areanum == "1624": - area = area_1624 - if areanum == "1626": - area = area_1626 - try: - do_pending_cave(k[5:], url, area) - except: - message = f" ! Error. Cannot create pending cave and entrance, pending-id:{k} in area {areanum}" - DataIssue.objects.create(parser='caves', message=message) - print(message) - raise with transaction.atomic(): print(" - settings.CAVEDESCRIPTIONS: ", CAVEDESCRIPTIONS) @@ -505,4 +486,27 @@ def readcaves(): print (" - Setting up all the variously useful alias names") mycavelookup = GetCaveLookup() + + print (" - Setting pending caves") + # Do this last, so we can detect if they are created and no longer 'pending' + + for k in pending: + + area = area_1623 + areanum = k[0:4] + url = areanum + "/" + k[5:] # Note we are not appending the .htm as we are modern folks now. + if areanum == "1623": + area = area_1623 + if areanum == "1624": + area = area_1624 + if areanum == "1626": + area = area_1626 + try: + do_pending_cave(k, url, area) + except: + message = f" ! Error. Cannot create pending cave and entrance, pending-id:{k} in area {areanum}" + DataIssue.objects.create(parser='caves', message=message) + print(message) + raise + diff --git a/parsers/drawings.py b/parsers/drawings.py index 88a6ca3..4b3e44d 100644 --- a/parsers/drawings.py +++ b/parsers/drawings.py @@ -50,7 +50,7 @@ def find_dwg_file(dwgfile, path): scansfile = scansfilel[0] if wallet: - dwgfile.manywallets.add(wallet) + dwgfile.dwgwallets.add(wallet) if scansfile: dwgfile.scans.add(scansfile) diff --git a/parsers/scans.py b/parsers/scans.py index fdded82..b78f76f 100644 --- a/parsers/scans.py +++ b/parsers/scans.py @@ -1,12 +1,15 @@ import sys import os +import subprocess import types import stat import csv import re import datetime +import shutil, filecmp from functools import reduce +from pathlib import Path import settings from troggle.core.models.survex import SingleScan, Wallet, DrawingFile @@ -18,7 +21,9 @@ from troggle.core.utils import save_carefully, GetListDir contentsjson = "contents.json" indexhtml = "walletindex.html" +git = settings.GIT +# to do: create a 'low priority' field, so that any such wallet does not appear in summary reports wallet_blank_json = { "cave": "", "date": "", @@ -54,6 +59,22 @@ wallet_blank_html = '''<html><body><H1>Wallet WALLET</H1> </UL> </body></html> ''' + +def CheckEmptyDate(wallet): + '''If date is not set, get it from a linked survex file. If several, pick the earliest. + + Maybe also look at filedates for the scans in expofiles/surveyscans/ , but these can be re-set by copying. + ''' + return + +def CheckEmptyPeople(wallet): + '''If people list is empty, copy them from the survex files: all of them + + To be a Troggle model change; a many:many relationship between wallets and people, + as well as being a list in the JSON file (which is the permanent repository). We want the many:many + relationship so that we can filter wallets based on a person. + ''' + return def LoadListScansFile(wallet): gld = [ ] @@ -73,7 +94,45 @@ def LoadListScansFile(wallet): if c>=10: print(".", end='') c = 0 +def CopyWalletData(wallet): + '''Copies all the contents.json to a parallel set of folders in the drawings repo + refreshes everything during a ful import, but it shoudl all be up to date as every time + wallet data gets saved it should also be copied across and committed. + ''' + year = wallet.walletname[0:4] + destfolder = Path(settings.DRAWINGS_DATA,'walletjson', year, wallet.walletname) + destjson = destfolder / contentsjson + sourcejson = Path(wallet.fpath, contentsjson) + if not os.path.exists(Path(destfolder)): + try: + os.makedirs(destfolder) + print(f' - created folder {destfolder}..') + except PermissionError: + print(f"CANNOT save this JSON file.\nPERMISSIONS incorrectly set on server for this folder {destfolder}. Ask a nerd to fix this.") + if os.path.isfile(sourcejson): + try: + if not os.path.isfile(destjson) or not filecmp.cmp(sourcejson, destjson): + shutil.copy(sourcejson, destjson) + print(f' - Copied {sourcejson} to {destjson}') + dr_add = subprocess.run([git, "add", contentsjson], cwd=destfolder, capture_output=True, text=True) + if dr_add.returncode != 0: + msgdata = 'Ask a nerd to fix this.\n\n' + dr_add.stderr + '\n\n' + dr_add.stdout + '\n\nreturn code: ' + str(dr_add.returncode) + message = f'CANNOT git on server for this file {contentsjson}. Edits saved but not added to git.\n\n' + msgdata + print(message) + else: + # ideally we would commit many chnages to many wallets just once. But most of the time only a couple of files will change. + dr_commit = subprocess.run([git, "commit", "-m", f'Update of {contentsjson} in wallet'], cwd=destfolder, capture_output=True, text=True) + # This produces return code = 1 if it commits OK + if dr_commit.returncode != 0: + msgdata = 'Ask a nerd to fix this.\n\n' + dr_commit.stderr + '\n\n' + dr_commit.stdout + '\n\nreturn code: ' + str(dr_commit.returncode) + message = f'Error code with git on server for this {contentsjson}. File is copied, added to git, but NOT committed.\n\n' + msgdata + print(message) + + except PermissionError: + print(f"CANNOT copy this JSON file.\nPERMISSIONS incorrectly set on server for this file {destjson}. Ask a nerd to fix this.") + + # this iterates through the scans directories (either here or on the remote server) # and builds up the models we can access later @@ -109,17 +168,20 @@ def load_all_scans(): if fisdir: wallet = Wallet(fpath=fpath, walletname=walletname) # this is where we should load the contents.json for people so we can report on them later - # this is where we shoudl record the year explicitly + # this is where we should record the year explicitly # line 347 of view/uploads.py and needs refactoring for loading contentsjson wallet.save() LoadListScansFile(wallet) + CheckEmptyDate(wallet) + CheckEmptyPeople(wallet) + CopyWalletData(wallet) # what is this? - elif walletname != "thumbs": - print(f'\n - Wallet {walletname} - {fpath}') - wallet = Wallet(fpath=fpath, walletname=walletname) - wallet.save() - LoadListScansFile(wallet) + # elif walletname != "thumbs": + # print(f'\n - Wallet {walletname} - {fpath}') + # wallet = Wallet(fpath=fpath, walletname=walletname) + # wallet.save() + # LoadListScansFile(wallet) else: print(f'\n - IGNORE {walletname} - {fpath}') diff --git a/parsers/survex.py b/parsers/survex.py index 7b94005..39d42dc 100644 --- a/parsers/survex.py +++ b/parsers/survex.py @@ -37,7 +37,6 @@ todo = '''Also walk the entire tree in the :loser: repo looking for unconnected - LoadSurvexFile() Creates a new current survexfile and valid .survexdirectory The survexblock passed-in is not necessarily the parent. FIX THIS. -- rx_qm recognises only simple survey point ids. EXTEND to cover more naming formats and test fully for 2023 ''' survexblockroot = None ROOTBLOCK = "rootblock" @@ -131,8 +130,8 @@ class LoadingSurvex(): rx_cave = re.compile(r'(?i)caves-(\d\d\d\d)/([-\d\w]+|\d\d\d\d-?\w+-\d+)') rx_comment = re.compile(r'([^;]*?)\s*(?:;\s*(.*))?\n?$') - rx_comminc = re.compile(r'(?i)^\*include[\s]*([-\w/]*).*$') # inserted by linear collate ;*include - rx_commcni = re.compile(r'(?i)^\*edulcni[\s]*([-\w/]*).*$') # inserted by linear collate ;*edulcni + rx_comminc = re.compile(r'(?i)^\|\*include[\s]*([-\w/]*).*$') # inserted by linear collate ;*include + rx_commcni = re.compile(r'(?i)^\|\*edulcni[\s]*([-\w/]*).*$') # inserted by linear collate ;*edulcni rx_include = re.compile(r'(?i)^\s*(\*include[\s].*)$') rx_commref = re.compile(r'(?i)^\s*ref(?:erence)?[\s.:]*(\d+)\s*#\s*(X)?\s*(\d+)') rx_wallet = re.compile(r'(?i)^\s*wallet[\s.:]*(\d+)\s*#\s*(X)?\s*(\d+)') @@ -178,13 +177,14 @@ class LoadingSurvex(): callcount = 0 caverncount = 0 ignoreprefix = ["surface", "kataster", "fixedpts", "gpx"] - ignorenoncave = ["caves-1623", "caves-1623/2007-neu"] + ignorenoncave = ["caves-1623", "caves-1626", "caves-1623/2007-neu"] includedfilename ="" currentsurvexblock = None currentsurvexfile = None currentcave = None caverndate = None currentpersonexped = [] + pending = [] def __init__(self): self.caveslist = GetCaveLookup() @@ -690,9 +690,7 @@ class LoadingSurvex(): def IdentifyCave(self, cavepath): if cavepath.lower() in self.caveslist: return self.caveslist[cavepath.lower()] - # TO DO - some of this is already done in generating self.caveslist so simplify this - # esp. as it is in a loop. - # TO DO recognise cave if different name, e.g. gruenstein == 281 + # TO DO - this predates the big revision to Gcavelookup so look at this again carefully path_match = self.rx_cave.search(cavepath) if path_match: sluggy = '{}-{}'.format(path_match.group(1), path_match.group(2)) @@ -724,31 +722,46 @@ class LoadingSurvex(): def ReportNonCaveIncludes(self, headpath, includelabel, depth): """Ignore surface, kataser and gpx *include survex files """ + if not self.pending: + self.pending = set() + fpending = Path(settings.CAVEDESCRIPTIONS, "pendingcaves.txt") + if fpending.is_file(): + with open(fpending, "r") as fo: + cids = fo.readlines() + for cid in cids: + self.pending.add(cid.rstrip('\n').upper()) + if headpath in self.ignorenoncave: - #message = f" - {headpath} is <ignorenoncave> (while creating '{includelabel}' sfile & sdirectory)" + message = f" - {headpath} is <ignorenoncave> (while creating '{includelabel}' sfile & sdirectory)" #print("\n"+message) #print("\n"+message,file=sys.stderr) return for i in self.ignoreprefix: if headpath.startswith(i): message = f" - {headpath} starts with <ignoreprefix> (while creating '{includelabel}' sfile & sdirectory)" - #print("\n"+message) - #print("\n"+message,file=sys.stderr) + # print("\n"+message) + # print("\n"+message,file=sys.stderr) return - message = f" ! Error: FAILURE '{headpath}' while creating '{includelabel}' at depth:[{depth}]. Not a cave or in the ignore list:'{self.ignoreprefix}'" - # getting this triggered for gpx/2018 (cavern error) but not for gpx/2017 (no content). + caveid = f'{headpath[6:10]}-{headpath[11:]}'.upper() + if caveid in self.pending: + # Yes we didn't find this cave, but we know it is a pending one. So not an error. + # print(f'! ALREADY PENDING {caveid}',file=sys.stderr) + return + + message = f" ! Error: not a cave nor ignorable. headpath:'{headpath}' while parsing '{includelabel=}.svx' at depth:[{len(depth)}]. ignore prefix list:'{self.ignoreprefix}'" print("\n"+message) print("\n"+message,file=sys.stderr) DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(headpath)) print(f' # datastack in LoadSurvexFile:{includelabel} type:', end="",file=sys.stderr) for dict in self.datastack: - print(f'{dict["type"].upper()} ', end="",file=sys.stderr) + print(f'<{dict["type"].upper()} >', end="",file=sys.stderr) def LoadSurvexFile(self, svxid): """Creates SurvexFile in the database, and SurvexDirectory if needed with links to 'cave' Creates a new current survexfile and valid .survexdirectory + Inspects the parent folder of the survexfile and uses that to decide if this is a cave we know The survexblock passed-in is not necessarily the parent. FIX THIS. """ if debugprint: @@ -780,7 +793,7 @@ class LoadingSurvex(): if cave: newdirectory.cave = cave newfile.cave = cave - # print(f"\n - New directory {newdirectory} for cave {newdirectory.cave}",file=sys.stderr) + # print(f"\n - New directory '{newdirectory}' for cave '{cave}'",file=sys.stderr) else: # probably a surface survey, or a cave in a new area e.g. 1624 not previously managed, and not in the pending list self.ReportNonCaveIncludes(headpath, svxid, depth) @@ -862,6 +875,7 @@ class LoadingSurvex(): included = self.rx_comminc.match(comment) # ;*include means 'we have been included'; whereas *include means 'proceed to include' + # bug, If the original survex file contians the line ;*include then we pick it up ! So fix our special code to be ;|*include if included: self.ProcessIncludeLine(included) @@ -1211,7 +1225,7 @@ class LoadingSurvex(): #-------------------------------------------------------- self.depthinclude += 1 fininclude = open(fullpath,'r') - fcollate.write(";*include {}\n".format(includepath)) + fcollate.write(";|*include {}\n".format(includepath)) flinear.write("{:2} {} *include {}\n".format(self.depthinclude, indent, includepath)) push = includepath.lower() self.includestack.append(push) @@ -1226,7 +1240,7 @@ class LoadingSurvex(): print(message,file=sys.stderr) DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(path)) flinear.write("{:2} {} *edulcni {}\n".format(self.depthinclude, indent, pop)) - fcollate.write(";*edulcni {}\n".format(pop)) + fcollate.write(";|*edulcni {}\n".format(pop)) fininclude.close() self.depthinclude -= 1 #-------------------------------------------------------- |