summaryrefslogtreecommitdiffstats
path: root/parsers
diff options
context:
space:
mode:
Diffstat (limited to 'parsers')
-rw-r--r--parsers/caves.py68
-rw-r--r--parsers/drawings.py2
-rw-r--r--parsers/scans.py74
-rw-r--r--parsers/survex.py46
4 files changed, 135 insertions, 55 deletions
diff --git a/parsers/caves.py b/parsers/caves.py
index 3549c75..9d95f32 100644
--- a/parsers/caves.py
+++ b/parsers/caves.py
@@ -13,23 +13,16 @@ from troggle.core.models.caves import Area, Cave, Entrance, CaveSlug, EntranceSl
'''Reads all the cave description data by parsing the xml files (stored as e.g. :EXPOWEB:/cave_data/1623-161.html )
and creating the various Cave, Entrance and necessary Area objects.
-This is the first import that happens after the dabase is reinitialised.
+This is the first import that happens after the database is reinitialised.
So is the first thing that creates tables.
-BUT in Django 2.0 and later we cannot do any queries on data we have just entered
-because this is all happening inside one transaction. Bummer.
-
-django.db.transaction.TransactionManagementError:
-An error occurred in the current transaction. You can't execute queries until the end of the 'atomic' block.
'''
-todo='''- db Update does not work when a cave id is in the pending list but a proper cave description file exists
- and is being imported. It should work. But currently Django aborts and he file is not read in.
-
+todo='''
- Cannot use Edit This Page for pendingcaves.txt_edit as Edit This Page is expecting an html file.
So we will need a separate file-editing capability just for this configuration file ?!
-- crashes on MariaDB on server when deleting Caves and complains Area needs a non null parent, But this is not true.
+- crashes on MariaDB in databasereset.py on server when deleting Caves and complains Area needs a non null parent, But this is not true.
The only solution we have found is to let it crash, then stop and restart MariaDB (requires a logon able to sudo)
and then restart the databasereset.py again. (status as of July 2022)
'''
@@ -91,6 +84,15 @@ def do_pending_cave(k, url, area):
in expoweb/cave_data/1623-"k".html
'''
slug = k
+
+ g = GetCaveLookup()
+ if slug in g:
+ message = f" ! {k} cave listed in pendingcaves.txt already exists."
+ DataIssue.objects.create(parser='caves', message=message, url=url)
+ print(message)
+ return
+
+
default_note = f"_Survex file found in loser repo but no description in expoweb <br><br><br>\n"
default_note += f"INSTRUCTIONS: First open 'This survex file' (link above the CaveView panel) to find the date and info. Then "
@@ -118,7 +120,7 @@ def do_pending_cave(k, url, area):
cave = Cave(
unofficial_number = k,
underground_description = "Pending cave write-up - creating as empty object. No XML file available yet.",
- survex_file = f"caves-{area.short_name}/{k}/{k}.svx",
+ survex_file = f"caves-{area.short_name}/{k[5:]}/{k[5:]}.svx",
url = url,
notes = default_note)
if cave:
@@ -465,27 +467,6 @@ def readcaves():
print(" - Saving Area 1626")
area_1626.save()
- print (" - Setting pending caves")
- # Do this first, so that these empty entries are overwritten as they get properly created.
-
- for k in pending:
-
- area = area_1623
- areanum = k[0:4]
- url = areanum + "/" + k[5:] # Note we are not appending the .htm as we are modern folks now.
- if areanum == "1623":
- area = area_1623
- if areanum == "1624":
- area = area_1624
- if areanum == "1626":
- area = area_1626
- try:
- do_pending_cave(k[5:], url, area)
- except:
- message = f" ! Error. Cannot create pending cave and entrance, pending-id:{k} in area {areanum}"
- DataIssue.objects.create(parser='caves', message=message)
- print(message)
- raise
with transaction.atomic():
print(" - settings.CAVEDESCRIPTIONS: ", CAVEDESCRIPTIONS)
@@ -505,4 +486,27 @@ def readcaves():
print (" - Setting up all the variously useful alias names")
mycavelookup = GetCaveLookup()
+
+ print (" - Setting pending caves")
+ # Do this last, so we can detect if they are created and no longer 'pending'
+
+ for k in pending:
+
+ area = area_1623
+ areanum = k[0:4]
+ url = areanum + "/" + k[5:] # Note we are not appending the .htm as we are modern folks now.
+ if areanum == "1623":
+ area = area_1623
+ if areanum == "1624":
+ area = area_1624
+ if areanum == "1626":
+ area = area_1626
+ try:
+ do_pending_cave(k, url, area)
+ except:
+ message = f" ! Error. Cannot create pending cave and entrance, pending-id:{k} in area {areanum}"
+ DataIssue.objects.create(parser='caves', message=message)
+ print(message)
+ raise
+
diff --git a/parsers/drawings.py b/parsers/drawings.py
index 88a6ca3..4b3e44d 100644
--- a/parsers/drawings.py
+++ b/parsers/drawings.py
@@ -50,7 +50,7 @@ def find_dwg_file(dwgfile, path):
scansfile = scansfilel[0]
if wallet:
- dwgfile.manywallets.add(wallet)
+ dwgfile.dwgwallets.add(wallet)
if scansfile:
dwgfile.scans.add(scansfile)
diff --git a/parsers/scans.py b/parsers/scans.py
index fdded82..b78f76f 100644
--- a/parsers/scans.py
+++ b/parsers/scans.py
@@ -1,12 +1,15 @@
import sys
import os
+import subprocess
import types
import stat
import csv
import re
import datetime
+import shutil, filecmp
from functools import reduce
+from pathlib import Path
import settings
from troggle.core.models.survex import SingleScan, Wallet, DrawingFile
@@ -18,7 +21,9 @@ from troggle.core.utils import save_carefully, GetListDir
contentsjson = "contents.json"
indexhtml = "walletindex.html"
+git = settings.GIT
+# to do: create a 'low priority' field, so that any such wallet does not appear in summary reports
wallet_blank_json = {
"cave": "",
"date": "",
@@ -54,6 +59,22 @@ wallet_blank_html = '''<html><body><H1>Wallet WALLET</H1>
</UL>
</body></html>
'''
+
+def CheckEmptyDate(wallet):
+ '''If date is not set, get it from a linked survex file. If several, pick the earliest.
+
+ Maybe also look at filedates for the scans in expofiles/surveyscans/ , but these can be re-set by copying.
+ '''
+ return
+
+def CheckEmptyPeople(wallet):
+ '''If people list is empty, copy them from the survex files: all of them
+
+ To be a Troggle model change; a many:many relationship between wallets and people,
+ as well as being a list in the JSON file (which is the permanent repository). We want the many:many
+ relationship so that we can filter wallets based on a person.
+ '''
+ return
def LoadListScansFile(wallet):
gld = [ ]
@@ -73,7 +94,45 @@ def LoadListScansFile(wallet):
if c>=10:
print(".", end='')
c = 0
+def CopyWalletData(wallet):
+ '''Copies all the contents.json to a parallel set of folders in the drawings repo
+ refreshes everything during a ful import, but it shoudl all be up to date as every time
+ wallet data gets saved it should also be copied across and committed.
+ '''
+ year = wallet.walletname[0:4]
+ destfolder = Path(settings.DRAWINGS_DATA,'walletjson', year, wallet.walletname)
+ destjson = destfolder / contentsjson
+ sourcejson = Path(wallet.fpath, contentsjson)
+ if not os.path.exists(Path(destfolder)):
+ try:
+ os.makedirs(destfolder)
+ print(f' - created folder {destfolder}..')
+ except PermissionError:
+ print(f"CANNOT save this JSON file.\nPERMISSIONS incorrectly set on server for this folder {destfolder}. Ask a nerd to fix this.")
+ if os.path.isfile(sourcejson):
+ try:
+ if not os.path.isfile(destjson) or not filecmp.cmp(sourcejson, destjson):
+ shutil.copy(sourcejson, destjson)
+ print(f' - Copied {sourcejson} to {destjson}')
+ dr_add = subprocess.run([git, "add", contentsjson], cwd=destfolder, capture_output=True, text=True)
+ if dr_add.returncode != 0:
+ msgdata = 'Ask a nerd to fix this.\n\n' + dr_add.stderr + '\n\n' + dr_add.stdout + '\n\nreturn code: ' + str(dr_add.returncode)
+ message = f'CANNOT git on server for this file {contentsjson}. Edits saved but not added to git.\n\n' + msgdata
+ print(message)
+ else:
+ # ideally we would commit many chnages to many wallets just once. But most of the time only a couple of files will change.
+ dr_commit = subprocess.run([git, "commit", "-m", f'Update of {contentsjson} in wallet'], cwd=destfolder, capture_output=True, text=True)
+ # This produces return code = 1 if it commits OK
+ if dr_commit.returncode != 0:
+ msgdata = 'Ask a nerd to fix this.\n\n' + dr_commit.stderr + '\n\n' + dr_commit.stdout + '\n\nreturn code: ' + str(dr_commit.returncode)
+ message = f'Error code with git on server for this {contentsjson}. File is copied, added to git, but NOT committed.\n\n' + msgdata
+ print(message)
+
+ except PermissionError:
+ print(f"CANNOT copy this JSON file.\nPERMISSIONS incorrectly set on server for this file {destjson}. Ask a nerd to fix this.")
+
+
# this iterates through the scans directories (either here or on the remote server)
# and builds up the models we can access later
@@ -109,17 +168,20 @@ def load_all_scans():
if fisdir:
wallet = Wallet(fpath=fpath, walletname=walletname)
# this is where we should load the contents.json for people so we can report on them later
- # this is where we shoudl record the year explicitly
+ # this is where we should record the year explicitly
# line 347 of view/uploads.py and needs refactoring for loading contentsjson
wallet.save()
LoadListScansFile(wallet)
+ CheckEmptyDate(wallet)
+ CheckEmptyPeople(wallet)
+ CopyWalletData(wallet)
# what is this?
- elif walletname != "thumbs":
- print(f'\n - Wallet {walletname} - {fpath}')
- wallet = Wallet(fpath=fpath, walletname=walletname)
- wallet.save()
- LoadListScansFile(wallet)
+ # elif walletname != "thumbs":
+ # print(f'\n - Wallet {walletname} - {fpath}')
+ # wallet = Wallet(fpath=fpath, walletname=walletname)
+ # wallet.save()
+ # LoadListScansFile(wallet)
else:
print(f'\n - IGNORE {walletname} - {fpath}')
diff --git a/parsers/survex.py b/parsers/survex.py
index 7b94005..39d42dc 100644
--- a/parsers/survex.py
+++ b/parsers/survex.py
@@ -37,7 +37,6 @@ todo = '''Also walk the entire tree in the :loser: repo looking for unconnected
- LoadSurvexFile() Creates a new current survexfile and valid .survexdirectory
The survexblock passed-in is not necessarily the parent. FIX THIS.
-- rx_qm recognises only simple survey point ids. EXTEND to cover more naming formats and test fully for 2023
'''
survexblockroot = None
ROOTBLOCK = "rootblock"
@@ -131,8 +130,8 @@ class LoadingSurvex():
rx_cave = re.compile(r'(?i)caves-(\d\d\d\d)/([-\d\w]+|\d\d\d\d-?\w+-\d+)')
rx_comment = re.compile(r'([^;]*?)\s*(?:;\s*(.*))?\n?$')
- rx_comminc = re.compile(r'(?i)^\*include[\s]*([-\w/]*).*$') # inserted by linear collate ;*include
- rx_commcni = re.compile(r'(?i)^\*edulcni[\s]*([-\w/]*).*$') # inserted by linear collate ;*edulcni
+ rx_comminc = re.compile(r'(?i)^\|\*include[\s]*([-\w/]*).*$') # inserted by linear collate ;*include
+ rx_commcni = re.compile(r'(?i)^\|\*edulcni[\s]*([-\w/]*).*$') # inserted by linear collate ;*edulcni
rx_include = re.compile(r'(?i)^\s*(\*include[\s].*)$')
rx_commref = re.compile(r'(?i)^\s*ref(?:erence)?[\s.:]*(\d+)\s*#\s*(X)?\s*(\d+)')
rx_wallet = re.compile(r'(?i)^\s*wallet[\s.:]*(\d+)\s*#\s*(X)?\s*(\d+)')
@@ -178,13 +177,14 @@ class LoadingSurvex():
callcount = 0
caverncount = 0
ignoreprefix = ["surface", "kataster", "fixedpts", "gpx"]
- ignorenoncave = ["caves-1623", "caves-1623/2007-neu"]
+ ignorenoncave = ["caves-1623", "caves-1626", "caves-1623/2007-neu"]
includedfilename =""
currentsurvexblock = None
currentsurvexfile = None
currentcave = None
caverndate = None
currentpersonexped = []
+ pending = []
def __init__(self):
self.caveslist = GetCaveLookup()
@@ -690,9 +690,7 @@ class LoadingSurvex():
def IdentifyCave(self, cavepath):
if cavepath.lower() in self.caveslist:
return self.caveslist[cavepath.lower()]
- # TO DO - some of this is already done in generating self.caveslist so simplify this
- # esp. as it is in a loop.
- # TO DO recognise cave if different name, e.g. gruenstein == 281
+ # TO DO - this predates the big revision to Gcavelookup so look at this again carefully
path_match = self.rx_cave.search(cavepath)
if path_match:
sluggy = '{}-{}'.format(path_match.group(1), path_match.group(2))
@@ -724,31 +722,46 @@ class LoadingSurvex():
def ReportNonCaveIncludes(self, headpath, includelabel, depth):
"""Ignore surface, kataser and gpx *include survex files
"""
+ if not self.pending:
+ self.pending = set()
+ fpending = Path(settings.CAVEDESCRIPTIONS, "pendingcaves.txt")
+ if fpending.is_file():
+ with open(fpending, "r") as fo:
+ cids = fo.readlines()
+ for cid in cids:
+ self.pending.add(cid.rstrip('\n').upper())
+
if headpath in self.ignorenoncave:
- #message = f" - {headpath} is <ignorenoncave> (while creating '{includelabel}' sfile & sdirectory)"
+ message = f" - {headpath} is <ignorenoncave> (while creating '{includelabel}' sfile & sdirectory)"
#print("\n"+message)
#print("\n"+message,file=sys.stderr)
return
for i in self.ignoreprefix:
if headpath.startswith(i):
message = f" - {headpath} starts with <ignoreprefix> (while creating '{includelabel}' sfile & sdirectory)"
- #print("\n"+message)
- #print("\n"+message,file=sys.stderr)
+ # print("\n"+message)
+ # print("\n"+message,file=sys.stderr)
return
- message = f" ! Error: FAILURE '{headpath}' while creating '{includelabel}' at depth:[{depth}]. Not a cave or in the ignore list:'{self.ignoreprefix}'"
- # getting this triggered for gpx/2018 (cavern error) but not for gpx/2017 (no content).
+ caveid = f'{headpath[6:10]}-{headpath[11:]}'.upper()
+ if caveid in self.pending:
+ # Yes we didn't find this cave, but we know it is a pending one. So not an error.
+ # print(f'! ALREADY PENDING {caveid}',file=sys.stderr)
+ return
+
+ message = f" ! Error: not a cave nor ignorable. headpath:'{headpath}' while parsing '{includelabel=}.svx' at depth:[{len(depth)}]. ignore prefix list:'{self.ignoreprefix}'"
print("\n"+message)
print("\n"+message,file=sys.stderr)
DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(headpath))
print(f' # datastack in LoadSurvexFile:{includelabel} type:', end="",file=sys.stderr)
for dict in self.datastack:
- print(f'{dict["type"].upper()} ', end="",file=sys.stderr)
+ print(f'<{dict["type"].upper()} >', end="",file=sys.stderr)
def LoadSurvexFile(self, svxid):
"""Creates SurvexFile in the database, and SurvexDirectory if needed
with links to 'cave'
Creates a new current survexfile and valid .survexdirectory
+ Inspects the parent folder of the survexfile and uses that to decide if this is a cave we know
The survexblock passed-in is not necessarily the parent. FIX THIS.
"""
if debugprint:
@@ -780,7 +793,7 @@ class LoadingSurvex():
if cave:
newdirectory.cave = cave
newfile.cave = cave
- # print(f"\n - New directory {newdirectory} for cave {newdirectory.cave}",file=sys.stderr)
+ # print(f"\n - New directory '{newdirectory}' for cave '{cave}'",file=sys.stderr)
else: # probably a surface survey, or a cave in a new area e.g. 1624 not previously managed, and not in the pending list
self.ReportNonCaveIncludes(headpath, svxid, depth)
@@ -862,6 +875,7 @@ class LoadingSurvex():
included = self.rx_comminc.match(comment)
# ;*include means 'we have been included'; whereas *include means 'proceed to include'
+ # bug, If the original survex file contians the line ;*include then we pick it up ! So fix our special code to be ;|*include
if included:
self.ProcessIncludeLine(included)
@@ -1211,7 +1225,7 @@ class LoadingSurvex():
#--------------------------------------------------------
self.depthinclude += 1
fininclude = open(fullpath,'r')
- fcollate.write(";*include {}\n".format(includepath))
+ fcollate.write(";|*include {}\n".format(includepath))
flinear.write("{:2} {} *include {}\n".format(self.depthinclude, indent, includepath))
push = includepath.lower()
self.includestack.append(push)
@@ -1226,7 +1240,7 @@ class LoadingSurvex():
print(message,file=sys.stderr)
DataIssue.objects.create(parser='survex', message=message, url=get_offending_filename(path))
flinear.write("{:2} {} *edulcni {}\n".format(self.depthinclude, indent, pop))
- fcollate.write(";*edulcni {}\n".format(pop))
+ fcollate.write(";|*edulcni {}\n".format(pop))
fininclude.close()
self.depthinclude -= 1
#--------------------------------------------------------