summaryrefslogtreecommitdiffstats
path: root/parsers
diff options
context:
space:
mode:
Diffstat (limited to 'parsers')
-rw-r--r--parsers/caves.py4
-rw-r--r--parsers/imports.py2
-rw-r--r--parsers/logbooks.py10
-rw-r--r--parsers/scans.py1
-rw-r--r--parsers/survex.py50
5 files changed, 43 insertions, 24 deletions
diff --git a/parsers/caves.py b/parsers/caves.py
index b376801..f8b39e9 100644
--- a/parsers/caves.py
+++ b/parsers/caves.py
@@ -483,7 +483,7 @@ def read_cave(filename, cave=None):
cave=cave, entrance_letter=letter, entrance=entrance
)
except:
- message = f' ! Entrance setting failure, slug:"{slug}" #entrances:{len(entrances)} {entrance} letter:"{letter}" cave:"{cave}" filename:"cave_data/{filename}"'
+ message = f' ! Entrance setting failure, slug:"{slug}" #entrances:{len(entrances)} {e} letter:"{letter}" cave:"{cave}" filename:"cave_data/{filename}"'
DataIssue.objects.create(parser="entrances", message=message, url=f"{cave.url}_edit/")
print(message)
def reload_entrances():
@@ -651,7 +651,7 @@ def read_cave(filename, cave=None):
print(message)
if description_file[0]: # if not an empty string
- message = f' - {slug:12} Note (not an error): complex description filename "{description_file[0]}" inside "{CAVEDESCRIPTIONS}/{filename}"'
+ message = f' - {slug:12} Note (not an error): complex description filename "{description_file[0]}" inside "cave_data/{filename}"'
DataIssue.objects.create(parser="caves ok", message=message, url=f"/{slug}_cave_edit/")
print(message)
diff --git a/parsers/imports.py b/parsers/imports.py
index 50b45cf..9191194 100644
--- a/parsers/imports.py
+++ b/parsers/imports.py
@@ -27,7 +27,7 @@ def import_people():
troggle.parsers.people.load_people_expos()
def import_surveyscans():
- print("-- Importing Survey Scans")
+ print("-- Importing Survey Scans and Wallets")
with transaction.atomic():
troggle.parsers.scans.load_all_scans()
diff --git a/parsers/logbooks.py b/parsers/logbooks.py
index e701183..84d0f5a 100644
--- a/parsers/logbooks.py
+++ b/parsers/logbooks.py
@@ -57,7 +57,7 @@ LOGBOOK_PARSER_SETTINGS = {
LOGBOOKS_DIR = "years" # subfolder of settings.EXPOWEB
ENTRIES = {
- "2023": 27,
+ "2023": 32,
"2022": 90,
"2019": 55,
"2018": 95,
@@ -305,8 +305,8 @@ def parser_html(year, expedition, txt, seq=""):
endmatch = re.match(r"(?i)(?s).*<hr\s*/>([\s\S]*?)(?=</body)", txt)
endpara = endmatch.groups()[0].strip()
- #print(f" - endpara:\n'{endpara}'")
if len(endpara) > 0:
+ print(f"\n - {year} endpara:\n'{endpara}'")
endpath = Path(settings.EXPOWEB, LOGBOOKS_DIR, year, "endmatter.html")
with open(endpath, "w") as end:
end.write(endpara + "\n")
@@ -560,7 +560,8 @@ def parse_logbook_for_expedition(expedition, blog=False):
if logbook_parseable:
# --------------------
parser = globals()[parsefunc]
- print(f" - {year} parsing with {parsefunc} - {lb}")
+ # print(f" - {year} parsing with {parsefunc} - {lb}")
+ print(" .", end="")
logentries = parser(year, expedition, txt, sq) # this launches the right parser
# --------------------
@@ -665,7 +666,7 @@ def LoadLogbooks():
logentries = parse_logbook_for_expedition(b, blog=True) # loads the blog logbook for one expo
allentries += logentries
- print(f"total {len(allentries):,} log entries parsed in all expeditions")
+ print(f"\n - {len(allentries):,} log entries parsed in all expeditions")
mem = get_process_memory()
print(f" - MEM: {mem:7.2f} MB in use, {mem-mem1:7.2f} MB more", file=sys.stderr)
duration = time.time() - start
@@ -682,6 +683,7 @@ def LoadLogbooks():
for expo in expos:
expo.save() # to save logbook name property
mem = get_process_memory()
+ print(f" - {len(allentries):,} log entries saved into database")
print(f" - MEM: {mem:7.2f} MB in use, {mem-mem1:7.2f} MB more", file=sys.stderr)
duration = time.time() - start
print(f" - TIME: {duration:7.2f} s", file=sys.stderr)
diff --git a/parsers/scans.py b/parsers/scans.py
index 3f042da..9929209 100644
--- a/parsers/scans.py
+++ b/parsers/scans.py
@@ -38,6 +38,7 @@ def load_all_scans():
Wallet.objects.all().delete()
print(" - deleting all Wallet and SingleScan objects")
DataIssue.objects.filter(parser="scans").delete()
+ DataIssue.objects.filter(parser="wallets").delete()
# These are valid old file types to be visible, they are not necessarily allowed to be uploaded to a new wallet.
valids = [
diff --git a/parsers/survex.py b/parsers/survex.py
index b3db9e7..2be2217 100644
--- a/parsers/survex.py
+++ b/parsers/survex.py
@@ -271,7 +271,7 @@ class LoadingSurvex:
rx_names = re.compile(r"(?i)names")
rx_flagsnot = re.compile(r"not\s")
rx_linelen = re.compile(r"[\d\-+.]+$")
- instruments = "(bitch|bodger|bolt|bolter|bolting|book|clino|comp|compass|consultant|disto|distox|distox2|dog|dogsbody|drawing|drill|gps|helper|inst|instr|instrument|monkey|nagging|nail|nail_polish|nail_polish_bitch|nail_polish_monkey|nail_varnish|nail_varnish_bitch|note|paint|photo|pic|point|polish|powerdrill|rig|rigger|rigging|sketch|slacker|something|tape|topodroid|unknown|useless|varnish|waiting_patiently)"
+ instruments = "(bitch|bodger|bolt|bolter|bolting|book|clino|comp|compass|consultant|disto|distox|distox2|dog|dogsbody|drawing|drill|gps|helper|inst|instr|instrument|monkey|nagging|nail|nail_polish|nail_polish_bitch|nail_polish_monkey|nail_varnish|nail_varnish_bitch|note|paint|photo|pic|point|polish|powerdrill|rig|rigger|rigging|shoot|sketch|slacker|something|tape|topodroid|unknown|useless|varnish|waiting_patiently)"
rx_teammem = re.compile(r"(?i)" + instruments + "?(?:es|s)?\s+(.*)$")
rx_teamold = re.compile(r"(?i)(.*)\s+" + instruments + "?(?:es|s)?$")
rx_teamabs = re.compile(r"(?i)^\s*(" + instruments + ")?(?:es|s)?\s*$")
@@ -435,23 +435,34 @@ class LoadingSurvex:
self.currentdate = self.inheritdate # unecessary duplication
# Not an error, so not put in DataIssues, but is printed to debug output
message = (
- f"- No *date. INHERITING date from ({survexblock.parent})-{survexblock.parent.survexfile.path} to ({survexblock})-{survexblock.survexfile.path} {self.inheritdate:%Y-%m-%d}"
+ f"- No *date. INHERITING date '{self.inheritdate:%Y-%m-%d}' from ({survexblock.parent})-{survexblock.parent.survexfile.path} to ({survexblock})-{survexblock.survexfile.path} {self.inheritdate:%Y-%m-%d}"
)
print(self.insp + message)
# stash_data_issue(
# parser="survex", message=message, url=None, sb=(survexblock.survexfile.path) # child
# )
if survexblock.survexfile != survexblock.parent.survexfile:
- # This is noteworthy, however.
- message = (
- f"- Warning *date INHERITED from DIFFERENT file:\n ({survexblock.parent})-{survexblock.parent.survexfile.path} to ({survexblock})-{survexblock.survexfile.path} {self.inheritdate:%Y-%m-%d}\n {self.stackbegin} {self.inheritdate:%Y-%m-%d}"
- )
- print(self.insp + message)
- stash_data_issue(
- parser="survex", message=message, url=None, sb=(survexblock.parent.survexfile.path) # PARENT
- )
-
- return self.inheritdate
+ # This is noteworthy, however.
+
+ if survexblock.parent.name == "rootblock":
+ # Not a sensible thing to inherit a date from, even if a date exists, which it shouldn't...
+ message = (
+ f"- No *date. But not sensible to inherit from rootblock. From ({survexblock.parent})-{survexblock.parent.survexfile.path} to ({survexblock})-{survexblock.survexfile.path} {self.inheritdate:%Y-%m-%d}"
+ )
+ print(self.insp + message)
+ # stash_data_issue(
+ # parser="survex", message=message, url=None, sb=(survexblock.survexfile.path)
+ # )
+ return
+ else:
+ message = (
+ f"- Warning *date '{self.inheritdate:%Y-%m-%d}' INHERITED from DIFFERENT file:\n ({survexblock.parent})-{survexblock.parent.survexfile.path} to ({survexblock})-{survexblock.survexfile.path} {self.inheritdate:%Y-%m-%d}\n {self.stackbegin} {self.inheritdate:%Y-%m-%d}"
+ )
+ print(self.insp + message)
+ stash_data_issue(
+ parser="survex", message=message, url=None, sb=(survexblock.parent.survexfile.path) # PARENT
+ )
+ return self.inheritdate
else:
# This is not an error in the Expo dataset.
# Many files just holding *include lines do not have dates.
@@ -1550,9 +1561,9 @@ class LoadingSurvex:
nonlocal blockcount
blockcount += 1
- if blockcount % 20 == 0:
+ if blockcount % 40 == 0:
print(".", file=sys.stderr, end="")
- if blockcount % 800 == 0:
+ if blockcount % 1600 == 0:
print("\n", file=sys.stderr, end="")
mem = get_process_memory()
print(f" - MEM: {mem:7.2f} MB in use", file=sys.stderr)
@@ -2186,7 +2197,7 @@ def FindAndLoadSurvex(survexblockroot):
)
unseensroot = re.sub(r"\.svx$", "", UNSEENS)
- excpts = ["surface/terrain", "kataster/kataster-boundaries", "template", "docs", unseensroot]
+ excpts = ["surface/terrain", "kataster/kataster-boundaries", "template", "docs", "deprecated", "subsections", unseensroot]
removals = []
for x in unseens:
for o in excpts:
@@ -2202,7 +2213,8 @@ def FindAndLoadSurvex(survexblockroot):
file=sys.stderr,
)
check_team_cache()
- print(" -- Now loading the previously-omitted survex files.", file=sys.stderr)
+ print(f" -- Now loading the previously-omitted survex files.", file=sys.stderr)
+ print(f" - (except: {excpts})", file=sys.stderr)
with open(Path(settings.SURVEX_DATA, UNSEENS), "w") as u:
u.write(
@@ -2498,7 +2510,11 @@ def MakeFileRoot(fn):
print(f" - Making/finding a new root survexfile for this import: {fn}")
fileroot = SurvexFile(path=fn, cave=cave)
- fileroot.survexdirectory = SurvexDirectory.objects.get(id=1) # default
+ try:
+ fileroot.survexdirectory = SurvexDirectory.objects.get(id=1) # default
+ except:
+ fileroot.survexdirectory = None
+
if cave:
# But setting the SurvexDirectory does work !