diff options
Diffstat (limited to 'parsers')
-rw-r--r-- | parsers/caves.py | 4 | ||||
-rw-r--r-- | parsers/imports.py | 2 | ||||
-rw-r--r-- | parsers/logbooks.py | 10 | ||||
-rw-r--r-- | parsers/scans.py | 1 | ||||
-rw-r--r-- | parsers/survex.py | 50 |
5 files changed, 43 insertions, 24 deletions
diff --git a/parsers/caves.py b/parsers/caves.py index b376801..f8b39e9 100644 --- a/parsers/caves.py +++ b/parsers/caves.py @@ -483,7 +483,7 @@ def read_cave(filename, cave=None): cave=cave, entrance_letter=letter, entrance=entrance ) except: - message = f' ! Entrance setting failure, slug:"{slug}" #entrances:{len(entrances)} {entrance} letter:"{letter}" cave:"{cave}" filename:"cave_data/{filename}"' + message = f' ! Entrance setting failure, slug:"{slug}" #entrances:{len(entrances)} {e} letter:"{letter}" cave:"{cave}" filename:"cave_data/{filename}"' DataIssue.objects.create(parser="entrances", message=message, url=f"{cave.url}_edit/") print(message) def reload_entrances(): @@ -651,7 +651,7 @@ def read_cave(filename, cave=None): print(message) if description_file[0]: # if not an empty string - message = f' - {slug:12} Note (not an error): complex description filename "{description_file[0]}" inside "{CAVEDESCRIPTIONS}/{filename}"' + message = f' - {slug:12} Note (not an error): complex description filename "{description_file[0]}" inside "cave_data/{filename}"' DataIssue.objects.create(parser="caves ok", message=message, url=f"/{slug}_cave_edit/") print(message) diff --git a/parsers/imports.py b/parsers/imports.py index 50b45cf..9191194 100644 --- a/parsers/imports.py +++ b/parsers/imports.py @@ -27,7 +27,7 @@ def import_people(): troggle.parsers.people.load_people_expos() def import_surveyscans(): - print("-- Importing Survey Scans") + print("-- Importing Survey Scans and Wallets") with transaction.atomic(): troggle.parsers.scans.load_all_scans() diff --git a/parsers/logbooks.py b/parsers/logbooks.py index e701183..84d0f5a 100644 --- a/parsers/logbooks.py +++ b/parsers/logbooks.py @@ -57,7 +57,7 @@ LOGBOOK_PARSER_SETTINGS = { LOGBOOKS_DIR = "years" # subfolder of settings.EXPOWEB ENTRIES = { - "2023": 27, + "2023": 32, "2022": 90, "2019": 55, "2018": 95, @@ -305,8 +305,8 @@ def parser_html(year, expedition, txt, seq=""): endmatch = re.match(r"(?i)(?s).*<hr\s*/>([\s\S]*?)(?=</body)", txt) endpara = endmatch.groups()[0].strip() - #print(f" - endpara:\n'{endpara}'") if len(endpara) > 0: + print(f"\n - {year} endpara:\n'{endpara}'") endpath = Path(settings.EXPOWEB, LOGBOOKS_DIR, year, "endmatter.html") with open(endpath, "w") as end: end.write(endpara + "\n") @@ -560,7 +560,8 @@ def parse_logbook_for_expedition(expedition, blog=False): if logbook_parseable: # -------------------- parser = globals()[parsefunc] - print(f" - {year} parsing with {parsefunc} - {lb}") + # print(f" - {year} parsing with {parsefunc} - {lb}") + print(" .", end="") logentries = parser(year, expedition, txt, sq) # this launches the right parser # -------------------- @@ -665,7 +666,7 @@ def LoadLogbooks(): logentries = parse_logbook_for_expedition(b, blog=True) # loads the blog logbook for one expo allentries += logentries - print(f"total {len(allentries):,} log entries parsed in all expeditions") + print(f"\n - {len(allentries):,} log entries parsed in all expeditions") mem = get_process_memory() print(f" - MEM: {mem:7.2f} MB in use, {mem-mem1:7.2f} MB more", file=sys.stderr) duration = time.time() - start @@ -682,6 +683,7 @@ def LoadLogbooks(): for expo in expos: expo.save() # to save logbook name property mem = get_process_memory() + print(f" - {len(allentries):,} log entries saved into database") print(f" - MEM: {mem:7.2f} MB in use, {mem-mem1:7.2f} MB more", file=sys.stderr) duration = time.time() - start print(f" - TIME: {duration:7.2f} s", file=sys.stderr) diff --git a/parsers/scans.py b/parsers/scans.py index 3f042da..9929209 100644 --- a/parsers/scans.py +++ b/parsers/scans.py @@ -38,6 +38,7 @@ def load_all_scans(): Wallet.objects.all().delete() print(" - deleting all Wallet and SingleScan objects") DataIssue.objects.filter(parser="scans").delete() + DataIssue.objects.filter(parser="wallets").delete() # These are valid old file types to be visible, they are not necessarily allowed to be uploaded to a new wallet. valids = [ diff --git a/parsers/survex.py b/parsers/survex.py index b3db9e7..2be2217 100644 --- a/parsers/survex.py +++ b/parsers/survex.py @@ -271,7 +271,7 @@ class LoadingSurvex: rx_names = re.compile(r"(?i)names") rx_flagsnot = re.compile(r"not\s") rx_linelen = re.compile(r"[\d\-+.]+$") - instruments = "(bitch|bodger|bolt|bolter|bolting|book|clino|comp|compass|consultant|disto|distox|distox2|dog|dogsbody|drawing|drill|gps|helper|inst|instr|instrument|monkey|nagging|nail|nail_polish|nail_polish_bitch|nail_polish_monkey|nail_varnish|nail_varnish_bitch|note|paint|photo|pic|point|polish|powerdrill|rig|rigger|rigging|sketch|slacker|something|tape|topodroid|unknown|useless|varnish|waiting_patiently)" + instruments = "(bitch|bodger|bolt|bolter|bolting|book|clino|comp|compass|consultant|disto|distox|distox2|dog|dogsbody|drawing|drill|gps|helper|inst|instr|instrument|monkey|nagging|nail|nail_polish|nail_polish_bitch|nail_polish_monkey|nail_varnish|nail_varnish_bitch|note|paint|photo|pic|point|polish|powerdrill|rig|rigger|rigging|shoot|sketch|slacker|something|tape|topodroid|unknown|useless|varnish|waiting_patiently)" rx_teammem = re.compile(r"(?i)" + instruments + "?(?:es|s)?\s+(.*)$") rx_teamold = re.compile(r"(?i)(.*)\s+" + instruments + "?(?:es|s)?$") rx_teamabs = re.compile(r"(?i)^\s*(" + instruments + ")?(?:es|s)?\s*$") @@ -435,23 +435,34 @@ class LoadingSurvex: self.currentdate = self.inheritdate # unecessary duplication # Not an error, so not put in DataIssues, but is printed to debug output message = ( - f"- No *date. INHERITING date from ({survexblock.parent})-{survexblock.parent.survexfile.path} to ({survexblock})-{survexblock.survexfile.path} {self.inheritdate:%Y-%m-%d}" + f"- No *date. INHERITING date '{self.inheritdate:%Y-%m-%d}' from ({survexblock.parent})-{survexblock.parent.survexfile.path} to ({survexblock})-{survexblock.survexfile.path} {self.inheritdate:%Y-%m-%d}" ) print(self.insp + message) # stash_data_issue( # parser="survex", message=message, url=None, sb=(survexblock.survexfile.path) # child # ) if survexblock.survexfile != survexblock.parent.survexfile: - # This is noteworthy, however. - message = ( - f"- Warning *date INHERITED from DIFFERENT file:\n ({survexblock.parent})-{survexblock.parent.survexfile.path} to ({survexblock})-{survexblock.survexfile.path} {self.inheritdate:%Y-%m-%d}\n {self.stackbegin} {self.inheritdate:%Y-%m-%d}" - ) - print(self.insp + message) - stash_data_issue( - parser="survex", message=message, url=None, sb=(survexblock.parent.survexfile.path) # PARENT - ) - - return self.inheritdate + # This is noteworthy, however. + + if survexblock.parent.name == "rootblock": + # Not a sensible thing to inherit a date from, even if a date exists, which it shouldn't... + message = ( + f"- No *date. But not sensible to inherit from rootblock. From ({survexblock.parent})-{survexblock.parent.survexfile.path} to ({survexblock})-{survexblock.survexfile.path} {self.inheritdate:%Y-%m-%d}" + ) + print(self.insp + message) + # stash_data_issue( + # parser="survex", message=message, url=None, sb=(survexblock.survexfile.path) + # ) + return + else: + message = ( + f"- Warning *date '{self.inheritdate:%Y-%m-%d}' INHERITED from DIFFERENT file:\n ({survexblock.parent})-{survexblock.parent.survexfile.path} to ({survexblock})-{survexblock.survexfile.path} {self.inheritdate:%Y-%m-%d}\n {self.stackbegin} {self.inheritdate:%Y-%m-%d}" + ) + print(self.insp + message) + stash_data_issue( + parser="survex", message=message, url=None, sb=(survexblock.parent.survexfile.path) # PARENT + ) + return self.inheritdate else: # This is not an error in the Expo dataset. # Many files just holding *include lines do not have dates. @@ -1550,9 +1561,9 @@ class LoadingSurvex: nonlocal blockcount blockcount += 1 - if blockcount % 20 == 0: + if blockcount % 40 == 0: print(".", file=sys.stderr, end="") - if blockcount % 800 == 0: + if blockcount % 1600 == 0: print("\n", file=sys.stderr, end="") mem = get_process_memory() print(f" - MEM: {mem:7.2f} MB in use", file=sys.stderr) @@ -2186,7 +2197,7 @@ def FindAndLoadSurvex(survexblockroot): ) unseensroot = re.sub(r"\.svx$", "", UNSEENS) - excpts = ["surface/terrain", "kataster/kataster-boundaries", "template", "docs", unseensroot] + excpts = ["surface/terrain", "kataster/kataster-boundaries", "template", "docs", "deprecated", "subsections", unseensroot] removals = [] for x in unseens: for o in excpts: @@ -2202,7 +2213,8 @@ def FindAndLoadSurvex(survexblockroot): file=sys.stderr, ) check_team_cache() - print(" -- Now loading the previously-omitted survex files.", file=sys.stderr) + print(f" -- Now loading the previously-omitted survex files.", file=sys.stderr) + print(f" - (except: {excpts})", file=sys.stderr) with open(Path(settings.SURVEX_DATA, UNSEENS), "w") as u: u.write( @@ -2498,7 +2510,11 @@ def MakeFileRoot(fn): print(f" - Making/finding a new root survexfile for this import: {fn}") fileroot = SurvexFile(path=fn, cave=cave) - fileroot.survexdirectory = SurvexDirectory.objects.get(id=1) # default + try: + fileroot.survexdirectory = SurvexDirectory.objects.get(id=1) # default + except: + fileroot.survexdirectory = None + if cave: # But setting the SurvexDirectory does work ! |