summaryrefslogtreecommitdiffstats
path: root/parsers
diff options
context:
space:
mode:
Diffstat (limited to 'parsers')
-rw-r--r--parsers/survex.py138
1 files changed, 70 insertions, 68 deletions
diff --git a/parsers/survex.py b/parsers/survex.py
index d712394..681dc48 100644
--- a/parsers/survex.py
+++ b/parsers/survex.py
@@ -22,20 +22,19 @@ It also scans the Loser repo for all the svx files, which it loads individually
"""
todo = """
+- Obscure bug in the *team inheritance and rootblock initialization needs tracking down,
+ probably in the team cache which should NOT be global, but should be an instance variable of
+ LoadingSurvex
+
- Lots to do to cut down on unnecessary .save() calls to avoid hitting the db so much. Should
speed it up noticably.
-
-- Obscure bug in the *team inheritance and rootblock initialization needs tracking down
- Learn to use Django .select_related() and .prefetch_related() to speed things up
https://zerotobyte.com/how-to-use-django-select-related-and-prefetch-related/
- LoadSurvexFile() Creates a new current survexfile
- The survexblock passed-in is not necessarily the parent. FIX THIS.
-
-- Finish writing the parse_one_file() function for survexfiles edited online. Perhaps
- easier if this is a completely new file rather than an existing file.. nasty.
-
+ The survexblock passed-in is not necessarily the survex parent. FIX THIS.
+
- When Olly implements LEG in the 'dump3d --legs' utility, then we can use that to get the length of
all the legs in a survex block instead of adding them up oursleves. Which means that we can
ignore all the Units and offset stuff, that troggle will work with survex files with backsights,
@@ -62,27 +61,10 @@ class SurvexLeg:
compass = 0.0
clino = 0.0
-def IdentifyCave(cavepath):
- """Given a file path for a survex file, or a survex-block path,
- return the cave object
- """
- caveslist = GetCaveLookup()
- if cavepath.lower() in caveslist:
- return caveslist[cavepath.lower()]
- # TO DO - this predates the big revision to Gcavelookup so look at this again carefully
- path_match = LoadingSurvex.rx_cave.search(cavepath) # use as Class method
- if path_match:
- sluggy = f"{path_match.group(1)}-{path_match.group(2)}"
- guesses = [sluggy.lower(), path_match.group(2).lower()]
- for g in guesses:
- if g in caveslist:
- caveslist[cavepath] = caveslist[g]
- return caveslist[g]
- print(f" ! Failed to find cave for {cavepath.lower()}")
- else:
- # not a cave, but that is fine.
- # print(f' ! No regex(standard identifier) cave match for {cavepath.lower()}')
- return None
+
+
+
+
def datewallet(w, earliest):
"""Gets the date of the youngest survexblock associated with the wallet
@@ -141,7 +123,8 @@ def get_offending_filename(path):
"""
return "/survexfile/" + path + ".svx"
-trip_people_cache = {} # per survexblock, so robust wrt PUSH/POP begin/end
+# THIS SHOULD NOT BE GLOBAL ! SHould be per instance of file loader..
+trip_people_cache = {} # indexed by survexblock, so never needs cleaning out
def get_team_on_trip(survexblock):
"""Uses a cache to avoid a database query if it doesn't need to.
Only used for complete team."""
@@ -165,8 +148,9 @@ def get_people_on_trip(survexblock):
return list(set(people))
-trip_person_record = {} # per survexblock, so robust wrt PUSH/POP begin/end
-trip_team_cache = {} # per survexblock, so robust wrt PUSH/POP begin/end
+# THIS SHOULD NOT BE GLOBAL ! SHould be per instance of file loader
+trip_person_record = {} # indexed by (survexblock, personexpedition) - so never needs cleaning out
+trip_team_cache = {} # indexed by survexblock, so never needs cleaning out
def put_person_on_trip(survexblock, personexpedition, tm):
"""Uses a cache to avoid a database query if it doesn't need to.
Only used for a single person"""
@@ -206,18 +190,17 @@ def confirm_team_on_trip(survexblock):
SurvexPersonRole.objects.bulk_create(trip_team_cache[survexblock])
trip_team_cache[survexblock] = [] # in database now, so empty cache
-def check_team_cache():
+def check_team_cache(label=None):
global trip_team_cache
-
message = f"! check_team_cache() called.. "
print(message)
-
+ print(message, file=sys.stderr)
for block in trip_team_cache:
- message = f"! *team CACHEFAIL, already created {block.survexfile.path} ({block}) "
+ message = f"! *team CACHEFAIL, trip_team_cache {block.survexfile.path} ({block}). label:{label}"
print(message)
-
+ print(message, file=sys.stderr)
-person_pending_cache = {} # per survexblock, so robust wrt PUSH/POP begin/end
+person_pending_cache = {} # indexed per survexblock, so robust wrt PUSH/POP begin/end
def add_to_pending(survexblock, tm):
"""Collects team names before we have a date so cannot validate against
expo attendance yet"""
@@ -1180,7 +1163,45 @@ class LoadingSurvex:
self.svxprim[headpath.lower()] = primary
return self.svxprim[headpath.lower()]
- def ReportNonCaveIncludes(self, headpath, includelabel, depth):
+ def IdentifyCave(self, cavepath, svxid, depth):
+ """Given a file path for a survex file, e.g. /1626/107/107.svx, or a survex-block path,
+ return the cave object
+
+ REWRITE ALL THIS and make a methoid on the class
+ """
+ caveslist = GetCaveLookup()
+ if cavepath.lower() in caveslist: # will only work after we load in full paths as indexes, see below
+ return caveslist[cavepath.lower()]
+ # rx_cave = re.compile(r"(?i)caves-(\d\d\d\d)/([-\d\w]+|\d\d\d\d-?\w+-\d+)")
+ path_match = self.rx_cave.search(cavepath) # use as Class method.
+ if path_match:
+ sluggy = f"{path_match.group(1)}-{path_match.group(2)}"
+ # guesses = [sluggy.lower(), path_match.group(2).lower()] # this looks for JUST "107" and ignores 1626..
+ guesses = [sluggy.lower()] # full 1626-107 search, don;t use short-forms
+ for g in guesses:
+ if g in caveslist:
+ caveslist[cavepath] = caveslist[g] # set "caves-1626/107/107.svx" as index to cave 1626-107
+ return caveslist[g]
+ print(f" ! Failed to find cave for {cavepath.lower()}", file=sys.stderr)
+ else:
+ # not a cave, but that is fine.
+ if self.is_it_already_pending(cavepath, svxid, depth):
+ pass
+ else:
+ # It is too late to add it to the pending caves list here, they were already
+ # processed in parsers/caves.py So we have to do a bespoke creation.
+ cave = create_new_cave(svxid)
+
+ message = f" ! Warning: cave identifier '{caveid}'or {id} (guessed from file path) is not a known cave. Need to add to expoweb/cave_data/pendingcaves.txt ? In '{includelabel}.svx' at depth:[{len(depth)}]."
+ print("\n" + message)
+ print("\n" + message, file=sys.stderr)
+ print(f"{self.pending}", end="", file=sys.stderr)
+ stash_data_issue(parser="survex", message=message, url=None, sb=(includelabel))
+
+ print(f' ! No regex (standard identifier) cave match for {cavepath.lower()}', file=sys.stderr)
+ return None
+
+ def is_it_already_pending(self, headpath, includelabel, depth):
"""Ignore surface, kataser and gpx *include survex files"""
if not self.pending:
self.pending = set()
@@ -1199,7 +1220,7 @@ class LoadingSurvex:
message = f" - {headpath} is <ignorenoncave> (while creating '{includelabel}' sfile & sdirectory)"
# print("\n"+message)
# print("\n"+message,file=sys.stderr)
- return
+ return True
for i in self.ignoreprefix:
if headpath.startswith(i):
message = (
@@ -1207,28 +1228,17 @@ class LoadingSurvex:
)
# print("\n"+message)
# print("\n"+message,file=sys.stderr)
- return
+ return True
caveid = f"{headpath[6:10]}-{headpath[11:]}".upper()
if caveid in self.pending:
# Yes we didn't find this cave, but we know it is a pending one. So not an error.
- # print(f'! ALREADY PENDING {caveid}',file=sys.stderr)
- return
+ print(f'! ALREADY PENDING caveid {caveid}',file=sys.stderr)
+ return True
id = caveid[5:]
if id in self.pending:
- print(f"! ALREADY PENDING {id}", file=sys.stderr)
- return
+ print(f"! ALREADY PENDING id {id}", file=sys.stderr)
+ return True
- # It is too late to add it to the pending caves list here, they were already
- # processed in parsers/caves.py So we have to do a bespoke creation.
- svxpath= includelabel
- cave = create_new_cave(svxpath)
-
- message = f" ! Warning: cave identifier '{caveid}'or {id} (guessed from file path) is not a known cave. Need to add to expoweb/cave_data/pendingcaves.txt ? In '{includelabel}.svx' at depth:[{len(depth)}]."
- print("\n" + message)
- print("\n" + message, file=sys.stderr)
- print(f"{self.pending}", end="", file=sys.stderr)
- stash_data_issue(parser="survex", message=message, url=None, sb=(includelabel))
-
def LoadSurvexFile(self, svxid):
"""Creates SurvexFile in the database, and SurvexDirectory if needed
Creates a new current survexfile and valid .survexdirectory
@@ -1267,15 +1277,9 @@ class LoadingSurvex:
newfile.save() # until we do this there is no internal id so no foreign key works
self.currentsurvexfile = newfile
newfile.primary = self.set_primary(headpath)
-
- # REPLACE all this IdentifyCave() stuff with GCaveLookup ?
- cave = IdentifyCave(headpath) # cave already exists in db
- if not cave:
- # probably a surface survey, or a cave in a new area
- # e.g. 1624 not previously managed, and not in the pending list
- self.ReportNonCaveIncludes(headpath, svxid, depth)
- #try again
- cave = IdentifyCave(headpath)
+
+ # refactor this !
+ cave = self.IdentifyCave(headpath, svxid, depth) # cave already exists in db?
if cave:
newfile.cave = cave
# print(f"\n - New directory '{newdirectory}' for cave '{cave}'",file=sys.stderr)
@@ -1530,7 +1534,7 @@ class LoadingSurvex:
slengthtotal = 0.0
nlegstotal = 0
self.relativefilename = path
- IdentifyCave(path) # this will produce null for survex files which are geographic collections
+ #self.IdentifyCave(path, svxid, depth) # this will produce null for survex files which are geographic collections
self.currentsurvexfile = survexblock.survexfile
self.currentsurvexfile.save() # django insists on this although it is already saved !?
@@ -2198,7 +2202,6 @@ def FindAndLoadSurvex():
)
print(f" -- (but ignoring {len(removals)} of them)", file=sys.stderr)
- check_team_cache()
s_date = date.today().isoformat().replace('-','.')
print(f" -- Now loading the previously-omitted survex files as {UNSEENS} *date {s_date}", file=sys.stderr)
print(f" - (except: {excpts})", file=sys.stderr)
@@ -2250,8 +2253,6 @@ def FindAndLoadSurvex():
flinear.write(f"{omit_scan.depthinclude:2} {indent} *edulcni {unseensroot}\n")
fcollate.write(f";*edulcni {UNSEENS}\n")
-
- check_team_cache()
mem1 = get_process_memory()
flinear.write(f"\n - MEM:{mem1:.2f} MB STOP {UNSEENS} Unseen Oddments\n")
@@ -2294,6 +2295,7 @@ def FindAndLoadSurvex():
# ps = pstats.Stats(pr2, stream=f)
# ps.sort_stats(SortKey.CUMULATIVE)
# ps.print_stats()
+
mem1 = get_process_memory()
print(f"\n - MEM:{mem1:7.2f} MB STOP", file=sys.stderr)
print(f" - MEM:{mem1 - mem0:7.3f} MB ADDITIONALLY USED", file=sys.stderr)