diff options
Diffstat (limited to 'parsers')
-rw-r--r-- | parsers/survex.py | 156 |
1 files changed, 102 insertions, 54 deletions
diff --git a/parsers/survex.py b/parsers/survex.py index 985517b..b741e30 100644 --- a/parsers/survex.py +++ b/parsers/survex.py @@ -21,6 +21,8 @@ It also scans the Loser repo for all the svx files, which it loads individually """ todo = """ +- Lots to do to cut down on unnecessary .save() calls to avoid hitting the db so much. SHould +speed it up noticably. - LoadSurvexFile() Creates a new current survexfile and valid .survexdirectory The survexblock passed-in is not necessarily the parent. FIX THIS. @@ -119,7 +121,7 @@ def get_offending_filename(path): """ return "/survexfile/" + path + ".svx" -trip_people_cache = {} +trip_people_cache = {} # DANGEROUS, should clean it on PUSH/POP begin/end def get_team_on_trip(survexblock): """Uses a cache to avoid a database query if it doesn't need to. Only used for complete team.""" @@ -143,7 +145,7 @@ def get_people_on_trip(survexblock): return list(set(people)) -trip_person_cache = {} +trip_person_cache = {} # pre survexblock, so robust wrt PUSH/POP begin/end def put_person_on_trip(survexblock, personexpedition, tm): """Uses a cache to avoid a database query if it doesn't need to. Only used for a single person""" @@ -168,21 +170,20 @@ def put_person_on_trip(survexblock, personexpedition, tm): trip_person_cache[(survexblock, personexpedition)] = 1 return False -person_pending_cache = {} +person_pending_cache = {} # pre survexblock, so robust wrt PUSH/POP begin/end def add_to_pending(survexblock, tm): + """Collects team names before we have a date so cannot validate against + expo attendance yet""" global person_pending_cache if survexblock not in person_pending_cache: person_pending_cache[survexblock] = set() person_pending_cache[survexblock].add(tm) - # personexpedition = None - # personrole, created = SurvexPersonRole.objects.update_or_create( - # survexblock=survexblock, personexpedition=personexpedition, personname=tm) - # personrole.save() def get_team_pending(survexblock): - """A set of *team names before we get to the *date line in a survexblock""" + """A set of *team names before we get to the *date line in a survexblock + """ global person_pending_cache if survexblock in person_pending_cache: @@ -267,7 +268,8 @@ class LoadingSurvex: unitsstack = [] legsnumberstack = [] slengthstack = [] - teamexpedstack = [] + teaminheritstack = [] + teamcurrentstack = [] stackbegin = [] flagsstack = [] datastack = [] @@ -297,7 +299,8 @@ class LoadingSurvex: currentsurvexfile = None currentcave = None caverndate = None - currentteamexped = [] + currentteam = set() + inheritteam = set() pending = [] def __init__(self): @@ -327,6 +330,47 @@ class LoadingSurvex: parser="survex", message=message, url=None, sb=(survexblock.survexfile.path) ) + def get_team_inherited(self, survexblock): # survexblock only used for debug mesgs + """See get_team_pending(survexblock) which gets called at the same time, + when we see a *date line""" + global person_pending_cache + + if self.inheritteam: + message = ( + f"- INHERITING ({survexblock.parent})>({survexblock}) {survexblock.survexfile.path} '{self.inheritteam}'" + ) + print(self.insp + message) + # stash_data_issue( + # parser="survex", message=message, url=None, sb=(survexblock.survexfile.path) + # ) + return self.inheritteam + + def fix_anonymous(self, survexblock): + """Called when we reach *end of a block + Checks to see if the block has no team attached, in which case it uses the + inherited team. + If the block has no date, then it is assumed to be an abstract container, + with no relevant team, and anyway we can't attach a PersonExpedition without + knowing the year. Unless its parent has an identified expo""" + + if survexblock.parent.name == "troggle_unseens": + # Bolluxed up if we try to inherit from this random junk + return + + expo = survexblock.expedition # may be None if no *date yet + if not expo: + expo = survexblock.parent.expedition # immediate parent works mostly + if not expo: + return + + if not self.currentteam: # i.e. if it is a dated block and has no team + if teamnames := self.get_team_inherited(survexblock):# WALRUS + for tm in teamnames: + personexpedition = GetPersonExpeditionNameLookup(expo).get(tm.lower()) + if personexpedition: + put_person_on_trip(survexblock, personexpedition, tm) + return + def LoadSurvexTeam(self, survexblock, line): """Interpeting the *team fields means interpreting older style survex as well as current survex standard, *team Insts Anthony Day - this is how most of our files specify the team member @@ -341,17 +385,14 @@ class LoadingSurvex: def record_team_member(tm, survexblock): tm = tm.strip("\"'").strip() # Refactor. The dict GetPersonExpeditionNameLookup(expo) indexes by name and has values of personexpedition - # This is convoluted, the whole personexpedition concept is unnecessary. + # This is convoluted # we need the current expedition, but if there has been no date yet in the survex file, we don't know which one it is. # so we can't validate whether the person was on expo or not. # we will have to attach them to the survexblock anyway, and then do a # later check on whether they are valid when we get the date. - # We have hundreds of updated Django database updates when the same person is - # on the same trip in multiple roles. We should de-duplicate these ourselves in Python - # instead of using SurvexPersonRole.objects.update_or_create() which is expensive. - + self.currentteam.add(tm) # used in push/pop block code expo = survexblock.expedition # may be None if no *date yet if expo: @@ -366,7 +407,7 @@ class LoadingSurvex: personexpedition = GetPersonExpeditionNameLookup(expo).get(tm.lower()) if personexpedition: put_person_on_trip(survexblock, personexpedition, tm) - self.currentteamexped.append(personexpedition) # used in push/pop block code + elif known_foreigner(tm): # note, not using .lower() message = f"- *team {expo.year} '{tm}' known foreigner on *team {survexblock.survexfile.path} ({survexblock}) in '{line}'" print(self.insp + message) @@ -523,7 +564,7 @@ class LoadingSurvex: pe = GetPersonExpeditionNameLookup(expo).get(tm.lower()) if pe: put_person_on_trip(survexblock, pe, tm) - self.currentteamexped.append(pe) + else: message = f"! *team {year} '{tm}' FAIL personexpedition lookup on *date {survexblock.survexfile.path} ({survexblock}) " print(self.insp + message) @@ -531,38 +572,39 @@ class LoadingSurvex: parser="survex", message=message, url=None, sb=(survexblock.survexfile.path), - ) - + ) + + # All this next section should not happen unless there are >1 *date lines in a block - for pr in team: # pr is a PersonRole object - if not pr.expeditionday: # *date and *team in 'wrong' order. - - pr.expeditionday = survexblock.expeditionday - pr.save() - - if not pr.personexpedition: - pe = GetPersonExpeditionNameLookup(expo).get(pr.personname.lower()) - if pe: # pe is a PersonExpedition - # message = "! {} ({}) Fixing undated personexpedition '{}'".format(survexblock.survexfile.path, survexblock, p.personname) - # print(self.insp+message) - # stash_data_issue(parser='survex', message=message) - pr.personexpedition = pe - pr.person = pr.personexpedition.person - pr.save() - self.currentteamexped.append(pe) # used in push/pop block code - elif known_foreigner(pr.personname): # note, not using .lower() - message = f"- *team {expo.year} '{pr.personname}' known foreigner on *date {survexblock.survexfile.path} ({survexblock}) in '{line}'" - print(self.insp + message) - # stash_data_issue(parser='survex', message=message, url=None, sb=(survexblock.survexfile.path)) - else: - message = f"! *team {year} '{pr.personname}' FAIL personexpedition lookup on *date {survexblock.survexfile.path} ({survexblock}) '{pr.personname}'" - print(self.insp + message) - stash_data_issue( - parser="survex", - message=message, - url=None, sb=(survexblock.survexfile.path), - ) + # for pr in team: # pr is a PersonRole object + # if not pr.expeditionday: # *date and *team in 'wrong' order. + + # pr.expeditionday = survexblock.expeditionday + # pr.save() + + # if not pr.personexpedition: + # pe = GetPersonExpeditionNameLookup(expo).get(pr.personname.lower()) + # if pe: # pe is a PersonExpedition + # # message = "! {} ({}) Fixing undated personexpedition '{}'".format(survexblock.survexfile.path, survexblock, p.personname) + # # print(self.insp+message) + # # stash_data_issue(parser='survex', message=message) + # pr.personexpedition = pe + # pr.person = pr.personexpedition.person + # pr.save() + + # elif known_foreigner(pr.personname): # note, not using .lower() + # message = f"- *team {expo.year} '{pr.personname}' known foreigner on *date {survexblock.survexfile.path} ({survexblock}) in '{line}'" + # print(self.insp + message) + # # stash_data_issue(parser='survex', message=message, url=None, sb=(survexblock.survexfile.path)) + # else: + # message = f"! *team {year} '{pr.personname}' FAIL personexpedition lookup on *date {survexblock.survexfile.path} ({survexblock}) '{pr.personname}'" + # print(self.insp + message) + # stash_data_issue( + # parser="survex", + # message=message, + # url=None, sb=(survexblock.survexfile.path), + # ) oline = line if len(line) > 10: @@ -1218,12 +1260,12 @@ class LoadingSurvex: # rx_ref2 = re.compile(r'(?i)\s*ref[.;]?') # This should also check that the QM survey point rxists in the block + depth = " " * self.depthbegin refline = self.rx_commref.match(comment) if refline: - # comment = re.sub('(?i)\s*ref[.;]?',"",comment.strip()) comment = self.rx_ref2.sub("", comment.strip()) - print(f"rx_ref2 -- {comment=} in {survexblock.survexfile.path} :: {survexblock}") + print(f"{self.depthbegin:2}{depth} - rx_ref2 -- {comment=} in {survexblock.survexfile.path} :: {survexblock}") self.LoadSurvexRef(survexblock, comment) # handle @@ -1411,17 +1453,20 @@ class LoadingSurvex: if self.rx_begin.match(cmd): blkid = args.lower() # PUSH state ++++++++++++++ + self.depthbegin += 1 self.stackbegin.append(blkid) self.unitsstack.append((self.units, self.unitsfactor)) self.legsnumberstack.append(self.legsnumber) self.slengthstack.append(self.slength) - self.teamexpedstack.append(self.currentteamexped) # just one person?! + self.teaminheritstack.append(self.inheritteam) + self.teamcurrentstack.append(self.currentteam) pushblock() # PUSH state ++++++++++++++ self.legsnumber = 0 self.slength = 0.0 self.units = "metres" - self.currentteamexped = [] + self.inheritteam = self.currentteam + self.currentteam = set() # zero the current team when we start a new block printbegin() newsurvexblock = SurvexBlock( name=blkid, @@ -1447,6 +1492,8 @@ class LoadingSurvex: printend() slengthtotal += self.slength nlegstotal += self.legsnumber + + self.fix_anonymous(survexblock) try: survexblock.parent.save() # django insists on this although it is already saved !? @@ -1460,7 +1507,8 @@ class LoadingSurvex: raise # POP state ++++++++++++++ popblock() - self.currentteamexped = self.teamexpedstack.pop() # just one person?! + self.inheritteam = self.teaminheritstack.pop() + self.currentteam = self.teamcurrentstack.pop() self.legsnumber = self.legsnumberstack.pop() self.units, self.unitsfactor = self.unitsstack.pop() self.slength = self.slengthstack.pop() @@ -1917,10 +1965,10 @@ def FindAndLoadSurvex(survexblockroot): ) u.write(f"; autogenerated by parser/survex.py from databasereset.py on '{datetime.now(timezone.utc)}'\n") u.write(f"; omitting any file beginning with {excpts}\n\n") - u.write("*begin unseens\n") + u.write("*begin troggle_unseens\n") for x in sorted(unseens): u.write(f" *include {x}\n") - u.write("*end unseens\n") + u.write("*end troggle_unseens\n") survexfileroot = survexblockroot.survexfile # i.e. SURVEX_TOPNAME only |