diff options
author | Philip Sargent <philip.sargent@gmail.com> | 2023-09-11 20:38:14 +0300 |
---|---|---|
committer | Philip Sargent <philip.sargent@gmail.com> | 2023-09-11 20:38:14 +0300 |
commit | d323ff270083ead337f8297653bb833100306999 (patch) | |
tree | cb234cd494092c14f5f878bbc2cf060b8a5f85a9 /parsers/caves.py | |
parent | 47db19f1a24d22610de2a5782737650f4b0941dc (diff) | |
download | troggle-d323ff270083ead337f8297653bb833100306999.tar.gz troggle-d323ff270083ead337f8297653bb833100306999.tar.bz2 troggle-d323ff270083ead337f8297653bb833100306999.zip |
debugging Cave page links..
Diffstat (limited to 'parsers/caves.py')
-rw-r--r-- | parsers/caves.py | 66 |
1 files changed, 41 insertions, 25 deletions
diff --git a/parsers/caves.py b/parsers/caves.py index 70f92cb..974f73e 100644 --- a/parsers/caves.py +++ b/parsers/caves.py @@ -27,6 +27,9 @@ So is the first thing that creates tables. """ todo = """ +- When reading cave data, to start off wit we do not know the cave id (slug) so we can't give a useful url in + the error message, but we do have the filename. Systematize this, and the same thing with reading entrance files. + - Cannot use Edit This Page for pendingcaves.txt_edit as Edit This Page is expecting an html file. So we will need a separate file-editing capability just for this configuration file ?! @@ -35,15 +38,15 @@ todo = """ - rewrite archaic regex re.findall("<%(itemname)s>(.*?)</%(itemname)s>" % {"itemname": itemname}, text, re.S) in modern form and pre-compile it. - -- Semi-automagically import all the 1627- pending caves and create HTML files for them to be - edited individually. (These are caves we only know about because we have German survex files.) - crashes on MariaDB in databasereset.py on server when deleting Caves and complains Area needs a non null parent, But this is not true. The only solution we have found is to let it crash, then stop and restart MariaDB (requires a logon able to sudo) and then restart the databasereset.py - again. (status as of July 2022) + again. (status as of July 2022). May not happen now that class Area is removed (Sept.2023). """ +AREACODES = {"1623", "1624", "1626", "1627"} # NB set not dict +ARGEAREAS = {"1626", "1627"} + entrances_xslug = {} caves_xslug = {} areas_xslug = {} @@ -105,7 +108,7 @@ def create_new_cave(svxpath, msg=None): :loser: repo which is not recognised as a known cave. ALSO called by survex parser when it finds a cave it doesn't recognise """ - # e.g. svxpath = "caves-1623/666/antig" + # e.g. svxpath = "caves-1623/666/beast" .svx print(f"Create new cave at {svxpath} - {msg}") # survex_file = svxpath + ".svx" @@ -117,16 +120,13 @@ def create_new_cave(svxpath, msg=None): if a[0:3] == "162": areacode = a[0:4] url = f"{areacode}/{caveid}.html" # Note we are appending the .html as we are believe in backwards compatability. - #url = f"{areacode}/{a[5:]}.html" # This is original code, but a above is only defined as being 4 characters long, so it did not make sense and produced non unique urls else: print(f"WARNING: parsers/caves/create_new_cave called with svxpath '{svxpath}'. Surely it should start 'caves-162*'? {msg}") areacode = "1623" url = f"1623/{caveid}.html" - #url = f"1623/{k}.html" # This is original code, but a above is only defined as being 4 characters long, so it did not make sense and produced non unique urls - + k = f"{areacode}-{caveid}" - caves = Cave.objects.filter(unofficial_number=caveid, areacode =areacode) if caves: message = f" ! Already exists, caveid:{k} in areacode {areacode} {caves} - {msg}" @@ -149,14 +149,15 @@ def create_new_cave(svxpath, msg=None): cave.save() return cave -def do_ARGE_cave(slug, caveid, url, areacode, svxid): - """Only called by survex parser. +def do_ARGE_cave(slug, caveid, svxurl, areacode, svxid): + """Only called by survex parser, NOT the cave parser. Creates a new Cave object, but with abbreviated data as the survex file (from ARGE) is all we have. We already know the survex file. - We already know that it doesn't exist... though there are bugs.. + We already know that the cave doesn't exist... though there are bugs.. """ default_note = "This is an ARGE cave where we only have the survex file and no other information" + url = f"{areacode}/{caveid}.html" urltest = Cave.objects.filter(url=url) if urltest: @@ -171,11 +172,21 @@ def do_ARGE_cave(slug, caveid, url, areacode, svxid): DataIssue.objects.create(parser="caves", message=message, url=url) print(message) return numtest[0] - + + sv = Path(settings.SURVEX_DATA, svxid + ".svx") + if sv.is_file: + with open(sv, "r") as s: + line1 = s.readline() + line2 = s.readline() + line3 = s.readline() + else: + print(f"not correct {sv}", file=sys.stderr) + cave = Cave( - underground_description="ARGE cave.", - survex_file= f"{svxid}.svx", # or is this svxurl ?! - # url=url, No, the url spplied is that of the survexfile not of the cave file, e.g. /1626/254/254 + underground_description="ARGE cave.\n3 lines of the survexfile:\n" + line1 +line2 +line3, + unofficial_number="ARGE", + survex_file= svxurl, + url=url, notes=default_note, areacode=areacode, ) @@ -427,7 +438,7 @@ def read_entrance(filename, ent=None): DataIssue.objects.create(parser="entrances", message=message, url=f"/cave/{slug}/edit/") print(message) - lastvisit = getXML(entrancecontents, "lastvisit", maxItems=1, minItems=0, context=context) + lastvisit = getXML(entrancecontents, "lastvisit", maxItems=1, minItems=0, context=f"/cave/{slug}/edit/") alt = getXMLmax1("alt") approach = getXMLmax1("approach") @@ -611,12 +622,12 @@ def read_cave(filename, cave=None): # Note: these are HTML files in the EXPOWEB repo, not from the loser repo. fn = settings.CAVEDESCRIPTIONS / filename - context = filename + context = f"/cave_data/{filename}_edit" # print(f" - Reading Cave from cave descriptions file {fn}") if not fn.exists(): message = f" ! Cave_data file reading problem filename:'cave_data/{filename}'" - DataIssue.objects.create(parser="caves", message=message, url=f"/cave_data/{filename}_edit") + DataIssue.objects.create(parser="caves", message=message, url=context) print(message) return None @@ -626,7 +637,7 @@ def read_cave(filename, cave=None): if len(cavecontentslist) != 1: message = f'! BAD CAVE DATA in "{filename}". More than one cave. Edit file manually, click.' - DataIssue.objects.create(parser="caves", message=message, url=f"/cave_data/{filename}_edit") + DataIssue.objects.create(parser="caves", message=message, url=context) print(message) return None @@ -634,10 +645,11 @@ def read_cave(filename, cave=None): slugs = getXML(cavecontents, "caveslug", maxItems=1, context=context) if len(slugs) > 1: message = f" ! - More than one slug for a cave: {cave}, slugs: {slugs}. Ignoring all except first." - DataIssue.objects.create(parser="caves", message=message, url=f"{cave.url}_edit/") + DataIssue.objects.create(parser="caves", message=message, url=context) print(message) slug = slugs[0] - + context = url=f"/{slug[0:4]}/{slug}_cave_edit/" + non_public = getXMLmax1("non_public") official_name = getXMLmax1("official_name") kataster_code = getXMLmax1("kataster_code") @@ -668,7 +680,7 @@ def read_cave(filename, cave=None): kaves = Cave.objects.all().filter(filename=filename) # replace with slug when CaveSlug tidied up for k in kaves: message = " ! - DUPLICATES in db. kataster:" + str(k.kataster_number) + ", slug:" + str(k.slug()) - DataIssue.objects.create(parser="caves", message=message) + DataIssue.objects.create(parser="caves", message=message, url=context) print(message) for k in kaves: if k.slug() is not None: @@ -679,6 +691,7 @@ def read_cave(filename, cave=None): # From here on the code applies to both edited and newly-imported caves (mostly!) do_caveslugstuff() # needs cave!=None + cave.non_public=boolify(non_public) cave.official_name=official_name[0] cave.kataster_code=kataster_code[0] @@ -701,11 +714,14 @@ def read_cave(filename, cave=None): areas = getXML(cavecontents, "area", context=context) # can be multiple <area> tags for area_slug in areas: - if area_slug in ["1623", "1624", "1626", "1627"]: # ignore sub areas which are in another <area> tag + if area_slug in AREACODES: # ignore sub areas which are in another <area> tag cave.areacode = area_slug else: cave.subarea = area_slug - + if not cave.areacode: + if slug[0:4] in AREACODES: + cave.areacode = slug[0:4] + entrances = getXML(cavecontents, "entrance", context=context) do_entrances() # print(f"- {entrances_xslug=}") |