summaryrefslogtreecommitdiffstats
path: root/parsers/caves.py
diff options
context:
space:
mode:
authorPhilip Sargent <philip.sargent@gmail.com>2024-07-02 12:23:33 +0300
committerPhilip Sargent <philip.sargent@gmail.com>2024-07-02 12:23:33 +0300
commite4d3e33dec40fe9b494c46a486172055a2e80e00 (patch)
tree5579c0a87a2a314bed31f95930d3e95008bd04a2 /parsers/caves.py
parent208c0ffa1f1d3af3959e2bec8f837d6c03935799 (diff)
downloadtroggle-e4d3e33dec40fe9b494c46a486172055a2e80e00.tar.gz
troggle-e4d3e33dec40fe9b494c46a486172055a2e80e00.tar.bz2
troggle-e4d3e33dec40fe9b494c46a486172055a2e80e00.zip
bug fix
Diffstat (limited to 'parsers/caves.py')
-rw-r--r--parsers/caves.py63
1 files changed, 24 insertions, 39 deletions
diff --git a/parsers/caves.py b/parsers/caves.py
index 0bf3520..f2176e0 100644
--- a/parsers/caves.py
+++ b/parsers/caves.py
@@ -479,12 +479,6 @@ def read_entrance(filename, ent=None):
contents = f.read()
context = filename
- # validate filename, check areacode
- ent_area = filename[:4]
- if ent_area not in AREACODES:
- message = f'! BAD AREA CODE in "{filename}". Not recognised.'
- DataIssue.objects.create(parser="entrances", message=message, url=ent_edit_url)
- print(message)
# Derive the letter, entrance slug and cave slug fromthe filename
@@ -497,10 +491,16 @@ def read_entrance(filename, ent=None):
letter_fn = ""
cave_name_fn = caveslug_fn[5:] # remove initial "1623-"
+ ent_area = filename[:4]
ent_edit_url = f"/{caveslug_fn}:{entslug_fn}_entrance_edit"
cave_edit_url = f"/{ent_area}/{cave_name_fn}/{caveslug_fn}_cave_edit"
+ # validate filename, check areacode
+ if ent_area not in AREACODES:
+ message = f'! BAD AREA CODE in "{filename}". Not recognised.'
+ DataIssue.objects.create(parser="entrances", message=message, url=ent_edit_url)
+ print(message)
entrancecontentslist = getXML(contents, "entrance", maxItems=1, context=context)
@@ -726,8 +726,7 @@ def read_cave(filename, mvf=None, cave=None):
def check_slug(areacode, kataster_number, unofficial_number, url):
"""There is a <caveslug> field in the .html file, but we now ignore it as we use the
filename itself to set the slug.
- However we do check it for sanity, pending its removal eventually."""
- # context = f"/{cave.areacode}/{slug}_cave_edit/"
+ However we do check it for sanity, if it is there, pending its removal eventually."""
if kataster_number:
if slug == f"{areacode}-{kataster_number}":
@@ -764,14 +763,20 @@ def read_cave(filename, mvf=None, cave=None):
# Note: these are HTML files in the EXPOWEB repo, not from the loser repo.
fn = settings.CAVEDESCRIPTIONS / filename
- context = f"/cave_data/{filename}_edit" # the expo html editor, not the specialist cave data editor. oops.FIX THIS
- # print(f" - Reading Cave from cave descriptions file {fn}")
+ #print(f" - Reading Cave from cave descriptions file {fn}")
if not fn.exists():
message = f" ! Cave_data file reading problem filename:'cave_data/{filename}'"
- DataIssue.objects.create(parser="caves", message=message, url=context)
+ DataIssue.objects.create(parser="caves", message=message, url="")
print(message)
return None
+
+ # ignore <caveslug> read from the filename
+ slug = filename[:-5] # strip off the ".html" at the end of the filename
+ slugs = [slug]
+ areacode = slug[:4]
+
+ context = f"/{areacode}/{slug}_cave_edit"
with open(fn) as f:
contents = f.read()
@@ -779,32 +784,17 @@ def read_cave(filename, mvf=None, cave=None):
if len(cavecontentslist) != 1:
message = f'! BAD CAVE DATA in "{filename}". More than one cave. Edit file manually, click.'
- DataIssue.objects.create(parser="caves", message=message, url=context)
+ DataIssue.objects.create(parser="caves", message=message, url="")
print(message)
return None
cavecontents = cavecontentslist[0]
-
- # This is ignored, we are using the filename not this <caveslug> field now
- # New 2024 June 28th.
- # slugs = getXML(cavecontents, "caveslug", maxItems=1, context=context)
- # if len(slugs) > 1:
- # message = f" ! - More than one slug for a cave: {cave}, slugs: {slugs}."
- # DataIssue.objects.create(parser="caves", message=message, url=context)
- # print(message)
- # slug = slugs[0]
-
- # but ignore <caveslug> read from the file
- slug = filename[:-5] # strip off the ".html" at the end of the filename
- slugs = [slug]
+
#print(f"{filename=} {slug=}")
- #print(slug[-1].lower(), slug)
if slug[-1].lower() in LETTERS:
message = f" ! Cave name ends in a letter not a number. Fix this! in file {filename} "
DataIssue.objects.create(parser="caves", message=message, url=context) # url here is for where the file actually is, for editing
print(message)
-
-
non_public = getXMLmax1("non_public")
official_name = getXMLmax1("official_name")
@@ -822,14 +812,11 @@ def read_cave(filename, mvf=None, cave=None):
survex_file = getXMLmax1("survex_file")
description_file = getXMLmax1("description_file")
+ # Optional, but probably deprecated as we should just derive this from the survex data
length = getXMLmin0("length")
depth = getXMLmin0("depth")
extent = getXMLmin0("extent")
-
- areacode = slug[:4]
-
- contextguess = f"/{slug[0:4]}/{slug}_cave_edit/"
-
+
manual_edit = True
if not cave:
# we are parsing using databaseReset.py not an online edit
@@ -843,7 +830,7 @@ def read_cave(filename, mvf=None, cave=None):
kaves = Cave.objects.all().filter(filename=filename) # replace with slug when CaveSlug tidied up
for k in kaves:
message = " ! - DUPLICATES in db. kataster:" + str(k.kataster_number) + ", slug:" + str(k.slug())
- DataIssue.objects.create(parser="caves", message=message, url=contextguess)
+ DataIssue.objects.create(parser="caves", message=message, url=context)
print(message)
for k in kaves:
if k.slug() is not None:
@@ -855,15 +842,13 @@ def read_cave(filename, mvf=None, cave=None):
do_caveslugstuff() # needs cave!=None
# We no longer need the <area> tag to define 1623 etc as we get that from the filename.
- areas = getXML(cavecontents, "area", context=contextguess, minItems=0) # can be multiple <area> tags
+ areas = getXML(cavecontents, "area", context=context, minItems=0) # can be multiple <area> tags
for area_slug in areas:
if area_slug not in AREACODES: # only detect subareas
cave.subarea = area_slug
if not cave.areacode:
if areacode in AREACODES:
cave.areacode = areacode
- context = f"/{cave.areacode}/{slug}_cave_edit/"
-
cave.non_public=boolify(non_public)
cave.official_name=official_name[0]
@@ -886,13 +871,13 @@ def read_cave(filename, mvf=None, cave=None):
cave.extent=extent[0]
cave.survex_file=survex_file[0]
cave.description_file=description_file[0]
- # cave.url=url[0] # set algorithically:
+ # cave.url=url[0] # set algorithically now:
cave.url = f"{cave.areacode}/{cave.number()}/{cave.number()}.html"
check_directory(cave.areacode, cave.number(), cave.url, cave)
slug = check_slug(cave.areacode, cave.kataster_number, cave.unofficial_number, cave.url) #NB cave.slug is not a field on Cave
- # Thsi whole way of doing entrances can be replaced by simply knowing formthe entrance_data filename what the cave is.
+ # Thsi whole way of doing entrances can be replaced by simply knowing the entrance_data filename what the cave is. to do.
entrances = getXML(cavecontents, "entrance", context=context)
do_entrances()
# print(f"- {entrances_xslug=}")