summaryrefslogtreecommitdiffstats
path: root/parsers/caves.py
diff options
context:
space:
mode:
authorPhilip Sargent <philip.sargent@gmail.com>2023-09-11 20:38:14 +0300
committerPhilip Sargent <philip.sargent@gmail.com>2023-09-11 20:38:14 +0300
commitd323ff270083ead337f8297653bb833100306999 (patch)
treecb234cd494092c14f5f878bbc2cf060b8a5f85a9 /parsers/caves.py
parent47db19f1a24d22610de2a5782737650f4b0941dc (diff)
downloadtroggle-d323ff270083ead337f8297653bb833100306999.tar.gz
troggle-d323ff270083ead337f8297653bb833100306999.tar.bz2
troggle-d323ff270083ead337f8297653bb833100306999.zip
debugging Cave page links..
Diffstat (limited to 'parsers/caves.py')
-rw-r--r--parsers/caves.py66
1 files changed, 41 insertions, 25 deletions
diff --git a/parsers/caves.py b/parsers/caves.py
index 70f92cb..974f73e 100644
--- a/parsers/caves.py
+++ b/parsers/caves.py
@@ -27,6 +27,9 @@ So is the first thing that creates tables.
"""
todo = """
+- When reading cave data, to start off wit we do not know the cave id (slug) so we can't give a useful url in
+ the error message, but we do have the filename. Systematize this, and the same thing with reading entrance files.
+
- Cannot use Edit This Page for pendingcaves.txt_edit as Edit This Page is expecting an html file.
So we will need a separate file-editing capability just for this configuration file ?!
@@ -35,15 +38,15 @@ todo = """
- rewrite archaic regex
re.findall("<%(itemname)s>(.*?)</%(itemname)s>" % {"itemname": itemname}, text, re.S)
in modern form and pre-compile it.
-
-- Semi-automagically import all the 1627- pending caves and create HTML files for them to be
- edited individually. (These are caves we only know about because we have German survex files.)
- crashes on MariaDB in databasereset.py on server when deleting Caves and complains Area needs a
non null parent, But this is not true. The only solution we have found is to let it crash, then
stop and restart MariaDB (requires a logon able to sudo) and then restart the databasereset.py
- again. (status as of July 2022)
+ again. (status as of July 2022). May not happen now that class Area is removed (Sept.2023).
"""
+AREACODES = {"1623", "1624", "1626", "1627"} # NB set not dict
+ARGEAREAS = {"1626", "1627"}
+
entrances_xslug = {}
caves_xslug = {}
areas_xslug = {}
@@ -105,7 +108,7 @@ def create_new_cave(svxpath, msg=None):
:loser: repo which is not recognised as a known cave.
ALSO called by survex parser when it finds a cave it doesn't recognise
"""
- # e.g. svxpath = "caves-1623/666/antig"
+ # e.g. svxpath = "caves-1623/666/beast" .svx
print(f"Create new cave at {svxpath} - {msg}")
#
survex_file = svxpath + ".svx"
@@ -117,16 +120,13 @@ def create_new_cave(svxpath, msg=None):
if a[0:3] == "162":
areacode = a[0:4]
url = f"{areacode}/{caveid}.html" # Note we are appending the .html as we are believe in backwards compatability.
- #url = f"{areacode}/{a[5:]}.html" # This is original code, but a above is only defined as being 4 characters long, so it did not make sense and produced non unique urls
else:
print(f"WARNING: parsers/caves/create_new_cave called with svxpath '{svxpath}'. Surely it should start 'caves-162*'? {msg}")
areacode = "1623"
url = f"1623/{caveid}.html"
- #url = f"1623/{k}.html" # This is original code, but a above is only defined as being 4 characters long, so it did not make sense and produced non unique urls
-
+
k = f"{areacode}-{caveid}"
-
caves = Cave.objects.filter(unofficial_number=caveid, areacode =areacode)
if caves:
message = f" ! Already exists, caveid:{k} in areacode {areacode} {caves} - {msg}"
@@ -149,14 +149,15 @@ def create_new_cave(svxpath, msg=None):
cave.save()
return cave
-def do_ARGE_cave(slug, caveid, url, areacode, svxid):
- """Only called by survex parser.
+def do_ARGE_cave(slug, caveid, svxurl, areacode, svxid):
+ """Only called by survex parser, NOT the cave parser.
Creates a new Cave object, but with abbreviated data as the survex file (from ARGE) is all we have.
We already know the survex file.
- We already know that it doesn't exist... though there are bugs..
+ We already know that the cave doesn't exist... though there are bugs..
"""
default_note = "This is an ARGE cave where we only have the survex file and no other information"
+ url = f"{areacode}/{caveid}.html"
urltest = Cave.objects.filter(url=url)
if urltest:
@@ -171,11 +172,21 @@ def do_ARGE_cave(slug, caveid, url, areacode, svxid):
DataIssue.objects.create(parser="caves", message=message, url=url)
print(message)
return numtest[0]
-
+
+ sv = Path(settings.SURVEX_DATA, svxid + ".svx")
+ if sv.is_file:
+ with open(sv, "r") as s:
+ line1 = s.readline()
+ line2 = s.readline()
+ line3 = s.readline()
+ else:
+ print(f"not correct {sv}", file=sys.stderr)
+
cave = Cave(
- underground_description="ARGE cave.",
- survex_file= f"{svxid}.svx", # or is this svxurl ?!
- # url=url, No, the url spplied is that of the survexfile not of the cave file, e.g. /1626/254/254
+ underground_description="ARGE cave.\n3 lines of the survexfile:\n" + line1 +line2 +line3,
+ unofficial_number="ARGE",
+ survex_file= svxurl,
+ url=url,
notes=default_note,
areacode=areacode,
)
@@ -427,7 +438,7 @@ def read_entrance(filename, ent=None):
DataIssue.objects.create(parser="entrances", message=message, url=f"/cave/{slug}/edit/")
print(message)
- lastvisit = getXML(entrancecontents, "lastvisit", maxItems=1, minItems=0, context=context)
+ lastvisit = getXML(entrancecontents, "lastvisit", maxItems=1, minItems=0, context=f"/cave/{slug}/edit/")
alt = getXMLmax1("alt")
approach = getXMLmax1("approach")
@@ -611,12 +622,12 @@ def read_cave(filename, cave=None):
# Note: these are HTML files in the EXPOWEB repo, not from the loser repo.
fn = settings.CAVEDESCRIPTIONS / filename
- context = filename
+ context = f"/cave_data/{filename}_edit"
# print(f" - Reading Cave from cave descriptions file {fn}")
if not fn.exists():
message = f" ! Cave_data file reading problem filename:'cave_data/{filename}'"
- DataIssue.objects.create(parser="caves", message=message, url=f"/cave_data/{filename}_edit")
+ DataIssue.objects.create(parser="caves", message=message, url=context)
print(message)
return None
@@ -626,7 +637,7 @@ def read_cave(filename, cave=None):
if len(cavecontentslist) != 1:
message = f'! BAD CAVE DATA in "{filename}". More than one cave. Edit file manually, click.'
- DataIssue.objects.create(parser="caves", message=message, url=f"/cave_data/{filename}_edit")
+ DataIssue.objects.create(parser="caves", message=message, url=context)
print(message)
return None
@@ -634,10 +645,11 @@ def read_cave(filename, cave=None):
slugs = getXML(cavecontents, "caveslug", maxItems=1, context=context)
if len(slugs) > 1:
message = f" ! - More than one slug for a cave: {cave}, slugs: {slugs}. Ignoring all except first."
- DataIssue.objects.create(parser="caves", message=message, url=f"{cave.url}_edit/")
+ DataIssue.objects.create(parser="caves", message=message, url=context)
print(message)
slug = slugs[0]
-
+ context = url=f"/{slug[0:4]}/{slug}_cave_edit/"
+
non_public = getXMLmax1("non_public")
official_name = getXMLmax1("official_name")
kataster_code = getXMLmax1("kataster_code")
@@ -668,7 +680,7 @@ def read_cave(filename, cave=None):
kaves = Cave.objects.all().filter(filename=filename) # replace with slug when CaveSlug tidied up
for k in kaves:
message = " ! - DUPLICATES in db. kataster:" + str(k.kataster_number) + ", slug:" + str(k.slug())
- DataIssue.objects.create(parser="caves", message=message)
+ DataIssue.objects.create(parser="caves", message=message, url=context)
print(message)
for k in kaves:
if k.slug() is not None:
@@ -679,6 +691,7 @@ def read_cave(filename, cave=None):
# From here on the code applies to both edited and newly-imported caves (mostly!)
do_caveslugstuff() # needs cave!=None
+
cave.non_public=boolify(non_public)
cave.official_name=official_name[0]
cave.kataster_code=kataster_code[0]
@@ -701,11 +714,14 @@ def read_cave(filename, cave=None):
areas = getXML(cavecontents, "area", context=context) # can be multiple <area> tags
for area_slug in areas:
- if area_slug in ["1623", "1624", "1626", "1627"]: # ignore sub areas which are in another <area> tag
+ if area_slug in AREACODES: # ignore sub areas which are in another <area> tag
cave.areacode = area_slug
else:
cave.subarea = area_slug
-
+ if not cave.areacode:
+ if slug[0:4] in AREACODES:
+ cave.areacode = slug[0:4]
+
entrances = getXML(cavecontents, "entrance", context=context)
do_entrances()
# print(f"- {entrances_xslug=}")