diff options
-rw-r--r-- | core/views/expo.py | 14 | ||||
-rw-r--r-- | parsers/caves.py | 156 | ||||
-rw-r--r-- | parsers/imports.py | 4 | ||||
-rw-r--r-- | settings.py | 8 |
4 files changed, 102 insertions, 80 deletions
diff --git a/core/views/expo.py b/core/views/expo.py index 1be813d..e051135 100644 --- a/core/views/expo.py +++ b/core/views/expo.py @@ -87,30 +87,30 @@ def expowebpage(request, expowebpath, path): if not Path(expowebpath / path).is_file(): return render(request, 'pagenotfound.html', {'path': path}) - with open(os.path.normpath(expowebpath / path), "rb") as o: + with open(os.path.normpath(expowebpath / path), "r") as o: html = o.read() - m = re.search(rb'(.*)<\s*head([^>]*)>(.*)<\s*/head\s*>(.*)<\s*body([^>]*)>(.*)<\s*/body\s*>(.*)', html, re.DOTALL + re.IGNORECASE) + m = re.search(r'(.*)<\s*head([^>]*)>(.*)<\s*/head\s*>(.*)<\s*body([^>]*)>(.*)<\s*/body\s*>(.*)', html, re.DOTALL + re.IGNORECASE) if m: preheader, headerattrs, head, postheader, bodyattrs, body, postbody = m.groups() else: - return HttpResponse(default_head + html.decode() + '<h3>HTML Parsing failure:<br>Page could not be parsed into header and body:<br>failure detected in expowebpage in views.expo.py</h3> Please edit this <var>:expoweb:</var> page to be in the expected full HTML format </body' ) - m = re.search(rb"<title>(.*)</title>", head, re.DOTALL + re.IGNORECASE) + return HttpResponse(default_head + html + '<h3>HTML Parsing failure:<br>Page could not be parsed into header and body:<br>failure detected in expowebpage in views.expo.py</h3> Please edit this <var>:expoweb:</var> page to be in the expected full HTML format </body' ) + m = re.search(r"<title>(.*)</title>", head, re.DOTALL + re.IGNORECASE) if m: title, = m.groups() else: title = "" - m = re.search(rb"<meta([^>]*)noedit", head, re.DOTALL + re.IGNORECASE) + m = re.search(r"<meta([^>]*)noedit", head, re.DOTALL + re.IGNORECASE) if m: editable = False else: editable = True has_menu = False - menumatch = re.match(rb'(.*)<div id="menu">', body, re.DOTALL + re.IGNORECASE) + menumatch = re.match(r'(.*)<div id="menu">', body, re.DOTALL + re.IGNORECASE) if menumatch: has_menu = True - menumatch = re.match(rb'(.*)<ul id="links">', body, re.DOTALL + re.IGNORECASE) + menumatch = re.match(r'(.*)<ul id="links">', body, re.DOTALL + re.IGNORECASE) if menumatch: has_menu = True return render(request, 'flatpage.html', {'editable': editable, 'path': path, 'title': title, diff --git a/parsers/caves.py b/parsers/caves.py index bfb51d6..533bf61 100644 --- a/parsers/caves.py +++ b/parsers/caves.py @@ -2,78 +2,98 @@ import os import re from django.conf import settings +from django.db import transaction from troggle.core.models import DataIssue, get_process_memory -import troggle.core.models_caves as models_caves +from troggle.core.models_caves import Area, Cave, Entrance, CaveSlug, EntranceSlug, CaveAndEntrance +'''Reads all the cave description data by parsing the xml files (stored as e.g. :EXPOWEB:/cave-data/1623-161.html ) +and creating the various Cave, Entrance and necessary Area objects. + +BUT in Django 2.0 and later we cannot do any queries on data we have just entered +because this is all happening inside one transaction. Bummer. + +django.db.transaction.TransactionManagementError: +An error occurred in the current transaction. You can't execute queries until the end of the 'atomic' block. +''' entrances_xslug = {} caves_xslug = {} areas_xslug = {} def readcaves(): - print(" - Deleting Caves and Entrances") - models_caves.Cave.objects.all().delete() - models_caves.Entrance.objects.all().delete() - # Clear the cave data issues and the caves as we are reloading - DataIssue.objects.filter(parser='caves').delete() - DataIssue.objects.filter(parser='entrances').delete() + '''Reads the xml-format HTML files in the EXPOWEB repo, not from the loser repo. + ''' + with transaction.atomic(): + print(" - Deleting Caves and Entrances") + Cave.objects.all().delete() + Entrance.objects.all().delete() + # Clear the cave data issues and the caves as we are reloading + DataIssue.objects.filter(parser='caves').delete() + DataIssue.objects.filter(parser='entrances').delete() - # Note: these are HTML files in the EXPOWEB repo, not from the loser repo. - area_1623 = models_caves.Area.objects.update_or_create(short_name = "1623", parent = None) - area_1626 = models_caves.Area.objects.update_or_create(short_name = "1626", parent = None) + + area_1623 = Area.objects.update_or_create(short_name = "1623", parent = None) + # This seems to retrun a tuple, not a single object! i.e. (<Area: 1623>, True) + #print(f' ! - READ CAVES: {area_1623}') - print (" - Setting pending caves") - # Do this first, so that these empty entries are overwritten as they get properly created. - - # For those caves which do not have XML files even though they exist and have surveys - # also needs to be done *before* entrances so that the entrance-cave links work properly. - pending = ["2007-04", "2007-05", "2007-06", "2007-07", "2007-12", "2009-01", "2009-02", - "2010-06", "2010-07", "2012-ns-01", "2012-ns-02", "2010-04", "2012-ns-05", "2012-ns-06", - "2012-ns-07", "2012-ns-08", "2012-ns-12", "2012-ns-14", "2012-ns-15", "2014-bl888", - "2018-pf-01", "2018-pf-02", "haldenloch", "gruenstein"] - for k in pending: - try: - cave = models_caves.Cave( - unofficial_number = k, -# official_name = "", - underground_description = "Pending cave write-up - creating as empty object. No XML file available yet.", - notes="_Survex file found in loser repo but no description in expoweb") - if cave: - cave.save() # must save to have id before foreign keys work - cave.area = area_1623 - cave.save() - message = " ! {:11s} {}".format(cave.unofficial_number, cave.underground_description) - DataIssue.objects.create(parser='caves', message=message) - print(message) - - try: # Now create a slug ID - cs = models_caves.CaveSlug.objects.update_or_create(cave = cave, - slug = "TEMP-" + k, - primary = False) - except: - message = " ! {:11s} {} PENDING cave slug create failure".format(k) + area_1626 = Area.objects.update_or_create(short_name = "1626", parent = None) + + print (" - Setting pending caves") + # Do this first, so that these empty entries are overwritten as they get properly created. + + # For those caves which do not have XML files even though they exist and have surveys + # also needs to be done *before* entrances so that the entrance-cave links work properly. + pending = ["2007-04", "2007-05", "2007-06", "2007-07", "2007-12", "2009-01", "2009-02", + "2010-06", "2010-07", "2012-ns-01", "2012-ns-02", "2010-04", "2012-ns-05", "2012-ns-06", + "2012-ns-07", "2012-ns-08", "2012-ns-12", "2012-ns-14", "2012-ns-15", "2014-bl888", + "2018-pf-01", "2018-pf-02", "haldenloch", "gruenstein"] + for k in pending: + try: + cave = Cave( + unofficial_number = k, + # official_name = "", + underground_description = "Pending cave write-up - creating as empty object. No XML file available yet.", + notes="_Survex file found in loser repo but no description in expoweb") + if cave: + cave.save() # must save to have id before foreign keys work. This is also a ManyToMany key. + #print(f' ! - READ CAVES: cave {k} {cave}') + cave.area.add(area_1623[0]) + cave.save() + message = " ! {:11s} {}".format(cave.unofficial_number, cave.underground_description) DataIssue.objects.create(parser='caves', message=message) print(message) - else: - print("Failed to create cave {} ".format(k)) - + + try: # Now create a cave slug ID + cs = CaveSlug.objects.update_or_create(cave = cave, + slug = "TEMP-" + k, + primary = False) + except: + message = " ! {:11s} {} PENDING cave slug create failure".format(k) + DataIssue.objects.create(parser='caves', message=message) + print(message) + else: + message = f' ! {k:11s} PENDING cave slug create failure' + DataIssue.objects.create(parser='caves', message=message) + print(message) + - except: - message = " ! Error. Cannot create pending cave, pending-id:{}".format(k) - DataIssue.objects.create(parser='caves', message=message) - print(message) - raise + except: + message = " ! Error. Cannot create pending cave, pending-id:{}".format(k) + DataIssue.objects.create(parser='caves', message=message) + print(message) + raise - print(" - Reading Entrances from entrance descriptions xml files") - print(" - settings.CAVEDESCRIPTIONS: ", settings.CAVEDESCRIPTIONS) - for filename in next(os.walk(settings.ENTRANCEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files - if filename.endswith('.html'): - readentrance(filename) + with transaction.atomic(): + print(" - Reading Entrances from entrance descriptions xml files") + print(" - settings.CAVEDESCRIPTIONS: ", settings.CAVEDESCRIPTIONS) + for filename in next(os.walk(settings.ENTRANCEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files + if filename.endswith('.html'): + readentrance(filename) - print(" - Reading Caves from cave descriptions xml files") - for filename in next(os.walk(settings.CAVEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files - if filename.endswith('.html'): - readcave(filename) + print(" - Reading Caves from cave descriptions xml files") + for filename in next(os.walk(settings.CAVEDESCRIPTIONS))[2]: #Should be a better way of getting a list of files + if filename.endswith('.html'): + readcave(filename) def readentrance(filename): global entrances_xslug @@ -112,7 +132,7 @@ def readentrance(filename): bearings = getXML(entrancecontents, "bearings", maxItems = 1, context = context) url = getXML(entrancecontents, "url", maxItems = 1, context = context) if len(non_public) == 1 and len(slugs) >= 1 and len(name) >= 1 and len(entrance_description) == 1 and len(explorers) == 1 and len(map_description) == 1 and len(location_description) == 1 and len(approach) == 1 and len(underground_description) == 1 and len(marking) == 1 and len(marking_comment) == 1 and len(findability) == 1 and len(findability_description) == 1 and len(alt) == 1 and len(northing) == 1 and len(easting) == 1 and len(tag_station) == 1 and len(exact_station) == 1 and len(other_station) == 1 and len(other_description) == 1 and len(bearings) == 1 and len(url) == 1: - e, state = models_caves.Entrance.objects.update_or_create(name = name[0], + e, state = Entrance.objects.update_or_create(name = name[0], non_public = {"True": True, "False": False, "true": True, "false": False,}[non_public[0]], entrance_description = entrance_description[0], explorers = explorers[0], @@ -140,13 +160,13 @@ def readentrance(filename): for slug in slugs: #print("entrance slug:{} filename:{}".format(slug, filename)) try: - cs = models_caves.EntranceSlug.objects.update_or_create(entrance = e, + cs = EntranceSlug.objects.update_or_create(entrance = e, slug = slug, primary = primary) except: # need to cope with duplicates print(" ! FAILED to get only one ENTRANCE when updating using: "+filename) - kents = models_caves.EntranceSlug.objects.all().filter(entrance = e, + kents = EntranceSlug.objects.all().filter(entrance = e, slug = slug, primary = primary) for k in kents: @@ -161,6 +181,8 @@ def readentrance(filename): primary = False def readcave(filename): + '''Assumes any area it hasn't seen before is a subarea of 1623 + ''' global entrances_xslug global caves_xslug global areas_xslug @@ -196,7 +218,7 @@ def readcave(filename): entrances = getXML(cavecontents, "entrance", context = context) if len(non_public) == 1 and len(slugs) >= 1 and len(official_name) == 1 and len(areas) >= 1 and len(kataster_code) == 1 and len(kataster_number) == 1 and len(unofficial_number) == 1 and len(explorers) == 1 and len(underground_description) == 1 and len(equipment) == 1 and len(references) == 1 and len(survey) == 1 and len(kataster_status) == 1 and len(underground_centre_line) == 1 and len(notes) == 1 and len(length) == 1 and len(depth) == 1 and len(extent) == 1 and len(survex_file) == 1 and len(description_file ) == 1 and len(url) == 1 and len(entrances) >= 1: try: - c, state = models_caves.Cave.objects.update_or_create(non_public = {"True": True, "False": False, "true": True, "false": False,}[non_public[0]], + c, state = Cave.objects.update_or_create(non_public = {"True": True, "False": False, "true": True, "false": False,}[non_public[0]], official_name = official_name[0], kataster_code = kataster_code[0], kataster_number = kataster_number[0], @@ -218,7 +240,7 @@ def readcave(filename): filename = filename) except: print(" ! FAILED to get only one CAVE when updating using: "+filename) - kaves = models_caves.Cave.objects.all().filter(kataster_number=kataster_number[0]) + kaves = Cave.objects.all().filter(kataster_number=kataster_number[0]) for k in kaves: message = " ! - DUPLICATES in db. kataster:"+ str(k.kataster_number) + ", slug:" + str(k.slug()) DataIssue.objects.create(parser='caves', message=message) @@ -233,11 +255,11 @@ def readcave(filename): if area_slug in areas_xslug: newArea = areas_xslug[area_slug] else: - area = models_caves.Area.objects.filter(short_name = area_slug) + area = Area.objects.filter(short_name = area_slug) if area: newArea = area[0] else: - newArea = models_caves.Area(short_name = area_slug, parent = models_caves.Area.objects.get(short_name = "1623")) + newArea = Area(short_name = area_slug, parent = Area.objects.get(short_name = "1623")) newArea.save() areas_xslug[area_slug] = newArea c.area.add(newArea) @@ -247,7 +269,7 @@ def readcave(filename): cs = caves_xslug[slug] else: try: - cs = models_caves.CaveSlug.objects.update_or_create(cave = c, + cs = CaveSlug.objects.update_or_create(cave = c, slug = slug, primary = primary) caves_xslug[slug] = cs @@ -265,9 +287,9 @@ def readcave(filename): if slug in entrances_xslug: entrance = entrances_xslug[slug] else: - entrance = models_caves.Entrance.objects.get(entranceslug__slug = slug) + entrance = Entrance.objects.get(entranceslug__slug = slug) entrances_xslug[slug] = entrance - ce = models_caves.CaveAndEntrance.objects.update_or_create(cave = c, entrance_letter = letter, entrance = entrance) + ce = CaveAndEntrance.objects.update_or_create(cave = c, entrance_letter = letter, entrance = entrance) except: message = " ! Entrance setting failure, slug: %s letter: %s" % (slug, letter) DataIssue.objects.create(parser='caves', message=message) diff --git a/parsers/imports.py b/parsers/imports.py index e2a6224..e07d8fd 100644 --- a/parsers/imports.py +++ b/parsers/imports.py @@ -18,9 +18,7 @@ import troggle.parsers.QMs def import_caves(): print("-- Importing Caves to ",end="") print(django.db.connections.databases['default']['NAME']) - # wrap the entire import in a transaction - with transaction.atomic(): - troggle.parsers.caves.readcaves() + troggle.parsers.caves.readcaves() def import_people(): print("-- Importing People (folk.csv) to ",end="") diff --git a/settings.py b/settings.py index 15f139f..a8bff2e 100644 --- a/settings.py +++ b/settings.py @@ -127,7 +127,9 @@ INSTALLED_APPS = ( ) # See the recommended order of these in https://docs.djangoproject.com/en/2.2/ref/middleware/ -MIDDLEWARE_CLASSES = ( +# Note that tis is a radically different onion architecture though it looks the same, +# see https://docs.djangoproject.com/en/2.0/topics/http/middleware/#upgrading-pre-django-1-10-style-middleware +MIDDLEWARE = [ #'django.middleware.security.SecurityMiddleware', # SECURE_SSL_REDIRECT and SECURE_SSL_HOST # we don't use this 'django.middleware.gzip.GZipMiddleware', # not needed when expofiles and photos served by apache 'django.contrib.sessions.middleware.SessionMiddleware', # Manages sessions, if CSRF_USE_SESSIONS then it needs to be early @@ -137,8 +139,8 @@ MIDDLEWARE_CLASSES = ( 'django.contrib.admindocs.middleware.XViewMiddleware', # this and docutils needed by admindocs 'django.contrib.messages.middleware.MessageMiddleware', # Cookie-based and session-based message support. Needed by admin system 'django.middleware.clickjacking.XFrameOptionsMiddleware', # clickjacking protection via the X-Frame-Options header - 'troggle.middleware.SmartAppendSlashMiddleware' # doesn't seem to be working... -) + #'troggle.middleware.SmartAppendSlashMiddleware' # needs adapting after Dj2.0 +] ROOT_URLCONF = 'troggle.urls' |