diff options
author | Philip Sargent <philip.sargent@klebos.com> | 2021-03-31 17:57:43 +0100 |
---|---|---|
committer | Philip Sargent <philip.sargent@klebos.com> | 2021-03-31 17:57:43 +0100 |
commit | a6ed0a964e8d3c99eff918d1fbe1abbd6c222460 (patch) | |
tree | ab086e476dadb3d0ccc9821bd1777c2129ffc71d | |
parent | 3452c2c5d424f39e0dfc02d9c4121cfc434cd216 (diff) | |
download | troggle-a6ed0a964e8d3c99eff918d1fbe1abbd6c222460.tar.gz troggle-a6ed0a964e8d3c99eff918d1fbe1abbd6c222460.tar.bz2 troggle-a6ed0a964e8d3c99eff918d1fbe1abbd6c222460.zip |
making flat files delivery more robust
-rw-r--r-- | core/views_expo.py | 148 | ||||
-rw-r--r-- | middleware.py | 40 | ||||
-rw-r--r-- | settings.py | 12 | ||||
-rw-r--r-- | templates/dirnotfound.html | 8 |
4 files changed, 117 insertions, 91 deletions
diff --git a/core/views_expo.py b/core/views_expo.py index fb4c06c..49f8bc7 100644 --- a/core/views_expo.py +++ b/core/views_expo.py @@ -9,6 +9,7 @@ from django.http import HttpResponse, HttpResponseRedirect, Http404 from django.urls import reverse, resolve from django.template import Context, loader from django.views.decorators.csrf import ensure_csrf_cookie +from django.contrib import admin import django.forms as forms @@ -55,93 +56,96 @@ def expofilesdir(request, dirpath, filepath): fileitems.append((Path(urlpath) / f.parts[-1], str(f.parts[-1]), getmimetype(f))) return render(request, 'dirdisplay.html', { 'filepath': urlpath, 'fileitems':fileitems, 'diritems': diritems,'settings': settings }) +def expowebpage(request, expowebpath, path): + '''Adds memnus and serves an HTML page + ''' + if not Path(expowebpath / path).is_file(): + return render(request, 'pagenotfound.html', {'path': path}) + + with open(os.path.normpath(expowebpath / path), "rb") as o: + html = o.read() + + m = re.search(rb'(.*)<\s*head([^>]*)>(.*)<\s*/head\s*>(.*)<\s*body([^>]*)>(.*)<\s*/body\s*>(.*)', html, re.DOTALL + re.IGNORECASE) + if m: + preheader, headerattrs, head, postheader, bodyattrs, body, postbody = m.groups() + else: + return HttpResponse(html + "HTML Parsing failure: Page could not be split into header and body: failed in expowebpage in views_expo.py") + m = re.search(rb"<title>(.*)</title>", head, re.DOTALL + re.IGNORECASE) + if m: + title, = m.groups() + else: + title = "" + m = re.search(rb"<meta([^>]*)noedit", head, re.DOTALL + re.IGNORECASE) + if m: + editable = False + else: + editable = True + + has_menu = False + menumatch = re.match(rb'(.*)<div id="menu">', body, re.DOTALL + re.IGNORECASE) + if menumatch: + has_menu = True + menumatch = re.match(rb'(.*)<ul id="links">', body, re.DOTALL + re.IGNORECASE) + if menumatch: + has_menu = True + return render(request, 'flatpage.html', {'editable': editable, 'path': path, 'title': title, + 'body': body, 'homepage': (path == "index.htm"), 'has_menu': has_menu}) + + def expopage(request, path): '''Either renders an HTML page from expoweb with all the menus, or serves an unadorned binary file with mime type - - This is a horrible mess and some code is redundant and unreachable because of urls.py setup ''' - # print(" - EXPOPAGES delivering the file: {} as MIME type: {}".format(path,getmimetype(path)),flush=True) + #print(" - EXPOPAGES delivering the file: '{}':{} as MIME type: {}".format(request.path, path,getmimetype(path)),flush=True) if path.startswith("noinfo") and settings.PUBLIC_SITE and not request.user.is_authenticated(): - # print((" - EXPOPAGES redirect to logon: flat path noinfo", path)) return HttpResponseRedirect(urljoin(reverse("auth_login"),'?next={}'.format(request.path))) + if path.startswith("admin/"): + # don't even attempt to handle these sorts of mistakes + return HttpResponseRedirect("/admin/") + expowebpath = Path(settings.EXPOWEB) - if path.endswith("/") or path == "": - # print(" - EXPOPAGES the file: {} ENDSWITH ...".format(path)) - try: - o = open(os.path.normpath(expowebpath / path / "index.html"), "rb") - path = path + "index.html" - except IOError: + if path == "": + return expowebpage(request, expowebpath, "index.htm") + + if path.endswith(".htm") or path.endswith(".html"): + return expowebpage(request, expowebpath, path) + + if Path(expowebpath / path ).is_dir(): + for p in ["index.html", "index.htm", "default.html"]: try: - o = open(os.path.normpath(expowebpath / path / "index.htm"), "rb") - path = path + "index.htm" + o = open(os.path.normpath(expowebpath / path / p), "rb") except IOError: - return render(request, 'pagenotfound.html', {'path': path}) - else: - # print(" - EXPOPAGES the file: '{}' ...".format(path)) - if path.startswith('site_media'): - # print(" - MEDIA_ROOT: {} ...{}".format(settings.MEDIA_ROOT, path)) - path = path.replace("site_media", settings.MEDIA_ROOT) - filetobeopened = os.path.normpath(path) - elif path.startswith("static"): - # print(" - STATIC_ROOT: {} ...{}".format(settings.MEDIA_ROOT, path)) - path = path.replace("static", settings.MEDIA_ROOT) - filetobeopened = os.path.normpath(path) - else: - # print(" - NO _ROOT: {} ...".format(expowebpath)) - filetobeopened = os.path.normpath(expowebpath / path) + pass + else: # no exception, so file was found + return expowebpage(request, expowebpath, Path(path) / p) + return render(request, 'pagenotfound.html', {'path': Path(path) / "index.html"}) - # print(" - EXPOPAGES full path : {} ...".format(filetobeopened)) - try: - o = open(filetobeopened, "rb") - #print(" - EXPOPAGES full path no error: {} ...".format(filetobeopened)) - except IOError: - #print(" - EXPOPAGES ERROR: {} ...".format(filetobeopened)) - #o.close() - return render(request, 'pagenotfound.html', {'path': path}) - - - if path.endswith(".htm") or path.endswith(".html"): - # add the menus etc. - with open(os.path.normpath(expowebpath / path), "rb") as o: - html = o.read() - - m = re.search(rb'(.*)<\s*head([^>]*)>(.*)<\s*/head\s*>(.*)<\s*body([^>]*)>(.*)<\s*/body\s*>(.*)', html, re.DOTALL + re.IGNORECASE) - if m: - preheader, headerattrs, head, postheader, bodyattrs, body, postbody = m.groups() - else: - return HttpResponse(html + "HTML Parsing failure: Page could not be split into header and body: failed in expopages.views.py") - m = re.search(rb"<title>(.*)</title>", head, re.DOTALL + re.IGNORECASE) - if m: - title, = m.groups() - else: - title = "" - m = re.search(rb"<meta([^>]*)noedit", head, re.DOTALL + re.IGNORECASE) - if m: - editable = False - else: - editable = True - - has_menu = False - menumatch = re.match(rb'(.*)<div id="menu">', body, re.DOTALL + re.IGNORECASE) - if menumatch: - has_menu = True - menumatch = re.match(rb'(.*)<ul id="links">', body, re.DOTALL + re.IGNORECASE) - if menumatch: - has_menu = True - #body, = menumatch.groups() -# if re.search(rb"iso-8859-1", html): -# body = str(body, "iso-8859-1") -# body.strip - return render(request, 'flatpage.html', {'editable': editable, 'path': path, 'title': title, - 'body': body, 'homepage': (path == "index.htm"), 'has_menu': has_menu}) + if path.endswith("/"): + # we already know it is not a directory. + # the final / may have been appended by middleware if there was no page without it + # do not redirect to a file path without the slash as we may get in a loop. Let the user fix it: + return render(request, 'dirnotfound.html', {'path': path, 'subpath': path[0:-1]}) + + if path.startswith('site_media'): # BUT we may have missing files, directories or .html here too?! + # print(" - MEDIA_ROOT: {} ...{}".format(settings.MEDIA_ROOT, path)) + npath = path.replace("site_media", settings.MEDIA_ROOT) + filetobeopened = os.path.normpath(npath) + elif path.startswith("static"): + # print(" - STATIC_ROOT: {} ...{}".format(settings.MEDIA_ROOT, path)) + npath = path.replace("static", settings.MEDIA_ROOT) + filetobeopened = os.path.normpath(npath) else: - # print(" - EXPOPAGES delivering the file: {} as MIME type: {}".format(path,getmimetype(path))) + filetobeopened = os.path.normpath(expowebpath / path) + + try: return HttpResponse(content=open(filetobeopened, "rb"), content_type=getmimetype(path)) - #return HttpResponse(content=open(singlescan.ffile,"rb"), content_type=getmimetype(path)) + except IOError: + return render(request, 'pagenotfound.html', {'path': path}) + + def getmimetype(path): path = str(path) diff --git a/middleware.py b/middleware.py index 43f7328..56078a9 100644 --- a/middleware.py +++ b/middleware.py @@ -1,7 +1,6 @@ from django.conf import settings from django import http -from django.urls import reverse, resolve -#from django.core.urlresolvers import resolve +from django.urls import reverse, resolve,Resolver404 """Non-standard django middleware is loaded from this file. """ @@ -17,19 +16,31 @@ class SmartAppendSlashMiddleware(object): """ def process_request(self, request): - """ - Rewrite the URL based on settings.SMART_APPEND_SLASH - """ + '''Called for every url so return as quickly as possible + Append a slash if SMART_APPEND_SLASH is set, the resulting URL resolves and it doesn't without the / + ''' + if not settings.SMART_APPEND_SLASH: + return None + + if request.path.endswith('/'): + return None + + if request.path.endswith('_edit'): + return None - # Check for a redirect based on settings.SMART_APPEND_SLASH host = http.HttpRequest.get_host(request) old_url = [host, request.path] - new_url = old_url[:] - # Append a slash if SMART_APPEND_SLASH is set and the resulting URL - # resolves. - if settings.SMART_APPEND_SLASH and (not old_url[1].endswith('/')) and not _resolves(old_url[1]) and _resolves(old_url[1] + '/'): - new_url[1] = new_url[1] + '/' + if _resolves(old_url[1]): + return None + + # So: it does not resolve according to our criteria, i.e. _edit doesn't count + new_url = old_url[:] + new_url[1] = new_url[1] + '/' + if not _resolves(new_url[1]): + return None + else: if settings.DEBUG and request.method == 'POST': + # replace this exception with a redirect to an error page raise RuntimeError("You called this URL via POST, but the URL doesn't end in a slash and you have SMART_APPEND_SLASH set. Django can't redirect to the slash URL while maintaining POST data. Change your form to point to %s%s (note the trailing slash), or set SMART_APPEND_SLASH=False in your Django settings." % (new_url[0], new_url[1])) if new_url != old_url: # Redirect @@ -45,9 +56,12 @@ class SmartAppendSlashMiddleware(object): def _resolves(url): try: - resolve(url) + # If the URL does not resolve, the function raises a Resolver404 exception (a subclass of Http404) + match = resolve(url) + # this will ALWAYS be resolved by expopages because it will produce pagenotfound if not the thing asked for + # so handle this in expopages, not in middleware return True - except http.Http404: + except Resolver404: return False except: print(url) diff --git a/settings.py b/settings.py index af9c18d..f42e8eb 100644 --- a/settings.py +++ b/settings.py @@ -104,7 +104,7 @@ LOGBOOK_PARSER_SETTINGS = { "1982": ("1982/log.htm", "Parseloghtml01"), } -APPEND_SLASH = False +APPEND_SLASH = False # never relevant because we have urls that match unknown files and produce an 'edit this page' response SMART_APPEND_SLASH = True @@ -130,7 +130,7 @@ INSTALLED_APPS = ( # See the recommended order of these in https://docs.djangoproject.com/en/2.2/ref/middleware/ MIDDLEWARE_CLASSES = ( #'django.middleware.security.SecurityMiddleware', # SECURE_SSL_REDIRECT and SECURE_SSL_HOST # we don't use this - 'django.middleware.gzip.GZipMiddleware', # not needed as expofiles and photos served by apache + 'django.middleware.gzip.GZipMiddleware', # not needed when expofiles and photos served by apache 'django.contrib.sessions.middleware.SessionMiddleware', # Manages sessions, if CSRF_USE_SESSIONS then it needs to be early 'django.middleware.common.CommonMiddleware', # DISALLOWED_USER_AGENTS, APPEND_SLASH and PREPEND_WWW 'django.middleware.csrf.CsrfViewMiddleware', # Cross Site Request Forgeries by adding hidden form fields to POST @@ -138,20 +138,20 @@ MIDDLEWARE_CLASSES = ( 'django.contrib.admindocs.middleware.XViewMiddleware', # this and docutils needed by admindocs 'django.contrib.messages.middleware.MessageMiddleware', # Cookie-based and session-based message support. Needed by admin system 'django.middleware.clickjacking.XFrameOptionsMiddleware', # clickjacking protection via the X-Frame-Options header - 'troggle.middleware.SmartAppendSlashMiddleware' # + 'troggle.middleware.SmartAppendSlashMiddleware' # doesn't seem to be working... ) ROOT_URLCONF = 'troggle.urls' -WSGI_APPLICATION = 'troggle.wsgi.application' +WSGI_APPLICATION = 'troggle.wsgi.application' # change to asgi as soon as we upgrade to Django 3.0 ACCOUNT_ACTIVATION_DAYS=3 -AUTH_PROFILE_MODULE = 'core.person' +# AUTH_PROFILE_MODULE = 'core.person' # used by removed profiles app ? QM_PATTERN="\[\[\s*[Qq][Mm]:([ABC]?)(\d{4})-(\d*)-(\d*)\]\]" -# Re-enable TinyMCE when Dj upgraded to v3. Also templates/editflatpage.html +# Re-enable TinyMCE when Dj upgraded to v3. Also templates/editexpopage.html # TINYMCE_DEFAULT_CONFIG = { # 'plugins': "table,spellchecker,paste,searchreplace", # 'theme': "advanced", diff --git a/templates/dirnotfound.html b/templates/dirnotfound.html new file mode 100644 index 0000000..9bf190f --- /dev/null +++ b/templates/dirnotfound.html @@ -0,0 +1,8 @@ +{% extends "expobase.html" %} +{% block title %}Directory not found {{ path }}{% endblock %} +{% block body %} +<h1>Directory not found '{{ path }}'</h1> +<h3>Click here: <a href="/{{ subpath }}">/{{ subpath }}</a> </h3> +<p>i.e. without the final '/' +{% include "menu.html" %} +{% endblock %} |