summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--core/utils.py29
-rw-r--r--core/views/uploads.py24
-rw-r--r--parsers/logbooks.py16
3 files changed, 38 insertions, 31 deletions
diff --git a/core/utils.py b/core/utils.py
index a24a1dc..2aba924 100644
--- a/core/utils.py
+++ b/core/utils.py
@@ -1,14 +1,15 @@
+import hashlib
import logging
+import os
import random
import resource
+import string
import subprocess
-import os
from decimal import getcontext
from pathlib import Path
getcontext().prec = 2 # use 2 significant figures for decimal calculations
-
import settings
"""This file declares TROG a globally visible object for caches.
@@ -30,6 +31,8 @@ thread.
"""
TROG = {"pagecache": {"expedition": {}}, "caves": {"gcavelookup": {}, "gcavecount": {}}}
+alphabet = []
+sha = hashlib.new('sha256')
# This is module-level executable. This is a Bad Thing. Especially when it touches the file system.
try:
@@ -50,7 +53,27 @@ def chaosmonkey(n):
return False
# print("CHAOS strikes !", file=sys.stderr)
return True
-
+
+def unique_slug(text, n):
+ """This gives an almost-unique id based on the text,
+ 2 hex digits would seem adequate, but we might get a collision.
+ Not used anywhere.
+ """
+ sha.update(text.encode('utf-8'))
+ return sha.hexdigest()[0:n]
+
+def alphabet_suffix(n):
+ """This is called repeatedly during initial parsing import, hence the cached list
+ """
+ global alphabet
+ if not alphabet:
+ alphabet = list(string.ascii_lowercase)
+
+ if n < len(alphabet):
+ suffix = alphabet[n]
+ else:
+ suffix = "_X_" + random.choice(string.ascii_lowercase) + random.choice(string.ascii_lowercase)
+ return suffix
def only_commit(fname, message):
"""Only used to commit a survex file edited and saved in view/survex.py"""
diff --git a/core/views/uploads.py b/core/views/uploads.py
index f0dedfa..5c2f4f4 100644
--- a/core/views/uploads.py
+++ b/core/views/uploads.py
@@ -1,5 +1,4 @@
import subprocess
-import hashlib
import string
from datetime import datetime
from pathlib import Path
@@ -9,10 +8,11 @@ from django.core.files.storage import FileSystemStorage
from django.shortcuts import render, redirect
import settings
-from troggle.core.models.caves import GetCaveLookup
+
from troggle.core.models.logbooks import LogbookEntry, writelogbook, PersonLogEntry
from troggle.core.models.survex import DrawingFile
from troggle.core.models.troggle import DataIssue, Expedition, PersonExpedition
+from troggle.core.utils import alphabet_suffix
from troggle.parsers.people import GetPersonExpeditionNameLookup, known_foreigner
# from databaseReset import reinit_db # don't do this. databaseRest runs code *at import time*
@@ -45,29 +45,15 @@ todo = """
- Make file rename utility less ugly.
"""
-sha = hashlib.new('sha256')
-
-def unique_slug(text, n):
- """This gives each logbook entry a unique id based on the date+content, so the order of entries on a particular day
- does not matter. This is a change (August 2023) from previous process.
-
- 2 hex digits would seem adequate for each expo day, but we might get a collision.
- The hash is based on the content after substitution of <p> so should be stable. Which means these ids
- can be used elsewhere in the troggle system as permanent slugs.
-
- When SAVING an edited entry (as opposed to a new one) we will have a different hash so we will have to
- delete the original database object
- """
- sha.update(text.encode('utf-8'))
- return sha.hexdigest()[0:n]
def create_new_lbe_slug(date):
onthisdate = LogbookEntry.objects.filter(date=date)
n = len(onthisdate)
# print(f" Already entries on this date: {n}\n {onthisdate}")
- alphabet = list(string.ascii_lowercase)
- tid = f"{date}{alphabet[n]}"
+ suffix = alphabet_suffix(n)
+
+ tid = f"{date}{suffix}"
print(tid)
return tid
diff --git a/parsers/logbooks.py b/parsers/logbooks.py
index a5f6631..511ed47 100644
--- a/parsers/logbooks.py
+++ b/parsers/logbooks.py
@@ -15,8 +15,7 @@ from parsers.people import GetPersonExpeditionNameLookup, load_people_expos, kno
from troggle.core.models.caves import GetCaveLookup
from troggle.core.models.logbooks import LogbookEntry, PersonLogEntry
from troggle.core.models.troggle import DataIssue, Expedition
-from troggle.core.utils import get_process_memory
-from troggle.core.views.uploads import unique_slug
+from troggle.core.utils import get_process_memory, alphabet_suffix, unique_slug
"""
Parses and imports logbooks in all their wonderful confusion
@@ -109,7 +108,7 @@ ENTRIES = {
logentries = [] # the entire logbook for one year is a single object: a list of entries
noncaveplaces = ["travel", "Journey", "Loser Plateau", "UNKNOWN", "plateau", "base camp", "basecamp", "top camp", "topcamp"]
tripsdate = {}
-alphabet = []
+
def set_trip_seq_id(year, seq):
'''We have not parsed the trip date yet, so this is a sequence numer
@@ -121,15 +120,14 @@ def reset_trip_id(date):
'''Now we have the date, we can set the tripid (the lbe slug) to be in our standard form
of <date><letter>, i.e. '2003-07-30b'
BUT this gets re-set every time the logbook is imported,
- so they are not persistent as we would much prefer.
+ However these are persistent as the entries are ordered on this field.
'''
- global alphabet
already =tripsdate.get(date, 0) # returns zero if none found
- tripsdate[date] = already +1
- if not alphabet:
- alphabet = list(string.ascii_lowercase)
+ n = already + 1
+ tripsdate[date] = n
+ suffix = alphabet_suffix(n)
- tid = f"{date}{alphabet[already]}"
+ tid = f"{date}{suffix}"
# print(tid)
return tid