summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--parsers/logbooks.py36
-rwxr-xr-xpre-run.sh2
2 files changed, 25 insertions, 13 deletions
diff --git a/parsers/logbooks.py b/parsers/logbooks.py
index 9007d4f..6b6c121 100644
--- a/parsers/logbooks.py
+++ b/parsers/logbooks.py
@@ -1,10 +1,12 @@
import csv
-from datetime import datetime, date, time
import os
import re
-#import time
import pickle
import shelve
+import time
+from random import randint
+from datetime import datetime, date
+from pathlib import Path
from django.conf import settings
from django.template.defaultfilters import slugify
@@ -24,6 +26,9 @@ Parses and imports logbooks in all their wonderful confusion
todo='''
- Put the object store 'trips' and the 'logdataissues' into TROG global object
+- works parsing logbooks but when reading cache files fails on storing data
+ and is slower than parsing from scratch now!
+
- refactor everything with some urgency, esp. LoadLogbookForExpedition()
- Logbooks log.htm exist for 1983, 84, 85, 87, 88, 89 but have no full-working parser,
@@ -141,7 +146,11 @@ def EnterLogIntoDbase(date, place, title, text, trippeople, expedition, logtime_
# but it is a db query which we should try to avoid - rewrite this
#NEW slug for a logbook entry here! Unique id + slugified title fragment
- slug = tid + "_" + slugify(title)[:10].replace('-','_')
+ # working for all cache files 2019-2005, failed on 2004; but fine when parsing logbook and not reading cache. Hmm.
+ if tid is not None:
+ slug = tid + "_" + slugify(title)[:10].replace('-','_')
+ else:
+ slug = str(randint(1000,10000)) + "_" + slugify(title)[:10].replace('-','_')
nonLookupAttribs={'place':place, 'text':text, 'expedition':expedition, 'cave_slug':str(cave), 'slug': slug, 'entry_type':entry_type}
lbo, created=save_carefully(LogbookEntry, lookupAttribs, nonLookupAttribs)
@@ -565,12 +574,14 @@ def LoadLogbookForExpedition(expedition, expect):
logbookfile = os.path.join(expologbase, expedition.year, settings.DEFAULT_LOGBOOK_FILE)
expedition.logbookfile = settings.DEFAULT_LOGBOOK_FILE
parsefunc = settings.DEFAULT_LOGBOOK_PARSER
- cache_filename = logbookfile + ".cache"
- expedition.save()
+ cache_filename = Path(logbookfile + ".cache")
+ if not cache_filename.is_file():
+ print(" - Cache file does not exist \"" + str(cache_filename) +"\"")
+ expedition.save()
+ now = time.time()
+ bad_cache = True # emporarily disable reading the cache - buggy
try:
- bad_cache = False
- now = time.time()
cache_t = os.path.getmtime(cache_filename)
if os.path.getmtime(logbookfile) - cache_t > 2: # at least 2 secs later
bad_cache= True
@@ -582,7 +593,7 @@ def LoadLogbookForExpedition(expedition, expect):
logentries=[]
print(" ! Removed stale or corrupt cache file")
raise
- print(" - Reading cache: " + cache_filename, end='')
+ print(" - Reading cache: " + str(cache_filename), end='')
try:
with open(cache_filename, "rb") as f:
year,n,logentries = pickle.load(f)
@@ -597,8 +608,8 @@ def LoadLogbookForExpedition(expedition, expect):
os.remove(cache_filename)
logentries=[]
raise
- except : # no cache found
- #print(" - No cache \"" + cache_filename +"\"")
+ except :
+ print(" - Cache de-pickle failure \"" + str(cache_filename) +"\"")
try:
file_in = open(logbookfile,'rb')
txt = file_in.read().decode("latin1")
@@ -627,10 +638,11 @@ def LoadLogbookForExpedition(expedition, expect):
i=0
for entrytuple in range(len(logentries)):
date, tripcave, triptitle, text, trippeople, expedition, logtime_underground, entry_type, tripid1 = logentries[i]
+ #print(" - entry tuple " , i, " tid", tripid1)
EnterLogIntoDbase(date, tripcave, triptitle, text, trippeople, expedition, 0,
- entry_type)
+ entry_type, tripid1)
EnterLogIntoObjStore(expedition.year, date, tripcave, triptitle, text, trippeople, logtime_underground,
- entry_type, tripid1, i)
+ entry_type, tripid1, i)
i +=1
SetDatesFromLogbookEntries(expedition)
return len(logentries)
diff --git a/pre-run.sh b/pre-run.sh
index 4153294..876a374 100755
--- a/pre-run.sh
+++ b/pre-run.sh
@@ -24,4 +24,4 @@ echo ""
echo `tail -1 lines-of-python.txt` non-comment lines of python.
echo `tail -1 lines-of-templates.txt` non-comment lines of HTML templates.
-echo 'If you have an error running manage.py, maybe you are not in an activated venv ? \ No newline at end of file
+echo 'If you have an error running manage.py, maybe you are not in an activated venv ?' \ No newline at end of file