summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--core/TESTS/test_imports.py3
-rw-r--r--core/utils.py42
-rw-r--r--parsers/QMs.py4
-rw-r--r--parsers/logbooks.py3
-rw-r--r--parsers/people.py10
-rw-r--r--parsers/surveys.py2
-rw-r--r--unused.py (renamed from utils.py)43
7 files changed, 54 insertions, 53 deletions
diff --git a/core/TESTS/test_imports.py b/core/TESTS/test_imports.py
index b726ae8..46c590d 100644
--- a/core/TESTS/test_imports.py
+++ b/core/TESTS/test_imports.py
@@ -32,7 +32,7 @@ class SimpleTest(SimpleTestCase):
from troggle.core.models_caves import Cave
def test_import_parsers_surveys(self):
from PIL import Image
- from utils import save_carefully
+ from troggle.core.utils import save_carefully
from functools import reduce
def test_import_parsers_survex(self):
import troggle.settings as settings
@@ -43,7 +43,6 @@ class SimpleTest(SimpleTestCase):
from troggle.core.views.caves import MapLocations
def test_import_parsers_QMs(self):
from troggle.core.models_caves import QM, Cave, LogbookEntry
- from utils import save_carefully
def test_import_parsers_people(self):
from html.parser import HTMLParser
from unidecode import unidecode
diff --git a/core/utils.py b/core/utils.py
index 4dcee68..deca124 100644
--- a/core/utils.py
+++ b/core/utils.py
@@ -60,6 +60,48 @@ def chaosmonkey(n):
# print("CHAOS strikes !", file=sys.stderr)
return True
+
+def save_carefully(objectType, lookupAttribs={}, nonLookupAttribs={}):
+ """Looks up instance using lookupAttribs and carries out the following:
+ -if instance does not exist in DB: add instance to DB, return (new instance, True)
+ -if instance exists in DB and was modified using Troggle: do nothing, return (existing instance, False)
+ -if instance exists in DB and was not modified using Troggle: overwrite instance, return (instance, False)
+
+ The checking is accomplished using Django's get_or_create and the new_since_parsing boolean field
+ defined in core.models.TroggleModel.
+
+ """
+ try:
+ instance, created = objectType.objects.get_or_create(defaults=nonLookupAttribs, **lookupAttribs)
+ except:
+ print(" !! - SAVE CAREFULLY ===================", objectType)
+ print(" !! - -- objects.get_or_create()")
+ print(" !! - lookupAttribs:{}\n !! - nonLookupAttribs:{}".format(lookupAttribs,nonLookupAttribs))
+ raise
+ if not created and not instance.new_since_parsing:
+ for k, v in list(nonLookupAttribs.items()): #overwrite the existing attributes from the logbook text (except date and title)
+ setattr(instance, k, v)
+ try:
+ instance.save()
+ except:
+ print(" !! - SAVE CAREFULLY ===================", objectType)
+ print(" !! - -- instance.save()")
+ print(" !! - lookupAttribs:{}\n !! - nonLookupAttribs:{}".format(lookupAttribs,nonLookupAttribs))
+ raise
+ try:
+ msg = str(instance)
+ except:
+ msg = "FAULT getting __str__ for instance with lookupattribs: {}:".format(lookupAttribs)
+ if created:
+ logging.info(str(instance) + ' was just added to the database for the first time. \n')
+
+ if not created and instance.new_since_parsing:
+ logging.info(str(instance) + " has been modified using Troggle, so the current script left it as is. \n")
+
+ if not created and not instance.new_since_parsing:
+ logging.info(str(instance) + " existed in the database unchanged since last parse. It was overwritten by the current script. \n")
+ return (instance, created)
+
# def get_related_by_wikilinks(wiki_text):
# found=re.findall(settings.QM_PATTERN,wiki_text)
# res=[]
diff --git a/parsers/QMs.py b/parsers/QMs.py
index a95a3cd..7449a8a 100644
--- a/parsers/QMs.py
+++ b/parsers/QMs.py
@@ -7,7 +7,7 @@ from django.conf import settings
from troggle.core.models import DataIssue
from troggle.core.models_caves import QM, Cave, LogbookEntry
-from utils import save_carefully
+from troggle.core.utils import save_carefully
def deleteQMs():
@@ -110,7 +110,7 @@ def parse_KH_QMs(kh, inputFile):
khQMs=open(os.path.join(settings.EXPOWEB, inputFile),'r')
khQMs=khQMs.readlines()
for line in khQMs:
- res=re.search('name=\"[CB](?P<year>\d*)-(?P<cave>\d*)-(?P<number>\d*).*</a> (?P<grade>[ABDCV])<dd>(?P<description>.*)\[(?P<nearest_station>.*)\]',line)
+ res=re.search(r'name=\"[CB](?P<year>\d*)-(?P<cave>\d*)-(?P<number>\d*).*</a> (?P<grade>[ABDCV])<dd>(?P<description>.*)\[(?P<nearest_station>.*)\]',line)
if res:
res=res.groupdict()
year=int(res['year'])
diff --git a/parsers/logbooks.py b/parsers/logbooks.py
index 00c8b14..7a334d6 100644
--- a/parsers/logbooks.py
+++ b/parsers/logbooks.py
@@ -11,10 +11,9 @@ from django.template.defaultfilters import slugify
from django.utils.timezone import get_current_timezone, make_aware
from troggle.core.models import DataIssue, Expedition
-from troggle.core.utils import TROG
+from troggle.core.utils import TROG, save_carefully
from troggle.core.models_caves import Cave, LogbookEntry, PersonTrip, GetCaveLookup
from parsers.people import GetPersonExpeditionNameLookup
-from utils import save_carefully
#
# When we edit logbook entries, allow a "?" after any piece of data to say we've frigged it and
diff --git a/parsers/people.py b/parsers/people.py
index bd67e8a..ecc9272 100644
--- a/parsers/people.py
+++ b/parsers/people.py
@@ -1,11 +1,13 @@
-from django.conf import settings
-import troggle.core.models as models
import csv, re, datetime, os, shutil
-from utils import save_carefully
from html.parser import HTMLParser
from unidecode import unidecode
-'''These functions do not match how the stand-alone script works. So the script produces an HTML file which has
+from django.conf import settings
+
+import troggle.core.models as models
+from troggle.core.utils import save_carefully
+
+'''These functions do not match how the stand-alone folk script works. So the script produces an HTML file which has
href links to pages in troggle which troggle does not think are right.
The standalone script needs to be renedred defucnt, and all the parsing needs to be in troggle. Either that,
or they should use the same code by importing a module.
diff --git a/parsers/surveys.py b/parsers/surveys.py
index 40a12f0..3207194 100644
--- a/parsers/surveys.py
+++ b/parsers/surveys.py
@@ -8,12 +8,12 @@ import re
import datetime
from PIL import Image
-from utils import save_carefully
from functools import reduce
import settings
from troggle.core.models_survex import SingleScan, ScansFolder, TunnelFile
from troggle.core.models import DataIssue
+from troggle.core.utils import save_carefully
def get_or_create_placeholder(year):
diff --git a/utils.py b/unused.py
index b667e09..e26a212 100644
--- a/utils.py
+++ b/unused.py
@@ -46,48 +46,7 @@ def randomLogbookSentence():
return randSent
-
-def save_carefully(objectType, lookupAttribs={}, nonLookupAttribs={}):
- """Looks up instance using lookupAttribs and carries out the following:
- -if instance does not exist in DB: add instance to DB, return (new instance, True)
- -if instance exists in DB and was modified using Troggle: do nothing, return (existing instance, False)
- -if instance exists in DB and was not modified using Troggle: overwrite instance, return (instance, False)
-
- The checking is accomplished using Django's get_or_create and the new_since_parsing boolean field
- defined in core.models.TroggleModel.
-
- """
- try:
- instance, created = objectType.objects.get_or_create(defaults=nonLookupAttribs, **lookupAttribs)
- except:
- print(" !! - SAVE CAREFULLY ===================", objectType)
- print(" !! - -- objects.get_or_create()")
- print(" !! - lookupAttribs:{}\n !! - nonLookupAttribs:{}".format(lookupAttribs,nonLookupAttribs))
- raise
- if not created and not instance.new_since_parsing:
- for k, v in list(nonLookupAttribs.items()): #overwrite the existing attributes from the logbook text (except date and title)
- setattr(instance, k, v)
- try:
- instance.save()
- except:
- print(" !! - SAVE CAREFULLY ===================", objectType)
- print(" !! - -- instance.save()")
- print(" !! - lookupAttribs:{}\n !! - nonLookupAttribs:{}".format(lookupAttribs,nonLookupAttribs))
- raise
- try:
- msg = str(instance)
- except:
- msg = "FAULT getting __str__ for instance with lookupattribs: {}:".format(lookupAttribs)
- if created:
- logging.info(str(instance) + ' was just added to the database for the first time. \n')
-
- if not created and instance.new_since_parsing:
- logging.info(str(instance) + " has been modified using Troggle, so the current script left it as is. \n")
-
- if not created and not instance.new_since_parsing:
- logging.info(str(instance) + " existed in the database unchanged since last parse. It was overwritten by the current script. \n")
- return (instance, created)
-
+
re_body = re.compile(r"\<body[^>]*\>(.*)\</body\>", re.DOTALL)
re_title = re.compile(r"\<title[^>]*\>(.*)\</title\>", re.DOTALL)