diff options
-rw-r--r-- | core/TESTS/test_imports.py | 3 | ||||
-rw-r--r-- | core/utils.py | 42 | ||||
-rw-r--r-- | parsers/QMs.py | 4 | ||||
-rw-r--r-- | parsers/logbooks.py | 3 | ||||
-rw-r--r-- | parsers/people.py | 10 | ||||
-rw-r--r-- | parsers/surveys.py | 2 | ||||
-rw-r--r-- | unused.py (renamed from utils.py) | 43 |
7 files changed, 54 insertions, 53 deletions
diff --git a/core/TESTS/test_imports.py b/core/TESTS/test_imports.py index b726ae8..46c590d 100644 --- a/core/TESTS/test_imports.py +++ b/core/TESTS/test_imports.py @@ -32,7 +32,7 @@ class SimpleTest(SimpleTestCase): from troggle.core.models_caves import Cave def test_import_parsers_surveys(self): from PIL import Image - from utils import save_carefully + from troggle.core.utils import save_carefully from functools import reduce def test_import_parsers_survex(self): import troggle.settings as settings @@ -43,7 +43,6 @@ class SimpleTest(SimpleTestCase): from troggle.core.views.caves import MapLocations def test_import_parsers_QMs(self): from troggle.core.models_caves import QM, Cave, LogbookEntry - from utils import save_carefully def test_import_parsers_people(self): from html.parser import HTMLParser from unidecode import unidecode diff --git a/core/utils.py b/core/utils.py index 4dcee68..deca124 100644 --- a/core/utils.py +++ b/core/utils.py @@ -60,6 +60,48 @@ def chaosmonkey(n): # print("CHAOS strikes !", file=sys.stderr) return True + +def save_carefully(objectType, lookupAttribs={}, nonLookupAttribs={}): + """Looks up instance using lookupAttribs and carries out the following: + -if instance does not exist in DB: add instance to DB, return (new instance, True) + -if instance exists in DB and was modified using Troggle: do nothing, return (existing instance, False) + -if instance exists in DB and was not modified using Troggle: overwrite instance, return (instance, False) + + The checking is accomplished using Django's get_or_create and the new_since_parsing boolean field + defined in core.models.TroggleModel. + + """ + try: + instance, created = objectType.objects.get_or_create(defaults=nonLookupAttribs, **lookupAttribs) + except: + print(" !! - SAVE CAREFULLY ===================", objectType) + print(" !! - -- objects.get_or_create()") + print(" !! - lookupAttribs:{}\n !! - nonLookupAttribs:{}".format(lookupAttribs,nonLookupAttribs)) + raise + if not created and not instance.new_since_parsing: + for k, v in list(nonLookupAttribs.items()): #overwrite the existing attributes from the logbook text (except date and title) + setattr(instance, k, v) + try: + instance.save() + except: + print(" !! - SAVE CAREFULLY ===================", objectType) + print(" !! - -- instance.save()") + print(" !! - lookupAttribs:{}\n !! - nonLookupAttribs:{}".format(lookupAttribs,nonLookupAttribs)) + raise + try: + msg = str(instance) + except: + msg = "FAULT getting __str__ for instance with lookupattribs: {}:".format(lookupAttribs) + if created: + logging.info(str(instance) + ' was just added to the database for the first time. \n') + + if not created and instance.new_since_parsing: + logging.info(str(instance) + " has been modified using Troggle, so the current script left it as is. \n") + + if not created and not instance.new_since_parsing: + logging.info(str(instance) + " existed in the database unchanged since last parse. It was overwritten by the current script. \n") + return (instance, created) + # def get_related_by_wikilinks(wiki_text): # found=re.findall(settings.QM_PATTERN,wiki_text) # res=[] diff --git a/parsers/QMs.py b/parsers/QMs.py index a95a3cd..7449a8a 100644 --- a/parsers/QMs.py +++ b/parsers/QMs.py @@ -7,7 +7,7 @@ from django.conf import settings from troggle.core.models import DataIssue from troggle.core.models_caves import QM, Cave, LogbookEntry -from utils import save_carefully +from troggle.core.utils import save_carefully def deleteQMs(): @@ -110,7 +110,7 @@ def parse_KH_QMs(kh, inputFile): khQMs=open(os.path.join(settings.EXPOWEB, inputFile),'r') khQMs=khQMs.readlines() for line in khQMs: - res=re.search('name=\"[CB](?P<year>\d*)-(?P<cave>\d*)-(?P<number>\d*).*</a> (?P<grade>[ABDCV])<dd>(?P<description>.*)\[(?P<nearest_station>.*)\]',line) + res=re.search(r'name=\"[CB](?P<year>\d*)-(?P<cave>\d*)-(?P<number>\d*).*</a> (?P<grade>[ABDCV])<dd>(?P<description>.*)\[(?P<nearest_station>.*)\]',line) if res: res=res.groupdict() year=int(res['year']) diff --git a/parsers/logbooks.py b/parsers/logbooks.py index 00c8b14..7a334d6 100644 --- a/parsers/logbooks.py +++ b/parsers/logbooks.py @@ -11,10 +11,9 @@ from django.template.defaultfilters import slugify from django.utils.timezone import get_current_timezone, make_aware from troggle.core.models import DataIssue, Expedition -from troggle.core.utils import TROG +from troggle.core.utils import TROG, save_carefully from troggle.core.models_caves import Cave, LogbookEntry, PersonTrip, GetCaveLookup from parsers.people import GetPersonExpeditionNameLookup -from utils import save_carefully # # When we edit logbook entries, allow a "?" after any piece of data to say we've frigged it and diff --git a/parsers/people.py b/parsers/people.py index bd67e8a..ecc9272 100644 --- a/parsers/people.py +++ b/parsers/people.py @@ -1,11 +1,13 @@ -from django.conf import settings -import troggle.core.models as models import csv, re, datetime, os, shutil -from utils import save_carefully from html.parser import HTMLParser from unidecode import unidecode -'''These functions do not match how the stand-alone script works. So the script produces an HTML file which has +from django.conf import settings + +import troggle.core.models as models +from troggle.core.utils import save_carefully + +'''These functions do not match how the stand-alone folk script works. So the script produces an HTML file which has href links to pages in troggle which troggle does not think are right. The standalone script needs to be renedred defucnt, and all the parsing needs to be in troggle. Either that, or they should use the same code by importing a module. diff --git a/parsers/surveys.py b/parsers/surveys.py index 40a12f0..3207194 100644 --- a/parsers/surveys.py +++ b/parsers/surveys.py @@ -8,12 +8,12 @@ import re import datetime from PIL import Image -from utils import save_carefully from functools import reduce import settings from troggle.core.models_survex import SingleScan, ScansFolder, TunnelFile from troggle.core.models import DataIssue +from troggle.core.utils import save_carefully def get_or_create_placeholder(year): @@ -46,48 +46,7 @@ def randomLogbookSentence(): return randSent - -def save_carefully(objectType, lookupAttribs={}, nonLookupAttribs={}): - """Looks up instance using lookupAttribs and carries out the following: - -if instance does not exist in DB: add instance to DB, return (new instance, True) - -if instance exists in DB and was modified using Troggle: do nothing, return (existing instance, False) - -if instance exists in DB and was not modified using Troggle: overwrite instance, return (instance, False) - - The checking is accomplished using Django's get_or_create and the new_since_parsing boolean field - defined in core.models.TroggleModel. - - """ - try: - instance, created = objectType.objects.get_or_create(defaults=nonLookupAttribs, **lookupAttribs) - except: - print(" !! - SAVE CAREFULLY ===================", objectType) - print(" !! - -- objects.get_or_create()") - print(" !! - lookupAttribs:{}\n !! - nonLookupAttribs:{}".format(lookupAttribs,nonLookupAttribs)) - raise - if not created and not instance.new_since_parsing: - for k, v in list(nonLookupAttribs.items()): #overwrite the existing attributes from the logbook text (except date and title) - setattr(instance, k, v) - try: - instance.save() - except: - print(" !! - SAVE CAREFULLY ===================", objectType) - print(" !! - -- instance.save()") - print(" !! - lookupAttribs:{}\n !! - nonLookupAttribs:{}".format(lookupAttribs,nonLookupAttribs)) - raise - try: - msg = str(instance) - except: - msg = "FAULT getting __str__ for instance with lookupattribs: {}:".format(lookupAttribs) - if created: - logging.info(str(instance) + ' was just added to the database for the first time. \n') - - if not created and instance.new_since_parsing: - logging.info(str(instance) + " has been modified using Troggle, so the current script left it as is. \n") - - if not created and not instance.new_since_parsing: - logging.info(str(instance) + " existed in the database unchanged since last parse. It was overwritten by the current script. \n") - return (instance, created) - + re_body = re.compile(r"\<body[^>]*\>(.*)\</body\>", re.DOTALL) re_title = re.compile(r"\<title[^>]*\>(.*)\</title\>", re.DOTALL) |