From b503d3d588474cc41bffc01eca7654bb8c6f4a42 Mon Sep 17 00:00:00 2001 From: substantialnoninfringinguser Date: Wed, 13 May 2009 05:13:38 +0100 Subject: [svn] Initial troggle checkin This is a development site using Django 1.0 Copied from http://cucc@cucc.survex.com/svn/trunk/expoweb/troggle/, rev. 8034 by julian @ 10/26/2008 9:04 PM --- parsers/cavetab.py | 272 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 272 insertions(+) create mode 100644 parsers/cavetab.py (limited to 'parsers/cavetab.py') diff --git a/parsers/cavetab.py b/parsers/cavetab.py new file mode 100644 index 0000000..bd3d81b --- /dev/null +++ b/parsers/cavetab.py @@ -0,0 +1,272 @@ +# -*- coding: utf-8 -*- + +import settings +import expo.models as models +import csv +import time +import sqlite3 +import re +import os + +##format of CAVETAB2.CSV is +KatasterNumber = 0 +KatStatusCode = 1 +Entrances = 2 +UnofficialNumber = 3 +MultipleEntrances = 4 +AutogenFile = 5 +LinkFile = 6 +LinkEntrance = 7 +Name = 8 +UnofficialName = 9 +Comment = 10 +Area = 11 +Explorers = 12 +UndergroundDescription = 13 +Equipment = 14 +QMList = 15 +KatasterStatus = 16 +References = 17 +UndergroundCentreLine = 18 +UndergroundDrawnSurvey = 19 +SurvexFile = 20 +Length = 21 +Depth = 22 +Extent = 23 +Notes = 24 +EntranceName = 25 +TagPoint = 26 +OtherPoint = 27 +DescriptionOfOtherPoint = 28 +ExactEntrance = 29 +TypeOfFix = 30 +GPSpreSA = 31 +GPSpostSA = 32 +Northing = 33 +Easting = 34 +Altitude = 35 +Bearings = 36 +Map = 37 +Location = 38 +Approach = 39 +EntranceDescription = 40 +PhotoOfLocation = 41 +Marking = 42 +MarkingComment = 43 +Findability = 44 +FindabilityComment = 45 + +cavetab = open(os.path.join(settings.EXPOWEB, "noinfo", "CAVETAB2.CSV")) +caveReader = csv.reader(cavetab) +caveReader.next() # Strip out column headers + + +def save(x): #There seems to be an intermitent problem with sqlite and Vista, this should fix it + try: + x.save() + except sqlite3.OperationalError: + print "Error" + time.sleep(1) + save(x) + +def html_to_wiki(text): + if type(text) != str: + return text + text = unicode(text, "utf-8") + #Characters + #text = re.sub("ü", u"\xfc", text) + #text = re.sub("ö", u"\xf6", text) + #text = re.sub("ä", u"\xe4", text) + #text = re.sub("°", u"\xb0", text) + #text = re.sub("©", u"\xa9", text) + #text = re.sub("&", u"\x26", text) + #text = re.sub("ß", u"\xdf", text) + #text = re.sub("ß", u"\xdf", text) + #text = re.sub("<", u"<", text) + #text = re.sub(">", u">", text) + #text = re.sub("è", u"\xe8", text) + #text = re.sub("é", u"\xe9", text) + #text = re.sub(""e;", u'"', text) + #text = re.sub(""", u'"', text) + #text = re.sub("Ö", u'\xd6', text) + #text = re.sub("×", u'"', text) + + #text = re.sub("&(.*);", "/1", text) + #if s: + # print s.groups() + #Lists + text = re.sub("^

(.*)", r"\1", text) + text = re.sub("(.*)

$", r"\1", text) + out = "" + lists = "" + while text: + mstar = re.match("^(.*?)

(.*)$", text, re.DOTALL) + mhash = re.match("^(.*?)
    \s*]*>(.*?)(.*)$", text, re.DOTALL) + munhash = re.match("^(\s*)
(.*)$", text, re.DOTALL) + mitem = re.match("^(\s*)]*>(.*?)(.*)$", text, re.DOTALL) + ms = [len(m.groups()[0]) for m in [mstar, munstar, mhash, munhash, mitem] if m] + def min_(i, l): + try: + v = i.groups()[0] + l.remove(len(v)) + return len(v) < min(l, 1000000000) + except: + return False + if min_(mstar, ms): + lists += "*" + pre, val, post = mstar.groups() + out += pre + "\n" + lists + " " + val + text = post + elif min_(mhash, ms): + lists += "#" + pre, val, post = mhash.groups() + out += pre + "\n" + lists + " " + val + text = post + elif min_(mitem, ms): + pre, val, post = mitem.groups() + out += "\n" + lists + " " + val + text = post + elif min_(munstar, ms): + lists = lists[:-1] + text = munstar.groups()[1] + elif min_(munhash, ms): + lists.pop() + text = munhash.groups()[1] + else: + out += text + text = "" + text2 = out + while text2: + mtag = re.match("^(.*?)<(.*?)>(.*)$", text, re.DOTALL) + if mtag: + text2 = mtag.groups()[2] + print mtag.groups()[1] + else: + text2 = "" + return out + +for katArea in ['1623', '1626']: + if not models.Area.objects.filter(short_name = katArea): + newArea = models.Area(short_name = katArea) + save(newArea) +area1626 = models.Area.objects.filter(short_name = '1626')[0] +area1623 = models.Area.objects.filter(short_name = '1623')[0] + +counter=0 +for line in caveReader : + if line[Area] == 'nonexistent': + continue + entranceLetters=[] #Used in caves that have mulitlple entrances, which are not described on seperate lines + if line[MultipleEntrances] == 'yes' or line[MultipleEntrances]=='': + args = {} + def addToArgs(CSVname, modelName): + if line[CSVname]: + args[modelName] = html_to_wiki(line[CSVname]) + addToArgs(KatasterNumber, "kataster_number") + addToArgs(KatStatusCode, "kataster_code") + addToArgs(UnofficialNumber, "unofficial_number") + addToArgs(Name, "official_name") + addToArgs(Comment, "notes") + addToArgs(Explorers, "explorers") + addToArgs(UndergroundDescription, "underground_description") + addToArgs(Equipment, "equipment") + addToArgs(KatasterStatus, "kataster_status") + addToArgs(References, "references") + addToArgs(UndergroundCentreLine, "underground_centre_line") + addToArgs(UndergroundDrawnSurvey, "survey") + addToArgs(Length, "length") + addToArgs(Depth, "depth") + addToArgs(Extent, "extent") + addToArgs(SurvexFile, "survex_file") + addToArgs(Notes, "notes") + + newCave = models.Cave(**args) + save(newCave) + + if line[Area]: + if line[Area] == "1626": + newCave.area.add(area1626) + else: + area = models.Area.objects.filter(short_name = line[Area]) + if area: + newArea = area[0] + else: + newArea = models.Area(short_name = line[Area], parent = area1623) + save(newArea) + newCave.area.add(newArea) + else: + newCave.area.add(area1623) + + save(newCave) + + if line[UnofficialName]: + newUnofficialName = models.OtherCaveName(cave = newCave, name = line[UnofficialName]) + save(newUnofficialName) + if line[MultipleEntrances] == '' or \ + line[MultipleEntrances] == 'entrance' or \ + line[MultipleEntrances] == 'last entrance': + args = {} + def addToArgs(CSVname, modelName): + if line[CSVname]: + args[modelName] = html_to_wiki(line[CSVname]) + def addToArgsViaDict(CSVname, modelName, dictionary): + if line[CSVname]: + args[modelName] = dictionary[html_to_wiki(line[CSVname])] + addToArgs(EntranceName, 'name') + addToArgs(Explorers, 'explorers') + addToArgs(Map, 'map_description') + addToArgs(Location, 'location_description') + addToArgs(Approach, 'approach') + addToArgs(EntranceDescription, 'entrance_description') + addToArgs(UndergroundDescription, 'underground_description') + addToArgs(PhotoOfLocation, 'photo') + addToArgsViaDict(Marking, 'marking', {"Paint": "P", + "Paint (?)": "P?", + "Tag": "T", + "Tag (?)": "T?", + "Retagged": "R", + "Retag": "R", + "Spit": "S", + "Spit (?)": "S?", + "Unmarked": "U", + "": "?", + }) + addToArgs(MarkingComment, 'marking_comment') + addToArgsViaDict(Findability, 'findability', {"Surveyed": "S", + "Lost": "L", + "Refindable": "R", + "": "?", + "?": "?", + }) + addToArgs(FindabilityComment, 'findability_description') + addToArgs(Easting, 'easting') + addToArgs(Northing, 'northing') + addToArgs(Altitude, 'alt') + addToArgs(DescriptionOfOtherPoint, 'other_description') + def addToArgsSurveyStation(CSVname, modelName): + if line[CSVname]: + surveyPoint = models.SurveyStation(name = line[CSVname]) + save(surveyPoint) + args[modelName] = html_to_wiki(surveyPoint) + addToArgsSurveyStation(TagPoint, 'tag_station') + addToArgsSurveyStation(ExactEntrance, 'exact_station') + addToArgsSurveyStation(OtherPoint, 'other_station') + addToArgs(OtherPoint, 'other_description') + if line[GPSpreSA]: + addToArgsSurveyStation(GPSpreSA, 'other_station') + args['other_description'] = 'pre selective availability GPS' + if line[GPSpostSA]: + addToArgsSurveyStation(GPSpostSA, 'other_station') + args['other_description'] = 'post selective availability GPS' + addToArgs(Bearings, 'bearings') + newEntrance = models.Entrance(**args) + save(newEntrance) + + if line[Entrances]: + entrance_letter = line[Entrances] + else: + entrance_letter = '' + + newCaveAndEntrance = models.CaveAndEntrance(cave = newCave, entrance = newEntrance, entrance_letter = entrance_letter) + save(newCaveAndEntrance) \ No newline at end of file -- cgit v1.2.3