diff options
Diffstat (limited to 'parsers/cavetab.py')
-rw-r--r-- | parsers/cavetab.py | 272 |
1 files changed, 272 insertions, 0 deletions
diff --git a/parsers/cavetab.py b/parsers/cavetab.py new file mode 100644 index 0000000..bd3d81b --- /dev/null +++ b/parsers/cavetab.py @@ -0,0 +1,272 @@ +# -*- coding: utf-8 -*-
+
+import settings
+import expo.models as models
+import csv
+import time
+import sqlite3
+import re
+import os
+
+##format of CAVETAB2.CSV is
+KatasterNumber = 0
+KatStatusCode = 1
+Entrances = 2
+UnofficialNumber = 3
+MultipleEntrances = 4
+AutogenFile = 5
+LinkFile = 6
+LinkEntrance = 7
+Name = 8
+UnofficialName = 9
+Comment = 10
+Area = 11
+Explorers = 12
+UndergroundDescription = 13
+Equipment = 14
+QMList = 15
+KatasterStatus = 16
+References = 17
+UndergroundCentreLine = 18
+UndergroundDrawnSurvey = 19
+SurvexFile = 20
+Length = 21
+Depth = 22
+Extent = 23
+Notes = 24
+EntranceName = 25
+TagPoint = 26
+OtherPoint = 27
+DescriptionOfOtherPoint = 28
+ExactEntrance = 29
+TypeOfFix = 30
+GPSpreSA = 31
+GPSpostSA = 32
+Northing = 33
+Easting = 34
+Altitude = 35
+Bearings = 36
+Map = 37
+Location = 38
+Approach = 39
+EntranceDescription = 40
+PhotoOfLocation = 41
+Marking = 42
+MarkingComment = 43
+Findability = 44
+FindabilityComment = 45
+
+cavetab = open(os.path.join(settings.EXPOWEB, "noinfo", "CAVETAB2.CSV"))
+caveReader = csv.reader(cavetab)
+caveReader.next() # Strip out column headers
+
+
+def save(x): #There seems to be an intermitent problem with sqlite and Vista, this should fix it
+ try:
+ x.save()
+ except sqlite3.OperationalError:
+ print "Error"
+ time.sleep(1)
+ save(x)
+
+def html_to_wiki(text):
+ if type(text) != str:
+ return text
+ text = unicode(text, "utf-8")
+ #Characters
+ #text = re.sub("ü", u"\xfc", text)
+ #text = re.sub("ö", u"\xf6", text)
+ #text = re.sub("ä", u"\xe4", text)
+ #text = re.sub("°", u"\xb0", text)
+ #text = re.sub("©", u"\xa9", text)
+ #text = re.sub("&", u"\x26", text)
+ #text = re.sub("ß", u"\xdf", text)
+ #text = re.sub("ß", u"\xdf", text)
+ #text = re.sub("<", u"<", text)
+ #text = re.sub(">", u">", text)
+ #text = re.sub("è", u"\xe8", text)
+ #text = re.sub("é", u"\xe9", text)
+ #text = re.sub(""e;", u'"', text)
+ #text = re.sub(""", u'"', text)
+ #text = re.sub("Ö", u'\xd6', text)
+ #text = re.sub("×", u'"', text)
+
+ #text = re.sub("&(.*);", "/1", text)
+ #if s:
+ # print s.groups()
+ #Lists
+ text = re.sub("^</p>(.*)", r"\1", text)
+ text = re.sub("(.*)<p>$", r"\1", text)
+ out = ""
+ lists = ""
+ while text:
+ mstar = re.match("^(.*?)<ul>\s*<li[^>]*>(.*?)</li>(.*)$", text, re.DOTALL)
+ munstar = re.match("^(\s*)</ul>(.*)$", text, re.DOTALL)
+ mhash = re.match("^(.*?)<ol>\s*<li[^>]*>(.*?)</li>(.*)$", text, re.DOTALL)
+ munhash = re.match("^(\s*)</ol>(.*)$", text, re.DOTALL)
+ mitem = re.match("^(\s*)<li[^>]*>(.*?)</li>(.*)$", text, re.DOTALL)
+ ms = [len(m.groups()[0]) for m in [mstar, munstar, mhash, munhash, mitem] if m]
+ def min_(i, l):
+ try:
+ v = i.groups()[0]
+ l.remove(len(v))
+ return len(v) < min(l, 1000000000)
+ except:
+ return False
+ if min_(mstar, ms):
+ lists += "*"
+ pre, val, post = mstar.groups()
+ out += pre + "\n" + lists + " " + val
+ text = post
+ elif min_(mhash, ms):
+ lists += "#"
+ pre, val, post = mhash.groups()
+ out += pre + "\n" + lists + " " + val
+ text = post
+ elif min_(mitem, ms):
+ pre, val, post = mitem.groups()
+ out += "\n" + lists + " " + val
+ text = post
+ elif min_(munstar, ms):
+ lists = lists[:-1]
+ text = munstar.groups()[1]
+ elif min_(munhash, ms):
+ lists.pop()
+ text = munhash.groups()[1]
+ else:
+ out += text
+ text = ""
+ text2 = out
+ while text2:
+ mtag = re.match("^(.*?)<(.*?)>(.*)$", text, re.DOTALL)
+ if mtag:
+ text2 = mtag.groups()[2]
+ print mtag.groups()[1]
+ else:
+ text2 = ""
+ return out
+
+for katArea in ['1623', '1626']:
+ if not models.Area.objects.filter(short_name = katArea):
+ newArea = models.Area(short_name = katArea)
+ save(newArea)
+area1626 = models.Area.objects.filter(short_name = '1626')[0]
+area1623 = models.Area.objects.filter(short_name = '1623')[0]
+
+counter=0
+for line in caveReader :
+ if line[Area] == 'nonexistent':
+ continue
+ entranceLetters=[] #Used in caves that have mulitlple entrances, which are not described on seperate lines
+ if line[MultipleEntrances] == 'yes' or line[MultipleEntrances]=='':
+ args = {}
+ def addToArgs(CSVname, modelName):
+ if line[CSVname]:
+ args[modelName] = html_to_wiki(line[CSVname])
+ addToArgs(KatasterNumber, "kataster_number")
+ addToArgs(KatStatusCode, "kataster_code")
+ addToArgs(UnofficialNumber, "unofficial_number")
+ addToArgs(Name, "official_name")
+ addToArgs(Comment, "notes")
+ addToArgs(Explorers, "explorers")
+ addToArgs(UndergroundDescription, "underground_description")
+ addToArgs(Equipment, "equipment")
+ addToArgs(KatasterStatus, "kataster_status")
+ addToArgs(References, "references")
+ addToArgs(UndergroundCentreLine, "underground_centre_line")
+ addToArgs(UndergroundDrawnSurvey, "survey")
+ addToArgs(Length, "length")
+ addToArgs(Depth, "depth")
+ addToArgs(Extent, "extent")
+ addToArgs(SurvexFile, "survex_file")
+ addToArgs(Notes, "notes")
+
+ newCave = models.Cave(**args)
+ save(newCave)
+
+ if line[Area]:
+ if line[Area] == "1626":
+ newCave.area.add(area1626)
+ else:
+ area = models.Area.objects.filter(short_name = line[Area])
+ if area:
+ newArea = area[0]
+ else:
+ newArea = models.Area(short_name = line[Area], parent = area1623)
+ save(newArea)
+ newCave.area.add(newArea)
+ else:
+ newCave.area.add(area1623)
+
+ save(newCave)
+
+ if line[UnofficialName]:
+ newUnofficialName = models.OtherCaveName(cave = newCave, name = line[UnofficialName])
+ save(newUnofficialName)
+ if line[MultipleEntrances] == '' or \
+ line[MultipleEntrances] == 'entrance' or \
+ line[MultipleEntrances] == 'last entrance':
+ args = {}
+ def addToArgs(CSVname, modelName):
+ if line[CSVname]:
+ args[modelName] = html_to_wiki(line[CSVname])
+ def addToArgsViaDict(CSVname, modelName, dictionary):
+ if line[CSVname]:
+ args[modelName] = dictionary[html_to_wiki(line[CSVname])]
+ addToArgs(EntranceName, 'name')
+ addToArgs(Explorers, 'explorers')
+ addToArgs(Map, 'map_description')
+ addToArgs(Location, 'location_description')
+ addToArgs(Approach, 'approach')
+ addToArgs(EntranceDescription, 'entrance_description')
+ addToArgs(UndergroundDescription, 'underground_description')
+ addToArgs(PhotoOfLocation, 'photo')
+ addToArgsViaDict(Marking, 'marking', {"Paint": "P",
+ "Paint (?)": "P?",
+ "Tag": "T",
+ "Tag (?)": "T?",
+ "Retagged": "R",
+ "Retag": "R",
+ "Spit": "S",
+ "Spit (?)": "S?",
+ "Unmarked": "U",
+ "": "?",
+ })
+ addToArgs(MarkingComment, 'marking_comment')
+ addToArgsViaDict(Findability, 'findability', {"Surveyed": "S",
+ "Lost": "L",
+ "Refindable": "R",
+ "": "?",
+ "?": "?",
+ })
+ addToArgs(FindabilityComment, 'findability_description')
+ addToArgs(Easting, 'easting')
+ addToArgs(Northing, 'northing')
+ addToArgs(Altitude, 'alt')
+ addToArgs(DescriptionOfOtherPoint, 'other_description')
+ def addToArgsSurveyStation(CSVname, modelName):
+ if line[CSVname]:
+ surveyPoint = models.SurveyStation(name = line[CSVname])
+ save(surveyPoint)
+ args[modelName] = html_to_wiki(surveyPoint)
+ addToArgsSurveyStation(TagPoint, 'tag_station')
+ addToArgsSurveyStation(ExactEntrance, 'exact_station')
+ addToArgsSurveyStation(OtherPoint, 'other_station')
+ addToArgs(OtherPoint, 'other_description')
+ if line[GPSpreSA]:
+ addToArgsSurveyStation(GPSpreSA, 'other_station')
+ args['other_description'] = 'pre selective availability GPS'
+ if line[GPSpostSA]:
+ addToArgsSurveyStation(GPSpostSA, 'other_station')
+ args['other_description'] = 'post selective availability GPS'
+ addToArgs(Bearings, 'bearings')
+ newEntrance = models.Entrance(**args)
+ save(newEntrance)
+
+ if line[Entrances]:
+ entrance_letter = line[Entrances]
+ else:
+ entrance_letter = ''
+
+ newCaveAndEntrance = models.CaveAndEntrance(cave = newCave, entrance = newEntrance, entrance_letter = entrance_letter)
+ save(newCaveAndEntrance)
\ No newline at end of file |