summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPhilip Sargent <philip.sargent@gmail.com>2025-01-09 21:59:27 +0000
committerPhilip Sargent <philip.sargent@gmail.com>2025-01-09 21:59:27 +0000
commit219b8b792e2a6e1fb72b9e658b06395a50292e59 (patch)
treea9ba93662e00af43afff3c4cb5ba6ad386e4ca8b
parent5b97cd83dd92ff506a40ac784d816a0be4bcc4eb (diff)
downloadtroggle-219b8b792e2a6e1fb72b9e658b06395a50292e59.tar.gz
troggle-219b8b792e2a6e1fb72b9e658b06395a50292e59.tar.bz2
troggle-219b8b792e2a6e1fb72b9e658b06395a50292e59.zip
AI comments on regexes
-rw-r--r--parsers/logbooks.py12
-rw-r--r--parsers/survex.py54
2 files changed, 66 insertions, 0 deletions
diff --git a/parsers/logbooks.py b/parsers/logbooks.py
index 2ede83f..3d96b3b 100644
--- a/parsers/logbooks.py
+++ b/parsers/logbooks.py
@@ -139,6 +139,18 @@ def GetTripPersons(trippeople, expedition, logtime_underground, tid=None):
# print(f'# {tid}')
# print(f" - {tid} '{trippeople}' ")
+ """
+ re.split(r",|\+|&amp;|&(?!\w+;)| and ", trippeople)
+
+ , : The comma character
+ \+ : The plus sign (+); escaped to treat as a literal character
+ &amp; : The literal string "&amp;" (HTML-encoded ampersand)
+ &(?!\w+;) : An ampersand (&) not followed by one or more word characters (\w+) and a semicolon (;)
+ : Uses negative lookahead assertion (?!...) to ensure it's not part of an HTML entity like "&nbsp;"
+ and : The literal string " and " (with spaces before and after)
+
+ This will split the 'trippeople' string at any of these delimiters.
+ """
for tripperson in re.split(r",|\+|&amp;|&(?!\w+;)| and ", trippeople):
tripperson = tripperson.strip()
# author_u = re.match(r"(?i)<u>(.*?)</u>$", tripperson)
diff --git a/parsers/survex.py b/parsers/survex.py
index 5338148..5f601e1 100644
--- a/parsers/survex.py
+++ b/parsers/survex.py
@@ -298,7 +298,61 @@ class LoadingSurvex:
rx_commteam = re.compile(r"(?i)\s*(Messteam|Zeichner)\s*[:]?(.*)")
rx_quotedtitle = re.compile(r'(?i)^"(.*)"$')
+ """
+ Regular expression explanation for rx_starref (MS CoPilot)
+
+ (?i) : Case-insensitive flag for the regex
+ ^ : Asserts the position at the start of a line
+ \s* : Matches zero or more whitespace characters
+ \*ref : Matches the literal string "*ref"
+ [\s.:]* : Matches zero or more whitespace characters, periods, or colons
+
+ ((?:19[6789]\d)|(?:20[0123]\d))
+ : Capturing group that matches a year in the 1960s-1990s or 2000s-2030s
+ : (?:...) is a non-capturing group
+ : 19[6789]\d matches years from 1960 to 1999
+ : 20[0123]\d matches years from 2000 to 2039
+
+ \s* : Matches zero or more whitespace characters
+ #? : Matches zero or one "#" character
+ \s* : Matches zero or more whitespace characters
+
+ (X)? : Capturing group that optionally matches the character "X"
+ \s* : Matches zero or more whitespace characters
+
+ (.*?\d+.*?) : Capturing group that matches any character sequence containing at least one digit
+ : .*? matches any character (except newline), as few times as possible (non-greedy)
+ : \d+ matches one or more digits
+ : .*? matches any character (except newline), as few times as possible (non-greedy)
+
+ $ : Asserts the position at the end of a line
+ Regular expression explanation for rx_argsref
+
+ (?i) : Case-insensitive flag for the regex
+ ^ : Asserts the position at the start of a line
+ [\s.:]* : Matches zero or more whitespace characters, periods, or colons
+
+ ((?:19[6789]\d)|(?:20[012345]\d))
+ : Capturing group that matches a year in the 1960s-1990s or 2000s-2050s
+ : (?:...) is a non-capturing group
+ : 19[6789]\d matches years from 1960 to 1999
+ : 20[012345]\d matches years from 2000 to 2059
+
+ \s* : Matches zero or more whitespace characters
+ #? : Matches zero or one "#" character
+ \s* : Matches zero or more whitespace characters
+
+ (X)? : Capturing group that optionally matches the character "X"
+ \s* : Matches zero or more whitespace characters
+
+ (.*?\d+.*?) : Capturing group that matches any character sequence containing at least one digit
+ : .*? matches any character (except newline), as few times as possible (non-greedy)
+ : \d+ matches one or more digits
+ : .*? matches any character (except newline), as few times as possible (non-greedy)
+
+ $ : Asserts the position at the end of a
+ """
# This interprets the survex "*data normal" command which sets out the order of the fields in the data, e.g.