AI comments on regexes

author: Philip Sargent <philip.sargent@gmail.com> 2025-01-09 21:59:27 +0000
committer: Philip Sargent <philip.sargent@gmail.com> 2025-01-09 21:59:27 +0000
commit: 219b8b792e2a6e1fb72b9e658b06395a50292e59 (patch)
tree: a9ba93662e00af43afff3c4cb5ba6ad386e4ca8b
parent: 5b97cd83dd92ff506a40ac784d816a0be4bcc4eb (diff)
download: troggle-219b8b792e2a6e1fb72b9e658b06395a50292e59.tar.gz
troggle-219b8b792e2a6e1fb72b9e658b06395a50292e59.tar.bz2
troggle-219b8b792e2a6e1fb72b9e658b06395a50292e59.zip
2 files changed, 66 insertions, 0 deletions
diff --git a/parsers/logbooks.py b/parsers/logbooks.py
index 2ede83f..3d96b3b 100644
--- a/parsers/logbooks.py
+++ b/parsers/logbooks.py
@@ -139,6 +139,18 @@ def GetTripPersons(trippeople, expedition, logtime_underground, tid=None):
     # print(f'# {tid}')
     # print(f" -  {tid} '{trippeople}'  ")
 
+    """
+    re.split(r",|\+|&amp;|&(?!\w+;)| and ", trippeople)
+
+    ,             : The comma character
+    \+            : The plus sign (+); escaped to treat as a literal character
+    &amp;         : The literal string "&amp;" (HTML-encoded ampersand)
+    &(?!\w+;)     : An ampersand (&) not followed by one or more word characters (\w+) and a semicolon (;)
+                    : Uses negative lookahead assertion (?!...) to ensure it's not part of an HTML entity like "&nbsp;"
+     and          : The literal string " and " (with spaces before and after)
+
+    This will split the 'trippeople' string at any of these delimiters.
+    """
     for tripperson in re.split(r",|\+|&amp;|&(?!\w+;)| and ", trippeople):
         tripperson = tripperson.strip()
         # author_u = re.match(r"(?i)<u>(.*?)</u>$", tripperson)
diff --git a/parsers/survex.py b/parsers/survex.py
index 5338148..5f601e1 100644
--- a/parsers/survex.py
+++ b/parsers/survex.py
@@ -298,7 +298,61 @@ class LoadingSurvex:
     rx_commteam = re.compile(r"(?i)\s*(Messteam|Zeichner)\s*[:]?(.*)")
     rx_quotedtitle = re.compile(r'(?i)^"(.*)"$')
     
+    """
+    Regular expression explanation for rx_starref (MS CoPilot)
+
+    (?i)        : Case-insensitive flag for the regex
+    ^           : Asserts the position at the start of a line
+    \s*         : Matches zero or more whitespace characters
+    \*ref       : Matches the literal string "*ref"
+    [\s.:]*     : Matches zero or more whitespace characters, periods, or colons
+
+    ((?:19[6789]\d)|(?:20[0123]\d))
+                 : Capturing group that matches a year in the 1960s-1990s or 2000s-2030s
+                 : (?:...) is a non-capturing group
+                 : 19[6789]\d matches years from 1960 to 1999
+                 : 20[0123]\d matches years from 2000 to 2039
+
+    \s*         : Matches zero or more whitespace characters
+    #?          : Matches zero or one "#" character
+    \s*         : Matches zero or more whitespace characters
+
+    (X)?        : Capturing group that optionally matches the character "X"
+    \s*         : Matches zero or more whitespace characters
+
+    (.*?\d+.*?) : Capturing group that matches any character sequence containing at least one digit
+                 : .*? matches any character (except newline), as few times as possible (non-greedy)
+                 : \d+ matches one or more digits
+                 : .*? matches any character (except newline), as few times as possible (non-greedy)
+
+    $           : Asserts the position at the end of a line
 
+    Regular expression explanation for rx_argsref
+
+    (?i)        : Case-insensitive flag for the regex
+    ^           : Asserts the position at the start of a line
+    [\s.:]*     : Matches zero or more whitespace characters, periods, or colons
+
+    ((?:19[6789]\d)|(?:20[012345]\d))
+                 : Capturing group that matches a year in the 1960s-1990s or 2000s-2050s
+                 : (?:...) is a non-capturing group
+                 : 19[6789]\d matches years from 1960 to 1999
+                 : 20[012345]\d matches years from 2000 to 2059
+
+    \s*         : Matches zero or more whitespace characters
+    #?          : Matches zero or one "#" character
+    \s*         : Matches zero or more whitespace characters
+
+    (X)?        : Capturing group that optionally matches the character "X"
+    \s*         : Matches zero or more whitespace characters
+
+    (.*?\d+.*?) : Capturing group that matches any character sequence containing at least one digit
+                 : .*? matches any character (except newline), as few times as possible (non-greedy)
+                 : \d+ matches one or more digits
+                 : .*? matches any character (except newline), as few times as possible (non-greedy)
+
+    $           : Asserts the position at the end of a
+    """
     
     
     # This interprets the survex "*data normal" command which sets out the order of the fields in the data, e.g.
author	Philip Sargent <philip.sargent@gmail.com>	2025-01-09 21:59:27 +0000
committer	Philip Sargent <philip.sargent@gmail.com>	2025-01-09 21:59:27 +0000
commit	219b8b792e2a6e1fb72b9e658b06395a50292e59 (patch)
tree	a9ba93662e00af43afff3c4cb5ba6ad386e4ca8b
parent	5b97cd83dd92ff506a40ac784d816a0be4bcc4eb (diff)
download	troggle-219b8b792e2a6e1fb72b9e658b06395a50292e59.tar.gz troggle-219b8b792e2a6e1fb72b9e658b06395a50292e59.tar.bz2 troggle-219b8b792e2a6e1fb72b9e658b06395a50292e59.zip