66https://github.com/flywire/caption
77Copyright (c) 2020-2023 flywire
88Copyright (c) 2023 sanzoghenzo
9+ Copyright (c) 2023 Hendrik Polczynski
910forked from yafg - https://git.sr.ht/~ferruck/yafg
1011Copyright (c) 2019-2020 Philipp Trommler
1112
1516from markdown .treeprocessors import Treeprocessor
1617from markdown .extensions import Extension
1718from xml .etree import ElementTree
19+ import re
1820
1921
2022class CaptionTreeprocessor (Treeprocessor ):
@@ -28,22 +30,32 @@ def __init__(
2830 self ,
2931 md = None ,
3032 caption_prefix = "" ,
33+ caption_match_re = "" ,
34+ caption_skip_empty = False ,
3135 numbering = True ,
36+ numbering_preserve = False ,
3237 caption_prefix_class = None ,
3338 caption_class = None ,
3439 content_class = None ,
3540 link_process = None ,
3641 caption_top = True ,
3742 ):
3843 self .caption_prefix = caption_prefix
44+ self .caption_match_re = caption_match_re
45+ self ._match_re = re .compile (self .caption_match_re )
46+ self .caption_skip_empty = caption_skip_empty
3947 self .numbering = numbering
48+ self .numbering_preserve = numbering_preserve
4049 self .number = 0
4150 self .caption_prefix_class = caption_prefix_class
4251 self .caption_class = caption_class
4352 self .content_class = content_class
4453 self .link_process = link_process
4554 self .caption_top = caption_top
4655
56+ self ._match_title = ""
57+ self ._match_number = None
58+
4759 def build_content_element (self , par , caption , replace = True ):
4860 """Format the content element containing the caption"""
4961 attrib = par .attrib
@@ -59,7 +71,7 @@ def build_content_element(self, par, caption, replace=True):
5971 else :
6072 par .set ("class" , self .content_class )
6173 if "id" not in attrib :
62- par .set ("id" , "_{}-{}" .format (self .name , self .number ))
74+ par .set ("id" , "_{}-{}" .format (self .name , self .get_number () ))
6375
6476 if replace :
6577 par .text = "\n "
@@ -84,12 +96,12 @@ def build_caption_element(self, title):
8496 caption_prefix_span = ElementTree .SubElement (caption , "span" )
8597 if title :
8698 caption_prefix_span .text = "{} {}:" .format (
87- self .caption_prefix , self .number
99+ self .caption_prefix , self .get_number ()
88100 )
89101 caption_prefix_span .tail = " {}" .format (title )
90102 else :
91103 caption_prefix_span .text = "{} {}" .format (
92- self .caption_prefix , self .number
104+ self .caption_prefix , self .get_number ()
93105 )
94106 caption_prefix_span .tail = ""
95107 if self .caption_prefix_class :
@@ -99,22 +111,62 @@ def build_caption_element(self, title):
99111 def matches (self , par ):
100112 """
101113 Whether the element tree part matches the object to be captioned.
102-
103- This will be overriden by the subclasses.
114+ This can be overriden by the subclasses.
115+ """
116+ self .reset_match ()
117+ if par .text :
118+ match_caption = self ._match_re .match (par .text )
119+ if match_caption is not None :
120+ return self .match_valid (match_caption .group ("title" ),
121+ match_caption .group ("number" ))
122+ return False
123+
124+ def reset_match (self ):
125+ """Resets the last found caption match data."""
126+ self ._match_title = ""
127+ self ._match_number = None
128+
129+ def match_valid (self , title = "" , number = None ):
104130 """
105- raise NotImplementedError
131+ Remember the determined number, title.
132+ Determines if the current match should not be skipped.
133+ """
134+ valid = True
135+ if title is None :
136+ valid = False
137+ else :
138+ title = title .strip ()
139+ valid &= len (title ) > 0
140+ valid |= not self .caption_skip_empty
141+ if valid :
142+ self ._match_title = title or ""
143+ try :
144+ self ._match_number = int (number )
145+ except TypeError :
146+ pass
147+ except ValueError :
148+ pass
149+ else :
150+ self .reset_match ()
151+ return valid
106152
107- def get_title (self , par ):
108- """Title of the element. This will be overriden by the subclasses."""
109- raise NotImplementedError
153+ def get_title (self ):
154+ """Title of the matched figure. This can be overriden by the subclasses."""
155+ return self ._match_title
156+
157+ def get_number (self ):
158+ """Number of the matched figure. This can be overriden by the subclasses."""
159+ if self .numbering_preserve and self ._match_number is not None :
160+ return self ._match_number
161+ return self .number
110162
111163 def run (self , root ):
112164 """Find and format all captions."""
113165 for par in root .findall ("./p" ):
114166 if not self .matches (par ):
115167 continue
116168 self .number += 1
117- title = self .get_title (par )
169+ title = self .get_title ()
118170 caption = self .build_caption_element (title )
119171 self .build_content_element (par , caption )
120172 self .add_caption_to_content (par , caption )
@@ -124,11 +176,8 @@ class ListingCaptionTreeProcessor(CaptionTreeprocessor):
124176 name = "listing"
125177 content_tag = "div"
126178
127- def matches (self , par ):
128- return par .text and par .text .startswith ("Listing: " )
129-
130- def get_title (self , par ):
131- return par .text [9 :]
179+ def __init__ (self , * args , ** kwargs ):
180+ super (ListingCaptionTreeProcessor , self ).__init__ (* args , ** kwargs )
132181
133182
134183class CaptionExtension (Extension ):
@@ -141,7 +190,21 @@ def __init__(self, **kwargs):
141190 "Listing" ,
142191 "The text to show in front of the listing caption." ,
143192 ],
193+ "caption_match_re" : [
194+ r"^Listing\s*?(?P<number>\d*)\:\s*(?P<title>.*)" ,
195+ "The regexp used to match captions."
196+ "The group(number) can match a optional number."
197+ "The group(title) needs to match the title." ,
198+ ],
199+ "caption_skip_empty" : [
200+ False ,
201+ "Dont create captions for empty titles."
202+ ],
144203 "numbering" : [True , "Add the caption number to the prefix." ],
204+ "numbering_preserve" : [
205+ False ,
206+ "Preserve matched numbers from caption match."
207+ ],
145208 "caption_prefix_class" : [
146209 "" ,
147210 "CSS class to add to the caption prefix <span /> element." ,
0 commit comments