Skip to content

Commit f6d6909

Browse files
authored
Merge pull request #10 from hendrikp/new-config-options-match-skip
New config options / features Close #10
2 parents 1cfe3ab + 6522a82 commit f6d6909

10 files changed

Lines changed: 313 additions & 49 deletions

README.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,11 +145,31 @@ Currently supported options are listed below:
145145
is inserted between the content of `caption_prefix` and the actual figure
146146
number.
147147

148+
* `caption_match_re`
149+
150+
The regexp used to match captions from the markdown text. It can be used
151+
to match captions from multiple languages at once.
152+
The `group(number)` can match a optional `number`, see `numbering_preserve`.
153+
The `group(title)` needs to match the `title`.
154+
155+
* `caption_skip_empty`
156+
157+
Whether empty captions should be skipped. This can be used for example to
158+
skip images that are used as icons (captions which have an empty `title`).
159+
148160
* `numbering`:
149161

150162
Adds a caption number like "Figure 1:" in front of the caption. It's
151163
wrapped in a `<span />` for easier styling.
152164

165+
* `numbering_preserve`
166+
167+
This preserves a number captured to the `group(number)` using the
168+
`caption_match_re` option. If no number is present it falls back to
169+
the number generated from `numbering` option behaviour. It is not
170+
recommended to use `preserved` manual and automatic numbering at
171+
the same time in the markdown text, because of the conflict potential.
172+
153173
* `content_class`:
154174

155175
The CSS class to add to the generated `<content />` element.
@@ -171,7 +191,10 @@ The default values for each type of content is synthesised in the following tabl
171191
| Config | Image | Table | Other |
172192
|------------------------|---------|---------|-----------|
173193
| `caption_prefix` | "Image" | "Table" | "Listing" |
194+
| `caption_match_re` | - (not supported) | `^Table\s*?(?P<number>\d*)\:\s*(?P<title>.*)` | `^Listing\s*?(?P<number>\d*)\:\s*(?P<title>.*)` |
195+
| `caption_skip_empty` | False | False | False |
174196
| `numbering` | False | False | False |
197+
| `numbering_preserve` | False | False | False |
175198
| `content_class` | - | - | listing |
176199
| `caption_class` | - | - | - |
177200
| `caption_prefix_class` | - | - | - |

caption/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#
33
# Copyright (c) 2020-2023 flywire
44
# Copyright (c) 2023 sanzoghenzo
5+
# Copyright (c) 2023 Hendrik Polczynski
56
#
67
# SPDX-License-Identifier: GPL-3.0-or-later
78

caption/caption.py

Lines changed: 78 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
https://github.com/flywire/caption
77
Copyright (c) 2020-2023 flywire
88
Copyright (c) 2023 sanzoghenzo
9+
Copyright (c) 2023 Hendrik Polczynski
910
forked from yafg - https://git.sr.ht/~ferruck/yafg
1011
Copyright (c) 2019-2020 Philipp Trommler
1112
@@ -15,6 +16,7 @@
1516
from markdown.treeprocessors import Treeprocessor
1617
from markdown.extensions import Extension
1718
from xml.etree import ElementTree
19+
import re
1820

1921

2022
class CaptionTreeprocessor(Treeprocessor):
@@ -28,22 +30,32 @@ def __init__(
2830
self,
2931
md=None,
3032
caption_prefix="",
33+
caption_match_re="",
34+
caption_skip_empty=False,
3135
numbering=True,
36+
numbering_preserve=False,
3237
caption_prefix_class=None,
3338
caption_class=None,
3439
content_class=None,
3540
link_process=None,
3641
caption_top=True,
3742
):
3843
self.caption_prefix = caption_prefix
44+
self.caption_match_re = caption_match_re
45+
self._match_re = re.compile(self.caption_match_re)
46+
self.caption_skip_empty = caption_skip_empty
3947
self.numbering = numbering
48+
self.numbering_preserve = numbering_preserve
4049
self.number = 0
4150
self.caption_prefix_class = caption_prefix_class
4251
self.caption_class = caption_class
4352
self.content_class = content_class
4453
self.link_process = link_process
4554
self.caption_top = caption_top
4655

56+
self._match_title = ""
57+
self._match_number = None
58+
4759
def build_content_element(self, par, caption, replace=True):
4860
"""Format the content element containing the caption"""
4961
attrib = par.attrib
@@ -59,7 +71,7 @@ def build_content_element(self, par, caption, replace=True):
5971
else:
6072
par.set("class", self.content_class)
6173
if "id" not in attrib:
62-
par.set("id", "_{}-{}".format(self.name, self.number))
74+
par.set("id", "_{}-{}".format(self.name, self.get_number()))
6375

6476
if replace:
6577
par.text = "\n"
@@ -84,12 +96,12 @@ def build_caption_element(self, title):
8496
caption_prefix_span = ElementTree.SubElement(caption, "span")
8597
if title:
8698
caption_prefix_span.text = "{}&nbsp;{}:".format(
87-
self.caption_prefix, self.number
99+
self.caption_prefix, self.get_number()
88100
)
89101
caption_prefix_span.tail = " {}".format(title)
90102
else:
91103
caption_prefix_span.text = "{}&nbsp;{}".format(
92-
self.caption_prefix, self.number
104+
self.caption_prefix, self.get_number()
93105
)
94106
caption_prefix_span.tail = ""
95107
if self.caption_prefix_class:
@@ -99,22 +111,62 @@ def build_caption_element(self, title):
99111
def matches(self, par):
100112
"""
101113
Whether the element tree part matches the object to be captioned.
102-
103-
This will be overriden by the subclasses.
114+
This can be overriden by the subclasses.
115+
"""
116+
self.reset_match()
117+
if par.text:
118+
match_caption = self._match_re.match(par.text)
119+
if match_caption is not None:
120+
return self.match_valid(match_caption.group("title"),
121+
match_caption.group("number"))
122+
return False
123+
124+
def reset_match(self):
125+
"""Resets the last found caption match data."""
126+
self._match_title = ""
127+
self._match_number = None
128+
129+
def match_valid(self, title="", number=None):
104130
"""
105-
raise NotImplementedError
131+
Remember the determined number, title.
132+
Determines if the current match should not be skipped.
133+
"""
134+
valid = True
135+
if title is None:
136+
valid = False
137+
else:
138+
title = title.strip()
139+
valid &= len(title) > 0
140+
valid |= not self.caption_skip_empty
141+
if valid:
142+
self._match_title = title or ""
143+
try:
144+
self._match_number = int(number)
145+
except TypeError:
146+
pass
147+
except ValueError:
148+
pass
149+
else:
150+
self.reset_match()
151+
return valid
106152

107-
def get_title(self, par):
108-
"""Title of the element. This will be overriden by the subclasses."""
109-
raise NotImplementedError
153+
def get_title(self):
154+
"""Title of the matched figure. This can be overriden by the subclasses."""
155+
return self._match_title
156+
157+
def get_number(self):
158+
"""Number of the matched figure. This can be overriden by the subclasses."""
159+
if self.numbering_preserve and self._match_number is not None:
160+
return self._match_number
161+
return self.number
110162

111163
def run(self, root):
112164
"""Find and format all captions."""
113165
for par in root.findall("./p"):
114166
if not self.matches(par):
115167
continue
116168
self.number += 1
117-
title = self.get_title(par)
169+
title = self.get_title()
118170
caption = self.build_caption_element(title)
119171
self.build_content_element(par, caption)
120172
self.add_caption_to_content(par, caption)
@@ -124,11 +176,8 @@ class ListingCaptionTreeProcessor(CaptionTreeprocessor):
124176
name = "listing"
125177
content_tag = "div"
126178

127-
def matches(self, par):
128-
return par.text and par.text.startswith("Listing: ")
129-
130-
def get_title(self, par):
131-
return par.text[9:]
179+
def __init__(self, *args, **kwargs):
180+
super(ListingCaptionTreeProcessor, self).__init__(*args, **kwargs)
132181

133182

134183
class CaptionExtension(Extension):
@@ -141,7 +190,21 @@ def __init__(self, **kwargs):
141190
"Listing",
142191
"The text to show in front of the listing caption.",
143192
],
193+
"caption_match_re": [
194+
r"^Listing\s*?(?P<number>\d*)\:\s*(?P<title>.*)",
195+
"The regexp used to match captions."
196+
"The group(number) can match a optional number."
197+
"The group(title) needs to match the title.",
198+
],
199+
"caption_skip_empty": [
200+
False,
201+
"Dont create captions for empty titles."
202+
],
144203
"numbering": [True, "Add the caption number to the prefix."],
204+
"numbering_preserve": [
205+
False,
206+
"Preserve matched numbers from caption match."
207+
],
145208
"caption_prefix_class": [
146209
"",
147210
"CSS class to add to the caption prefix <span /> element.",

caption/image_caption.py

Lines changed: 19 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
https://github.com/flywire/caption
77
Copyright (c) 2020-2023 flywire
88
Copyright (c) 2023 sanzoghenzo
9+
Copyright (c) 2023 Hendrik Polczynski
910
forked from yafg - https://git.sr.ht/~ferruck/yafg
1011
Copyright (c) 2019-2020 Philipp Trommler
1112
@@ -20,40 +21,26 @@ class ImageCaptionTreeProcessor(CaptionTreeprocessor):
2021
name = "figure"
2122
content_tag = "figure"
2223

23-
def __init__(
24-
self,
25-
md=None,
26-
caption_prefix="",
27-
numbering=True,
28-
caption_prefix_class=None,
29-
caption_class=None,
30-
content_class=None,
31-
strip_title=True,
32-
caption_top=False,
33-
):
34-
super(ImageCaptionTreeProcessor, self).__init__(
35-
md=md,
36-
caption_prefix=caption_prefix,
37-
numbering=numbering,
38-
caption_prefix_class=caption_prefix_class,
39-
caption_class=caption_class,
40-
content_class=content_class,
41-
caption_top=caption_top,
42-
)
43-
self.strip_title = strip_title
24+
def __init__(self, *args, **kwargs):
25+
self.strip_title = kwargs.pop("strip_title", True)
26+
super(ImageCaptionTreeProcessor, self).__init__(*args, **kwargs)
4427

45-
def matches(self, par):
28+
def reset_match(self):
29+
super(ImageCaptionTreeProcessor, self).reset_match()
4630
self._a = None
31+
self._img = None
32+
33+
def matches(self, par):
34+
self.reset_match()
4735
self._img = par.find("./img")
4836
if self._img is None:
4937
self._a = par.find("./a")
50-
if self._a is None:
51-
return False
52-
self._img = self._a.find("./img")
53-
return self._img is not None
38+
if self._a is not None:
39+
self._img = self._a.find("./img")
5440

55-
def get_title(self, par):
56-
return self._img.get("title")
41+
if self._img is not None:
42+
return self.match_valid(self._img.get("title"))
43+
return False
5744

5845
def build_content_element(self, par, caption, replace=True):
5946
super(ImageCaptionTreeProcessor, self).build_content_element(par, caption, replace=replace)
@@ -81,6 +68,10 @@ def __init__(self, **kwargs):
8168
"Figure",
8269
"The text to show in front of the image caption.",
8370
],
71+
"caption_skip_empty": [
72+
False,
73+
"Dont create captions for empty titles."
74+
],
8475
"numbering": [True, "Add the caption number to the prefix."],
8576
"caption_prefix_class": [
8677
"",

caption/table_caption.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#
33
# Copyright (c) 2020-2023 flywire
44
# Copyright (c) 2023 sanzoghenzo
5+
# Copyright (c) 2023 Hendrik Polczynski
56
# forked from yafg - https://git.sr.ht/~ferruck/yafg
67
# Copyright (c) 2019 Philipp Trommler
78
#
@@ -17,11 +18,8 @@ class TableCaptionTreeProcessor(CaptionTreeprocessor):
1718
content_tag = "table"
1819
caption_tag = "caption"
1920

20-
def matches(self, par):
21-
return par.text and par.text.startswith("Table: ")
22-
23-
def get_title(self, par):
24-
return par.text[7:]
21+
def __init__(self, *args, **kwargs):
22+
super(TableCaptionTreeProcessor, self).__init__(*args, **kwargs)
2523

2624
def add_caption_to_content(self, content, caption):
2725
if not self.caption_top:
@@ -38,7 +36,7 @@ def run(self, root):
3836
if next_item.tag != self.content_tag:
3937
continue
4038
self.number += 1
41-
title = self.get_title(child)
39+
title = self.get_title()
4240
root.remove(child)
4341
caption = self.build_caption_element(title)
4442

@@ -66,7 +64,21 @@ def __init__(self, **kwargs):
6664
"Table",
6765
"The text to show in front of the table caption.",
6866
],
67+
"caption_match_re": [
68+
r"^Table\s*?(?P<number>\d*)\:\s*(?P<title>.*)",
69+
"The regexp used to match captions."
70+
"The group(number) can match a optional number."
71+
"The group(title) needs to match the title.",
72+
],
73+
"caption_skip_empty": [
74+
False,
75+
"Dont create captions for empty titles."
76+
],
6977
"numbering": [True, "Add the caption number to the prefix."],
78+
"numbering_preserve": [
79+
False,
80+
"Preserve matched numbers from caption match."
81+
],
7082
"caption_prefix_class": [
7183
"",
7284
"CSS class to add to the caption prefix <span /> element.",

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#
33
# Copyright (c) 2020-2023 flywire
44
# Copyright (c) 2023 sanzoghenzo
5+
# Copyright (c) 2023 Hendrik Polczynski
56
# forked from yafg - https://git.sr.ht/~ferruck/yafg
67
# Copyright (c) 2019 Philipp Trommler
78
#

test/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,6 @@
22
#
33
# Copyright (c) 2020-2023 flywire
44
# Copyright (c) 2023 sanzoghenzo
5+
# Copyright (c) 2023 Hendrik Polczynski
56
#
67
# SPDX-License-Identifier: GPL-3.0-or-later

0 commit comments

Comments
 (0)