diff --git a/docs/api/enum/XlChartType.rst b/docs/api/enum/XlChartType.rst index 5326ad6bb..75de42148 100644 --- a/docs/api/enum/XlChartType.rst +++ b/docs/api/enum/XlChartType.rst @@ -236,19 +236,19 @@ WATERFALL Waterfall (ChartEx). Office 2016+. Write + round-trip supported. TREEMAP - Treemap (ChartEx). Office 2016+. Round-trip preservation only. + Treemap (ChartEx). Office 2016+. Write + round-trip supported. SUNBURST - Sunburst (ChartEx). Office 2016+. Round-trip preservation only. + Sunburst (ChartEx). Office 2016+. Write + round-trip supported. FUNNEL - Funnel (ChartEx). Office 2016+. Round-trip preservation only. + Funnel (ChartEx). Office 2016+. Write + round-trip supported. BOX_WHISKER - Box & Whisker (ChartEx). Office 2016+. Round-trip preservation only. + Box & Whisker (ChartEx). Office 2016+. Write + round-trip supported. HISTOGRAM - Histogram (ChartEx). Office 2016+. Round-trip preservation only. + Histogram (ChartEx). Office 2016+. Write + round-trip supported. PARETO - Pareto (ChartEx). Office 2016+. Round-trip preservation only. + Pareto (ChartEx). Office 2016+. Write + round-trip supported. diff --git a/docs/user/charts.rst b/docs/user/charts.rst index e5d11fc01..dfb5672d8 100644 --- a/docs/user/charts.rst +++ b/docs/user/charts.rst @@ -274,16 +274,11 @@ namespace (``cx:``, the *chart extensions* or "chartEx" part) rather than the classic ``c:`` chart tree. |pp| supports this family with two distinct capability levels: -================== ============================ ========================= -Capability Chart types What you can do -================== ============================ ========================= -**Write** ``WATERFALL`` Author a brand-new chart -**Round-trip** ``WATERFALL``, ``TREEMAP``, Open a deck that already -only ``SUNBURST``, ``FUNNEL``, contains the chart, edit - ``BOX_WHISKER``, unrelated slides, and - ``HISTOGRAM``, ``PARETO`` save without corrupting - the chartEx part -================== ============================ ========================= +As of Phase C (issue #14) **all** ChartEx types are write-capable: +``WATERFALL``, ``TREEMAP``, ``SUNBURST``, ``FUNNEL``, ``BOX_WHISKER``, +``HISTOGRAM``, and ``PARETO`` can each be authored with ``add_chart`` and +also round-trip (open a deck that already contains the chart, edit unrelated +slides, save without corrupting the chartEx part). Authoring a waterfall chart uses the dedicated :class:`~pptx.chart.data.WaterfallChartData` container:: @@ -303,11 +298,39 @@ The returned |GraphicFrame| reports ``graphic_frame.has_chartex == True`` and its :attr:`~pptx.shapes.graphfrm.GraphicFrame.chartex` property returns a ChartEx proxy. (Classic charts continue to use ``.has_chart`` / ``.chart``.) -The remaining ``cx:`` types currently have **round-trip preservation only** — -``add_chart`` raises ``NotImplementedError`` for them, but a deck authored in -PowerPoint that already contains a treemap, sunburst, etc. will read, modify, -and save without damaging the existing chart. Writer support for those types -is tracked as a follow-up to issue #14. +The other ChartEx types use purpose-built data containers from +``pptx.chart.data``: + +- ``TreemapChartData`` / ``SunburstChartData`` — hierarchical; call + ``add_level(labels)`` outermost-first, then ``add_series(name, values)`` + for the leaf values. +- ``FunnelChartData`` / ``BoxWhiskerChartData`` — ``categories`` plus + ``add_series(name, values)``. +- ``HistogramChartData`` / ``ParetoChartData`` — raw values with optional + binning: ``add_series(name, values, bin_count=N)`` (or ``bin_size=...``). + +For example, a treemap:: + + from pptx.chart.data import TreemapChartData + + chart_data = TreemapChartData() + chart_data.add_level(['Tech', 'Tech', 'Retail', 'Retail']) + chart_data.add_level(['Phones', 'Laptops', 'Apparel', 'Food']) + chart_data.add_series('Revenue', (50, 30, 20, 15)) + + slide.shapes.add_chart( + XL_CHART_TYPE.TREEMAP, x, y, cx, cy, chart_data + ) + +Updating the data of an existing ChartEx chart (any type) uses +:meth:`~pptx.chart.chartex.ChartEx.replace_data`, parallel to the classic +``Chart.replace_data``:: + + graphic_frame.chartex.replace_data(new_chart_data) + +``replace_data`` rewrites the chart data and embedded workbook in place — the +chartEx part and its slide relationship are unchanged — and raises +``ValueError`` if the new data's chart type doesn't match the existing chart. The full set of ``cx:`` enum members is documented under :ref:`XlChartType`. diff --git a/features/cht-chartex-phasec.feature b/features/cht-chartex-phasec.feature new file mode 100644 index 000000000..d924b443d --- /dev/null +++ b/features/cht-chartex-phasec.feature @@ -0,0 +1,45 @@ +Feature: ChartEx Phase-C writers and replace_data + In order to author every Office-2016 modern chart type + As a developer using python-pptx + I need each ChartEx type to write, round-trip, and support replace_data + + + Scenario Outline: Each ChartEx type writes and round-trips + Given a blank slide + When I add a ChartEx chart + Then the slide has a ChartEx graphic frame + And the saved package contains a ChartEx part + And the ChartEx round-trips preserving its part + + Examples: ChartEx writable types + | member-name | + | WATERFALL | + | TREEMAP | + | SUNBURST | + | FUNNEL | + | BOX_WHISKER | + | HISTOGRAM | + | PARETO | + + + Scenario Outline: replace_data updates each ChartEx type and round-trips + Given a blank slide + When I add a ChartEx chart + And I replace the ChartEx data with a smaller dataset + Then the reopened ChartEx reflects the replaced data + And the ChartEx round-trips preserving its part + + Examples: replace_data types + | member-name | + | WATERFALL | + | TREEMAP | + | SUNBURST | + | FUNNEL | + | HISTOGRAM | + | PARETO | + + + Scenario: replace_data rejects a chart-type mismatch + Given a blank slide + When I attempt to replace a FUNNEL ChartEx with HISTOGRAM data + Then a chart-type mismatch error is raised diff --git a/features/cht-chartex-types.feature b/features/cht-chartex-types.feature index 72f0b7b38..5f876aca1 100644 --- a/features/cht-chartex-types.feature +++ b/features/cht-chartex-types.feature @@ -1,17 +1,18 @@ Feature: ChartEx chart type members In order to use the ChartEx chart type enumeration safely As a developer using python-pptx - I need deferred members to fail explicitly and modern members to exist in a private range + I need every modern member to exist in a private range and be writable - Scenario Outline: Writer-deferred ChartEx types fail through add_chart + Scenario Outline: Every ChartEx type is writable via add_chart (Phase C) Given a blank slide - And ChartEx waterfall data case q4-total - When I attempt to add deferred ChartEx type - Then adding deferred ChartEx type raises NotImplementedError + When I add a ChartEx chart + Then the slide has a ChartEx graphic frame + And the saved package contains a ChartEx part - Examples: writer-deferred ChartEx members + Examples: ChartEx writable members | member-name | + | WATERFALL | | TREEMAP | | SUNBURST | | FUNNEL | diff --git a/features/steps/chartex_phasec.py b/features/steps/chartex_phasec.py new file mode 100644 index 000000000..636f83a29 --- /dev/null +++ b/features/steps/chartex_phasec.py @@ -0,0 +1,159 @@ +"""Gherkin step implementations for ChartEx Phase-C features (issue #14): +writers for Treemap/Sunburst/Funnel/BoxWhisker/Histogram/Pareto + replace_data. +""" + +from __future__ import annotations + +import io +import zipfile + +from behave import then, when + +from pptx import Presentation +from pptx.chart.data import ( + BoxWhiskerChartData, + FunnelChartData, + HistogramChartData, + ParetoChartData, + SunburstChartData, + TreemapChartData, + WaterfallChartData, +) +from pptx.enum.chart import XL_CHART_TYPE +from pptx.util import Inches + + +def _data_for(member_name): + m = member_name.strip() + if m == "WATERFALL": + cd = WaterfallChartData() + cd.categories = ["Q1", "Q2", "Total"] + cd.add_series("R", [10, 20, 30], subtotals=[2]) + return XL_CHART_TYPE.WATERFALL, cd + if m in ("TREEMAP", "SUNBURST"): + cls = TreemapChartData if m == "TREEMAP" else SunburstChartData + cd = cls() + cd.add_level(["A", "A", "B", "B"]) + cd.add_level(["a1", "a2", "b1", "b2"]) + cd.add_series("Rev", [40, 30, 20, 10]) + return getattr(XL_CHART_TYPE, m), cd + if m in ("FUNNEL", "BOX_WHISKER"): + cls = FunnelChartData if m == "FUNNEL" else BoxWhiskerChartData + cd = cls() + cd.categories = ["Leads", "Qualified", "Won"] + cd.add_series("Pipe", [100, 60, 25]) + return getattr(XL_CHART_TYPE, m), cd + if m == "HISTOGRAM": + cd = HistogramChartData() + cd.add_series("Scores", [55, 62, 71, 73, 88, 91, 64, 78], bin_count=4) + return XL_CHART_TYPE.HISTOGRAM, cd + if m == "PARETO": + # PowerPoint Pareto is categorical (ground truth, issue #14). + cd = ParetoChartData() + cd.categories = ["Defect A", "Defect B", "Defect C", "Defect D"] + cd.add_series("Count", [45, 30, 15, 10]) + return XL_CHART_TYPE.PARETO, cd + raise KeyError(m) + + +def _cx_parts(blob): + z = zipfile.ZipFile(io.BytesIO(blob)) + return [n for n in z.namelist() if "chartEx" in n and n.endswith(".xml")] + + +# when ==================================================== + + +@when("I add a ChartEx {member_name} chart") +def when_i_add_a_chartex_member_chart(context, member_name): + ct, cd = _data_for(member_name) + context.cx_member = member_name.strip() + context.cx_data = cd + context.cx_frame = context.slide.shapes.add_chart( + ct, Inches(1), Inches(1), Inches(6), Inches(4), cd + ) + + +@when("I replace the ChartEx {member_name} data with a smaller dataset") +def when_i_replace_chartex_data(context, member_name): + _, new_cd = _data_for(member_name) + # shrink it so the change is observable + if hasattr(new_cd, "levels"): + nd = type(new_cd)() + nd.add_level(["Z", "Z"]) + nd.add_level(["z1", "z2"]) + nd.add_series("New", [7, 3]) + elif hasattr(new_cd, "categories"): + nd = type(new_cd)() + nd.categories = ["Only"] + nd.add_series("New", [42]) + else: + nd = type(new_cd)() + nd.add_series("New", [1, 2, 3, 4], bin_count=2) + context.cx_replacement = nd + context.cx_frame.chartex.replace_data(nd) + + +@when("I attempt to replace a {a_type} ChartEx with {b_type} data") +def when_attempt_mismatch_replace(context, a_type, b_type): + ct, cd = _data_for(a_type) + frame = context.slide.shapes.add_chart(ct, Inches(1), Inches(1), Inches(6), Inches(4), cd) + _, bad = _data_for(b_type) + context.cx_replace_error = None + try: + frame.chartex.replace_data(bad) + except ValueError as e: + context.cx_replace_error = e + + +# then ==================================================== + + +@then("the slide has a ChartEx graphic frame") +def then_slide_has_a_chartex_frame(context): + frames = [s for s in context.slide.shapes if getattr(s, "has_chartex", False)] + assert len(frames) >= 1, "no ChartEx graphic frame on slide" + + +@then("the saved package contains a ChartEx part") +def then_saved_package_contains_chartex_part(context): + buf = io.BytesIO() + context.prs.save(buf) + assert _cx_parts(buf.getvalue()), "no chartEx part in saved package" + + +@then("the ChartEx round-trips preserving its part") +def then_chartex_round_trips(context): + buf = io.BytesIO() + context.prs.save(buf) + before = sorted(_cx_parts(buf.getvalue())) + prs2 = Presentation(io.BytesIO(buf.getvalue())) + prs2.slides.add_slide(prs2.slide_layouts[0]) # unrelated edit (layout 0 always exists) + buf2 = io.BytesIO() + prs2.save(buf2) + after = sorted(_cx_parts(buf2.getvalue())) + assert before and before == after, f"{before!r} != {after!r}" + rt = [s for s in prs2.slides[0].shapes if getattr(s, "has_chartex", False)] + assert len(rt) == 1 + + +@then("the reopened ChartEx reflects the replaced data") +def then_reopened_reflects_replaced(context): + buf = io.BytesIO() + context.prs.save(buf) + prs2 = Presentation(io.BytesIO(buf.getvalue())) + z = zipfile.ZipFile(io.BytesIO(buf.getvalue())) + name = next( + n for n in z.namelist() if "chartEx" in n and n.endswith(".xml") and "_rels" not in n + ) + xml = z.read(name).decode() + nd = context.cx_replacement + token = "New" + assert token in xml, "replaced series name not found after reopen" + assert prs2 is not None + + +@then("a chart-type mismatch error is raised") +def then_mismatch_error_raised(context): + assert context.cx_replace_error is not None + assert "cannot change chart type" in str(context.cx_replace_error) diff --git a/src/pptx/chart/chartex.py b/src/pptx/chart/chartex.py index b0b67597a..51c116dc1 100644 --- a/src/pptx/chart/chartex.py +++ b/src/pptx/chart/chartex.py @@ -10,7 +10,6 @@ from pptx.util import lazyproperty if TYPE_CHECKING: - from pptx.chart.data import WaterfallChartData from pptx.oxml.chart.chartex import CT_Axis, CT_ChartSpace, CT_Series from pptx.parts.chartex import ChartExPart @@ -119,44 +118,82 @@ def axes(self) -> list[Axis]: ) return [Axis(axis, self) for axis in axis_elements] - def replace_data(self, chart_data: WaterfallChartData): + def replace_data(self, chart_data): """Replace the data for this chart with *chart_data*. - *chart_data* is a |WaterfallChartData| instance populated with the categories, - series values, and subtotal indices for the new chart data. + *chart_data* is any ChartEx data container — |WaterfallChartData|, + |TreemapChartData|, |SunburstChartData|, |FunnelChartData|, + |BoxWhiskerChartData|, |HistogramChartData|, or |ParetoChartData|. + The chartEx part name and its slide relationship are unchanged + (only ``, the series name, and the embedded workbook + are rewritten in place). + + Raises |ValueError| if *chart_data*'s chart type does not match the + layout of the chart currently in this part. """ + plotAreaRegion = self._chart.plotArea.plotAreaRegion + series_elems = plotAreaRegion.series_lst + cx_type = getattr(chart_data, "cx_chart_type", None) # None ⇒ waterfall + + # --- type-match guard (ISC-30) --- + current_layout = series_elems[0].get("layoutId") if series_elems else None + expected = { + None: "waterfall", + "treemap": "treemap", + "sunburst": "sunburst", + "funnel": "funnel", + "boxWhisker": "boxWhisker", + "histogram": "clusteredColumn", + "pareto": "clusteredColumn", + }[cx_type] + if current_layout is not None and current_layout != expected: + raise ValueError( + f"data is for a {expected!r} ChartEx but this chart is " + f"{current_layout!r}; replace_data cannot change chart type" + ) + chartData = self._chartspace.chartData - # --- rebuild the element --- + # --- rebuild the element per chart type --- for old_data in list(chartData.data_lst): chartData.remove(old_data) new_data = OxmlElement("cx:data") new_data.set("id", "0") chartData.append(new_data) - new_data.add_string_dimension("cat", chart_data.categories_ref, chart_data.categories) - new_data.add_numeric_dimension( - "val", - chart_data.values_ref, - chart_data.series_values, - chart_data.number_format, - ) - # --- update series name --- - series_elems = self._chart.plotArea.plotAreaRegion.series_lst - if series_elems: - series_elem = series_elems[0] + if cx_type in ("treemap", "sunburst"): + plotAreaRegion.add_hierarchical_string_dimension( + new_data, "cat", chart_data.categories_ref, chart_data.levels + ) + new_data.add_numeric_dimension( + "size", chart_data.values_ref, chart_data.series_values, chart_data.number_format + ) + elif cx_type == "histogram": + # numeric raw values, binned — numDim only, no strDim + new_data.add_numeric_dimension( + "val", chart_data.values_ref, chart_data.series_values, chart_data.number_format + ) + else: + # waterfall / funnel / boxWhisker / pareto: cat strDim + val numDim + new_data.add_string_dimension("cat", chart_data.categories_ref, chart_data.categories) + new_data.add_numeric_dimension( + "val", chart_data.values_ref, chart_data.series_values, chart_data.number_format + ) + + # --- update series name on every series (Pareto has two) --- + for series_elem in series_elems: tx = series_elem.tx if tx is not None: txData = tx.txData if txData is not None: - f_elem = txData.f - if f_elem is not None: - f_elem.text = chart_data.series_name_ref - v_elem = txData.v - if v_elem is not None: - v_elem.text = chart_data.series_name - - # --- replace subtotals on layoutPr --- + if txData.f is not None: + txData.f.text = chart_data.series_name_ref + if txData.v is not None: + txData.v.text = chart_data.series_name + + # --- waterfall-only: rebuild subtotals + prune stale dataPt --- + if cx_type is None and series_elems: + series_elem = series_elems[0] series_elem._remove_layoutPr() if chart_data.subtotals: layoutPr = series_elem._add_layoutPr() @@ -166,8 +203,6 @@ def replace_data(self, chart_data: WaterfallChartData): idx_elem = OxmlElement("cx:idx") idx_elem.set("val", str(idx)) subtotals_elem.append(idx_elem) - - # --- remove dataPt elements that reference out-of-range indices --- num_points = len(chart_data.categories) for dataPt in list(series_elem.dataPt_lst): idx = dataPt.get("idx") diff --git a/src/pptx/chart/data.py b/src/pptx/chart/data.py index 42b48acb8..157fbcce7 100644 --- a/src/pptx/chart/data.py +++ b/src/pptx/chart/data.py @@ -966,3 +966,272 @@ def xlsx_blob(self): workbook.close() return xlsx_file.getvalue() + + +def _cx_xlsx(columns): + """Return an xlsx blob. `columns` is a list of (header, list-of-cells).""" + import io + + from xlsxwriter import Workbook + + xlsx_file = io.BytesIO() + workbook = Workbook(xlsx_file, {"in_memory": True}) + worksheet = workbook.add_worksheet("Sheet1") + for col, (header, cells) in enumerate(columns): + worksheet.write(0, col, header) + for row, cell in enumerate(cells): + if cell is not None and cell != "": + worksheet.write(row + 1, col, cell) + workbook.close() + return xlsx_file.getvalue() + + +class _CategoryChartExData: + """Flat categories + one value series. Base for Funnel and Box & Whisker.""" + + cx_chart_type = None # set by subclass: "funnel" | "boxWhisker" + + def __init__(self, number_format="General"): + self._categories = [] + self._series_name = None + self._series_values = [] + self._number_format = number_format + + @property + def categories(self): + """The category labels as a list of strings.""" + return self._categories + + @categories.setter + def categories(self, value): + self._categories = list(value) + + def add_series(self, name, values): + """Set the (single) series name and numeric values.""" + self._series_name = name + self._series_values = list(values) + + @property + def series_name(self): + return self._series_name + + @property + def series_values(self): + return self._series_values + + @property + def number_format(self): + return self._number_format + + @property + def categories_ref(self): + n = len(self._categories) + return "Sheet1!$A$2:$A$%d" % (n + 1) + + @property + def values_ref(self): + n = len(self._categories) + return "Sheet1!$B$2:$B$%d" % (n + 1) + + @property + def series_name_ref(self): + return "Sheet1!$B$1" + + @property + def xlsx_blob(self): + if len(self._categories) != len(self._series_values): + raise ValueError( + f"categories length ({len(self._categories)}) must equal" + f" series values length ({len(self._series_values)})" + ) + return _cx_xlsx( + [ + ("Category", self._categories), + (self._series_name or "Series 1", self._series_values), + ] + ) + + +class FunnelChartData(_CategoryChartExData): + """Data container for a ChartEx funnel chart (categories + values).""" + + cx_chart_type = "funnel" + + +class BoxWhiskerChartData(_CategoryChartExData): + """Data container for a ChartEx box & whisker chart (categories + values).""" + + cx_chart_type = "boxWhisker" + + +class HierarchicalChartExData: + """Multi-level categories + leaf values. Base for Treemap and Sunburst. + + `add_level(labels)` is called outermost-first; the final `add_series` + supplies the leaf-level values (aligned to the innermost level). + + Example:: + + cd = TreemapChartData() + cd.add_level(['Tech', 'Tech', 'Retail', 'Retail']) + cd.add_level(['Phones', 'Laptops', 'Apparel', 'Food']) + cd.add_series('Revenue', [50, 30, 20, 15]) + """ + + cx_chart_type = None # "treemap" | "sunburst" + + def __init__(self, number_format="General"): + self._levels = [] + self._series_name = None + self._series_values = [] + self._number_format = number_format + + def add_level(self, labels): + """Append one hierarchy level (outermost-first).""" + self._levels.append(list(labels)) + + @property + def levels(self): + return self._levels + + def add_series(self, name, values): + self._series_name = name + self._series_values = list(values) + + @property + def series_name(self): + return self._series_name + + @property + def series_values(self): + return self._series_values + + @property + def number_format(self): + return self._number_format + + @property + def _leaf_count(self): + return len(self._series_values) + + @property + def categories_ref(self): + n = self._leaf_count + last_col = chr(ord("A") + max(len(self._levels) - 1, 0)) + return "Sheet1!$A$2:$%s$%d" % (last_col, n + 1) + + @property + def values_ref(self): + n = self._leaf_count + col = chr(ord("A") + len(self._levels)) + return "Sheet1!$%s$2:$%s$%d" % (col, col, n + 1) + + @property + def series_name_ref(self): + col = chr(ord("A") + len(self._levels)) + return "Sheet1!$%s$1" % col + + @property + def xlsx_blob(self): + for lvl in self._levels: + if len(lvl) != self._leaf_count: + raise ValueError( + "every hierarchy level must have the same length as the" + f" series values ({self._leaf_count})" + ) + cols = [("Level %d" % (i + 1), lvl) for i, lvl in enumerate(self._levels)] + cols.append((self._series_name or "Series 1", self._series_values)) + return _cx_xlsx(cols) + + +class TreemapChartData(HierarchicalChartExData): + """Data container for a ChartEx treemap chart.""" + + cx_chart_type = "treemap" + + +class SunburstChartData(HierarchicalChartExData): + """Data container for a ChartEx sunburst chart.""" + + cx_chart_type = "sunburst" + + +class HistogramChartData: + """Raw values + bin configuration for a ChartEx histogram. + + Provide exactly one of `bin_count` / `bin_size` (or neither for + PowerPoint-automatic binning). + + Example:: + + cd = HistogramChartData() + cd.add_series('Scores', [55, 62, 71, 73, 88, 91, 64, 78], bin_count=5) + """ + + cx_chart_type = "histogram" + + def __init__(self, number_format="General"): + self._series_name = None + self._series_values = [] + self._bin_count = None + self._bin_size = None + self._number_format = number_format + + def add_series(self, name, values, bin_count=None, bin_size=None): + self._series_name = name + self._series_values = list(values) + if bin_count is not None and bin_size is not None: + raise ValueError("supply only one of bin_count / bin_size") + self._bin_count = bin_count + self._bin_size = bin_size + + @property + def series_name(self): + return self._series_name + + @property + def series_values(self): + return self._series_values + + @property + def bin_count(self): + return self._bin_count + + @property + def bin_size(self): + return self._bin_size + + @property + def number_format(self): + return self._number_format + + @property + def values_ref(self): + n = len(self._series_values) + return "Sheet1!$A$2:$A$%d" % (n + 1) + + @property + def series_name_ref(self): + return "Sheet1!$A$1" + + @property + def xlsx_blob(self): + return _cx_xlsx([(self._series_name or "Series 1", self._series_values)]) + + +class ParetoChartData(_CategoryChartExData): + """Data container for a ChartEx Pareto chart. + + PowerPoint's Pareto aggregates by **category** (not numeric bins) and + overlays a cumulative-percentage line — so this is category+value shaped + (like Funnel), confirmed against PowerPoint-authored ground truth + (issue #14). + + Example:: + + cd = ParetoChartData() + cd.categories = ['Defect A', 'Defect B', 'Defect C', 'Defect D'] + cd.add_series('Count', [45, 30, 15, 10]) + """ + + cx_chart_type = "pareto" diff --git a/src/pptx/enum/chart.py b/src/pptx/enum/chart.py index bc95cd71a..2a878ef41 100644 --- a/src/pptx/enum/chart.py +++ b/src/pptx/enum/chart.py @@ -300,23 +300,23 @@ class XL_CHART_TYPE(BaseEnum): WATERFALL = (1001, "Waterfall (ChartEx). Office 2016+.") """Waterfall (ChartEx). Office 2016+.""" - TREEMAP = (1002, "Treemap (ChartEx). Office 2016+. Round-trip only.") - """Treemap (ChartEx). Office 2016+. Round-trip preservation only.""" + TREEMAP = (1002, "Treemap (ChartEx). Office 2016+. Write + round-trip.") + """Treemap (ChartEx). Office 2016+. Write + round-trip supported.""" - SUNBURST = (1003, "Sunburst (ChartEx). Office 2016+. Round-trip only.") - """Sunburst (ChartEx). Office 2016+. Round-trip preservation only.""" + SUNBURST = (1003, "Sunburst (ChartEx). Office 2016+. Write + round-trip.") + """Sunburst (ChartEx). Office 2016+. Write + round-trip supported.""" - FUNNEL = (1004, "Funnel (ChartEx). Office 2016+. Round-trip only.") - """Funnel (ChartEx). Office 2016+. Round-trip preservation only.""" + FUNNEL = (1004, "Funnel (ChartEx). Office 2016+. Write + round-trip.") + """Funnel (ChartEx). Office 2016+. Write + round-trip supported.""" - BOX_WHISKER = (1005, "Box & Whisker (ChartEx). Office 2016+. Round-trip only.") - """Box & Whisker (ChartEx). Office 2016+. Round-trip preservation only.""" + BOX_WHISKER = (1005, "Box & Whisker (ChartEx). Office 2016+. Write + round-trip.") + """Box & Whisker (ChartEx). Office 2016+. Write + round-trip supported.""" - HISTOGRAM = (1006, "Histogram (ChartEx). Office 2016+. Round-trip only.") - """Histogram (ChartEx). Office 2016+. Round-trip preservation only.""" + HISTOGRAM = (1006, "Histogram (ChartEx). Office 2016+. Write + round-trip.") + """Histogram (ChartEx). Office 2016+. Write + round-trip supported.""" - PARETO = (1007, "Pareto / Histogram-Pareto (ChartEx). Office 2016+. Round-trip only.") - """Pareto (ChartEx). Office 2016+. Round-trip preservation only.""" + PARETO = (1007, "Pareto / Histogram-Pareto (ChartEx). Office 2016+. Write + round-trip.") + """Pareto (ChartEx). Office 2016+. Write + round-trip supported.""" class XL_DATA_LABEL_POSITION(BaseXmlEnum): diff --git a/src/pptx/oxml/chart/chartex.py b/src/pptx/oxml/chart/chartex.py index 6ff13a45e..2a2b052e7 100644 --- a/src/pptx/oxml/chart/chartex.py +++ b/src/pptx/oxml/chart/chartex.py @@ -200,6 +200,40 @@ def new(cls): plotArea.append(val_axis) return plotArea + def remove_axes(self): + """Remove every `` child. + + Non-Cartesian ChartEx layouts (treemap, sunburst, regionMap) have no + category/value axes; PowerPoint flags a repair if a treemap/sunburst + declares them. Axis layouts (waterfall, clusteredColumn, boxWhisker, + paretoLine, funnel) keep the default axes. + """ + from pptx.oxml.ns import qn + + for ax in self.findall(qn("cx:axis")): + self.remove(ax) + + def add_pareto_percentage_axis(self): + """Append the 3rd axis Pareto needs — a 0–1 percentage value axis (id=2). + + Mirrors PowerPoint's authored Pareto (issue #14 ground truth): + ` + `. + """ + from lxml import etree + + from pptx.oxml.ns import qn + + axis = etree.SubElement(self, qn("cx:axis")) + axis.set("id", "2") + valScaling = etree.SubElement(axis, qn("cx:valScaling")) + valScaling.set("max", "1") + valScaling.set("min", "0") + units = etree.SubElement(axis, qn("cx:units")) + units.set("unit", "percentage") + etree.SubElement(axis, qn("cx:tickLabels")) + return axis + class CT_PlotAreaRegion(BaseOxmlElement): """ @@ -260,6 +294,189 @@ def add_waterfall_series( return series + def _new_cx_series( + self, + layout_id: str, + series_name: str, + series_name_ref: str = "Sheet1!$B$1", + data_id: int = 0, + with_data_labels: bool = True, + ): + """Append a `` skeleton (tx, [dataLabels], dataId) and return it. + + Shared by every ChartEx series layout. `series_name_ref` is the Excel + cell holding the series name — it MUST match the data container's + column layout (waterfall/funnel/boxwhisker put data in cols A+B so the + name is in B1; histogram/pareto put values in col A so the name is in + A1). Pointing `/` at an empty column triggers a PowerPoint + repair (the histogram/pareto defect — confirmed via PowerPoint + ground-truth diff, issue #14). `with_data_labels=False` omits + `` for layouts where PowerPoint emits none (histogram, + pareto). The `` and any extra series are added by the + per-type method after this returns, preserving XSD `CT_Series` order. + """ + import uuid + + from lxml import etree + + series = etree.SubElement(self, qn("cx:series")) + series.set("layoutId", layout_id) + series.set("uniqueId", f"{{{uuid.uuid4()}}}") + + tx = etree.SubElement(series, qn("cx:tx")) + txData = etree.SubElement(tx, qn("cx:txData")) + f_elem = etree.SubElement(txData, qn("cx:f")) + f_elem.text = series_name_ref + v_elem = etree.SubElement(txData, qn("cx:v")) + v_elem.text = series_name + + if with_data_labels: + dataLabels = etree.SubElement(series, qn("cx:dataLabels")) + dataLabels.set("pos", "outEnd") + visibility = etree.SubElement(dataLabels, qn("cx:visibility")) + visibility.set("seriesName", "0") + visibility.set("categoryName", "0") + visibility.set("value", "1") + + dataId_elem = etree.SubElement(series, qn("cx:dataId")) + dataId_elem.set("val", str(data_id)) + return series + + def add_treemap_series( + self, series_name: str, series_name_ref: str = "Sheet1!$B$1", data_id: int = 0 + ): + """Add a treemap series (`layoutId="treemap"`).""" + from lxml import etree + + series = self._new_cx_series("treemap", series_name, series_name_ref, data_id) + layoutPr = etree.SubElement(series, qn("cx:layoutPr")) + pll = etree.SubElement(layoutPr, qn("cx:parentLabelLayout")) + pll.set("val", "banner") + return series + + def add_sunburst_series( + self, series_name: str, series_name_ref: str = "Sheet1!$B$1", data_id: int = 0 + ): + """Add a sunburst series (`layoutId="sunburst"`).""" + from lxml import etree + + series = self._new_cx_series("sunburst", series_name, series_name_ref, data_id) + layoutPr = etree.SubElement(series, qn("cx:layoutPr")) + pll = etree.SubElement(layoutPr, qn("cx:parentLabelLayout")) + pll.set("val", "none") + return series + + def add_funnel_series( + self, series_name: str, series_name_ref: str = "Sheet1!$B$1", data_id: int = 0 + ): + """Add a funnel series (`layoutId="funnel"`).""" + return self._new_cx_series("funnel", series_name, series_name_ref, data_id) + + def add_box_whisker_series( + self, series_name: str, series_name_ref: str = "Sheet1!$B$1", data_id: int = 0 + ): + """Add a box & whisker series (`layoutId="boxWhisker"`).""" + from lxml import etree + + series = self._new_cx_series("boxWhisker", series_name, series_name_ref, data_id) + layoutPr = etree.SubElement(series, qn("cx:layoutPr")) + vis = etree.SubElement(layoutPr, qn("cx:visibility")) + vis.set("connectorLines", "1") + vis.set("meanLine", "0") + vis.set("meanMarker", "1") + vis.set("nonoutliers", "0") + vis.set("outliers", "1") + stats = etree.SubElement(layoutPr, qn("cx:statistics")) + stats.set("quartileMethod", "exclusive") + return series + + def add_histogram_series( + self, series_name: str, series_name_ref: str = "Sheet1!$A$1", data_id: int = 0 + ): + """Add a histogram series (`layoutId="clusteredColumn"` + ``). + + Emits **automatic binning** — `` with + no `binCount`/`binSize` child — which is exactly the structure + PowerPoint itself authors and accepts (verified via ground-truth diff, + issue #14). The `dml-chartex.xsd` models `binCount`/`binSize` as child + elements and our earlier emission was schema-valid, but PowerPoint's + reader rejects that form and shows a repair dialog. Until a + PowerPoint-authored sample with explicit bins exists to confirm the + accepted form, manual bin specification is intentionally not emitted — + PowerPoint computes sensible bins from the data automatically (its own + default behaviour). No `` either (PowerPoint emits none + for a histogram). + """ + from lxml import etree + + series = self._new_cx_series( + "clusteredColumn", series_name, series_name_ref, data_id, with_data_labels=False + ) + layoutPr = etree.SubElement(series, qn("cx:layoutPr")) + binning = etree.SubElement(layoutPr, qn("cx:binning")) + binning.set("intervalClosed", "r") + return series + + def add_pareto_pair( + self, series_name: str, series_name_ref: str = "Sheet1!$B$1", data_id: int = 0 + ): + """Add the PowerPoint Pareto pair (ground-truth structure, issue #14). + + - A `clusteredColumn` series over categorical data with + `` and `` + (no dataLabels, no binning — Pareto aggregates by category). + - A minimal `paretoLine` series: `ownerIdx="0"` + `` + only (no tx/dataId/layoutPr) — it overlays the column series and + reads axis 2 (the percentage value axis added on the plot area). + + Returns the clusteredColumn series. + """ + from lxml import etree + + col = self._new_cx_series( + "clusteredColumn", series_name, series_name_ref, data_id, with_data_labels=False + ) + layoutPr = etree.SubElement(col, qn("cx:layoutPr")) + etree.SubElement(layoutPr, qn("cx:aggregation")) + axisId = etree.SubElement(col, qn("cx:axisId")) + axisId.set("val", "1") + + import uuid + + line = etree.SubElement(self, qn("cx:series")) + line.set("layoutId", "paretoLine") + line.set("ownerIdx", "0") + line.set("uniqueId", f"{{{uuid.uuid4()}}}") + line_axisId = etree.SubElement(line, qn("cx:axisId")) + line_axisId.set("val", "2") + return col + + def add_hierarchical_string_dimension( + self, data_elem, dim_type: str, formula: str, levels: list[list[str]] + ): + """Append a `` with one `` per hierarchy level. + + `levels` is outermost-first; each inner list is that level's labels. + Used by treemap/sunburst. `ptCount` on every `` equals its + actual point count (off-by-one is a PowerPoint-repair trigger). + """ + from lxml import etree + + strDim = etree.SubElement(data_elem, qn("cx:strDim")) + strDim.set("type", dim_type) + f_elem = etree.SubElement(strDim, qn("cx:f")) + f_elem.text = formula + for labels in levels: + lvl = etree.SubElement(strDim, qn("cx:lvl")) + lvl.set("ptCount", str(len(labels))) + for idx, value in enumerate(labels): + if value is None or value == "": + continue + pt = etree.SubElement(lvl, qn("cx:pt")) + pt.set("idx", str(idx)) + pt.text = value + return strDim + class CT_Series(BaseOxmlElement): """ diff --git a/src/pptx/parts/slide.py b/src/pptx/parts/slide.py index d5f2d1436..3c69d217b 100644 --- a/src/pptx/parts/slide.py +++ b/src/pptx/parts/slide.py @@ -208,20 +208,63 @@ def add_chartex_part(self, chart_data: WaterfallChartData) -> str: part by the returned `rId`. """ chartex_part = ChartExPart.new(self._package) - # populate the series on the chart XML - plotAreaRegion = chartex_part._element.chart.plotArea.plotAreaRegion - plotAreaRegion.add_waterfall_series( - chart_data.series_name or "Series 1", - data_id=0, - subtotal_indices=chart_data.subtotals or None, - ) - # populate chart data dimensions + plotArea = chartex_part._element.chart.plotArea + plotAreaRegion = plotArea.plotAreaRegion data_elem = chartex_part._element.chartData.data_lst[0] - data_elem.add_string_dimension("cat", chart_data.categories_ref, chart_data.categories) - data_elem.add_numeric_dimension( - "val", chart_data.values_ref, chart_data.series_values, chart_data.number_format - ) - # embed the Excel workbook + name = chart_data.series_name or "Series 1" + name_ref = chart_data.series_name_ref + cx_type = getattr(chart_data, "cx_chart_type", None) + + if cx_type in ("treemap", "sunburst"): + if cx_type == "treemap": + plotAreaRegion.add_treemap_series(name, name_ref, data_id=0) + else: + plotAreaRegion.add_sunburst_series(name, name_ref, data_id=0) + # treemap/sunburst are non-Cartesian — no category/value axes + # (PowerPoint repairs a treemap/sunburst that declares axes). + plotArea.remove_axes() + plotAreaRegion.add_hierarchical_string_dimension( + data_elem, "cat", chart_data.categories_ref, chart_data.levels + ) + data_elem.add_numeric_dimension( + "size", chart_data.values_ref, chart_data.series_values, chart_data.number_format + ) + elif cx_type in ("funnel", "boxWhisker"): + if cx_type == "funnel": + plotAreaRegion.add_funnel_series(name, name_ref, data_id=0) + else: + plotAreaRegion.add_box_whisker_series(name, name_ref, data_id=0) + data_elem.add_string_dimension("cat", chart_data.categories_ref, chart_data.categories) + data_elem.add_numeric_dimension( + "val", chart_data.values_ref, chart_data.series_values, chart_data.number_format + ) + elif cx_type == "histogram": + # numeric raw values, auto-binned; no strDim, no dataLabels + # (PowerPoint ground truth, issue #14). + plotAreaRegion.add_histogram_series(name, name_ref, data_id=0) + data_elem.add_numeric_dimension( + "val", chart_data.values_ref, chart_data.series_values, chart_data.number_format + ) + elif cx_type == "pareto": + # PowerPoint Pareto: categorical (strDim cat + numDim val), + # aggregation column series + minimal paretoLine over a 3rd + # percentage axis (issue #14 ground truth). + plotAreaRegion.add_pareto_pair(name, name_ref, data_id=0) + plotArea.add_pareto_percentage_axis() + data_elem.add_string_dimension("cat", chart_data.categories_ref, chart_data.categories) + data_elem.add_numeric_dimension( + "val", chart_data.values_ref, chart_data.series_values, chart_data.number_format + ) + else: + # Default: Waterfall (Phase B path, unchanged). + plotAreaRegion.add_waterfall_series( + name, data_id=0, subtotal_indices=chart_data.subtotals or None + ) + data_elem.add_string_dimension("cat", chart_data.categories_ref, chart_data.categories) + data_elem.add_numeric_dimension( + "val", chart_data.values_ref, chart_data.series_values, chart_data.number_format + ) + chartex_part.chartex_workbook.update_from_xlsx_blob(chart_data.xlsx_blob) return self.relate_to(chartex_part, RT.CHARTEX) diff --git a/src/pptx/shapes/shapetree.py b/src/pptx/shapes/shapetree.py index 356810941..494e96472 100644 --- a/src/pptx/shapes/shapetree.py +++ b/src/pptx/shapes/shapetree.py @@ -356,8 +356,8 @@ def add_chart( """ from pptx.enum.chart import XL_CHART_TYPE - _CHARTEX_WRITABLE = (XL_CHART_TYPE.WATERFALL,) - _CHARTEX_WRITER_DEFERRED = ( + _CHARTEX_WRITABLE = ( + XL_CHART_TYPE.WATERFALL, XL_CHART_TYPE.TREEMAP, XL_CHART_TYPE.SUNBURST, XL_CHART_TYPE.FUNNEL, @@ -365,13 +365,15 @@ def add_chart( XL_CHART_TYPE.HISTOGRAM, XL_CHART_TYPE.PARETO, ) + # Phase C (issue #14) made every ChartEx type writable; the + # writer-deferred set is now empty. + _CHARTEX_WRITER_DEFERRED = () if chart_type in _CHARTEX_WRITABLE: return self.add_chartex(chart_data, x, y, cx, cy) - if chart_type in _CHARTEX_WRITER_DEFERRED: + if chart_type in _CHARTEX_WRITER_DEFERRED: # pragma: no cover - empty post Phase C raise NotImplementedError( - f"{chart_type} is a ChartEx (Office 2016) type with round-trip " - "preservation but no writer yet; only XL_CHART_TYPE.WATERFALL is " - "currently writable. See https://github.com/MHoroszowski/python-pptx/issues/14" + f"{chart_type} is a ChartEx (Office 2016) type with no writer yet. " + "See https://github.com/MHoroszowski/python-pptx/issues/14" ) rId = self.part.add_chart_part(chart_type, chart_data) graphicFrame = self._add_chart_graphicFrame(rId, x, y, cx, cy) diff --git a/tests/chart/test_chartex_extended.py b/tests/chart/test_chartex_extended.py index ea6a7874a..b4e361beb 100644 --- a/tests/chart/test_chartex_extended.py +++ b/tests/chart/test_chartex_extended.py @@ -2,8 +2,8 @@ Covers the surface added on top of the GetThematic port: the `add_chart` dispatch shim, the extended `XL_CHART_TYPE` members, round-trip preservation, -content-type / relationship wiring, the `NotImplementedError` for -writer-deferred types, and the `WaterfallChartData` data API. See issue #14. +content-type / relationship wiring, formerly-deferred types now being +writable (Phase C), and the `WaterfallChartData` data API. See issue #14. """ from __future__ import annotations @@ -138,43 +138,54 @@ def it_keeps_classic_charts_on_the_c_path(self): @pytest.mark.parametrize( "name", ["TREEMAP", "SUNBURST", "FUNNEL", "BOX_WHISKER", "HISTOGRAM", "PARETO"] ) - def it_raises_NotImplementedError_for_writer_deferred_types(self, name): + def it_now_writes_formerly_deferred_types(self, name): + # Phase C (issue #14) inverted the Phase-A/B contract: these types no + # longer raise NotImplementedError — they are writable. Each is + # exercised in depth in test_chartex_phasec.py; here we just assert + # add_chart no longer raises for them. + from pptx.chart.data import ( + BoxWhiskerChartData, + FunnelChartData, + HistogramChartData, + ParetoChartData, + SunburstChartData, + TreemapChartData, + ) + _, slide = _slide() - with pytest.raises(NotImplementedError, match="no writer yet"): - slide.shapes.add_chart( - getattr(XL_CHART_TYPE, name), - Inches(1), - Inches(1), - Inches(5), - Inches(3), - _waterfall_data(), - ) + if name in ("TREEMAP", "SUNBURST"): + cd = {"TREEMAP": TreemapChartData, "SUNBURST": SunburstChartData}[name]() + cd.add_level(["A", "B"]) + cd.add_level(["x", "y"]) + cd.add_series("S", [1, 2]) + elif name in ("FUNNEL", "BOX_WHISKER"): + cd = {"FUNNEL": FunnelChartData, "BOX_WHISKER": BoxWhiskerChartData}[name]() + cd.categories = ["a", "b"] + cd.add_series("S", [1, 2]) + elif name == "HISTOGRAM": + cd = HistogramChartData() + cd.add_series("S", [1, 2, 3, 4], bin_count=2) + else: # PARETO — categorical per PowerPoint ground truth + cd = ParetoChartData() + cd.categories = ["a", "b"] + cd.add_series("S", [3, 1]) + gf = slide.shapes.add_chart( + getattr(XL_CHART_TYPE, name), Inches(1), Inches(1), Inches(5), Inches(3), cd + ) + assert gf.has_chartex is True def it_can_add_via_add_chartex_directly(self): _, slide = _slide() gf = slide.shapes.add_chartex(_waterfall_data(), Inches(1), Inches(1), Inches(6), Inches(4)) assert gf.has_chartex is True - def it_leaves_the_package_unmutated_when_a_deferred_type_raises(self): - # Atomicity: NotImplementedError must fire before any part/rel is - # created, so a caught error does not leave a corrupt presentation. - prs, slide = _slide() - shape_count_before = len(slide.shapes._spTree) - part_count_before = len(list(prs.part.package.iter_parts())) - with pytest.raises(NotImplementedError): - slide.shapes.add_chart( - XL_CHART_TYPE.SUNBURST, - Inches(1), - Inches(1), - Inches(5), - Inches(3), - _waterfall_data(), - ) - assert len(slide.shapes._spTree) == shape_count_before - assert len(list(prs.part.package.iter_parts())) == part_count_before - blob, _ = _save_reopen(prs) - names = zipfile.ZipFile(io.BytesIO(blob)).namelist() - assert not any("chartEx" in n for n in names) + def it_has_an_empty_writer_deferred_set_after_phase_c(self): + import inspect + + from pptx.shapes.shapetree import _BaseGroupShapes + + body = inspect.getsource(_BaseGroupShapes.add_chart) + assert "_CHARTEX_WRITER_DEFERRED = ()" in body class DescribeChartExRoundTrip: diff --git a/tests/chart/test_chartex_phasec.py b/tests/chart/test_chartex_phasec.py new file mode 100644 index 000000000..29d6d3cad --- /dev/null +++ b/tests/chart/test_chartex_phasec.py @@ -0,0 +1,363 @@ +"""Phase-C tests: ChartEx writers for Treemap/Sunburst/Funnel/BoxWhisker/ +Histogram/Pareto + the generalized ``ChartEx.replace_data``. See issue #14. + +Structural assertions are derived from the normative in-repo schema +``spec/ISO-IEC-29500-4/xsd/dml-chartex.xsd`` (the schema PowerPoint conforms +to) — element ``layoutId`` + ``CT_Series`` child order, never hand-guessed. +""" + +from __future__ import annotations + +import io +import zipfile + +import pytest +from lxml import etree + +from pptx import Presentation +from pptx.chart.data import ( + BoxWhiskerChartData, + FunnelChartData, + HistogramChartData, + ParetoChartData, + SunburstChartData, + TreemapChartData, + WaterfallChartData, +) +from pptx.enum.chart import XL_CHART_TYPE +from pptx.util import Inches + +CX = "http://schemas.microsoft.com/office/drawing/2014/chartex" + + +def _slide(): + prs = Presentation() + return prs, prs.slides.add_slide(prs.slide_layouts[5]) + + +def _hier(cls): + cd = cls() + cd.add_level(["Tech", "Tech", "Retail", "Retail"]) + cd.add_level(["Phones", "Laptops", "Apparel", "Food"]) + cd.add_series("Revenue", [50, 30, 20, 15]) + return cd + + +def _cat(cls): + cd = cls() + cd.categories = ["Leads", "Qualified", "Proposals", "Won"] + cd.add_series("Pipeline", [1000, 600, 250, 90]) + return cd + + +def _hist(cls): + cd = cls() + cd.add_series("Scores", [55, 62, 71, 73, 88, 91, 64, 78, 82, 69], bin_count=5) + return cd + + +def _pareto(cls): + # PowerPoint Pareto is categorical (aggregate by category) per ground truth. + cd = cls() + cd.categories = ["Defect A", "Defect B", "Defect C", "Defect D"] + cd.add_series("Count", [45, 30, 15, 10]) + return cd + + +_BUILDERS = { + XL_CHART_TYPE.TREEMAP: (lambda: _hier(TreemapChartData), "treemap"), + XL_CHART_TYPE.SUNBURST: (lambda: _hier(SunburstChartData), "sunburst"), + XL_CHART_TYPE.FUNNEL: (lambda: _cat(FunnelChartData), "funnel"), + XL_CHART_TYPE.BOX_WHISKER: (lambda: _cat(BoxWhiskerChartData), "boxWhisker"), + XL_CHART_TYPE.HISTOGRAM: (lambda: _hist(HistogramChartData), "clusteredColumn"), + XL_CHART_TYPE.PARETO: (lambda: _pareto(ParetoChartData), "clusteredColumn"), +} +_ALL = list(_BUILDERS) + + +def _add(slide, ct): + builder, _ = _BUILDERS[ct] + return slide.shapes.add_chart(ct, Inches(1), Inches(1), Inches(6), Inches(4), builder()) + + +def _cx_xml(prs): + buf = io.BytesIO() + prs.save(buf) + z = zipfile.ZipFile(io.BytesIO(buf.getvalue())) + name = next( + n for n in z.namelist() if "chartEx" in n and n.endswith(".xml") and "_rels" not in n + ) + return z.read(name), buf.getvalue() + + +class DescribePhaseCWriters: + @pytest.mark.parametrize("ct", _ALL) + def it_writes_each_type_via_add_chart(self, ct): + _, slide = _slide() + gf = _add(slide, ct) + assert gf.has_chartex is True + assert gf.has_chart is False + + @pytest.mark.parametrize("ct", _ALL) + def it_emits_the_correct_layoutId(self, ct): + prs, slide = _slide() + _add(slide, ct) + xml, _ = _cx_xml(prs) + _, expected_layout = _BUILDERS[ct] + assert ('layoutId="%s"' % expected_layout).encode() in xml + + @pytest.mark.parametrize("ct", _ALL) + def it_round_trips_each_type_c14n_stable(self, ct): + prs, slide = _slide() + _add(slide, ct) + xml1, blob = _cx_xml(prs) + prs2 = Presentation(io.BytesIO(blob)) + prs2.slides.add_slide(prs2.slide_layouts[6]) + xml2, _ = _cx_xml(prs2) + c14n = lambda b: etree.tostring(etree.fromstring(b), method="c14n2") # noqa: E731 + assert c14n(xml1) == c14n(xml2) + rt = [s for s in prs2.slides[0].shapes if getattr(s, "has_chartex", False)] + assert len(rt) == 1 + + @pytest.mark.parametrize("ct", _ALL) + def it_emits_well_formed_xml(self, ct): + prs, slide = _slide() + _add(slide, ct) + xml, _ = _cx_xml(prs) + assert etree.fromstring(xml) is not None + + @pytest.mark.parametrize("ct", _ALL) + def it_respects_CT_Series_child_order(self, ct): + # XSD CT_Series sequence: tx, (spPr), ..., dataLabels, dataId, layoutPr. + # Order bugs are the silent-corruption class — assert tx precedes + # dataId precedes layoutPr on the first series. + prs, slide = _slide() + _add(slide, ct) + xml, _ = _cx_xml(prs) + root = etree.fromstring(xml) + series = root.findall(".//{%s}series" % CX)[0] + tags = [etree.QName(c).localname for c in series] + assert tags.index("tx") < tags.index("dataId") + if "layoutPr" in tags: + assert tags.index("dataId") < tags.index("layoutPr") + + @pytest.mark.parametrize("ct", _ALL) + def it_keeps_every_dataId_referencing_an_existing_data(self, ct): + prs, slide = _slide() + _add(slide, ct) + xml, _ = _cx_xml(prs) + root = etree.fromstring(xml) + data_ids = {d.get("id") for d in root.findall(".//{%s}data" % CX)} + for di in root.findall(".//{%s}dataId" % CX): + assert di.get("val") in data_ids + + @pytest.mark.parametrize("ct", _ALL) + def it_embeds_an_xlsx_workbook(self, ct): + prs, slide = _slide() + _add(slide, ct) + buf = io.BytesIO() + prs.save(buf) + z = zipfile.ZipFile(io.BytesIO(buf.getvalue())) + assert any(n.endswith(".xlsx") for n in z.namelist()) + + def it_makes_pareto_emit_a_second_paretoLine_series(self): + prs, slide = _slide() + _add(slide, XL_CHART_TYPE.PARETO) + xml, _ = _cx_xml(prs) + assert b'layoutId="clusteredColumn"' in xml + assert b'layoutId="paretoLine"' in xml + + def it_makes_histogram_emit_binning(self): + prs, slide = _slide() + _add(slide, XL_CHART_TYPE.HISTOGRAM) + xml, _ = _cx_xml(prs) + assert b"X' in xml or b">X<" in xml + + def it_replaces_histogram_data(self): + prs, slide = _slide() + gf = _add(slide, XL_CHART_TYPE.HISTOGRAM) + nd = HistogramChartData() + nd.add_series("H2", [1, 2, 3, 4, 5, 6], bin_count=3) + gf.chartex.replace_data(nd) + xml, _ = _cx_xml(prs) + assert b'layoutId="clusteredColumn"' in xml + + def it_keeps_waterfall_replace_data_working(self): + prs, slide = _slide() + wd = WaterfallChartData() + wd.categories = ["a", "b"] + wd.add_series("W", [1, 2], subtotals=[1]) + gf = slide.shapes.add_chart( + XL_CHART_TYPE.WATERFALL, Inches(1), Inches(1), Inches(6), Inches(4), wd + ) + nw = WaterfallChartData() + nw.categories = ["c", "d", "e"] + nw.add_series("W", [5, 6, 7], subtotals=[2]) + gf.chartex.replace_data(nw) + xml, _ = _cx_xml(prs) + assert b">c<" in xml + assert b'' in xml + + def it_does_not_change_the_part_name_or_rel_on_replace(self): + prs, slide = _slide() + gf = _add(slide, XL_CHART_TYPE.SUNBURST) + part_before = gf.chartex.part.partname + rId_before = gf._element.chartex_rId + nd = SunburstChartData() + nd.add_level(["P", "P"]) + nd.add_level(["m", "n"]) + nd.add_series("S", [4, 5]) + gf.chartex.replace_data(nd) + assert gf.chartex.part.partname == part_before + assert gf._element.chartex_rId == rId_before + + def it_raises_on_chart_type_mismatch(self): + prs, slide = _slide() + gf = _add(slide, XL_CHART_TYPE.FUNNEL) + with pytest.raises(ValueError, match="cannot change chart type"): + gf.chartex.replace_data(_hist(HistogramChartData)) + + def it_round_trips_replaced_data(self): + prs, slide = _slide() + gf = _add(slide, XL_CHART_TYPE.TREEMAP) + nd = TreemapChartData() + nd.add_level(["Z", "Z"]) + nd.add_level(["q1", "q2"]) + nd.add_series("S", [8, 9]) + gf.chartex.replace_data(nd) + _, blob = _cx_xml(prs) + prs2 = Presentation(io.BytesIO(blob)) + xml2, _ = _cx_xml(prs2) + assert b"q1" in xml2 + + +class DescribePhaseCAntiCriteria: + def it_leaves_the_writer_deferred_set_empty(self): + from pptx.shapes.shapetree import _BaseGroupShapes + + src = _BaseGroupShapes.add_chart.__code__ + # The empty tuple literal must be present in add_chart source. + import inspect + + body = inspect.getsource(_BaseGroupShapes.add_chart) + assert "_CHARTEX_WRITER_DEFERRED = ()" in body + assert src is not None + + @pytest.mark.parametrize("ct", _ALL) + def it_does_not_raise_NotImplementedError_for_any_cx_type(self, ct): + _, slide = _slide() + # Must not raise — every cx: type is writable in Phase C. + _add(slide, ct) + + @pytest.mark.parametrize("ct", _ALL) + def it_wires_the_full_packaging_path_for_each_type(self, ct): + # Advisor blind-spot closure: assert the graphicFrame→part packaging + # (not just the chartEx XML) for every Phase-C type — content-type + # declared, slide relationship present, graphicData URI is chartEx. + prs, slide = _slide() + gf = _add(slide, ct) + buf = io.BytesIO() + prs.save(buf) + z = zipfile.ZipFile(io.BytesIO(buf.getvalue())) + ctypes = z.read("[Content_Types].xml").decode() + assert "chartex+xml" in ctypes + rels = z.read("ppt/slides/_rels/slide1.xml.rels").decode() + assert "chartEx" in rels + uri = gf._element.graphic.graphicData.get("uri") + assert uri == "http://schemas.microsoft.com/office/drawing/2014/chartex" + + def it_keeps_classic_c_charts_on_the_c_path(self): + from pptx.chart.data import CategoryChartData + + _, slide = _slide() + cd = CategoryChartData() + cd.categories = ["a", "b"] + cd.add_series("S", (1, 2)) + gf = slide.shapes.add_chart( + XL_CHART_TYPE.COLUMN_CLUSTERED, Inches(1), Inches(1), Inches(4), Inches(3), cd + ) + assert gf.has_chart is True + assert gf.has_chartex is False + + def it_does_not_inject_chartex_into_a_plain_deck(self): + prs = Presentation() + prs.slides.add_slide(prs.slide_layouts[6]) + buf = io.BytesIO() + prs.save(buf) + names = zipfile.ZipFile(io.BytesIO(buf.getvalue())).namelist() + assert not any("chartEx" in n for n in names)