Skip to content

Commit 0ff743c

Browse files
committed
Add CanonicalDeterminismTest to validate deterministic PDF output and promote 1.0.0 release
- Introduced `CanonicalDeterminismTest` to ensure deterministic byte-for-byte PDF generation across JVM runs, locales, and invocations. - Verified stability of metadata, locale independence, fixed timestamps, and multi-run consistency. - Added utility method `buildSolidColorPng` for generating deterministic PNGs used in tests. - Updated project version to `1.0.0` in `pom.xml`. - Revised README with `1.0.0` highlights, including stable release status, API freeze, and deterministic design.
1 parent 449f97c commit 0ff743c

3 files changed

Lines changed: 292 additions & 7 deletions

File tree

README.md

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,20 +12,40 @@ Byte-for-byte reproducible output. No bloat. No surprises.
1212

1313
---
1414

15+
## 1.0.0 Highlights
16+
17+
- **Stable release** — production-ready engine foundation.
18+
- **API frozen** for the 1.x line; public API compatibility is guaranteed.
19+
- **Deterministic by design** — identical bytes on every run, every platform.
20+
- **Unicode-aware** — UTF-16 literal support in Core; full Unicode rendering available in Pro.
21+
- **Zero dependencies, JPMS modular** — drop-in for modular and non-modular projects alike.
22+
23+
---
24+
1525
## Why PDFixa?
1626

1727
PDFixa Core is a deterministic PDF engine built for predictability, reproducibility and clean architecture.
28+
Every byte of output is fully determined by your input — no timestamps, no UUIDs, no ambient state.
29+
The same code always produces the same file, bit for bit.
1830

1931
### Key Guarantees
2032

21-
- Byte-for-byte deterministic output
33+
- **Byte-for-byte deterministic output** — object numbers, xref offsets and `/ID` are stable across runs, platforms and JVM versions.
2234
- Zero runtime dependencies (pure JDK 17+)
23-
- JPMS modular design
35+
- JPMS modular design (`io.offixa.pdfixa.core`)
2436
- No hidden timestamps or UUID pollution
2537
- Strict lifecycle (allocate → write → seal)
2638

2739
---
2840

41+
## API Stability
42+
43+
PDFixa Core 1.0.0 marks the beginning of the stable 1.x line.
44+
All public API surfaces are frozen: **source-compatible changes only** for future 1.x releases.
45+
You can depend on `pdfixa-core` in production without risk of breaking upgrades within the 1.x series.
46+
47+
---
48+
2949
## Determinism Example
3050

3151
```java
@@ -47,14 +67,14 @@ Object numbers, xref offsets and `/ID` remain identical across runs.
4767
<dependency>
4868
<groupId>io.offixa</groupId>
4969
<artifactId>pdfixa-core</artifactId>
50-
<version>0.8.0</version>
70+
<version>1.0.0</version>
5171
</dependency>
5272
```
5373

5474
### Gradle
5575

5676
```groovy
57-
implementation 'io.offixa:pdfixa-core:0.8.0'
77+
implementation 'io.offixa:pdfixa-core:1.0.0'
5878
```
5979

6080
### Example
@@ -111,8 +131,8 @@ try (var out = new FileOutputStream("hello.pdf")) {
111131
|:---|:---:|:---:|
112132
| Deterministic output | Yes | Yes |
113133
| Zero dependencies | Yes | Yes |
114-
| Unicode-aware API | Yes | Yes |
115-
| Full Unicode rendering || Yes |
134+
| Unicode-aware API (UTF-16 literals) | Yes | Yes |
135+
| Full Unicode rendering (CIDFont, ToUnicode) || Yes |
116136
| Font embedding || Yes |
117137
| Font subsetting || Yes |
118138
| Advanced layout engine || Yes |

pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
<groupId>io.offixa</groupId>
88
<artifactId>pdfixa-core</artifactId>
9-
<version>0.8.0</version>
9+
<version>1.0.0</version>
1010
<packaging>jar</packaging>
1111

1212
<name>PDFixa Core</name>
Lines changed: 265 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,265 @@
1+
package io.offixa.pdfixa.core.document;
2+
3+
import org.junit.jupiter.api.Test;
4+
5+
import java.io.ByteArrayOutputStream;
6+
import java.nio.charset.StandardCharsets;
7+
import java.security.MessageDigest;
8+
import java.util.Locale;
9+
import java.util.zip.CRC32;
10+
import java.util.zip.Deflater;
11+
12+
import static org.junit.jupiter.api.Assertions.*;
13+
14+
/**
15+
* Phase 3: Determinism Hard Proof.
16+
*
17+
* <p>Proves byte-for-byte reproducibility of PDFixa output across JVM runs,
18+
* locales, and repeated invocations. The canonical document exercises every
19+
* major feature path:
20+
* <ul>
21+
* <li>Base-14 font text ({@code showText})</li>
22+
* <li>Unicode raw text ({@code showTextUnicodeRaw})</li>
23+
* <li>JPEG image XObject</li>
24+
* <li>PNG image XObject</li>
25+
* <li>Document metadata ({@link PdfInfo})</li>
26+
* <li>Custom page size ({@link PdfPageSize})</li>
27+
* </ul>
28+
*
29+
* <p>No test depends on {@link System#currentTimeMillis()} or any other
30+
* non-deterministic system state. All metadata dates are fixed string literals.
31+
*/
32+
class CanonicalDeterminismTest {
33+
34+
private static final byte[] TINY_JPEG = {
35+
(byte) 0xFF, (byte) 0xD8,
36+
(byte) 0xFF, (byte) 0xD9
37+
};
38+
39+
private static final PdfPageSize CUSTOM_PAGE = new PdfPageSize(700, 500);
40+
41+
private static final String EXPECTED_SHA256 =
42+
"042051ed6a438ce1a99760363ab1eb5ffb4b9f87e684d26f8fefb545037742dd";
43+
44+
/**
45+
* Builds the canonical PDF exercising all major feature paths.
46+
* Every input is a compile-time constant — no runtime entropy.
47+
*/
48+
static byte[] buildCanonicalPdf() throws Exception {
49+
PdfDocument doc = new PdfDocument(CUSTOM_PAGE);
50+
51+
doc.setInfo(PdfInfo.builder()
52+
.title("Determinism Proof")
53+
.author("PDFixa CI")
54+
.subject("Phase 3 canonical document")
55+
.keywords("determinism, test, canonical")
56+
.creator("CanonicalDeterminismTest")
57+
.creationDate("D:20250101120000+00'00'")
58+
.modDate("D:20250101120000+00'00'")
59+
.build());
60+
61+
byte[] pngBytes = buildSolidColorPng(16, 16, 0xFF, 0x00, 0x00);
62+
PdfImage jpegImg = doc.addJpegImage(TINY_JPEG, 10, 8);
63+
PdfImage pngImg = doc.addPngImage(pngBytes);
64+
65+
PdfPage page = doc.addPage();
66+
67+
page.getContent()
68+
.beginText()
69+
.setFont("Helvetica-Bold", 14)
70+
.moveText(50, 450)
71+
.showText("Determinism Proof: Base14 Text")
72+
.endText()
73+
74+
.beginText()
75+
.setFont("Helvetica", 11)
76+
.moveText(50, 430)
77+
.showText("The quick brown fox jumps over the lazy dog.")
78+
.endText()
79+
80+
.beginText()
81+
.setFont("Courier", 10)
82+
.moveText(50, 410)
83+
.showText("0123456789 !@#$%^&*()")
84+
.endText()
85+
86+
.beginText()
87+
.setFont("Helvetica", 11)
88+
.moveText(50, 380)
89+
.showTextUnicodeRaw("Salom Dunyo!")
90+
.endText()
91+
92+
.beginText()
93+
.setFont("Helvetica", 11)
94+
.moveText(50, 360)
95+
.showTextUnicodeRaw("\u041F\u0440\u0438\u0432\u0435\u0442 \u043C\u0438\u0440!")
96+
.endText()
97+
98+
.beginText()
99+
.setFont("Helvetica", 11)
100+
.moveText(50, 340)
101+
.showTextUnicodeRaw("\u4F60\u597D\u4E16\u754C")
102+
.endText();
103+
104+
page.drawImage(jpegImg, 50, 250, 100, 80);
105+
page.drawImage(pngImg, 200, 250, 64, 64);
106+
107+
page.getContent()
108+
.setStrokeColor(0.2, 0.4, 0.8)
109+
.setLineWidth(1.5)
110+
.rectangle(50, 200, 200, 30)
111+
.stroke()
112+
113+
.setFillColor(0.9, 0.1, 0.1)
114+
.rectangle(300, 200, 100, 30)
115+
.fill();
116+
117+
ByteArrayOutputStream baos = new ByteArrayOutputStream();
118+
doc.save(baos);
119+
return baos.toByteArray();
120+
}
121+
122+
static String sha256Hex(byte[] data) throws Exception {
123+
MessageDigest md = MessageDigest.getInstance("SHA-256");
124+
byte[] digest = md.digest(data);
125+
StringBuilder sb = new StringBuilder(64);
126+
for (byte b : digest) {
127+
sb.append(String.format("%02x", b & 0xFF));
128+
}
129+
return sb.toString();
130+
}
131+
132+
// ── 1. Canonical hash test ──────────────────────────────────────────────
133+
134+
@Test
135+
void canonical_pdf_matches_expected_sha256() throws Exception {
136+
byte[] pdf = buildCanonicalPdf();
137+
String hash = sha256Hex(pdf);
138+
139+
assertEquals(EXPECTED_SHA256, hash,
140+
"Canonical PDF SHA-256 mismatch — if the document builder changed "
141+
+ "intentionally, update EXPECTED_SHA256 to: " + hash);
142+
}
143+
144+
// ── 2. Multi-run stability ──────────────────────────────────────────────
145+
146+
@Test
147+
void three_generations_produce_identical_bytes() throws Exception {
148+
byte[] run1 = buildCanonicalPdf();
149+
byte[] run2 = buildCanonicalPdf();
150+
byte[] run3 = buildCanonicalPdf();
151+
152+
assertArrayEquals(run1, run2, "Run 1 vs Run 2 must be byte-identical");
153+
assertArrayEquals(run2, run3, "Run 2 vs Run 3 must be byte-identical");
154+
}
155+
156+
// ── 3. Locale stability ─────────────────────────────────────────────────
157+
158+
@Test
159+
void output_is_identical_across_locales() throws Exception {
160+
Locale original = Locale.getDefault();
161+
try {
162+
Locale.setDefault(Locale.US);
163+
byte[] usBytes = buildCanonicalPdf();
164+
165+
Locale.setDefault(Locale.GERMANY);
166+
byte[] deBytes = buildCanonicalPdf();
167+
168+
Locale.setDefault(new Locale("tr", "TR"));
169+
byte[] trBytes = buildCanonicalPdf();
170+
171+
Locale.setDefault(Locale.JAPAN);
172+
byte[] jpBytes = buildCanonicalPdf();
173+
174+
Locale.setDefault(new Locale("ar", "SA"));
175+
byte[] arBytes = buildCanonicalPdf();
176+
177+
assertArrayEquals(usBytes, deBytes, "US vs DE locale must be byte-identical");
178+
assertArrayEquals(usBytes, trBytes, "US vs TR locale must be byte-identical");
179+
assertArrayEquals(usBytes, jpBytes, "US vs JP locale must be byte-identical");
180+
assertArrayEquals(usBytes, arBytes, "US vs AR locale must be byte-identical");
181+
} finally {
182+
Locale.setDefault(original);
183+
}
184+
}
185+
186+
// ── 4. No system-time dependency ────────────────────────────────────────
187+
188+
@Test
189+
void metadata_uses_fixed_dates_not_system_time() throws Exception {
190+
byte[] pdf = buildCanonicalPdf();
191+
String text = new String(pdf, StandardCharsets.US_ASCII);
192+
193+
assertTrue(text.contains("D:20250101120000+00'00'"),
194+
"PDF must contain the fixed CreationDate literal");
195+
assertFalse(text.contains("D:20" + java.time.Year.now().getValue()),
196+
"PDF must not embed current-year timestamps");
197+
}
198+
199+
// ── PNG builder (deterministic, self-contained) ─────────────────────────
200+
201+
static byte[] buildSolidColorPng(int w, int h, int r, int g, int b) throws Exception {
202+
ByteArrayOutputStream raw = new ByteArrayOutputStream(h * (1 + w * 3));
203+
for (int y = 0; y < h; y++) {
204+
raw.write(0);
205+
for (int x = 0; x < w; x++) {
206+
raw.write(r);
207+
raw.write(g);
208+
raw.write(b);
209+
}
210+
}
211+
212+
byte[] idat = zlibCompress(raw.toByteArray());
213+
214+
ByteArrayOutputStream png = new ByteArrayOutputStream();
215+
png.write(new byte[]{(byte) 0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A});
216+
ByteArrayOutputStream ihdr = new ByteArrayOutputStream(13);
217+
writeInt4(ihdr, w);
218+
writeInt4(ihdr, h);
219+
ihdr.write(8);
220+
ihdr.write(2);
221+
ihdr.write(0);
222+
ihdr.write(0);
223+
ihdr.write(0);
224+
writeChunk(png, "IHDR", ihdr.toByteArray());
225+
writeChunk(png, "IDAT", idat);
226+
writeChunk(png, "IEND", new byte[0]);
227+
return png.toByteArray();
228+
}
229+
230+
private static void writeChunk(ByteArrayOutputStream out, String type, byte[] data)
231+
throws Exception {
232+
byte[] typeBytes = type.getBytes(StandardCharsets.US_ASCII);
233+
CRC32 crc = new CRC32();
234+
crc.update(typeBytes);
235+
crc.update(data);
236+
writeInt4(out, data.length);
237+
out.write(typeBytes);
238+
out.write(data);
239+
writeInt4(out, (int) crc.getValue());
240+
}
241+
242+
private static void writeInt4(ByteArrayOutputStream out, int v) {
243+
out.write((v >>> 24) & 0xFF);
244+
out.write((v >>> 16) & 0xFF);
245+
out.write((v >>> 8) & 0xFF);
246+
out.write(v & 0xFF);
247+
}
248+
249+
private static byte[] zlibCompress(byte[] input) {
250+
Deflater d = new Deflater(Deflater.DEFAULT_COMPRESSION, false);
251+
try {
252+
d.setInput(input);
253+
d.finish();
254+
ByteArrayOutputStream baos = new ByteArrayOutputStream();
255+
byte[] buf = new byte[4096];
256+
while (!d.finished()) {
257+
int n = d.deflate(buf);
258+
baos.write(buf, 0, n);
259+
}
260+
return baos.toByteArray();
261+
} finally {
262+
d.end();
263+
}
264+
}
265+
}

0 commit comments

Comments
 (0)