|
| 1 | +package io.offixa.pdfixa.core.document; |
| 2 | + |
| 3 | +import org.junit.jupiter.api.Test; |
| 4 | + |
| 5 | +import java.io.ByteArrayOutputStream; |
| 6 | +import java.nio.charset.StandardCharsets; |
| 7 | +import java.security.MessageDigest; |
| 8 | +import java.util.Locale; |
| 9 | +import java.util.zip.CRC32; |
| 10 | +import java.util.zip.Deflater; |
| 11 | + |
| 12 | +import static org.junit.jupiter.api.Assertions.*; |
| 13 | + |
| 14 | +/** |
| 15 | + * Phase 3: Determinism Hard Proof. |
| 16 | + * |
| 17 | + * <p>Proves byte-for-byte reproducibility of PDFixa output across JVM runs, |
| 18 | + * locales, and repeated invocations. The canonical document exercises every |
| 19 | + * major feature path: |
| 20 | + * <ul> |
| 21 | + * <li>Base-14 font text ({@code showText})</li> |
| 22 | + * <li>Unicode raw text ({@code showTextUnicodeRaw})</li> |
| 23 | + * <li>JPEG image XObject</li> |
| 24 | + * <li>PNG image XObject</li> |
| 25 | + * <li>Document metadata ({@link PdfInfo})</li> |
| 26 | + * <li>Custom page size ({@link PdfPageSize})</li> |
| 27 | + * </ul> |
| 28 | + * |
| 29 | + * <p>No test depends on {@link System#currentTimeMillis()} or any other |
| 30 | + * non-deterministic system state. All metadata dates are fixed string literals. |
| 31 | + */ |
| 32 | +class CanonicalDeterminismTest { |
| 33 | + |
| 34 | + private static final byte[] TINY_JPEG = { |
| 35 | + (byte) 0xFF, (byte) 0xD8, |
| 36 | + (byte) 0xFF, (byte) 0xD9 |
| 37 | + }; |
| 38 | + |
| 39 | + private static final PdfPageSize CUSTOM_PAGE = new PdfPageSize(700, 500); |
| 40 | + |
| 41 | + private static final String EXPECTED_SHA256 = |
| 42 | + "042051ed6a438ce1a99760363ab1eb5ffb4b9f87e684d26f8fefb545037742dd"; |
| 43 | + |
| 44 | + /** |
| 45 | + * Builds the canonical PDF exercising all major feature paths. |
| 46 | + * Every input is a compile-time constant — no runtime entropy. |
| 47 | + */ |
| 48 | + static byte[] buildCanonicalPdf() throws Exception { |
| 49 | + PdfDocument doc = new PdfDocument(CUSTOM_PAGE); |
| 50 | + |
| 51 | + doc.setInfo(PdfInfo.builder() |
| 52 | + .title("Determinism Proof") |
| 53 | + .author("PDFixa CI") |
| 54 | + .subject("Phase 3 canonical document") |
| 55 | + .keywords("determinism, test, canonical") |
| 56 | + .creator("CanonicalDeterminismTest") |
| 57 | + .creationDate("D:20250101120000+00'00'") |
| 58 | + .modDate("D:20250101120000+00'00'") |
| 59 | + .build()); |
| 60 | + |
| 61 | + byte[] pngBytes = buildSolidColorPng(16, 16, 0xFF, 0x00, 0x00); |
| 62 | + PdfImage jpegImg = doc.addJpegImage(TINY_JPEG, 10, 8); |
| 63 | + PdfImage pngImg = doc.addPngImage(pngBytes); |
| 64 | + |
| 65 | + PdfPage page = doc.addPage(); |
| 66 | + |
| 67 | + page.getContent() |
| 68 | + .beginText() |
| 69 | + .setFont("Helvetica-Bold", 14) |
| 70 | + .moveText(50, 450) |
| 71 | + .showText("Determinism Proof: Base14 Text") |
| 72 | + .endText() |
| 73 | + |
| 74 | + .beginText() |
| 75 | + .setFont("Helvetica", 11) |
| 76 | + .moveText(50, 430) |
| 77 | + .showText("The quick brown fox jumps over the lazy dog.") |
| 78 | + .endText() |
| 79 | + |
| 80 | + .beginText() |
| 81 | + .setFont("Courier", 10) |
| 82 | + .moveText(50, 410) |
| 83 | + .showText("0123456789 !@#$%^&*()") |
| 84 | + .endText() |
| 85 | + |
| 86 | + .beginText() |
| 87 | + .setFont("Helvetica", 11) |
| 88 | + .moveText(50, 380) |
| 89 | + .showTextUnicodeRaw("Salom Dunyo!") |
| 90 | + .endText() |
| 91 | + |
| 92 | + .beginText() |
| 93 | + .setFont("Helvetica", 11) |
| 94 | + .moveText(50, 360) |
| 95 | + .showTextUnicodeRaw("\u041F\u0440\u0438\u0432\u0435\u0442 \u043C\u0438\u0440!") |
| 96 | + .endText() |
| 97 | + |
| 98 | + .beginText() |
| 99 | + .setFont("Helvetica", 11) |
| 100 | + .moveText(50, 340) |
| 101 | + .showTextUnicodeRaw("\u4F60\u597D\u4E16\u754C") |
| 102 | + .endText(); |
| 103 | + |
| 104 | + page.drawImage(jpegImg, 50, 250, 100, 80); |
| 105 | + page.drawImage(pngImg, 200, 250, 64, 64); |
| 106 | + |
| 107 | + page.getContent() |
| 108 | + .setStrokeColor(0.2, 0.4, 0.8) |
| 109 | + .setLineWidth(1.5) |
| 110 | + .rectangle(50, 200, 200, 30) |
| 111 | + .stroke() |
| 112 | + |
| 113 | + .setFillColor(0.9, 0.1, 0.1) |
| 114 | + .rectangle(300, 200, 100, 30) |
| 115 | + .fill(); |
| 116 | + |
| 117 | + ByteArrayOutputStream baos = new ByteArrayOutputStream(); |
| 118 | + doc.save(baos); |
| 119 | + return baos.toByteArray(); |
| 120 | + } |
| 121 | + |
| 122 | + static String sha256Hex(byte[] data) throws Exception { |
| 123 | + MessageDigest md = MessageDigest.getInstance("SHA-256"); |
| 124 | + byte[] digest = md.digest(data); |
| 125 | + StringBuilder sb = new StringBuilder(64); |
| 126 | + for (byte b : digest) { |
| 127 | + sb.append(String.format("%02x", b & 0xFF)); |
| 128 | + } |
| 129 | + return sb.toString(); |
| 130 | + } |
| 131 | + |
| 132 | + // ── 1. Canonical hash test ────────────────────────────────────────────── |
| 133 | + |
| 134 | + @Test |
| 135 | + void canonical_pdf_matches_expected_sha256() throws Exception { |
| 136 | + byte[] pdf = buildCanonicalPdf(); |
| 137 | + String hash = sha256Hex(pdf); |
| 138 | + |
| 139 | + assertEquals(EXPECTED_SHA256, hash, |
| 140 | + "Canonical PDF SHA-256 mismatch — if the document builder changed " |
| 141 | + + "intentionally, update EXPECTED_SHA256 to: " + hash); |
| 142 | + } |
| 143 | + |
| 144 | + // ── 2. Multi-run stability ────────────────────────────────────────────── |
| 145 | + |
| 146 | + @Test |
| 147 | + void three_generations_produce_identical_bytes() throws Exception { |
| 148 | + byte[] run1 = buildCanonicalPdf(); |
| 149 | + byte[] run2 = buildCanonicalPdf(); |
| 150 | + byte[] run3 = buildCanonicalPdf(); |
| 151 | + |
| 152 | + assertArrayEquals(run1, run2, "Run 1 vs Run 2 must be byte-identical"); |
| 153 | + assertArrayEquals(run2, run3, "Run 2 vs Run 3 must be byte-identical"); |
| 154 | + } |
| 155 | + |
| 156 | + // ── 3. Locale stability ───────────────────────────────────────────────── |
| 157 | + |
| 158 | + @Test |
| 159 | + void output_is_identical_across_locales() throws Exception { |
| 160 | + Locale original = Locale.getDefault(); |
| 161 | + try { |
| 162 | + Locale.setDefault(Locale.US); |
| 163 | + byte[] usBytes = buildCanonicalPdf(); |
| 164 | + |
| 165 | + Locale.setDefault(Locale.GERMANY); |
| 166 | + byte[] deBytes = buildCanonicalPdf(); |
| 167 | + |
| 168 | + Locale.setDefault(new Locale("tr", "TR")); |
| 169 | + byte[] trBytes = buildCanonicalPdf(); |
| 170 | + |
| 171 | + Locale.setDefault(Locale.JAPAN); |
| 172 | + byte[] jpBytes = buildCanonicalPdf(); |
| 173 | + |
| 174 | + Locale.setDefault(new Locale("ar", "SA")); |
| 175 | + byte[] arBytes = buildCanonicalPdf(); |
| 176 | + |
| 177 | + assertArrayEquals(usBytes, deBytes, "US vs DE locale must be byte-identical"); |
| 178 | + assertArrayEquals(usBytes, trBytes, "US vs TR locale must be byte-identical"); |
| 179 | + assertArrayEquals(usBytes, jpBytes, "US vs JP locale must be byte-identical"); |
| 180 | + assertArrayEquals(usBytes, arBytes, "US vs AR locale must be byte-identical"); |
| 181 | + } finally { |
| 182 | + Locale.setDefault(original); |
| 183 | + } |
| 184 | + } |
| 185 | + |
| 186 | + // ── 4. No system-time dependency ──────────────────────────────────────── |
| 187 | + |
| 188 | + @Test |
| 189 | + void metadata_uses_fixed_dates_not_system_time() throws Exception { |
| 190 | + byte[] pdf = buildCanonicalPdf(); |
| 191 | + String text = new String(pdf, StandardCharsets.US_ASCII); |
| 192 | + |
| 193 | + assertTrue(text.contains("D:20250101120000+00'00'"), |
| 194 | + "PDF must contain the fixed CreationDate literal"); |
| 195 | + assertFalse(text.contains("D:20" + java.time.Year.now().getValue()), |
| 196 | + "PDF must not embed current-year timestamps"); |
| 197 | + } |
| 198 | + |
| 199 | + // ── PNG builder (deterministic, self-contained) ───────────────────────── |
| 200 | + |
| 201 | + static byte[] buildSolidColorPng(int w, int h, int r, int g, int b) throws Exception { |
| 202 | + ByteArrayOutputStream raw = new ByteArrayOutputStream(h * (1 + w * 3)); |
| 203 | + for (int y = 0; y < h; y++) { |
| 204 | + raw.write(0); |
| 205 | + for (int x = 0; x < w; x++) { |
| 206 | + raw.write(r); |
| 207 | + raw.write(g); |
| 208 | + raw.write(b); |
| 209 | + } |
| 210 | + } |
| 211 | + |
| 212 | + byte[] idat = zlibCompress(raw.toByteArray()); |
| 213 | + |
| 214 | + ByteArrayOutputStream png = new ByteArrayOutputStream(); |
| 215 | + png.write(new byte[]{(byte) 0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A}); |
| 216 | + ByteArrayOutputStream ihdr = new ByteArrayOutputStream(13); |
| 217 | + writeInt4(ihdr, w); |
| 218 | + writeInt4(ihdr, h); |
| 219 | + ihdr.write(8); |
| 220 | + ihdr.write(2); |
| 221 | + ihdr.write(0); |
| 222 | + ihdr.write(0); |
| 223 | + ihdr.write(0); |
| 224 | + writeChunk(png, "IHDR", ihdr.toByteArray()); |
| 225 | + writeChunk(png, "IDAT", idat); |
| 226 | + writeChunk(png, "IEND", new byte[0]); |
| 227 | + return png.toByteArray(); |
| 228 | + } |
| 229 | + |
| 230 | + private static void writeChunk(ByteArrayOutputStream out, String type, byte[] data) |
| 231 | + throws Exception { |
| 232 | + byte[] typeBytes = type.getBytes(StandardCharsets.US_ASCII); |
| 233 | + CRC32 crc = new CRC32(); |
| 234 | + crc.update(typeBytes); |
| 235 | + crc.update(data); |
| 236 | + writeInt4(out, data.length); |
| 237 | + out.write(typeBytes); |
| 238 | + out.write(data); |
| 239 | + writeInt4(out, (int) crc.getValue()); |
| 240 | + } |
| 241 | + |
| 242 | + private static void writeInt4(ByteArrayOutputStream out, int v) { |
| 243 | + out.write((v >>> 24) & 0xFF); |
| 244 | + out.write((v >>> 16) & 0xFF); |
| 245 | + out.write((v >>> 8) & 0xFF); |
| 246 | + out.write(v & 0xFF); |
| 247 | + } |
| 248 | + |
| 249 | + private static byte[] zlibCompress(byte[] input) { |
| 250 | + Deflater d = new Deflater(Deflater.DEFAULT_COMPRESSION, false); |
| 251 | + try { |
| 252 | + d.setInput(input); |
| 253 | + d.finish(); |
| 254 | + ByteArrayOutputStream baos = new ByteArrayOutputStream(); |
| 255 | + byte[] buf = new byte[4096]; |
| 256 | + while (!d.finished()) { |
| 257 | + int n = d.deflate(buf); |
| 258 | + baos.write(buf, 0, n); |
| 259 | + } |
| 260 | + return baos.toByteArray(); |
| 261 | + } finally { |
| 262 | + d.end(); |
| 263 | + } |
| 264 | + } |
| 265 | +} |
0 commit comments