Skip to content

Commit b4f4402

Browse files
emcdclaude
andcommitted
Add deterministic boundary option for reproducible mimeogram output.
Adds --deterministic-boundary CLI flag and deterministic-boundary configuration setting to generate content-based SHA-256 hashes instead of random UUIDs for MIME boundaries. This makes mimeogram output reproducible and diff-friendly for version control workflows. Key features: - CLI flag --deterministic-boundary overrides configuration setting - Configuration via deterministic-boundary = true/false in [create] section - SHA-256 hash includes all part content, metadata, and optional messages - Comprehensive test coverage for all functionality Closes #8 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 0006f28 commit b4f4402

7 files changed

Lines changed: 315 additions & 11 deletions

File tree

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Add deterministic boundary option for reproducible mimeogram output. When
2+
enabled via ``--deterministic-boundary`` CLI flag or ``deterministic-boundary``
3+
configuration setting, boundaries are generated from content hashes instead of
4+
random UUIDs, making output diff-friendly for version control workflows.
5+
(Feature request from @developingjames.)

data/configuration/general.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ from-clipboard = true
55

66
[create]
77
count-tokens = true
8+
deterministic-boundary = false
89
to-clipboard = true
910

1011
[prompt]

sources/mimeogram/__/imports.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,16 @@
2323
# ruff: noqa: F401
2424

2525

26-
import abc
27-
import asyncio
26+
import abc
27+
import asyncio
2828
import collections.abc as cabc
2929
import dataclasses as dcls
30-
import enum
31-
import os
32-
import re
33-
import sys
34-
import types
30+
import enum
31+
import hashlib
32+
import os
33+
import re
34+
import sys
35+
import types
3536

3637
from contextlib import AsyncExitStack as ExitsAsync
3738
from logging import getLogger as produce_scribe

sources/mimeogram/create.py

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,17 @@ class Command(
8686
If not specified, then the default variant is used.
8787
''' ),
8888
] = None
89+
deterministic_boundary: __.typx.Annotated[
90+
__.typx.Optional[ bool ],
91+
__.typx.Doc(
92+
''' Use deterministic boundary for reproducible output.
93+
94+
When enabled, the MIME boundary marker will be a hash of the
95+
content, making output reproducible and diff-friendly.
96+
Useful for testing, CI, and batch processing.
97+
''' ),
98+
__.tyro.conf.arg( aliases = ( '--deterministic-boundary', ) ),
99+
] = None
89100

90101
async def __call__( self, auxdata: __.Globals ) -> None:
91102
''' Executes command to create mimeogram. '''
@@ -113,6 +124,10 @@ def provide_configuration_edits( self ) -> __.DictionaryEdits:
113124
edits.append( __.SimpleDictionaryEdit( # pyright: ignore
114125
address = ( 'tokenizers', 'default' ),
115126
value = self.tokenizer ) )
127+
if None is not self.deterministic_boundary:
128+
edits.append( __.SimpleDictionaryEdit( # pyright: ignore
129+
address = ( 'create', 'deterministic-boundary' ),
130+
value = self.deterministic_boundary ) )
116131
return tuple( edits )
117132

118133

@@ -154,12 +169,18 @@ async def create(
154169
_scribe, "Could not acquire user message."
155170
): message = await editor( )
156171
else: message = None
157-
mimeogram = format_mimeogram( parts, message = message )
172+
options = auxdata.configuration.get( 'create', { } )
173+
deterministic_boundary = (
174+
command.deterministic_boundary
175+
if command.deterministic_boundary is not None
176+
else options.get( 'deterministic-boundary', False ) )
177+
mimeogram = format_mimeogram(
178+
parts, message = message,
179+
deterministic_boundary = deterministic_boundary )
158180
# TODO? Pass prompt to 'format_mimeogram'.
159181
if command.prepend_prompt:
160182
prompt = await prompter( auxdata )
161183
mimeogram = f"{prompt}\n\n{mimeogram}"
162-
options = auxdata.configuration.get( 'create', { } )
163184
if options.get( 'count-tokens', False ):
164185
with __.report_exceptions(
165186
_scribe, "Could not count mimeogram tokens."
@@ -171,7 +192,7 @@ async def create(
171192
with __.report_exceptions(
172193
_scribe, "Could not copy mimeogram to clipboard."
173194
): await clipcopier( mimeogram )
174-
else: print( mimeogram ) # TODO? Use output stream from configuration.
195+
else: print( mimeogram )
175196
raise SystemExit( 0 )
176197

177198

sources/mimeogram/formatters.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,17 @@
2828
def format_mimeogram(
2929
parts: __.cabc.Sequence[ _parts.Part ],
3030
message: __.typx.Optional[ str ] = None,
31+
deterministic_boundary: bool = False,
3132
) -> str:
3233
''' Formats parts into mimeogram. '''
3334
if not parts and message is None:
3435
from .exceptions import MimeogramFormatEmpty
3536
raise MimeogramFormatEmpty( )
36-
boundary = "====MIMEOGRAM_{uuid}====".format( uuid = __.uuid4( ).hex )
37+
if deterministic_boundary:
38+
content_hash = _compute_content_hash( parts, message )
39+
boundary = f"====MIMEOGRAM_{content_hash}===="
40+
else:
41+
boundary = "====MIMEOGRAM_{uuid}====".format( uuid = __.uuid4( ).hex )
3742
lines: list[ str ] = [ ]
3843
if message:
3944
message_part = _parts.Part(
@@ -59,3 +64,20 @@ def format_part( part: _parts.Part, boundary: str ) -> str:
5964
f"linesep={part.linesep.name}",
6065
'',
6166
part.content ) )
67+
68+
69+
def _compute_content_hash(
70+
parts: __.cabc.Sequence[ _parts.Part ],
71+
message: __.typx.Optional[ str ] = None,
72+
) -> str:
73+
''' Computes deterministic hash for mimeogram content. '''
74+
hasher = __.hashlib.sha256( )
75+
if message is not None:
76+
hasher.update( message.encode( 'utf-8' ) )
77+
for part in parts:
78+
hasher.update( str( part.location ).encode( 'utf-8' ) )
79+
hasher.update( str( part.mimetype ).encode( 'utf-8' ) )
80+
hasher.update( str( part.charset ).encode( 'utf-8' ) )
81+
hasher.update( str( part.linesep.name ).encode( 'utf-8' ) )
82+
hasher.update( str( part.content ).encode( 'utf-8' ) )
83+
return hasher.hexdigest( )

tests/test_000_mimeogram/test_210_formatters.py

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,3 +247,88 @@ def test_070_verify_boundary_uniqueness( ):
247247
assert match1 is not None
248248
assert match2 is not None
249249
assert match1.group( 1 ) != match2.group( 1 )
250+
251+
252+
def test_080_deterministic_boundary_basic( ):
253+
''' Deterministic boundary produces reproducible output. '''
254+
formatters = cache_import_module( f"{PACKAGE_NAME}.formatters" )
255+
part = _create_sample_part(
256+
location = 'deterministic.txt',
257+
content = 'Deterministic content'
258+
)
259+
mimeogram1 = formatters.format_mimeogram(
260+
[ part ], deterministic_boundary = True
261+
)
262+
mimeogram2 = formatters.format_mimeogram(
263+
[ part ], deterministic_boundary = True
264+
)
265+
assert mimeogram1 == mimeogram2
266+
boundary_pattern = r'--====MIMEOGRAM_([0-9a-f]{64})===='
267+
match = re.search( boundary_pattern, mimeogram1 )
268+
assert match is not None, 'Boundary should be a 64-character hex hash'
269+
270+
271+
def test_090_deterministic_boundary_with_message( ):
272+
''' Deterministic boundary works with message. '''
273+
formatters = cache_import_module( f"{PACKAGE_NAME}.formatters" )
274+
part = _create_sample_part( content = 'Test content' )
275+
message = 'Test message'
276+
mimeogram1 = formatters.format_mimeogram(
277+
[ part ], message = message, deterministic_boundary = True
278+
)
279+
mimeogram2 = formatters.format_mimeogram(
280+
[ part ], message = message, deterministic_boundary = True
281+
)
282+
assert mimeogram1 == mimeogram2
283+
mimeogram3 = formatters.format_mimeogram(
284+
[ part ], message = 'Different message', deterministic_boundary = True
285+
)
286+
assert mimeogram1 != mimeogram3
287+
288+
289+
def test_100_deterministic_boundary_different_content( ):
290+
''' Deterministic boundary changes with different content. '''
291+
formatters = cache_import_module( f"{PACKAGE_NAME}.formatters" )
292+
part1 = _create_sample_part( content = 'Content 1' )
293+
part2 = _create_sample_part( content = 'Content 2' )
294+
mimeogram1 = formatters.format_mimeogram(
295+
[ part1 ], deterministic_boundary = True
296+
)
297+
mimeogram2 = formatters.format_mimeogram(
298+
[ part2 ], deterministic_boundary = True
299+
)
300+
assert mimeogram1 != mimeogram2
301+
boundary_pattern = r'--====MIMEOGRAM_([0-9a-f]{64})===='
302+
match1 = re.search( boundary_pattern, mimeogram1 )
303+
match2 = re.search( boundary_pattern, mimeogram2 )
304+
assert match1 is not None
305+
assert match2 is not None
306+
assert match1.group( 1 ) != match2.group( 1 )
307+
308+
309+
def test_110_deterministic_boundary_order_sensitivity( ):
310+
''' Deterministic boundary is sensitive to part order. '''
311+
formatters = cache_import_module( f"{PACKAGE_NAME}.formatters" )
312+
part1 = _create_sample_part( location = 'first.txt', content = 'First' )
313+
part2 = _create_sample_part( location = 'second.txt', content = 'Second' )
314+
mimeogram1 = formatters.format_mimeogram(
315+
[ part1, part2 ], deterministic_boundary = True
316+
)
317+
mimeogram2 = formatters.format_mimeogram(
318+
[ part2, part1 ], deterministic_boundary = True
319+
)
320+
assert mimeogram1 != mimeogram2
321+
322+
323+
def test_120_deterministic_boundary_metadata_sensitivity( ):
324+
''' Deterministic boundary is sensitive to part metadata. '''
325+
formatters = cache_import_module( f"{PACKAGE_NAME}.formatters" )
326+
part1 = _create_sample_part( mimetype = 'text/plain', content = 'Same' )
327+
part2 = _create_sample_part( mimetype = 'text/html', content = 'Same' )
328+
mimeogram1 = formatters.format_mimeogram(
329+
[ part1 ], deterministic_boundary = True
330+
)
331+
mimeogram2 = formatters.format_mimeogram(
332+
[ part2 ], deterministic_boundary = True
333+
)
334+
assert mimeogram1 != mimeogram2

tests/test_000_mimeogram/test_600_create.py

Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -341,3 +341,172 @@ async def failing_clipcopier( content: str ) -> None:
341341
clipcopier = failing_clipcopier )
342342

343343
assert exc_info.value.code == 1
344+
345+
346+
@pytest.mark.asyncio
347+
async def test_400_create_deterministic_boundary_cli( provide_tempdir ):
348+
''' Create uses deterministic boundary when CLI flag is set. '''
349+
create = cache_import_module( f"{PACKAGE_NAME}.create" )
350+
import re
351+
352+
test_content = "test content\n"
353+
test_path = provide_tempdir / "test.txt"
354+
test_files = { "test.txt": test_content }
355+
printed_content = [ ]
356+
357+
def mock_print( content: str ):
358+
printed_content.append( content )
359+
360+
with create_test_files( provide_tempdir, test_files ):
361+
cmd = create.Command(
362+
sources = [ str( test_path ) ],
363+
deterministic_boundary = True )
364+
with pytest.raises( SystemExit ) as exc_info: # noqa: SIM117
365+
with pytest.MonkeyPatch( ).context( ) as mp:
366+
mp.setattr( 'builtins.print', mock_print )
367+
await create.create(
368+
MagicMock( configuration = { } ),
369+
cmd )
370+
371+
assert exc_info.value.code == 0
372+
assert len( printed_content ) == 1
373+
output = printed_content[ 0 ]
374+
boundary_pattern = r'--====MIMEOGRAM_([0-9a-f]{64})===='
375+
match = re.search( boundary_pattern, output )
376+
assert match is not None, 'Should have deterministic boundary'
377+
378+
379+
@pytest.mark.asyncio
380+
async def test_410_create_deterministic_boundary_config( provide_tempdir ):
381+
''' Create uses deterministic boundary when config is set. '''
382+
create = cache_import_module( f"{PACKAGE_NAME}.create" )
383+
import re
384+
385+
test_content = "test content\n"
386+
test_path = provide_tempdir / "test.txt"
387+
test_files = { "test.txt": test_content }
388+
printed_content = [ ]
389+
390+
def mock_print( content: str ):
391+
printed_content.append( content )
392+
393+
with create_test_files( provide_tempdir, test_files ):
394+
cmd = create.Command( sources = [ str( test_path ) ] )
395+
with pytest.raises( SystemExit ) as exc_info: # noqa: SIM117
396+
with pytest.MonkeyPatch( ).context( ) as mp:
397+
mp.setattr( 'builtins.print', mock_print )
398+
await create.create(
399+
MagicMock( configuration = {
400+
'create': { 'deterministic-boundary': True }
401+
} ),
402+
cmd )
403+
404+
assert exc_info.value.code == 0
405+
assert len( printed_content ) == 1
406+
output = printed_content[ 0 ]
407+
boundary_pattern = r'--====MIMEOGRAM_([0-9a-f]{64})===='
408+
match = re.search( boundary_pattern, output )
409+
assert match is not None, 'Should have deterministic boundary'
410+
411+
412+
@pytest.mark.asyncio
413+
async def test_420_create_deterministic_boundary_repeatability(
414+
provide_tempdir
415+
):
416+
''' Create produces identical output with deterministic boundary. '''
417+
create = cache_import_module( f"{PACKAGE_NAME}.create" )
418+
419+
test_content = "test content\n"
420+
test_path = provide_tempdir / "test.txt"
421+
test_files = { "test.txt": test_content }
422+
printed_content1 = [ ]
423+
printed_content2 = [ ]
424+
425+
def mock_print1( content: str ):
426+
printed_content1.append( content )
427+
428+
def mock_print2( content: str ):
429+
printed_content2.append( content )
430+
431+
with create_test_files( provide_tempdir, test_files ):
432+
cmd = create.Command(
433+
sources = [ str( test_path ) ],
434+
deterministic_boundary = True )
435+
436+
# First run
437+
with pytest.raises( SystemExit ): # noqa: SIM117
438+
with pytest.MonkeyPatch( ).context( ) as mp:
439+
mp.setattr( 'builtins.print', mock_print1 )
440+
await create.create(
441+
MagicMock( configuration = { } ),
442+
cmd )
443+
444+
# Second run
445+
with pytest.raises( SystemExit ): # noqa: SIM117
446+
with pytest.MonkeyPatch( ).context( ) as mp:
447+
mp.setattr( 'builtins.print', mock_print2 )
448+
await create.create(
449+
MagicMock( configuration = { } ),
450+
cmd )
451+
452+
assert len( printed_content1 ) == 1
453+
assert len( printed_content2 ) == 1
454+
assert printed_content1[ 0 ] == printed_content2[ 0 ]
455+
456+
457+
@pytest.mark.asyncio
458+
async def test_430_create_deterministic_boundary_cli_overrides_config(
459+
provide_tempdir
460+
):
461+
''' CLI flag overrides configuration setting. '''
462+
create = cache_import_module( f"{PACKAGE_NAME}.create" )
463+
import re
464+
465+
test_content = "test content\n"
466+
test_path = provide_tempdir / "test.txt"
467+
test_files = { "test.txt": test_content }
468+
printed_content = [ ]
469+
470+
def mock_print( content: str ):
471+
printed_content.append( content )
472+
473+
with create_test_files( provide_tempdir, test_files ):
474+
cmd = create.Command(
475+
sources = [ str( test_path ) ],
476+
deterministic_boundary = False )
477+
with pytest.raises( SystemExit ) as exc_info: # noqa: SIM117
478+
with pytest.MonkeyPatch( ).context( ) as mp:
479+
mp.setattr( 'builtins.print', mock_print )
480+
await create.create(
481+
MagicMock( configuration = {
482+
'create': { 'deterministic-boundary': True }
483+
} ),
484+
cmd )
485+
486+
assert exc_info.value.code == 0
487+
assert len( printed_content ) == 1
488+
output = printed_content[ 0 ]
489+
boundary_pattern = r'--====MIMEOGRAM_([0-9a-f]{32})===='
490+
match = re.search( boundary_pattern, output )
491+
assert match is not None, (
492+
'Should have random boundary due to CLI override' )
493+
494+
495+
def test_440_deterministic_boundary_configuration_edit( ):
496+
''' Command generates configuration edit for deterministic boundary. '''
497+
create = cache_import_module( f"{PACKAGE_NAME}.create" )
498+
499+
cmd = create.Command(
500+
sources = [ 'test.txt' ],
501+
deterministic_boundary = True )
502+
edits = cmd.provide_configuration_edits( )
503+
504+
# Find the deterministic boundary edit
505+
deterministic_edit = None
506+
for edit in edits:
507+
if edit.address == ( 'create', 'deterministic-boundary' ):
508+
deterministic_edit = edit
509+
break
510+
511+
assert deterministic_edit is not None
512+
assert deterministic_edit.value is True

0 commit comments

Comments
 (0)