diff --git a/processing/text/processor.go b/processing/text/processor.go index f56f9dd..765896a 100644 --- a/processing/text/processor.go +++ b/processing/text/processor.go @@ -1,7 +1,9 @@ package text import ( + "bufio" "fmt" + "html" "regexp" "strings" ) @@ -24,6 +26,7 @@ var ( _ Processor = RegexpRemover{} _ Processor = Cutter{} _ Processor = Trimmer{} + _ Processor = LineToParagraph{} ) // processor is a generic implementation of Processor, @@ -137,6 +140,51 @@ func (p Trimmer) Process(s string) (string, error) { return strings.Trim(s, p.Cutset), nil } +// LineToParagraph converts each line of text into a separate HTML

...

paragraph. +// TrimSpace controls whether leading and trailing spaces are removed from each line before wrapping it in

tags. +// Empty lines can be either skipped or rendered as empty

according to the SkipEmpty flag. +type LineToParagraph struct { + // TrimSpace controls whether leading and trailing spaces are removed from each line. + // true → trim spaces + // false → preserve spaces (default, matches previous behaviour) + TrimSpace bool + // SkipEmpty controls whether completely empty lines produce

or are ignored. + // true → skip empty lines (default, matches previous behaviour) + // false → emit

for empty lines + SkipEmpty bool +} + +// Describe returns a human-readable description of the processor. +func (p LineToParagraph) Describe() string { + return fmt.Sprintf("LineToParagraph(TrimSpace=%t, SkipEmpty=%t)", p.TrimSpace, p.SkipEmpty) +} + +// Once returns true – the transformation is idempotent and should run only once. +func (LineToParagraph) Once() bool { return true } + +// Process transforms the input text line-by-line into HTML paragraphs. +func (p LineToParagraph) Process(s string) (string, error) { + scanner := bufio.NewScanner(strings.NewReader(s)) + var b strings.Builder + b.Grow(len(s)) + for scanner.Scan() { + line := scanner.Text() + if p.TrimSpace { + line = strings.TrimSpace(line) + } + if p.SkipEmpty && line == "" { + continue + } + b.WriteString("

") + b.WriteString(html.EscapeString(line)) + b.WriteString("

\n") + } + if err := scanner.Err(); err != nil { + return "", err + } + return b.String(), nil +} + // TrimSpace returns a processor that removes leading and trailing spaces. func TrimSpace() Processor { return NewProcessor("TrimSpace", false, WrapFunc(strings.TrimSpace)) @@ -162,6 +210,12 @@ func RemoveParentheses() Processor { ) } +// ToParagraphs returns a processor that converts each line into a

paragraph. +// If skipEmpty is true, empty lines are ignored; otherwise, they produce empty

. +func ToParagraphs(skipEmpty bool) Processor { + return LineToParagraph{SkipEmpty: skipEmpty} +} + // WrapFunc wraps a simple string -> string function // into a function matching the Processor signature. func WrapFunc(fn func(string) string) func(string) (string, error) { diff --git a/processing/text/processor_test.go b/processing/text/processor_test.go index 1c87ac4..9986da1 100644 --- a/processing/text/processor_test.go +++ b/processing/text/processor_test.go @@ -60,3 +60,74 @@ func TestTrimmer(t *testing.T) { } } } + +func TestLineToParagraph(t *testing.T) { + for i, tc := range []struct { + proc Processor + input string + expected string + }{ + // Default behaviour (zero value): SkipEmpty=true, TrimSpace=false + { + LineToParagraph{}, + " First line \n\n Second line\t \n\n", + "

First line

\n

\n

Second line\t

\n

\n", + }, + // Explicitly enable trimming of leading/trailing whitespace + { + LineToParagraph{TrimSpace: true}, + " Hello \n World \n", + "

Hello

\n

World

\n", + }, + // Preserve empty lines (SkipEmpty = false) + { + LineToParagraph{SkipEmpty: false}, + "Line 1\n\n\nLine 2\n", + "

Line 1

\n

\n

\n

Line 2

\n", + }, + // Trim + preserve empty lines + { + LineToParagraph{TrimSpace: true, SkipEmpty: false}, + " \n A \n \nB \n", + "

\n

A

\n

\n

B

\n", + }, + // Fully literal mode: keep all original whitespace and emit every line + { + LineToParagraph{TrimSpace: false, SkipEmpty: false}, + "\tIndented\n \n Spaces only \n\nTrailing \n", + "

\tIndented

\n

\n

Spaces only

\n

\n

Trailing

\n", + }, + // Empty input + { + LineToParagraph{}, + "", + "", + }, + // Input containing only empty lines and whitespace + { + LineToParagraph{}, + "\n \n\t\n \n", + "

\n

\n

\t

\n

\n", + }, + // HTML escaping works regardless of configuration + { + LineToParagraph{TrimSpace: true}, + " \n © 2025 \n", + "

<script>alert(1)</script>

\n

&copy; 2025

\n", + }, + { + LineToParagraph{TrimSpace: false}, + " bold \n", + "

<b>bold</b>

\n", + }, + } { + res, err := NewTasks().Append(tc.proc).Process(tc.input) + if err != nil { + t.Errorf("case %d: unexpected error: %v", i, err) + continue + } + if res != tc.expected { + t.Errorf("case %d:\ngot:\n%q\nwant:\n%q", i, res, tc.expected) + } + } +} diff --git a/processing/text/task.go b/processing/text/task.go index 278e77e..6ef1748 100644 --- a/processing/text/task.go +++ b/processing/text/task.go @@ -1,8 +1,6 @@ package text -import ( - "fmt" -) +import "fmt" var MaxIter = 100