Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 10 additions & 4 deletions src/uu/sort/src/merge.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ use uucore::error::{FromIo, UResult};
use crate::{
GlobalSettings, Output, SortError,
chunks::{self, Chunk, RecycledChunk},
compare_by, current_open_fd_count, fd_soft_limit, open,
current_open_fd_count, fd_soft_limit, merge_compare, open,
tmp_dir::TmpDirWrapper,
};

Expand Down Expand Up @@ -200,7 +200,13 @@ fn merge_without_limit<M: MergeInput + 'static, F: Iterator<Item = UResult<M>>>(
}

let reader_join_handle = thread::spawn({
let settings = settings.clone();
// The merge comparator (`merge_compare`) compares whole-line locale keys lazily
// with the ICU collator, so the reader does not need to precompute per-line sort
// keys. Disabling `fast_locale_collation` here turns `Line::create` into a no-op
// for that (whole-line, default) mode and avoids the dominant cost of merging
// already-sorted input. Other modes are unaffected (the flag is already false).
let mut settings = settings.clone();
settings.precomputed.fast_locale_collation = false;
move || {
reader(
&request_receiver,
Expand Down Expand Up @@ -332,7 +338,7 @@ impl FileMerger<'_> {
let current_line = &contents.lines[file.line_idx];
if settings.unique {
if let Some(prev) = &prev {
let cmp = compare_by(
let cmp = merge_compare(
&prev.chunk.lines()[prev.line_idx],
current_line,
settings,
Expand Down Expand Up @@ -383,7 +389,7 @@ struct FileComparator<'a> {

impl Compare<MergeableFile> for FileComparator<'_> {
fn compare(&self, a: &MergeableFile, b: &MergeableFile) -> Ordering {
let mut cmp = compare_by(
let mut cmp = merge_compare(
&a.current_chunk.lines()[a.line_idx],
&b.current_chunk.lines()[b.line_idx],
self.settings,
Expand Down
31 changes: 31 additions & 0 deletions src/uu/sort/src/sort.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2621,6 +2621,37 @@ fn sort_by<'a>(unsorted: &mut Vec<Line<'a>>, settings: &GlobalSettings, line_dat
}
}

/// Comparison used by the merge path.
///
/// This is result-identical to [`compare_by`], but for the whole-line locale-collation

@cakebaker cakebaker Jul 1, 2026

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The compare_by link makes the Documentation/warnings job fail:

public documentation for `merge_compare` links to private item `compare_by`
    --> src/uu/sort/src/sort.rs:2626:35
     |
2626 | /// This is result-identical to [`compare_by`], but for the whole-line locale-collation
     |                                   ^^^^^^^^^^ this item is private

/// case it compares the two lines lazily with the ICU collator instead of relying on
/// precomputed collation keys. Merging only performs O(n log k) comparisons (and none at
/// all when merging a single file), so computing a full sort key for every line — as the
/// regular sort path does to amortize O(n log n) comparisons — is pure overhead here.
/// Skipping that per-line work (see `merge_without_limit`) is what makes `sort -m` of
/// already-sorted input fast.
pub fn merge_compare<'a>(
a: &Line<'a>,
b: &Line<'a>,
settings: &GlobalSettings,
a_line_data: &LineData<'a>,
b_line_data: &LineData<'a>,
) -> Ordering {
Comment on lines +2633 to +2639

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The function signature looks a bit odd with settings in the middle, I would have expected it at the end. But I think it's something for a future PR.

#[cfg(feature = "i18n-collator")]
if settings.precomputed.fast_locale_collation {
// Mirror the `fast_locale_collation` branch of `compare_by`, but compare the line
// bytes directly rather than precomputed keys: `locale_cmp` (ICU `compare_utf8`)
// and the sort-key comparison agree on ordering by construction.
let mut cmp = locale_cmp(a.line, b.line);
if cmp == Ordering::Equal {
// Equal keys for inputs like `01` and `0_1`; fall back to (reversed) byte order.
cmp = b.line.cmp(a.line);
}
return if settings.reverse { cmp.reverse() } else { cmp };
}
compare_by(a, b, settings, a_line_data, b_line_data)
}

fn compare_by<'a>(
a: &Line<'a>,
b: &Line<'a>,
Expand Down
Loading