diff --git a/README.md b/README.md index 378e9ed..c9030ea 100644 --- a/README.md +++ b/README.md @@ -98,3 +98,18 @@ See `data/DATASETS.md` for descriptions of the included integer datasets: - `twitterjson_integers.txt` - Twitter API integers (heterogeneous distribution) - `cit_patents_citing_integers.txt.gz` - US patent numbers (7-digit, homogeneous) - `stackoverflow_unix_timestamps_integers.txt.gz` - Unix timestamps (10-digit, homogeneous) + +## Benchmark Metrics + +The benchmark reports the following metrics: + +| Metric | Description | +| -------- | ------------- | +| `ns/n` | Nanoseconds per number (integer) | +| `GHz` | CPU frequency during benchmark | +| `c/n` | CPU cycles per number | +| `i/n` | Instructions per number | +| `B/n` | Branches per number | +| `BM/n` | Branch misses per number | +| `i/d` | Instructions per output digit | +| `i/c` | Instructions per cycle (IPC) | diff --git a/benchmarks/benchmark.cpp b/benchmarks/benchmark.cpp index 549444e..39eb8b5 100644 --- a/benchmarks/benchmark.cpp +++ b/benchmarks/benchmark.cpp @@ -22,18 +22,18 @@ constexpr double Ratio_To_Sample = 0.01; constexpr double Ratio_Homogeneous = 0.95; // Homogeneous mode if > 95% of // numbers have the same digit length -void pretty_print(size_t volume, size_t bytes, const std::string &name, +void pretty_print(size_t num_integers, size_t volume, const std::string &name, event_aggregate agg) { std::print("{:<50} : ", name); - std::print(" {:5.2f} ns/d ", agg.fastest_elapsed_ns() / volume); + std::print(" {:5.2f} ns/n ", agg.fastest_elapsed_ns() / num_integers); if (collector.has_events()) { std::print(" {:5.2f} GHz ", agg.fastest_cycles() / agg.fastest_elapsed_ns()); - std::print(" {:5.2f} c/d ", agg.fastest_cycles() / volume); + std::print(" {:5.2f} c/n ", agg.fastest_cycles() / num_integers); + std::print(" {:5.2f} i/n ", agg.fastest_instructions() / num_integers); + std::print(" {:5.2f} B/n ", agg.branches() / num_integers); + std::print(" {:5.2f} BM/n ", agg.branch_misses() / num_integers); std::print(" {:5.2f} i/d ", agg.fastest_instructions() / volume); - std::print(" {:5.2f} B/d ", agg.branches() / volume); - std::print(" {:5.2f} BM/d ", agg.branch_misses() / volume); - std::print(" {:5.2f} i/B ", agg.fastest_instructions() / bytes); std::print(" {:5.2f} i/c ", agg.fastest_instructions() / agg.fastest_cycles()); } @@ -223,7 +223,7 @@ void run_benchmark(const std::vector &data, [[maybe_unused]] Variant a auto run_and_report = [&](auto&& name, auto&& func, size_t volume) { std::print("\n"); for (size_t i = 0; i < Number_Benchmark_Runs; ++i) - pretty_print(volume, data.size() * sizeof(uint64_t), name, bench(func)); + pretty_print(data.size(), volume, name, bench(func)); }; #if defined(CHAMPAGNE_LEMIRE_AVX512) && CHAMPAGNE_LEMIRE_AVX512 diff --git a/scripts/README.md b/scripts/README.md index ac1708e..317ffe6 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -169,7 +169,7 @@ Optional arguments: This will: - Auto-detect available compilers if `--compiler` not specified - Parse all `outputs/*.raw` files for each compiler -- Extract timing information (ns/d - nanoseconds per digit/character) +- Extract timing information (ns/n - nanoseconds per number) - Determine which variant was auto-selected for each dataset - Generate LaTeX table file(s) with compiler name in filename - Print a preview of the table @@ -217,7 +217,7 @@ Optional arguments: This will: - Auto-detect available compilers if `--compiler` not specified - Parse benchmark outputs for all algorithms -- Extract multiple metrics: ns/d (nanoseconds per character), i/d (instructions per character), c/d (cycles per character) +- Extract multiple metrics: ns/n (nanoseconds per number), i/n (instructions per number), c/n (cycles per number) - Calculate speedup percentages relative to AVX-512 for each metric - Compare 10 different algorithms across 5 real-world and synthetic datasets - Bold the best (lowest) value for each metric @@ -267,7 +267,7 @@ This will: - Auto-detect available compilers if `--compiler` not specified - Auto-detect CPU model from existing .raw files if not specified - Parse benchmark outputs for uniform-Ndigit-1M datasets (N=1 to 20) -- Extract ns/d metrics for all algorithms +- Extract ns/n metrics for all algorithms - Generate line plot(s) showing performance vs digit length - Save as PDF in the outputs directory with compiler name in filename @@ -316,7 +316,7 @@ Optional arguments: This will: - Auto-detect available compilers if `--compiler` not specified - Parse benchmark outputs for Twitter JSON, CIT Patents, and Natural 1-8 datasets -- Extract ns/d metrics for all (or 5 simplified) algorithms +- Extract ns/n metrics for all (or 5 simplified) algorithms - Generate combined bar chart (3 subplots) or separate figures per dataset - Save as PDF in the outputs directory with compiler name in filename diff --git a/scripts/generate_algorithm_comparison_table.py b/scripts/generate_algorithm_comparison_table.py index 9fefb4d..50a91aa 100755 --- a/scripts/generate_algorithm_comparison_table.py +++ b/scripts/generate_algorithm_comparison_table.py @@ -42,9 +42,9 @@ # Metrics to extract # Format: (pattern_suffix, column_header, latex_unit) METRICS = [ - ('ns/d', 'ns/d', 'ns/d'), # nanoseconds per digit (character) - ('i/d', 'i/d', 'ins/d'), # instructions per digit - ('c/d', 'c/d', 'cyc/d'), # cycles per digit + ('ns/n', 'ns/n', 'ns/n'), # nanoseconds per number + ('i/n', 'i/n', 'ins/n'), # instructions per number + ('c/n', 'c/n', 'cyc/n'), # cycles per number ] @@ -62,17 +62,17 @@ def parse_algorithm_metrics(file_path: Path, algorithm_pattern: str) -> Optional return None # Build regex pattern to capture the full line for this algorithm - # Example: "avx-512+champagne_lemire : 1.94 ns/d 5.48 GHz 10.64 c/d ..." + # Example: "avx-512+champagne_lemire : 1.94 ns/n 5.48 GHz 10.64 c/n ..." escaped_pattern = algorithm_pattern - pattern = rf'{escaped_pattern}\s*:\s*([\d.]+)\s*ns/d\s*[\d.]+\s*GHz\s*([\d.]+)\s*c/d\s*([\d.]+)\s*i/d' + pattern = rf'{escaped_pattern}\s*:\s*([\d.]+)\s*ns/n\s*[\d.]+\s*GHz\s*([\d.]+)\s*c/n\s*([\d.]+)\s*i/n' match = re.search(pattern, content) if not match: return None return { - 'ns/d': float(match.group(1)), - 'c/d': float(match.group(2)), - 'i/d': float(match.group(3)), + 'ns/n': float(match.group(1)), + 'c/n': float(match.group(2)), + 'i/n': float(match.group(3)), } @@ -206,14 +206,14 @@ def generate_latex_table(compiler: str, output_dir: str) -> str: # Table header lines.append(r"\begin{table}") lines.append(r" \caption{Performance comparison of integer-to-string algorithms across datasets.") - lines.append(r" Metrics: ns/d = nanoseconds, i/d = instructions, c/d = cycles per character.") + lines.append(r" Metrics: ns/n = nanoseconds, i/n = instructions, c/n = cycles per number.") lines.append(r" \textbf{Bold} indicates fastest; \% shows difference vs Champagne--Lemire (positive = slower).}%") lines.append(r" \label{tab:algorithm_comparison}") lines.append(r" \centering") lines.append(r" \small") lines.append(r" \begin{tabular}{llrrrrrr}") lines.append(r" \toprule") - lines.append(r" Algorithm & Dataset & ns/d & \% & i/d & \% & c/d & \% \\") + lines.append(r" Algorithm & Dataset & ns/n & \% & i/n & \% & c/n & \% \\") lines.append(r" \midrule") # Data rows - outer loop: algorithms, inner loop: datasets @@ -232,9 +232,9 @@ def generate_latex_table(compiler: str, output_dir: str) -> str: else: row_parts.append("") - # Check if this algorithm has the best ns/d for this dataset - value_nsd = algo_metrics.get('ns/d') - best_nsd = best_values.get((dataset_display, 'ns/d')) + # Check if this algorithm has the best ns/n for this dataset + value_nsd = algo_metrics.get('ns/n') + best_nsd = best_values.get((dataset_display, 'ns/n')) is_best_algo = (value_nsd is not None and best_nsd is not None and abs(value_nsd - best_nsd) < 0.01) diff --git a/scripts/generate_bar_chart.py b/scripts/generate_bar_chart.py index 533207a..3044bdc 100755 --- a/scripts/generate_bar_chart.py +++ b/scripts/generate_bar_chart.py @@ -65,15 +65,15 @@ def parse_algorithm_metrics(file_path: Path, algorithm_pattern: str) -> Optional return None escaped_pattern = algorithm_pattern - pattern = rf'{escaped_pattern}\s*:\s*([\d.]+)\s*ns/d\s*[\d.]+\s*GHz\s*([\d.]+)\s*c/d\s*([\d.]+)\s*i/d' + pattern = rf'{escaped_pattern}\s*:\s*([\d.]+)\s*ns/n\s*[\d.]+\s*GHz\s*([\d.]+)\s*c/n\s*([\d.]+)\s*i/n' match = re.search(pattern, content) if not match: return None return { - 'ns/d': float(match.group(1)), - 'c/d': float(match.group(2)), - 'i/d': float(match.group(3)), + 'ns/n': float(match.group(1)), + 'c/n': float(match.group(2)), + 'i/n': float(match.group(3)), } @@ -153,14 +153,14 @@ def get_available_compilers(output_dir: Path) -> List[str]: def collect_data(output_dir: Path, compiler: str, algorithms: List[Tuple[str, str]]) -> Dict[str, Dict[str, float]]: """ - Collect ns/d data for all datasets and algorithms. - Returns: {dataset_display: {algorithm_name: ns/d_value}} + Collect ns/n data for all datasets and algorithms. + Returns: {dataset_display: {algorithm_name: ns/n_value}} """ data = {} for dataset_base, dataset_display in DATASETS: results = get_algorithm_results_for_dataset(dataset_base, compiler, output_dir, algorithms) if results: - data[dataset_display] = {algo: metrics['ns/d'] for algo, metrics in results.items()} + data[dataset_display] = {algo: metrics['ns/n'] for algo, metrics in results.items()} else: print(f" WARNING: No data found for dataset {dataset_display}") return data diff --git a/scripts/generate_digit_length_figure.py b/scripts/generate_digit_length_figure.py index f6c259e..85a70d3 100755 --- a/scripts/generate_digit_length_figure.py +++ b/scripts/generate_digit_length_figure.py @@ -55,15 +55,15 @@ def parse_algorithm_metrics(file_path: Path, algorithm_pattern: str) -> Optional return None escaped_pattern = algorithm_pattern - pattern = rf'{escaped_pattern}\s*:\s*([\d.]+)\s*ns/d\s*[\d.]+\s*GHz\s*([\d.]+)\s*c/d\s*([\d.]+)\s*i/d' + pattern = rf'{escaped_pattern}\s*:\s*([\d.]+)\s*ns/n\s*[\d.]+\s*GHz\s*([\d.]+)\s*c/n\s*([\d.]+)\s*i/n' match = re.search(pattern, content) if not match: return None return { - 'ns/d': float(match.group(1)), - 'c/d': float(match.group(2)), - 'i/d': float(match.group(3)), + 'ns/n': float(match.group(1)), + 'c/n': float(match.group(2)), + 'i/n': float(match.group(3)), } @@ -130,7 +130,7 @@ def get_available_compilers(output_dir: Path) -> List[str]: def collect_data_by_digit_length(output_dir: Path, compiler: str, cpu_model: Optional[str], algorithms: List[Tuple[str, str]]) -> Dict[int, Dict[str, float]]: """ Collect performance data for all digit lengths. - Returns: {digit_length: {algorithm_name: ns/d_value}} + Returns: {digit_length: {algorithm_name: ns/n_value}} """ data = {} for digit_length in DIGIT_LENGTHS: @@ -161,8 +161,8 @@ def collect_data_by_digit_length(output_dir: Path, compiler: str, cpu_model: Opt digit_data = {} for algo_pattern, algo_display in algorithms: metrics = parse_algorithm_metrics(selected_file, algo_pattern) - if metrics and 'ns/d' in metrics: - digit_data[algo_display] = metrics['ns/d'] + if metrics and 'ns/n' in metrics: + digit_data[algo_display] = metrics['ns/n'] if digit_data: data[digit_length] = digit_data @@ -170,7 +170,7 @@ def collect_data_by_digit_length(output_dir: Path, compiler: str, cpu_model: Opt return data -def generate_figure(data: Dict[int, Dict[str, float]], output_path: Path, metric_name: str = "ns/d"): +def generate_figure(data: Dict[int, Dict[str, float]], output_path: Path, metric_name: str = "ns/n"): """Generate a line plot comparing algorithms across digit lengths.""" if not data: print("ERROR: No data to plot") diff --git a/scripts/generate_variant_comparison_table.py b/scripts/generate_variant_comparison_table.py index 2cff347..66d0db4 100755 --- a/scripts/generate_variant_comparison_table.py +++ b/scripts/generate_variant_comparison_table.py @@ -34,7 +34,7 @@ def parse_benchmark_output(file_path: Path) -> Optional[Dict[str, float]]: """ Parse a benchmark output file and extract timing information. - Returns a dict with algorithm names as keys and ns/d (ns per item) as values. + Returns a dict with algorithm names as keys and ns/n (ns per number) as values. Returns None if parsing fails. """ try: @@ -47,8 +47,8 @@ def parse_benchmark_output(file_path: Path) -> Optional[Dict[str, float]]: results = {} # Parse the AVX-512+champagne_lemire line(s) - # Format: "avx-512+champagne_lemire : X.XX ns/d ..." - pattern = r'avx-512\+champagne_lemire\s*:\s*(\d+\.\d+)\s*ns/d' + # Format: "avx-512+champagne_lemire : X.XX ns/n ..." + pattern = r'avx-512\+champagne_lemire\s*:\s*(\d+\.\d+)\s*ns/n' matches = re.findall(pattern, content) if matches: