diff --git a/.github/workflows/github-package.yml b/.github/workflows/github-package.yml new file mode 100644 index 0000000..cbcb92a --- /dev/null +++ b/.github/workflows/github-package.yml @@ -0,0 +1,44 @@ +name: Publish GitHub Package + +on: + workflow_dispatch: + push: + tags: + - "v*" + +permissions: + contents: read + packages: write + +jobs: + publish-github-package: + name: Publish to GitHub Packages + runs-on: ubuntu-latest + + steps: + - name: Check out repository + uses: actions/checkout@v4 + + - name: Set up Node.js for GitHub Packages + uses: actions/setup-node@v4 + with: + node-version: "20" + registry-url: "https://npm.pkg.github.com" + scope: "@eamon2009" + cache: "npm" + cache-dependency-path: frontend/package-lock.json + + - name: Build frontend assets + run: | + npm --prefix frontend ci + npm --prefix frontend run build + + - name: Prepare GitHub Packages metadata + run: | + npm pkg set name="@eamon2009/quadtrix" + npm pkg set publishConfig.registry="https://npm.pkg.github.com" + + - name: Publish package + run: npm publish + env: + NODE_AUTH_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/README.md b/README.md index 5524b7a..2f5dea3 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,14 @@ Quadtrix.cpp is a local language model project with several execution paths: The project is designed as a technical learning implementation. The C++ path exposes the transformer internals directly: tensor operations, attention, layer normalization, cross-entropy, analytical gradients, AdamW, checkpointing, and autoregressive generation. -Quadtrix architecture image +## v1.1.0 +run_20260508_110726 + +--- + +run_20260430_192930 + +--- ## Contents diff --git a/benchmark.cpp b/benchmark.cpp new file mode 100644 index 0000000..041c725 --- /dev/null +++ b/benchmark.cpp @@ -0,0 +1,296 @@ +// Run: +// .\quadtrix_bench.exe data\input.txt +// .\quadtrix_bench.exe data\input.txt --tokens 100 --runs 10 --warmup 3 +// +// Flags (all optional): +// --tokens N tokens to generate per run (default: 50) +// --runs N how many timed runs per prompt (default: 5) +// --warmup N un-timed warmup runs per prompt (default: 2) + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "config/config.h" +#include "include/dataloader.h" +#include "include/gpt.h" + +static bool file_exists(const std::string &p) +{ + std::ifstream f(p.c_str(), std::ios::binary); + return f.good(); +} + +static double now_ms() +{ + using namespace std::chrono; + return duration( + steady_clock::now().time_since_epoch()) + .count(); +} + +static double mean(const std::vector &v) +{ + return std::accumulate(v.begin(), v.end(), 0.0) / v.size(); +} + +static double stdev(const std::vector &v, double m) +{ + double sq = 0.0; + for (double x : v) + sq += (x - m) * (x - m); + return std::sqrt(sq / v.size()); +} + +static double timed_run(GPTLanguageModel &model, + DataLoader &dl, + const std::vector &prompt_ctx, + int n_tokens) +{ + std::vector ctx = prompt_ctx; + + double t0 = now_ms(); + for (int i = 0; i < n_tokens; ++i) + { + ctx = model.generate(ctx, 1); + if ((int)ctx.size() > BLOCK_SIZE) + ctx = std::vector(ctx.end() - BLOCK_SIZE, ctx.end()); + } + return now_ms() - t0; +} + +// + +static void section(const std::string &title) +{ + ; + std::cout << " " << title << "\n"; +} + +struct PromptResult +{ + std::string label; + int prompt_tokens; + int gen_tokens; + double avg_ms; + double min_ms; + double max_ms; + double std_ms; + double avg_tps; // tokens per second +}; + +static PromptResult bench_prompt(GPTLanguageModel &model, + DataLoader &dl, + const std::string &prompt, + int n_tokens, + int n_runs, + int n_warmup) +{ + // encode + std::vector ctx = dl.encode(prompt); + if (ctx.empty()) + ctx = {0}; + if ((int)ctx.size() > BLOCK_SIZE) + ctx = std::vector(ctx.end() - BLOCK_SIZE, ctx.end()); + + int prompt_len = (int)ctx.size(); + + // warmup (un-timed) + for (int i = 0; i < n_warmup; ++i) + timed_run(model, dl, ctx, n_tokens); + + // timed runs + std::vector times; + times.reserve(n_runs); + for (int i = 0; i < n_runs; ++i) + times.push_back(timed_run(model, dl, ctx, n_tokens)); + + double m = mean(times); + double sd = stdev(times, m); + double mn = *std::min_element(times.begin(), times.end()); + double mx = *std::max_element(times.begin(), times.end()); + double tps = n_tokens / (m / 1000.0); + + // truncate prompt for display + std::string label = prompt.size() > 30 + ? prompt.substr(0, 27) + "..." + : prompt; + + return PromptResult{label, prompt_len, n_tokens, m, mn, mx, sd, tps}; +} + +static void print_table(const std::vector &results) +{ + section("RESULTS"); + + // header + std::cout << std::left + << std::setw(34) << "Prompt" + << std::right + << std::setw(8) << "P.Tok" + << std::setw(8) << "G.Tok" + << std::setw(10) << "Avg ms" + << std::setw(10) << "Min ms" + << std::setw(10) << "Max ms" + << std::setw(9) << "Std ms" + << std::setw(10) << "tok/s" + << "\n"; + std::cout << std::string(99, '-') << "\n"; + + std::cout << std::fixed; + for (const auto &r : results) + { + std::cout << std::left + << std::setw(34) << r.label + << std::right + << std::setw(8) << r.prompt_tokens + << std::setw(8) << r.gen_tokens + << std::setw(10) << std::setprecision(1) << r.avg_ms + << std::setw(10) << std::setprecision(1) << r.min_ms + << std::setw(10) << std::setprecision(1) << r.max_ms + << std::setw(9) << std::setprecision(1) << r.std_ms + << std::setw(10) << std::setprecision(2) << r.avg_tps + << "\n"; + } + + double total_avg_tps = 0.0; + double best_tps = 0.0; + for (const auto &r : results) + { + total_avg_tps += r.avg_tps; + best_tps = std::max(best_tps, r.avg_tps); + } + double overall_tps = total_avg_tps / results.size(); + + std::cout << "\n Overall avg throughput : " + << std::setprecision(2) << overall_tps << " tok/s\n"; + std::cout << " Peak throughput : " + << std::setprecision(2) << best_tps << " tok/s\n"; + std::cout << " ms per token (avg) : " + << std::setprecision(2) << 1000.0 / overall_tps << " ms\n"; +} + +static void save_csv(const std::vector &results, + const std::string &path) +{ + std::ofstream f(path); + if (!f) + { + std::cerr << "[WARN] Could not write CSV to " << path << "\n"; + return; + } + f << "prompt,prompt_tokens,gen_tokens,avg_ms,min_ms,max_ms,std_ms,tok_per_sec\n"; + for (const auto &r : results) + { + f << "\"" << r.label << "\"," + << r.prompt_tokens << "," + << r.gen_tokens << "," + << r.avg_ms << "," + << r.min_ms << "," + << r.max_ms << "," + << r.std_ms << "," + << r.avg_tps << "\n"; + } + std::cout << "\n CSV saved to: " << path << "\n"; +} + +int main(int argc, char *argv[]) +{ + + std::string data_path = DEFAULT_CLEANED_PATH; + std::string model_path = BEST_MODEL_PATH; + int n_tokens = 50; + int n_runs = 5; + int n_warmup = 2; + + for (int i = 1; i < argc; ++i) + { + std::string a = argv[i]; + if (a == "--tokens" && i + 1 < argc) + n_tokens = std::atoi(argv[++i]); + else if (a == "--runs" && i + 1 < argc) + n_runs = std::atoi(argv[++i]); + else if (a == "--warmup" && i + 1 < argc) + n_warmup = std::atoi(argv[++i]); + else + data_path = a; + } + + std::cout << " Quadtrix Inference Benchmark\n"; + std::cout << " data : " << data_path << "\n"; + std::cout << " model : " << model_path << "\n"; + std::cout << " tokens : " << n_tokens << " per run\n"; + std::cout << " runs : " << n_runs << " timed + " + << n_warmup << " warmup\n"; + + DataLoader dl; + try + { + dl.load(data_path); + } + catch (const std::exception &e) + { + std::cerr << "[ERROR] " << e.what() << "\n"; + return 1; + } + + if (!file_exists(model_path)) + { + std::cerr << "[ERROR] Weights not found at " << model_path << "\n"; + std::cerr << "[HINT] Train first, or set " << MODEL_PATH_ENV_VAR << "\n"; + return 1; + } + + GPTLanguageModel model(dl.vocab_size, N_EMBD, N_HEAD, N_LAYER, BLOCK_SIZE, SEED); + model.load(model_path); + + std::cout << "\n[OK] Model loaded (" << model.num_params() / 1.0e6f + << " M params)\n"; + + std::vector prompts = { + "", + "The", // 1-token prompt + "Once upon a time", // short prompt + "The quick brown fox jumps", // medium prompt + std::string(1, 'a'), // long prompt (stress-tests context window) + }; + + section("RUNNING"); + std::vector results; + results.reserve(prompts.size()); + + for (size_t i = 0; i < prompts.size(); ++i) + { + std::string display = prompts[i].empty() + ? "(empty / BOS)" + : (prompts[i].size() > 30 + ? prompts[i].substr(0, 27) + "..." + : prompts[i]); + + std::cout << " [" << (i + 1) << "/" << prompts.size() << "] \"" + << display << "\" ... " << std::flush; + + PromptResult r = bench_prompt(model, dl, + prompts[i], + n_tokens, n_runs, n_warmup); + results.push_back(r); + + std::cout << std::fixed << std::setprecision(2) + << r.avg_tps << " tok/s\n"; + } + + print_table(results); + save_csv(results, "benchmark_results.csv"); + + std::cout << "\n"; + + std::cout << " Done.\n"; + return 0; +} \ No newline at end of file diff --git a/benchmark_results.csv b/benchmark_results.csv new file mode 100644 index 0000000..7496a05 --- /dev/null +++ b/benchmark_results.csv @@ -0,0 +1,6 @@ +prompt,prompt_tokens,gen_tokens,avg_ms,min_ms,max_ms,std_ms,tok_per_sec +"",1,50,2027,1848.4,2314.78,168.952,24.667 +"The",3,50,2561.63,2241.3,2986.35,306.631,19.5188 +"Once upon a time",16,50,3038.74,2885.22,3225.42,126.5,16.4542 +"The quick brown fox jumps",25,50,3975,3561.3,4783.34,454.592,12.5786 +"a",1,50,1862.99,1808.71,1929.64,46.4738,26.8386 diff --git a/docs/Quadtrix_Educational_Paper.pdf b/docs/Quadtrix_Educational_Paper.pdf new file mode 100644 index 0000000..d8d2a3a Binary files /dev/null and b/docs/Quadtrix_Educational_Paper.pdf differ diff --git a/docs/run_20260430_192930.png b/docs/run_20260430_192930.png new file mode 100644 index 0000000..8548392 Binary files /dev/null and b/docs/run_20260430_192930.png differ diff --git a/docs/run_20260508_110726.png b/docs/run_20260508_110726.png new file mode 100644 index 0000000..5848440 Binary files /dev/null and b/docs/run_20260508_110726.png differ diff --git a/engine/fineweb_dataset.py b/engine/fineweb_dataset.py index 0a9f86e..9a0107f 100644 --- a/engine/fineweb_dataset.py +++ b/engine/fineweb_dataset.py @@ -36,10 +36,10 @@ def download_fineweb_sample(output_dir="engine", target_size_mb=30): current_bytes += sample_size if (i + 1) % 100 == 0: - print(f"Collected {i + 1} samples (~{current_bytes / (1024*1024):.2f} MB)") + print(f"Collected {i + 1} samples ({current_bytes / (1024*1024):.2f} MB)") print(f"\nDownloaded {len(samples)} samples ({current_bytes / (1024*1024):.2f} MB)") - output_file = os.path.join(output_dir, "fineweb_30mb.txt") + output_file = os.path.join(output_dir, "input.txt") with open(output_file, 'w', encoding='utf-8') as f: for sample in samples: f.write(sample['text']) @@ -54,7 +54,7 @@ def download_fineweb_sample(output_dir="engine", target_size_mb=30): if __name__ == "__main__": try: download_fineweb_sample() - print("\n✓ Download completed successfully!") + print("\nDownload completed successfully!") except Exception as e: print(f"\ Error: {e}") print("\nMake sure you have the 'datasets' library installed:") diff --git a/engine/main.py b/engine/main.py index 4553843..cffb4cb 100644 --- a/engine/main.py +++ b/engine/main.py @@ -65,7 +65,7 @@ def success(msg): log(f" ok {msg}") # CONFIGURATION -cleaned_path = Path(os.environ.get("QUADTRIX_TRAIN_DATA", SCRIPT_DIR / "input.txt")) +cleaned_path = Path(os.environ.get("data", SCRIPT_DIR / "input.txt")) train_split = 0.9 seed = 1337 diff --git a/package.json b/package.json index 613490f..a4a24c5 100644 --- a/package.json +++ b/package.json @@ -1,8 +1,17 @@ { "name": "quadtrix", - "version": "1.0.0", + "version": "1.0.2", "description": "CLI for running Quadtrix.cpp chat and local training.", "license": "MIT", + "author": "Eamon", + "repository": { + "type": "git", + "url": "https://github.com/Eamon2009/Quadtrix.cpp.git" + }, + "bugs": { + "url": "https://github.com/Eamon2009/Quadtrix.cpp/issues" + }, + "homepage": "https://github.com/Eamon2009/Quadtrix.cpp#readme", "bin": { "quadtrix": "bin/quadtrix.js" }, @@ -42,4 +51,4 @@ "engines": { "node": ">=18" } -} +} \ No newline at end of file