diff --git a/.github/workflows/github-package.yml b/.github/workflows/github-package.yml
new file mode 100644
index 0000000..cbcb92a
--- /dev/null
+++ b/.github/workflows/github-package.yml
@@ -0,0 +1,44 @@
+name: Publish GitHub Package
+
+on:
+ workflow_dispatch:
+ push:
+ tags:
+ - "v*"
+
+permissions:
+ contents: read
+ packages: write
+
+jobs:
+ publish-github-package:
+ name: Publish to GitHub Packages
+ runs-on: ubuntu-latest
+
+ steps:
+ - name: Check out repository
+ uses: actions/checkout@v4
+
+ - name: Set up Node.js for GitHub Packages
+ uses: actions/setup-node@v4
+ with:
+ node-version: "20"
+ registry-url: "https://npm.pkg.github.com"
+ scope: "@eamon2009"
+ cache: "npm"
+ cache-dependency-path: frontend/package-lock.json
+
+ - name: Build frontend assets
+ run: |
+ npm --prefix frontend ci
+ npm --prefix frontend run build
+
+ - name: Prepare GitHub Packages metadata
+ run: |
+ npm pkg set name="@eamon2009/quadtrix"
+ npm pkg set publishConfig.registry="https://npm.pkg.github.com"
+
+ - name: Publish package
+ run: npm publish
+ env:
+ NODE_AUTH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/README.md b/README.md
index 9fc6788..2f5dea3 100644
--- a/README.md
+++ b/README.md
@@ -14,7 +14,14 @@ Quadtrix.cpp is a local language model project with several execution paths:
The project is designed as a technical learning implementation. The C++ path exposes the transformer internals directly: tensor operations, attention, layer normalization, cross-entropy, analytical gradients, AdamW, checkpointing, and autoregressive generation.
-
+## v1.1.0
+
+
+---
+
+
+
+---
## Contents
@@ -961,4 +968,4 @@ g++ -std=c++17 -O2 -I. -Iinclude -o quadtrix.exe main.cpp
## License
-This project is released under the MIT License. See `LICENSE`.
+MIT
diff --git a/benchmark.cpp b/benchmark.cpp
new file mode 100644
index 0000000..041c725
--- /dev/null
+++ b/benchmark.cpp
@@ -0,0 +1,296 @@
+// Run:
+// .\quadtrix_bench.exe data\input.txt
+// .\quadtrix_bench.exe data\input.txt --tokens 100 --runs 10 --warmup 3
+//
+// Flags (all optional):
+// --tokens N tokens to generate per run (default: 50)
+// --runs N how many timed runs per prompt (default: 5)
+// --warmup N un-timed warmup runs per prompt (default: 2)
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "config/config.h"
+#include "include/dataloader.h"
+#include "include/gpt.h"
+
+static bool file_exists(const std::string &p)
+{
+ std::ifstream f(p.c_str(), std::ios::binary);
+ return f.good();
+}
+
+static double now_ms()
+{
+ using namespace std::chrono;
+ return duration(
+ steady_clock::now().time_since_epoch())
+ .count();
+}
+
+static double mean(const std::vector &v)
+{
+ return std::accumulate(v.begin(), v.end(), 0.0) / v.size();
+}
+
+static double stdev(const std::vector &v, double m)
+{
+ double sq = 0.0;
+ for (double x : v)
+ sq += (x - m) * (x - m);
+ return std::sqrt(sq / v.size());
+}
+
+static double timed_run(GPTLanguageModel &model,
+ DataLoader &dl,
+ const std::vector &prompt_ctx,
+ int n_tokens)
+{
+ std::vector ctx = prompt_ctx;
+
+ double t0 = now_ms();
+ for (int i = 0; i < n_tokens; ++i)
+ {
+ ctx = model.generate(ctx, 1);
+ if ((int)ctx.size() > BLOCK_SIZE)
+ ctx = std::vector(ctx.end() - BLOCK_SIZE, ctx.end());
+ }
+ return now_ms() - t0;
+}
+
+//
+
+static void section(const std::string &title)
+{
+ ;
+ std::cout << " " << title << "\n";
+}
+
+struct PromptResult
+{
+ std::string label;
+ int prompt_tokens;
+ int gen_tokens;
+ double avg_ms;
+ double min_ms;
+ double max_ms;
+ double std_ms;
+ double avg_tps; // tokens per second
+};
+
+static PromptResult bench_prompt(GPTLanguageModel &model,
+ DataLoader &dl,
+ const std::string &prompt,
+ int n_tokens,
+ int n_runs,
+ int n_warmup)
+{
+ // encode
+ std::vector ctx = dl.encode(prompt);
+ if (ctx.empty())
+ ctx = {0};
+ if ((int)ctx.size() > BLOCK_SIZE)
+ ctx = std::vector(ctx.end() - BLOCK_SIZE, ctx.end());
+
+ int prompt_len = (int)ctx.size();
+
+ // warmup (un-timed)
+ for (int i = 0; i < n_warmup; ++i)
+ timed_run(model, dl, ctx, n_tokens);
+
+ // timed runs
+ std::vector times;
+ times.reserve(n_runs);
+ for (int i = 0; i < n_runs; ++i)
+ times.push_back(timed_run(model, dl, ctx, n_tokens));
+
+ double m = mean(times);
+ double sd = stdev(times, m);
+ double mn = *std::min_element(times.begin(), times.end());
+ double mx = *std::max_element(times.begin(), times.end());
+ double tps = n_tokens / (m / 1000.0);
+
+ // truncate prompt for display
+ std::string label = prompt.size() > 30
+ ? prompt.substr(0, 27) + "..."
+ : prompt;
+
+ return PromptResult{label, prompt_len, n_tokens, m, mn, mx, sd, tps};
+}
+
+static void print_table(const std::vector &results)
+{
+ section("RESULTS");
+
+ // header
+ std::cout << std::left
+ << std::setw(34) << "Prompt"
+ << std::right
+ << std::setw(8) << "P.Tok"
+ << std::setw(8) << "G.Tok"
+ << std::setw(10) << "Avg ms"
+ << std::setw(10) << "Min ms"
+ << std::setw(10) << "Max ms"
+ << std::setw(9) << "Std ms"
+ << std::setw(10) << "tok/s"
+ << "\n";
+ std::cout << std::string(99, '-') << "\n";
+
+ std::cout << std::fixed;
+ for (const auto &r : results)
+ {
+ std::cout << std::left
+ << std::setw(34) << r.label
+ << std::right
+ << std::setw(8) << r.prompt_tokens
+ << std::setw(8) << r.gen_tokens
+ << std::setw(10) << std::setprecision(1) << r.avg_ms
+ << std::setw(10) << std::setprecision(1) << r.min_ms
+ << std::setw(10) << std::setprecision(1) << r.max_ms
+ << std::setw(9) << std::setprecision(1) << r.std_ms
+ << std::setw(10) << std::setprecision(2) << r.avg_tps
+ << "\n";
+ }
+
+ double total_avg_tps = 0.0;
+ double best_tps = 0.0;
+ for (const auto &r : results)
+ {
+ total_avg_tps += r.avg_tps;
+ best_tps = std::max(best_tps, r.avg_tps);
+ }
+ double overall_tps = total_avg_tps / results.size();
+
+ std::cout << "\n Overall avg throughput : "
+ << std::setprecision(2) << overall_tps << " tok/s\n";
+ std::cout << " Peak throughput : "
+ << std::setprecision(2) << best_tps << " tok/s\n";
+ std::cout << " ms per token (avg) : "
+ << std::setprecision(2) << 1000.0 / overall_tps << " ms\n";
+}
+
+static void save_csv(const std::vector &results,
+ const std::string &path)
+{
+ std::ofstream f(path);
+ if (!f)
+ {
+ std::cerr << "[WARN] Could not write CSV to " << path << "\n";
+ return;
+ }
+ f << "prompt,prompt_tokens,gen_tokens,avg_ms,min_ms,max_ms,std_ms,tok_per_sec\n";
+ for (const auto &r : results)
+ {
+ f << "\"" << r.label << "\","
+ << r.prompt_tokens << ","
+ << r.gen_tokens << ","
+ << r.avg_ms << ","
+ << r.min_ms << ","
+ << r.max_ms << ","
+ << r.std_ms << ","
+ << r.avg_tps << "\n";
+ }
+ std::cout << "\n CSV saved to: " << path << "\n";
+}
+
+int main(int argc, char *argv[])
+{
+
+ std::string data_path = DEFAULT_CLEANED_PATH;
+ std::string model_path = BEST_MODEL_PATH;
+ int n_tokens = 50;
+ int n_runs = 5;
+ int n_warmup = 2;
+
+ for (int i = 1; i < argc; ++i)
+ {
+ std::string a = argv[i];
+ if (a == "--tokens" && i + 1 < argc)
+ n_tokens = std::atoi(argv[++i]);
+ else if (a == "--runs" && i + 1 < argc)
+ n_runs = std::atoi(argv[++i]);
+ else if (a == "--warmup" && i + 1 < argc)
+ n_warmup = std::atoi(argv[++i]);
+ else
+ data_path = a;
+ }
+
+ std::cout << " Quadtrix Inference Benchmark\n";
+ std::cout << " data : " << data_path << "\n";
+ std::cout << " model : " << model_path << "\n";
+ std::cout << " tokens : " << n_tokens << " per run\n";
+ std::cout << " runs : " << n_runs << " timed + "
+ << n_warmup << " warmup\n";
+
+ DataLoader dl;
+ try
+ {
+ dl.load(data_path);
+ }
+ catch (const std::exception &e)
+ {
+ std::cerr << "[ERROR] " << e.what() << "\n";
+ return 1;
+ }
+
+ if (!file_exists(model_path))
+ {
+ std::cerr << "[ERROR] Weights not found at " << model_path << "\n";
+ std::cerr << "[HINT] Train first, or set " << MODEL_PATH_ENV_VAR << "\n";
+ return 1;
+ }
+
+ GPTLanguageModel model(dl.vocab_size, N_EMBD, N_HEAD, N_LAYER, BLOCK_SIZE, SEED);
+ model.load(model_path);
+
+ std::cout << "\n[OK] Model loaded (" << model.num_params() / 1.0e6f
+ << " M params)\n";
+
+ std::vector prompts = {
+ "",
+ "The", // 1-token prompt
+ "Once upon a time", // short prompt
+ "The quick brown fox jumps", // medium prompt
+ std::string(1, 'a'), // long prompt (stress-tests context window)
+ };
+
+ section("RUNNING");
+ std::vector results;
+ results.reserve(prompts.size());
+
+ for (size_t i = 0; i < prompts.size(); ++i)
+ {
+ std::string display = prompts[i].empty()
+ ? "(empty / BOS)"
+ : (prompts[i].size() > 30
+ ? prompts[i].substr(0, 27) + "..."
+ : prompts[i]);
+
+ std::cout << " [" << (i + 1) << "/" << prompts.size() << "] \""
+ << display << "\" ... " << std::flush;
+
+ PromptResult r = bench_prompt(model, dl,
+ prompts[i],
+ n_tokens, n_runs, n_warmup);
+ results.push_back(r);
+
+ std::cout << std::fixed << std::setprecision(2)
+ << r.avg_tps << " tok/s\n";
+ }
+
+ print_table(results);
+ save_csv(results, "benchmark_results.csv");
+
+ std::cout << "\n";
+
+ std::cout << " Done.\n";
+ return 0;
+}
\ No newline at end of file
diff --git a/benchmark_results.csv b/benchmark_results.csv
new file mode 100644
index 0000000..7496a05
--- /dev/null
+++ b/benchmark_results.csv
@@ -0,0 +1,6 @@
+prompt,prompt_tokens,gen_tokens,avg_ms,min_ms,max_ms,std_ms,tok_per_sec
+"",1,50,2027,1848.4,2314.78,168.952,24.667
+"The",3,50,2561.63,2241.3,2986.35,306.631,19.5188
+"Once upon a time",16,50,3038.74,2885.22,3225.42,126.5,16.4542
+"The quick brown fox jumps",25,50,3975,3561.3,4783.34,454.592,12.5786
+"a",1,50,1862.99,1808.71,1929.64,46.4738,26.8386
diff --git a/docs/Quadtrix_Educational_Paper.pdf b/docs/Quadtrix_Educational_Paper.pdf
new file mode 100644
index 0000000..d8d2a3a
Binary files /dev/null and b/docs/Quadtrix_Educational_Paper.pdf differ
diff --git a/docs/run_20260430_192930.png b/docs/run_20260430_192930.png
new file mode 100644
index 0000000..8548392
Binary files /dev/null and b/docs/run_20260430_192930.png differ
diff --git a/docs/run_20260508_110726.png b/docs/run_20260508_110726.png
new file mode 100644
index 0000000..5848440
Binary files /dev/null and b/docs/run_20260508_110726.png differ
diff --git a/engine/fineweb_dataset.py b/engine/fineweb_dataset.py
index 0a9f86e..9a0107f 100644
--- a/engine/fineweb_dataset.py
+++ b/engine/fineweb_dataset.py
@@ -36,10 +36,10 @@ def download_fineweb_sample(output_dir="engine", target_size_mb=30):
current_bytes += sample_size
if (i + 1) % 100 == 0:
- print(f"Collected {i + 1} samples (~{current_bytes / (1024*1024):.2f} MB)")
+ print(f"Collected {i + 1} samples ({current_bytes / (1024*1024):.2f} MB)")
print(f"\nDownloaded {len(samples)} samples ({current_bytes / (1024*1024):.2f} MB)")
- output_file = os.path.join(output_dir, "fineweb_30mb.txt")
+ output_file = os.path.join(output_dir, "input.txt")
with open(output_file, 'w', encoding='utf-8') as f:
for sample in samples:
f.write(sample['text'])
@@ -54,7 +54,7 @@ def download_fineweb_sample(output_dir="engine", target_size_mb=30):
if __name__ == "__main__":
try:
download_fineweb_sample()
- print("\n✓ Download completed successfully!")
+ print("\nDownload completed successfully!")
except Exception as e:
print(f"\ Error: {e}")
print("\nMake sure you have the 'datasets' library installed:")
diff --git a/engine/main.py b/engine/main.py
index 4553843..cffb4cb 100644
--- a/engine/main.py
+++ b/engine/main.py
@@ -65,7 +65,7 @@ def success(msg): log(f" ok {msg}")
# CONFIGURATION
-cleaned_path = Path(os.environ.get("QUADTRIX_TRAIN_DATA", SCRIPT_DIR / "input.txt"))
+cleaned_path = Path(os.environ.get("data", SCRIPT_DIR / "input.txt"))
train_split = 0.9
seed = 1337
diff --git a/package.json b/package.json
index 613490f..a4a24c5 100644
--- a/package.json
+++ b/package.json
@@ -1,8 +1,17 @@
{
"name": "quadtrix",
- "version": "1.0.0",
+ "version": "1.0.2",
"description": "CLI for running Quadtrix.cpp chat and local training.",
"license": "MIT",
+ "author": "Eamon",
+ "repository": {
+ "type": "git",
+ "url": "https://github.com/Eamon2009/Quadtrix.cpp.git"
+ },
+ "bugs": {
+ "url": "https://github.com/Eamon2009/Quadtrix.cpp/issues"
+ },
+ "homepage": "https://github.com/Eamon2009/Quadtrix.cpp#readme",
"bin": {
"quadtrix": "bin/quadtrix.js"
},
@@ -42,4 +51,4 @@
"engines": {
"node": ">=18"
}
-}
+}
\ No newline at end of file