tekaratzas · tekaratzas · Oct 5, 2025 · Sep 22, 2025 · Sep 22, 2025 · Sep 22, 2025
diff --git a/.github/codecov.yml b/.github/codecov.yml
@@ -0,0 +1,22 @@
+# # ref: https://docs.codecov.com/docs/codecovyml-reference
+# comment out coverage job for now, https://github.com/tekaratzas/RustGPT/pull/11#issuecomment-3361854174
+# coverage:
+#   # Hold ourselves to a high bar
+#   range: 55..100
+#   round: down
+#   precision: 1
+#   status:
+#     # ref: https://docs.codecov.com/docs/commit-status
+#     project:
+#       default:
+#         # Avoid false negatives
+#         threshold: 1%
+
+# # Test files aren't important for coverage
+# ignore:
+#   - "tests"
+
+# # Make comments less noisy
+# comment:
+#   layout: "files"
+#   require_changes: yes
diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml
@@ -0,0 +1,73 @@
+permissions:
+  contents: read
+on:
+  push:
+    branches: [main, master]
+  pull_request:
+  merge_group:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
+env:    
+  RUST_TOOLCHAIN: stable
+
+name: Check
+jobs:
+  fmt:
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+    name: fmt
+    permissions:
+      # Give the default GITHUB_TOKEN write permission to commit and push the
+      # added or changed files to the repository.
+      contents: write
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: true
+      - name: Install rust
+        uses: dtolnay/rust-toolchain@master
+        with:
+          toolchain: nightly #${{ env.RUST_TOOLCHAIN }}  
+          components: rustfmt
+      - run: cargo fmt --check
+
+  clippy:
+    runs-on: ubuntu-latest
+    name: clippy
+    permissions:
+      contents: read
+      checks: write
+    strategy:
+      fail-fast: false
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: true
+      - name: Install ${{ env.RUST_TOOLCHAIN }}
+        uses: dtolnay/rust-toolchain@master  # master
+        with:
+          toolchain: ${{ env.RUST_TOOLCHAIN }}
+          components: clippy
+      - name: Rust Cache
+        uses: Swatinem/rust-cache@v2
+      - run: cargo clippy --workspace --all-features --all-targets -- -D warnings
+
+  typos:
+      runs-on: ubuntu-latest
+      name: typos
+      permissions:
+        contents: read
+      strategy:
+        fail-fast: false
+      steps:
+        - uses: actions/checkout@v4
+          with:
+            submodules: true
+        - name: Check spelling
+          uses: crate-ci/typos@master
+
+
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -0,0 +1,68 @@
+permissions:
+  contents: read
+on:
+  push:
+    branches: [main, master]
+  pull_request:
+  merge_group:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
+env:    
+  RUST_TOOLCHAIN: stable
+
+name: Test
+jobs:
+  required:
+    runs-on: ubuntu-latest
+    name: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: true
+      - name: Install ${{ env.RUST_TOOLCHAIN }}
+        uses: dtolnay/rust-toolchain@master
+        with:
+          toolchain: ${{ env.RUST_TOOLCHAIN }}
+      - name: cargo generate-lockfile
+        if: hashFiles('Cargo.lock') == ''
+        run: cargo generate-lockfile
+      # https://twitter.com/jonhoo/status/1571290371124260865
+      - name: Rust Cache
+        uses: Swatinem/rust-cache@v2
+      - name: Install nextest
+        uses: taiki-e/install-action@nextest
+      - name: cargo nextest --locked
+        run: cargo nextest run --locked --workspace --all-features --all-targets
+
+  # comment out coverage job for now, https://github.com/tekaratzas/RustGPT/pull/11#issuecomment-3361854174
+  # coverage:
+  #   runs-on: ubuntu-latest
+  #   name: coverage
+  #   steps:
+  #     - uses: actions/checkout@v4
+  #       with:
+  #         submodules: true
+  #     - name: Install rust
+  #       uses: dtolnay/rust-toolchain@master
+  #       with:
+  #         toolchain: ${{ env.RUST_TOOLCHAIN }}
+  #         components: llvm-tools-preview
+  #     - name: cargo install cargo-llvm-cov
+  #       uses: taiki-e/install-action@cargo-llvm-cov
+  #     - name: cargo generate-lockfile
+  #       if: hashFiles('Cargo.lock') == ''
+  #       run: cargo generate-lockfile
+  #     - name: Rust Cache
+  #       uses: Swatinem/rust-cache@v2        
+  #     - name: Install nextest
+  #       uses: taiki-e/install-action@nextest
+  #     - name: cargo llvm-cov
+  #       run: cargo llvm-cov nextest --locked --workspace --all-features --all-targets --lcov --output-path lcov.info
+  #     - name: Upload to codecov.io
+  #       uses: codecov/codecov-action@v5
+  #       with:
+  #         fail_ci_if_error: true
+  #         token: ${{ secrets.CODECOV_TOKEN }} # required
diff --git a/Cargo.toml b/Cargo.toml
@@ -7,7 +7,7 @@ edition = "2024"
 [dependencies]
 bincode = "2.0.1"
 ndarray = "0.16.1"
-rand = "0.9.0"
+rand = "0.9.2"
 rand_distr = "0.5.0"
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1.0"

diff --git a/rustfmt.toml b/rustfmt.toml
@@ -0,0 +1,14 @@
+edition = "2024"
+style_edition = "2024"
+comment_width = 120
+format_code_in_doc_comments = true
+format_macro_bodies = true
+format_macro_matchers = true
+normalize_comments = true
+normalize_doc_attributes = true
+imports_granularity = "Crate"
+group_imports = "StdExternalCrate"
+reorder_impl_items = true
+reorder_imports = true
+tab_spaces = 4
+wrap_comments = true
diff --git a/src/dataset_loader.rs b/src/dataset_loader.rs
@@ -1,13 +1,14 @@
-use csv::ReaderBuilder;
-use serde_json;
 use std::fs;
 
+use csv::ReaderBuilder;
+
 pub struct Dataset {
     pub pretraining_data: Vec<String>,
     pub chat_training_data: Vec<String>,
 }
 
 #[allow(dead_code)]
+#[allow(clippy::upper_case_acronyms)]
 pub enum DatasetType {
     JSON,
     CSV,

diff --git a/src/embeddings.rs b/src/embeddings.rs
@@ -1,7 +1,8 @@
-use crate::{EMBEDDING_DIM, MAX_SEQ_LEN, adam::Adam, llm::Layer, vocab::Vocab};
 use ndarray::{Array2, s};
 use rand_distr::{Distribution, Normal};
 
+use crate::{EMBEDDING_DIM, MAX_SEQ_LEN, adam::Adam, llm::Layer, vocab::Vocab};
+
 pub struct Embeddings {
     pub token_embeddings: Array2<f32>,
     pub positional_embeddings: Array2<f32>,

diff --git a/src/feed_forward.rs b/src/feed_forward.rs
@@ -1,8 +1,8 @@
-use crate::{adam::Adam, llm::Layer};
-use ndarray::Array2;
-use ndarray::Axis;
+use ndarray::{Array2, Axis};
 use rand_distr::{Distribution, Normal};
 
+use crate::{adam::Adam, llm::Layer};
+
 pub struct FeedForward {
     w1: Array2<f32>,
     b1: Array2<f32>,

diff --git a/src/layer_norm.rs b/src/layer_norm.rs
@@ -1,7 +1,7 @@
-use crate::adam::Adam;
-use crate::llm::Layer;
 use ndarray::{Array2, Axis};
 
+use crate::{adam::Adam, llm::Layer};
+
 pub struct LayerNorm {
     epsilon: f32,       // Small constant for stability
     gamma: Array2<f32>, // Learnable scaling parameter

diff --git a/src/llm.rs b/src/llm.rs
@@ -1,13 +1,11 @@
-use crate::EMBEDDING_DIM;
-use crate::Embeddings;
-use crate::HIDDEN_DIM;
-use crate::MAX_SEQ_LEN;
-use crate::Vocab;
-use crate::output_projection::OutputProjection;
-use crate::transformer::TransformerBlock;
-use ndarray::{Array1, Array2, Axis};
 use std::cmp::Ordering;
 
+use ndarray::{Array1, Array2, Axis};
+
+use crate::{
+    EMBEDDING_DIM, Embeddings, HIDDEN_DIM, MAX_SEQ_LEN, Vocab, output_projection::OutputProjection,
+    transformer::TransformerBlock,
+};
 pub trait Layer {
     fn layer_type(&self) -> &str;
 
@@ -18,6 +16,7 @@ pub trait Layer {
     fn parameters(&self) -> usize;
 }
 
+#[allow(clippy::upper_case_acronyms)]
 pub struct LLM {
     pub vocab: Vocab,
     pub network: Vec<Box<dyn Layer>>,
@@ -57,7 +56,7 @@ impl LLM {
         // Sum the parameters across all layers in the network
         self.network
             .iter()
-            .map(|layer: &Box<dyn Layer>| layer.parameters())
+            .map(|layer| layer.parameters())
             .sum::<usize>()
     }
 
@@ -126,7 +125,8 @@ impl LLM {
                 .to_owned()
                 .insert_axis(Axis(0));
 
-            // Softmax - convert activiations of each token to a probability distribution over the vocabulary
+            // Softmax - convert activations of each token to a probability distribution over the
+            // vocabulary
             let probs = Self::softmax(&last_logit); // 1 x vocab_size
 
             // Greedy Decode - Choose the highest probability token for each position
@@ -238,10 +238,10 @@ impl LLM {
             }
 
             // Add any remaining word
-            if !current_word.is_empty() {
-                if let Some(token_id) = self.vocab.encode(&current_word) {
-                    tokens.push(token_id);
-                }
+            if !current_word.is_empty()
+                && let Some(token_id) = self.vocab.encode(&current_word)
+            {
+                tokens.push(token_id);
             }
         }
 

diff --git a/src/main.rs b/src/main.rs
@@ -1,13 +1,13 @@
 use std::io::Write;
 
-use crate::embeddings::Embeddings;
-use crate::llm::LLM;
-use crate::output_projection::OutputProjection;
-use crate::transformer::TransformerBlock;
-use crate::vocab::Vocab;
 use ::llm::{EMBEDDING_DIM, HIDDEN_DIM, MAX_SEQ_LEN};
 use dataset_loader::{Dataset, DatasetType};
 
+use crate::{
+    embeddings::Embeddings, llm::LLM, output_projection::OutputProjection,
+    transformer::TransformerBlock, vocab::Vocab,
+};
+
 mod adam;
 mod dataset_loader;
 mod embeddings;

diff --git a/src/self_attention.rs b/src/self_attention.rs
@@ -1,9 +1,9 @@
-use crate::EMBEDDING_DIM;
-use crate::adam::Adam;
-use crate::llm::Layer;
+use std::f32;
+
 use ndarray::Array2;
 use rand_distr::{Distribution, Normal};
-use std::f32;
+
+use crate::{EMBEDDING_DIM, adam::Adam, llm::Layer};
 
 pub struct SelfAttention {
     pub embedding_dim: usize,

diff --git a/src/transformer.rs b/src/transformer.rs
@@ -1,8 +1,8 @@
-use crate::feed_forward::FeedForward;
-use crate::layer_norm::LayerNorm;
-use crate::llm::Layer;
-use crate::self_attention::SelfAttention;
 use ndarray::Array2;
+
+use crate::{
+    feed_forward::FeedForward, layer_norm::LayerNorm, llm::Layer, self_attention::SelfAttention,
+};
 pub struct TransformerBlock {
     attention: SelfAttention,
     feed_forward: FeedForward,
@@ -32,9 +32,8 @@ impl Layer for TransformerBlock {
         let norm1_out = self.norm1.normalize(&attention_out);
 
         let feed_forward_out = self.feed_forward.forward(&norm1_out); // includes residual
-        let norm2_out = self.norm2.normalize(&feed_forward_out);
 
-        norm2_out
+        self.norm2.normalize(&feed_forward_out)
     }
 
     fn backward(&mut self, grads: &Array2<f32>, lr: f32) -> Array2<f32> {
@@ -48,9 +47,8 @@ impl Layer for TransformerBlock {
         let grad_norm1 = self.norm1.backward(&grad_ffn, lr);
 
         // Backward through attention (includes residual connection)
-        let grad_attn = self.attention.backward(&grad_norm1, lr);
 
-        grad_attn
+        self.attention.backward(&grad_norm1, lr)
     }
 
     fn parameters(&self) -> usize {