Skip to content
Merged
22 changes: 22 additions & 0 deletions .github/codecov.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# # ref: https://docs.codecov.com/docs/codecovyml-reference
# comment out coverage job for now, https://github.com/tekaratzas/RustGPT/pull/11#issuecomment-3361854174
# coverage:
# # Hold ourselves to a high bar
# range: 55..100
# round: down
# precision: 1
# status:
# # ref: https://docs.codecov.com/docs/commit-status
# project:
# default:
# # Avoid false negatives
# threshold: 1%

# # Test files aren't important for coverage
# ignore:
# - "tests"

# # Make comments less noisy
# comment:
# layout: "files"
# require_changes: yes
73 changes: 73 additions & 0 deletions .github/workflows/check.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
permissions:
contents: read
on:
push:
branches: [main, master]
pull_request:
merge_group:

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true

env:
RUST_TOOLCHAIN: stable

name: Check
jobs:
fmt:
runs-on: ubuntu-latest
strategy:
fail-fast: false
name: fmt
permissions:
# Give the default GITHUB_TOKEN write permission to commit and push the
# added or changed files to the repository.
contents: write
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Install rust
uses: dtolnay/rust-toolchain@master
with:
toolchain: nightly #${{ env.RUST_TOOLCHAIN }}
components: rustfmt
- run: cargo fmt --check

clippy:
runs-on: ubuntu-latest
name: clippy
permissions:
contents: read
checks: write
strategy:
fail-fast: false
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Install ${{ env.RUST_TOOLCHAIN }}
uses: dtolnay/rust-toolchain@master # master
with:
toolchain: ${{ env.RUST_TOOLCHAIN }}
components: clippy
- name: Rust Cache
uses: Swatinem/rust-cache@v2
- run: cargo clippy --workspace --all-features --all-targets -- -D warnings

typos:
runs-on: ubuntu-latest
name: typos
permissions:
contents: read
strategy:
fail-fast: false
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Check spelling
uses: crate-ci/typos@master


23 changes: 0 additions & 23 deletions .github/workflows/rust.yml

This file was deleted.

68 changes: 68 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
permissions:
contents: read
on:
push:
branches: [main, master]
pull_request:
merge_group:

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true

env:
RUST_TOOLCHAIN: stable

name: Test
jobs:
required:
runs-on: ubuntu-latest
name: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Install ${{ env.RUST_TOOLCHAIN }}
uses: dtolnay/rust-toolchain@master
with:
toolchain: ${{ env.RUST_TOOLCHAIN }}
- name: cargo generate-lockfile
if: hashFiles('Cargo.lock') == ''
run: cargo generate-lockfile
# https://twitter.com/jonhoo/status/1571290371124260865
- name: Rust Cache
uses: Swatinem/rust-cache@v2
- name: Install nextest
uses: taiki-e/install-action@nextest
- name: cargo nextest --locked
run: cargo nextest run --locked --workspace --all-features --all-targets

# comment out coverage job for now, https://github.com/tekaratzas/RustGPT/pull/11#issuecomment-3361854174
# coverage:
# runs-on: ubuntu-latest
# name: coverage
# steps:
# - uses: actions/checkout@v4
# with:
# submodules: true
# - name: Install rust
# uses: dtolnay/rust-toolchain@master
# with:
# toolchain: ${{ env.RUST_TOOLCHAIN }}
# components: llvm-tools-preview
# - name: cargo install cargo-llvm-cov
# uses: taiki-e/install-action@cargo-llvm-cov
# - name: cargo generate-lockfile
# if: hashFiles('Cargo.lock') == ''
# run: cargo generate-lockfile
# - name: Rust Cache
# uses: Swatinem/rust-cache@v2
# - name: Install nextest
# uses: taiki-e/install-action@nextest
# - name: cargo llvm-cov
# run: cargo llvm-cov nextest --locked --workspace --all-features --all-targets --lcov --output-path lcov.info
# - name: Upload to codecov.io
# uses: codecov/codecov-action@v5
# with:
# fail_ci_if_error: true
# token: ${{ secrets.CODECOV_TOKEN }} # required
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ edition = "2024"
[dependencies]
bincode = "2.0.1"
ndarray = "0.16.1"
rand = "0.9.0"
rand = "0.9.2"
rand_distr = "0.5.0"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
Expand Down
14 changes: 14 additions & 0 deletions rustfmt.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
edition = "2024"
style_edition = "2024"
comment_width = 120
format_code_in_doc_comments = true
format_macro_bodies = true
format_macro_matchers = true
normalize_comments = true
normalize_doc_attributes = true
imports_granularity = "Crate"
group_imports = "StdExternalCrate"
reorder_impl_items = true
reorder_imports = true
tab_spaces = 4
wrap_comments = true
5 changes: 3 additions & 2 deletions src/dataset_loader.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
use csv::ReaderBuilder;
use serde_json;
use std::fs;

use csv::ReaderBuilder;

pub struct Dataset {
pub pretraining_data: Vec<String>,
pub chat_training_data: Vec<String>,
}

#[allow(dead_code)]
#[allow(clippy::upper_case_acronyms)]
pub enum DatasetType {
JSON,
CSV,
Expand Down
3 changes: 2 additions & 1 deletion src/embeddings.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
use crate::{EMBEDDING_DIM, MAX_SEQ_LEN, adam::Adam, llm::Layer, vocab::Vocab};
use ndarray::{Array2, s};
use rand_distr::{Distribution, Normal};

use crate::{EMBEDDING_DIM, MAX_SEQ_LEN, adam::Adam, llm::Layer, vocab::Vocab};

pub struct Embeddings {
pub token_embeddings: Array2<f32>,
pub positional_embeddings: Array2<f32>,
Expand Down
6 changes: 3 additions & 3 deletions src/feed_forward.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use crate::{adam::Adam, llm::Layer};
use ndarray::Array2;
use ndarray::Axis;
use ndarray::{Array2, Axis};
use rand_distr::{Distribution, Normal};

use crate::{adam::Adam, llm::Layer};

pub struct FeedForward {
w1: Array2<f32>,
b1: Array2<f32>,
Expand Down
4 changes: 2 additions & 2 deletions src/layer_norm.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::adam::Adam;
use crate::llm::Layer;
use ndarray::{Array2, Axis};

use crate::{adam::Adam, llm::Layer};

pub struct LayerNorm {
epsilon: f32, // Small constant for stability
gamma: Array2<f32>, // Learnable scaling parameter
Expand Down
28 changes: 14 additions & 14 deletions src/llm.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
use crate::EMBEDDING_DIM;
use crate::Embeddings;
use crate::HIDDEN_DIM;
use crate::MAX_SEQ_LEN;
use crate::Vocab;
use crate::output_projection::OutputProjection;
use crate::transformer::TransformerBlock;
use ndarray::{Array1, Array2, Axis};
use std::cmp::Ordering;

use ndarray::{Array1, Array2, Axis};

use crate::{
EMBEDDING_DIM, Embeddings, HIDDEN_DIM, MAX_SEQ_LEN, Vocab, output_projection::OutputProjection,
transformer::TransformerBlock,
};
pub trait Layer {
fn layer_type(&self) -> &str;

Expand All @@ -18,6 +16,7 @@ pub trait Layer {
fn parameters(&self) -> usize;
}

#[allow(clippy::upper_case_acronyms)]
pub struct LLM {
pub vocab: Vocab,
pub network: Vec<Box<dyn Layer>>,
Expand Down Expand Up @@ -57,7 +56,7 @@ impl LLM {
// Sum the parameters across all layers in the network
self.network
.iter()
.map(|layer: &Box<dyn Layer>| layer.parameters())
.map(|layer| layer.parameters())
.sum::<usize>()
}

Expand Down Expand Up @@ -126,7 +125,8 @@ impl LLM {
.to_owned()
.insert_axis(Axis(0));

// Softmax - convert activiations of each token to a probability distribution over the vocabulary
// Softmax - convert activations of each token to a probability distribution over the
// vocabulary
let probs = Self::softmax(&last_logit); // 1 x vocab_size

// Greedy Decode - Choose the highest probability token for each position
Expand Down Expand Up @@ -238,10 +238,10 @@ impl LLM {
}

// Add any remaining word
if !current_word.is_empty() {
if let Some(token_id) = self.vocab.encode(&current_word) {
tokens.push(token_id);
}
if !current_word.is_empty()
&& let Some(token_id) = self.vocab.encode(&current_word)
{
tokens.push(token_id);
}
}

Expand Down
10 changes: 5 additions & 5 deletions src/main.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
use std::io::Write;

use crate::embeddings::Embeddings;
use crate::llm::LLM;
use crate::output_projection::OutputProjection;
use crate::transformer::TransformerBlock;
use crate::vocab::Vocab;
use ::llm::{EMBEDDING_DIM, HIDDEN_DIM, MAX_SEQ_LEN};
use dataset_loader::{Dataset, DatasetType};

use crate::{
embeddings::Embeddings, llm::LLM, output_projection::OutputProjection,
transformer::TransformerBlock, vocab::Vocab,
};

mod adam;
mod dataset_loader;
mod embeddings;
Expand Down
8 changes: 4 additions & 4 deletions src/self_attention.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
use crate::EMBEDDING_DIM;
use crate::adam::Adam;
use crate::llm::Layer;
use std::f32;

use ndarray::Array2;
use rand_distr::{Distribution, Normal};
use std::f32;

use crate::{EMBEDDING_DIM, adam::Adam, llm::Layer};

pub struct SelfAttention {
pub embedding_dim: usize,
Expand Down
14 changes: 6 additions & 8 deletions src/transformer.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use crate::feed_forward::FeedForward;
use crate::layer_norm::LayerNorm;
use crate::llm::Layer;
use crate::self_attention::SelfAttention;
use ndarray::Array2;

use crate::{
feed_forward::FeedForward, layer_norm::LayerNorm, llm::Layer, self_attention::SelfAttention,
};
pub struct TransformerBlock {
attention: SelfAttention,
feed_forward: FeedForward,
Expand Down Expand Up @@ -32,9 +32,8 @@ impl Layer for TransformerBlock {
let norm1_out = self.norm1.normalize(&attention_out);

let feed_forward_out = self.feed_forward.forward(&norm1_out); // includes residual
let norm2_out = self.norm2.normalize(&feed_forward_out);

norm2_out
self.norm2.normalize(&feed_forward_out)
}

fn backward(&mut self, grads: &Array2<f32>, lr: f32) -> Array2<f32> {
Expand All @@ -48,9 +47,8 @@ impl Layer for TransformerBlock {
let grad_norm1 = self.norm1.backward(&grad_ffn, lr);

// Backward through attention (includes residual connection)
let grad_attn = self.attention.backward(&grad_norm1, lr);

grad_attn
self.attention.backward(&grad_norm1, lr)
}

fn parameters(&self) -> usize {
Expand Down
Loading