diff --git a/.github/codecov.yml b/.github/codecov.yml new file mode 100644 index 0000000..f8e75a2 --- /dev/null +++ b/.github/codecov.yml @@ -0,0 +1,22 @@ +# # ref: https://docs.codecov.com/docs/codecovyml-reference +# comment out coverage job for now, https://github.com/tekaratzas/RustGPT/pull/11#issuecomment-3361854174 +# coverage: +# # Hold ourselves to a high bar +# range: 55..100 +# round: down +# precision: 1 +# status: +# # ref: https://docs.codecov.com/docs/commit-status +# project: +# default: +# # Avoid false negatives +# threshold: 1% + +# # Test files aren't important for coverage +# ignore: +# - "tests" + +# # Make comments less noisy +# comment: +# layout: "files" +# require_changes: yes \ No newline at end of file diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml new file mode 100644 index 0000000..aba9556 --- /dev/null +++ b/.github/workflows/check.yml @@ -0,0 +1,73 @@ +permissions: + contents: read +on: + push: + branches: [main, master] + pull_request: + merge_group: + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +env: + RUST_TOOLCHAIN: stable + +name: Check +jobs: + fmt: + runs-on: ubuntu-latest + strategy: + fail-fast: false + name: fmt + permissions: + # Give the default GITHUB_TOKEN write permission to commit and push the + # added or changed files to the repository. + contents: write + steps: + - uses: actions/checkout@v4 + with: + submodules: true + - name: Install rust + uses: dtolnay/rust-toolchain@master + with: + toolchain: nightly #${{ env.RUST_TOOLCHAIN }} + components: rustfmt + - run: cargo fmt --check + + clippy: + runs-on: ubuntu-latest + name: clippy + permissions: + contents: read + checks: write + strategy: + fail-fast: false + steps: + - uses: actions/checkout@v4 + with: + submodules: true + - name: Install ${{ env.RUST_TOOLCHAIN }} + uses: dtolnay/rust-toolchain@master # master + with: + toolchain: ${{ env.RUST_TOOLCHAIN }} + components: clippy + - name: Rust Cache + uses: Swatinem/rust-cache@v2 + - run: cargo clippy --workspace --all-features --all-targets -- -D warnings + + typos: + runs-on: ubuntu-latest + name: typos + permissions: + contents: read + strategy: + fail-fast: false + steps: + - uses: actions/checkout@v4 + with: + submodules: true + - name: Check spelling + uses: crate-ci/typos@master + + \ No newline at end of file diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml deleted file mode 100644 index ead13e2..0000000 --- a/.github/workflows/rust.yml +++ /dev/null @@ -1,23 +0,0 @@ -name: Rust - -on: - push: - branches: ["main"] - pull_request: - branches: ["main"] - -env: - CARGO_TERM_COLOR: always - -jobs: - build: - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v4 - - name: Format Check - run: cargo fmt -- --check - - name: Build - run: cargo build --verbose - - name: Run tests - run: cargo test --verbose diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..da07418 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,68 @@ +permissions: + contents: read +on: + push: + branches: [main, master] + pull_request: + merge_group: + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +env: + RUST_TOOLCHAIN: stable + +name: Test +jobs: + required: + runs-on: ubuntu-latest + name: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + submodules: true + - name: Install ${{ env.RUST_TOOLCHAIN }} + uses: dtolnay/rust-toolchain@master + with: + toolchain: ${{ env.RUST_TOOLCHAIN }} + - name: cargo generate-lockfile + if: hashFiles('Cargo.lock') == '' + run: cargo generate-lockfile + # https://twitter.com/jonhoo/status/1571290371124260865 + - name: Rust Cache + uses: Swatinem/rust-cache@v2 + - name: Install nextest + uses: taiki-e/install-action@nextest + - name: cargo nextest --locked + run: cargo nextest run --locked --workspace --all-features --all-targets + + # comment out coverage job for now, https://github.com/tekaratzas/RustGPT/pull/11#issuecomment-3361854174 + # coverage: + # runs-on: ubuntu-latest + # name: coverage + # steps: + # - uses: actions/checkout@v4 + # with: + # submodules: true + # - name: Install rust + # uses: dtolnay/rust-toolchain@master + # with: + # toolchain: ${{ env.RUST_TOOLCHAIN }} + # components: llvm-tools-preview + # - name: cargo install cargo-llvm-cov + # uses: taiki-e/install-action@cargo-llvm-cov + # - name: cargo generate-lockfile + # if: hashFiles('Cargo.lock') == '' + # run: cargo generate-lockfile + # - name: Rust Cache + # uses: Swatinem/rust-cache@v2 + # - name: Install nextest + # uses: taiki-e/install-action@nextest + # - name: cargo llvm-cov + # run: cargo llvm-cov nextest --locked --workspace --all-features --all-targets --lcov --output-path lcov.info + # - name: Upload to codecov.io + # uses: codecov/codecov-action@v5 + # with: + # fail_ci_if_error: true + # token: ${{ secrets.CODECOV_TOKEN }} # required \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml index 3ad229c..f88cc2c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,7 +7,7 @@ edition = "2024" [dependencies] bincode = "2.0.1" ndarray = "0.16.1" -rand = "0.9.0" +rand = "0.9.2" rand_distr = "0.5.0" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 0000000..d85c165 --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1,14 @@ +edition = "2024" +style_edition = "2024" +comment_width = 120 +format_code_in_doc_comments = true +format_macro_bodies = true +format_macro_matchers = true +normalize_comments = true +normalize_doc_attributes = true +imports_granularity = "Crate" +group_imports = "StdExternalCrate" +reorder_impl_items = true +reorder_imports = true +tab_spaces = 4 +wrap_comments = true diff --git a/src/dataset_loader.rs b/src/dataset_loader.rs index 0c63eb2..bb0192a 100644 --- a/src/dataset_loader.rs +++ b/src/dataset_loader.rs @@ -1,13 +1,14 @@ -use csv::ReaderBuilder; -use serde_json; use std::fs; +use csv::ReaderBuilder; + pub struct Dataset { pub pretraining_data: Vec, pub chat_training_data: Vec, } #[allow(dead_code)] +#[allow(clippy::upper_case_acronyms)] pub enum DatasetType { JSON, CSV, diff --git a/src/embeddings.rs b/src/embeddings.rs index 1d36685..72d8a6d 100644 --- a/src/embeddings.rs +++ b/src/embeddings.rs @@ -1,7 +1,8 @@ -use crate::{EMBEDDING_DIM, MAX_SEQ_LEN, adam::Adam, llm::Layer, vocab::Vocab}; use ndarray::{Array2, s}; use rand_distr::{Distribution, Normal}; +use crate::{EMBEDDING_DIM, MAX_SEQ_LEN, adam::Adam, llm::Layer, vocab::Vocab}; + pub struct Embeddings { pub token_embeddings: Array2, pub positional_embeddings: Array2, diff --git a/src/feed_forward.rs b/src/feed_forward.rs index db08438..2048f39 100644 --- a/src/feed_forward.rs +++ b/src/feed_forward.rs @@ -1,8 +1,8 @@ -use crate::{adam::Adam, llm::Layer}; -use ndarray::Array2; -use ndarray::Axis; +use ndarray::{Array2, Axis}; use rand_distr::{Distribution, Normal}; +use crate::{adam::Adam, llm::Layer}; + pub struct FeedForward { w1: Array2, b1: Array2, diff --git a/src/layer_norm.rs b/src/layer_norm.rs index 895a6de..ff488d4 100644 --- a/src/layer_norm.rs +++ b/src/layer_norm.rs @@ -1,7 +1,7 @@ -use crate::adam::Adam; -use crate::llm::Layer; use ndarray::{Array2, Axis}; +use crate::{adam::Adam, llm::Layer}; + pub struct LayerNorm { epsilon: f32, // Small constant for stability gamma: Array2, // Learnable scaling parameter diff --git a/src/llm.rs b/src/llm.rs index c2a3ee9..d0d6688 100644 --- a/src/llm.rs +++ b/src/llm.rs @@ -1,13 +1,11 @@ -use crate::EMBEDDING_DIM; -use crate::Embeddings; -use crate::HIDDEN_DIM; -use crate::MAX_SEQ_LEN; -use crate::Vocab; -use crate::output_projection::OutputProjection; -use crate::transformer::TransformerBlock; -use ndarray::{Array1, Array2, Axis}; use std::cmp::Ordering; +use ndarray::{Array1, Array2, Axis}; + +use crate::{ + EMBEDDING_DIM, Embeddings, HIDDEN_DIM, MAX_SEQ_LEN, Vocab, output_projection::OutputProjection, + transformer::TransformerBlock, +}; pub trait Layer { fn layer_type(&self) -> &str; @@ -18,6 +16,7 @@ pub trait Layer { fn parameters(&self) -> usize; } +#[allow(clippy::upper_case_acronyms)] pub struct LLM { pub vocab: Vocab, pub network: Vec>, @@ -57,7 +56,7 @@ impl LLM { // Sum the parameters across all layers in the network self.network .iter() - .map(|layer: &Box| layer.parameters()) + .map(|layer| layer.parameters()) .sum::() } @@ -126,7 +125,8 @@ impl LLM { .to_owned() .insert_axis(Axis(0)); - // Softmax - convert activiations of each token to a probability distribution over the vocabulary + // Softmax - convert activations of each token to a probability distribution over the + // vocabulary let probs = Self::softmax(&last_logit); // 1 x vocab_size // Greedy Decode - Choose the highest probability token for each position @@ -238,10 +238,10 @@ impl LLM { } // Add any remaining word - if !current_word.is_empty() { - if let Some(token_id) = self.vocab.encode(¤t_word) { - tokens.push(token_id); - } + if !current_word.is_empty() + && let Some(token_id) = self.vocab.encode(¤t_word) + { + tokens.push(token_id); } } diff --git a/src/main.rs b/src/main.rs index 0c9712b..5babf3c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,13 +1,13 @@ use std::io::Write; -use crate::embeddings::Embeddings; -use crate::llm::LLM; -use crate::output_projection::OutputProjection; -use crate::transformer::TransformerBlock; -use crate::vocab::Vocab; use ::llm::{EMBEDDING_DIM, HIDDEN_DIM, MAX_SEQ_LEN}; use dataset_loader::{Dataset, DatasetType}; +use crate::{ + embeddings::Embeddings, llm::LLM, output_projection::OutputProjection, + transformer::TransformerBlock, vocab::Vocab, +}; + mod adam; mod dataset_loader; mod embeddings; diff --git a/src/self_attention.rs b/src/self_attention.rs index 6252522..2e31324 100644 --- a/src/self_attention.rs +++ b/src/self_attention.rs @@ -1,9 +1,9 @@ -use crate::EMBEDDING_DIM; -use crate::adam::Adam; -use crate::llm::Layer; +use std::f32; + use ndarray::Array2; use rand_distr::{Distribution, Normal}; -use std::f32; + +use crate::{EMBEDDING_DIM, adam::Adam, llm::Layer}; pub struct SelfAttention { pub embedding_dim: usize, diff --git a/src/transformer.rs b/src/transformer.rs index 4795f25..e700c8c 100644 --- a/src/transformer.rs +++ b/src/transformer.rs @@ -1,8 +1,8 @@ -use crate::feed_forward::FeedForward; -use crate::layer_norm::LayerNorm; -use crate::llm::Layer; -use crate::self_attention::SelfAttention; use ndarray::Array2; + +use crate::{ + feed_forward::FeedForward, layer_norm::LayerNorm, llm::Layer, self_attention::SelfAttention, +}; pub struct TransformerBlock { attention: SelfAttention, feed_forward: FeedForward, @@ -32,9 +32,8 @@ impl Layer for TransformerBlock { let norm1_out = self.norm1.normalize(&attention_out); let feed_forward_out = self.feed_forward.forward(&norm1_out); // includes residual - let norm2_out = self.norm2.normalize(&feed_forward_out); - norm2_out + self.norm2.normalize(&feed_forward_out) } fn backward(&mut self, grads: &Array2, lr: f32) -> Array2 { @@ -48,9 +47,8 @@ impl Layer for TransformerBlock { let grad_norm1 = self.norm1.backward(&grad_ffn, lr); // Backward through attention (includes residual connection) - let grad_attn = self.attention.backward(&grad_norm1, lr); - grad_attn + self.attention.backward(&grad_norm1, lr) } fn parameters(&self) -> usize { diff --git a/src/vocab.rs b/src/vocab.rs index 448d761..ced340b 100644 --- a/src/vocab.rs +++ b/src/vocab.rs @@ -1,6 +1,6 @@ +use std::collections::{HashMap, HashSet}; + use bincode::Encode; -use std::collections::HashMap; -use std::collections::HashSet; #[derive(Clone, Encode)] pub struct Vocab { @@ -77,10 +77,10 @@ impl Vocab { } } -impl Into for Vocab { - fn into(self) -> String { +impl From for String { + fn from(val: Vocab) -> Self { String::from_iter( - self.words + val.words .iter() .enumerate() .map(|(i, str)| format!("({i},{str}),")), diff --git a/tests/feed_forward_test.rs b/tests/feed_forward_test.rs index 922239c..c651fb6 100644 --- a/tests/feed_forward_test.rs +++ b/tests/feed_forward_test.rs @@ -1,5 +1,4 @@ -use llm::feed_forward::FeedForward; -use llm::{EMBEDDING_DIM, HIDDEN_DIM, Layer}; +use llm::{EMBEDDING_DIM, HIDDEN_DIM, Layer, feed_forward::FeedForward}; use ndarray::Array2; #[test] diff --git a/tests/llm_test.rs b/tests/llm_test.rs index 937b12b..1e2fec4 100644 --- a/tests/llm_test.rs +++ b/tests/llm_test.rs @@ -1,10 +1,7 @@ -use llm::EMBEDDING_DIM; -use llm::Embeddings; -use llm::HIDDEN_DIM; -use llm::MAX_SEQ_LEN; -use llm::output_projection::OutputProjection; -use llm::transformer::TransformerBlock; -use llm::{LLM, Layer, Vocab}; +use llm::{ + EMBEDDING_DIM, Embeddings, HIDDEN_DIM, LLM, Layer, MAX_SEQ_LEN, Vocab, + output_projection::OutputProjection, transformer::TransformerBlock, +}; use ndarray::Array2; struct TestOutputProjectionLayer { @@ -46,7 +43,7 @@ impl Layer for TestOutputProjectionLayer { let grad_input = input.dot(grads); self.cached_grads = Some(grad_input.clone()); - return grad_input; + grad_input } fn parameters(&self) -> usize { @@ -158,7 +155,8 @@ fn test_llm_total_parameters() { let param_count = llm.total_parameters(); assert!(param_count > 0); - // Let's validate that this is equal to the expected total number of parameters. (based on our source) + // Let's validate that this is equal to the expected total number of parameters. (based on our + // source) let expected_embeddings_parameters = vocab_size * EMBEDDING_DIM + MAX_SEQ_LEN * EMBEDDING_DIM; let expected_transformer_block_parameters = (2 * EMBEDDING_DIM) + // LayerNorm (3 * EMBEDDING_DIM * EMBEDDING_DIM) + // SelfAttention diff --git a/tests/output_projection_test.rs b/tests/output_projection_test.rs index a0f14c1..5b467ad 100644 --- a/tests/output_projection_test.rs +++ b/tests/output_projection_test.rs @@ -1,5 +1,4 @@ -use llm::output_projection::OutputProjection; -use llm::{EMBEDDING_DIM, Layer}; +use llm::{EMBEDDING_DIM, Layer, output_projection::OutputProjection}; use ndarray::Array2; #[test] diff --git a/tests/self_attention_test.rs b/tests/self_attention_test.rs index 009c7e4..4e1e5ff 100644 --- a/tests/self_attention_test.rs +++ b/tests/self_attention_test.rs @@ -1,5 +1,4 @@ -use llm::self_attention::SelfAttention; -use llm::{EMBEDDING_DIM, Layer}; +use llm::{EMBEDDING_DIM, Layer, self_attention::SelfAttention}; use ndarray::Array2; #[test] diff --git a/tests/transformer_test.rs b/tests/transformer_test.rs index c198915..0fa49d1 100644 --- a/tests/transformer_test.rs +++ b/tests/transformer_test.rs @@ -1,5 +1,4 @@ -use llm::transformer::TransformerBlock; -use llm::{EMBEDDING_DIM, HIDDEN_DIM, Layer}; +use llm::{EMBEDDING_DIM, HIDDEN_DIM, Layer, transformer::TransformerBlock}; use ndarray::Array2; #[test]