From b5435fb1541bf65ddd32c4cba9e9ae0ffb9b70a5 Mon Sep 17 00:00:00 2001 From: Maksim Shylau Date: Sun, 8 Feb 2026 19:13:56 +0100 Subject: [PATCH 1/5] feat: update assignments --- Assignments_v1.md | 48 +++ README.md | 74 ++-- .../01-nodejs-fundamentals/assignment.md | 99 ++++++ .../01-nodejs-fundamentals/score.md | 43 +++ assignments-v2/02-crud-api/assignment.md | 84 +++++ assignments-v2/02-crud-api/score.md | 34 ++ .../03-fastify-rest-api/assignment.md | 176 ++++++++++ assignments-v2/03-fastify-rest-api/score.md | 33 ++ .../04-database-prisma/assignment.md | 96 ++++++ assignments-v2/04-database-prisma/score.md | 35 ++ assignments-v2/05-auth-jwt/assignment.md | 76 +++++ assignments-v2/05-auth-jwt/score.md | 28 ++ assignments-v2/06a-testing/assignment.md | 70 ++++ assignments-v2/06a-testing/score.md | 23 ++ .../06b-logging-errors/assignment.md | 80 +++++ assignments-v2/06b-logging-errors/score.md | 24 ++ assignments-v2/07-docker/assignment.md | 102 ++++++ assignments-v2/07-docker/score.md | 30 ++ assignments-v2/08-websockets/assignment.md | 319 ++++++++++++++++++ assignments-v2/08-websockets/score.md | 31 ++ .../09-ai-llm-integration/assignment.md | 204 +++++++++++ assignments-v2/09-ai-llm-integration/score.md | 34 ++ .../10-ai-rag-vectordb/assignment.md | 266 +++++++++++++++ assignments-v2/10-ai-rag-vectordb/score.md | 34 ++ 24 files changed, 2007 insertions(+), 36 deletions(-) create mode 100644 Assignments_v1.md create mode 100644 assignments-v2/01-nodejs-fundamentals/assignment.md create mode 100644 assignments-v2/01-nodejs-fundamentals/score.md create mode 100644 assignments-v2/02-crud-api/assignment.md create mode 100644 assignments-v2/02-crud-api/score.md create mode 100644 assignments-v2/03-fastify-rest-api/assignment.md create mode 100644 assignments-v2/03-fastify-rest-api/score.md create mode 100644 assignments-v2/04-database-prisma/assignment.md create mode 100644 assignments-v2/04-database-prisma/score.md create mode 100644 assignments-v2/05-auth-jwt/assignment.md create mode 100644 assignments-v2/05-auth-jwt/score.md create mode 100644 assignments-v2/06a-testing/assignment.md create mode 100644 assignments-v2/06a-testing/score.md create mode 100644 assignments-v2/06b-logging-errors/assignment.md create mode 100644 assignments-v2/06b-logging-errors/score.md create mode 100644 assignments-v2/07-docker/assignment.md create mode 100644 assignments-v2/07-docker/score.md create mode 100644 assignments-v2/08-websockets/assignment.md create mode 100644 assignments-v2/08-websockets/score.md create mode 100644 assignments-v2/09-ai-llm-integration/assignment.md create mode 100644 assignments-v2/09-ai-llm-integration/score.md create mode 100644 assignments-v2/10-ai-rag-vectordb/assignment.md create mode 100644 assignments-v2/10-ai-rag-vectordb/score.md diff --git a/Assignments_v1.md b/Assignments_v1.md new file mode 100644 index 0000000..5c46d3f --- /dev/null +++ b/Assignments_v1.md @@ -0,0 +1,48 @@ +# [Course program (for RS School students)](https://github.com/rolling-scopes-school/tasks/blob/master/node/README.md) + +# Node.js Assignments + +## Node.js basics +- [Assignment](assignments/nodejs-basics/assignment.md) +- [Scoring](assignments/nodejs-basics/score.md) +- [Cross-Check manual](assignments/nodejs-basics/cross-check-manual.md) + +## File Manager +- [Assignment](assignments/file-manager/assignment.md) +- [Scoring](assignments/file-manager/score.md) + +## Basic Testing +- [Assignment](assignments/basic-testing/assignment.md) +- [Scoring](assignments/basic-testing/score.md) + +## Simple CRUD API +- [Assignment](assignments/crud-api/assignment.md) +- [Scoring](assignments/crud-api/score.md) + +## Battleship backend +- [Assignment](assignments/battleship/assignment.md) +- [Scoring](assignments/battleship/score.md) + +## GraphQL service +- [Assignment](assignments/graphql-service/assignment.md) +- [Scoring](assignments/graphql-service/score.md) + +## REST Service: Basic +- [Assignment](assignments/rest-service/assignment.md) +- [Scoring](assignments/rest-service/score.md) + +## REST service: Database & ORM +- [Assignment](assignments/database-orm/assignment.md) +- [Scoring](assignments/database-orm/score.md) + +## REST service: Logging & Error handling +- [Assignment](assignments/logging-error-handling/assignment.md) +- [Scoring](assignments/logging-error-handling/score.md) + +## REST service: Authentication & Authorization +- [Assignment](assignments/authentication/assignment.md) +- [Scoring](assignments/authentication/score.md) + +## REST service: Containerization +- [Assignment](assignments/containerization/assignment.md) +- [Scoring](assignments/containerization/score.md) diff --git a/README.md b/README.md index 5c46d3f..532fa6f 100644 --- a/README.md +++ b/README.md @@ -1,48 +1,50 @@ -# [Course program (for RS School students)](https://github.com/rolling-scopes-school/tasks/blob/master/node/README.md) +# Node.js Assignments v2 -# Node.js Assignments +## Node.js Fundamentals +- [Assignment](assignments-v2/01-nodejs-fundamentals/assignment.md) +- [Scoring](assignments-v2/01-nodejs-fundamentals/score.md) -## Node.js basics -- [Assignment](assignments/nodejs-basics/assignment.md) -- [Scoring](assignments/nodejs-basics/score.md) -- [Cross-Check manual](assignments/nodejs-basics/cross-check-manual.md) +## CRUD API +- [Assignment](assignments-v2/02-crud-api/assignment.md) +- [Scoring](assignments-v2/02-crud-api/score.md) -## File Manager -- [Assignment](assignments/file-manager/assignment.md) -- [Scoring](assignments/file-manager/score.md) +## Fastify REST API +- [Assignment](assignments-v2/03-fastify-rest-api/assignment.md) +- [Scoring](assignments-v2/03-fastify-rest-api/score.md) -## Basic Testing -- [Assignment](assignments/basic-testing/assignment.md) -- [Scoring](assignments/basic-testing/score.md) +## REST service: Database & Prisma ORM +- [Assignment](assignments-v2/04-database-prisma/assignment.md) +- [Scoring](assignments-v2/04-database-prisma/score.md) -## Simple CRUD API -- [Assignment](assignments/crud-api/assignment.md) -- [Scoring](assignments/crud-api/score.md) +## REST service: Authentication & Authorization +- [Assignment](assignments-v2/05-auth-jwt/assignment.md) +- [Scoring](assignments-v2/05-auth-jwt/score.md) -## Battleship backend -- [Assignment](assignments/battleship/assignment.md) -- [Scoring](assignments/battleship/score.md) +## REST service: Testing +- [Assignment](assignments-v2/06a-testing/assignment.md) +- [Scoring](assignments-v2/06a-testing/score.md) -## GraphQL service -- [Assignment](assignments/graphql-service/assignment.md) -- [Scoring](assignments/graphql-service/score.md) +## REST service: Logging & Error Handling +- [Assignment](assignments-v2/06b-logging-errors/assignment.md) +- [Scoring](assignments-v2/06b-logging-errors/score.md) -## REST Service: Basic -- [Assignment](assignments/rest-service/assignment.md) -- [Scoring](assignments/rest-service/score.md) +## REST service: Containerization & Docker +- [Assignment](assignments-v2/07-docker/assignment.md) +- [Scoring](assignments-v2/07-docker/score.md) -## REST service: Database & ORM -- [Assignment](assignments/database-orm/assignment.md) -- [Scoring](assignments/database-orm/score.md) +## WebSocket: Live Quiz Game +- [Assignment](assignments-v2/08-websockets/assignment.md) +- [Scoring](assignments-v2/08-websockets/score.md) -## REST service: Logging & Error handling -- [Assignment](assignments/logging-error-handling/assignment.md) -- [Scoring](assignments/logging-error-handling/score.md) +## AI/LLM Integration +- [Assignment](assignments-v2/09-ai-llm-integration/assignment.md) +- [Scoring](assignments-v2/09-ai-llm-integration/score.md) -## REST service: Authentication & Authorization -- [Assignment](assignments/authentication/assignment.md) -- [Scoring](assignments/authentication/score.md) +## AI RAG & Vector Database +- [Assignment](assignments-v2/10-ai-rag-vectordb/assignment.md) +- [Scoring](assignments-v2/10-ai-rag-vectordb/score.md) + +--- -## REST service: Containerization -- [Assignment](assignments/containerization/assignment.md) -- [Scoring](assignments/containerization/score.md) +## Previous version +- [Assignments v1](Assignments_v1.md) diff --git a/assignments-v2/01-nodejs-fundamentals/assignment.md b/assignments-v2/01-nodejs-fundamentals/assignment.md new file mode 100644 index 0000000..0e81d79 --- /dev/null +++ b/assignments-v2/01-nodejs-fundamentals/assignment.md @@ -0,0 +1,99 @@ +# Assignment: Node.js Fundamentals + +## Description + +Your task is to complete several tasks to learn Node.js core APIs. You will build a **Data Processing Toolkit** — a set of utilities that work with the file system, streams, CLI, hashing, compression, worker threads, and child processes. + +Fork the starter repository and implement the required functionality inside the `src/` folder. Each subtask has its own dedicated file inside a corresponding subfolder. + +## Technical requirements + +- Any external tools and libraries are prohibited +- Use 24.x.x version (24.10.0 or upper) of Node.js +- Don't change the signature of pre-written functions (e.g. don't rename them, don't make them synchronous, etc.) +- Prefer asynchronous API whenever possible + +## Subtasks + +### File System (src/fs) + +You should implement several functions in dedicated files: + +- `create.js` — implement function that creates a new JSON file `config.json` with the following content inside the `workspace` folder: + ```json + { + "name": "Data Toolkit", + "version": "1.0.0", + "features": ["fs", "streams", "cli"] + } + ``` + If the file already exists, `Error` with message `FS operation failed` must be thrown. + +- `copy.js` — implement function that recursively copies the `workspace` folder with all its content into a `workspace_backup` folder at the same level. If the `workspace` folder doesn't exist or `workspace_backup` has already been created, `Error` with message `FS operation failed` must be thrown. + +- `rename.js` — implement function that renames the file `data.csv` to `processed_data.csv`. If there's no file `data.csv` or `processed_data.csv` already exists, `Error` with message `FS operation failed` must be thrown. + +- `delete.js` — implement function that deletes the file `obsolete.txt`. If there's no file `obsolete.txt`, `Error` with message `FS operation failed` must be thrown. + +- `list.js` — implement function that prints an array of all files and folders from the `workspace` folder into the console. Each entry should be an object with `name` (string) and `sizeKB` (number, file size in kilobytes rounded to 2 decimal places) properties. For directories, `sizeKB` should be `null`. If the `workspace` folder doesn't exist, `Error` with message `FS operation failed` must be thrown. + +- `read.js` — implement function that prints content of the `report.txt` into the console. If there's no file `report.txt`, `Error` with message `FS operation failed` must be thrown. + +### Command Line Interface (src/cli) + +You should implement several functions in dedicated files: + +- `args.js` — implement function that parses command line arguments given in the format `--input path/to/file --output path/to/output --format json` and prints them to the console in the format `input is path/to/file, output is path/to/output, format is json` + +- `env.js` — implement function that parses environment variables with the prefix `DPT_` and prints them to the console in the format `DPT_name1=value1; DPT_name2=value2` + +### Hash (src/hash) + +You should implement a function in a dedicated file: + +- `calcHash.js` — implement function that calculates the SHA256 hash for file `data.txt` and logs it into the console as a `hex` string using Streams API + +### Streams (src/streams) + +You should implement several functions in dedicated files: + +- `read.js` — implement function that reads file `input.txt` content using Readable Stream and prints its content into `process.stdout` + +- `write.js` — implement function that writes `process.stdin` data into file `output.txt` content using Writable Stream + +- `transform.js` — implement function that reads data from `process.stdin`, converts each line to upper case using Transform Stream, and then writes it into `process.stdout` + +### Zlib (src/zip) + +You should implement several functions in dedicated files: + +- `compress.js` — implement function that compresses file `archive_me.txt` to `archive.gz` using `zlib` and Streams API + +- `decompress.js` — implement function that decompresses `archive.gz` back to the `archive_me.txt` with same content as before compression using `zlib` and Streams API + +### Worker Threads (src/wt) + +You should implement several functions in dedicated files: + +- `worker.js` — implement a function that receives a range `{ start, end }` from the main thread and finds all prime numbers within that range. The function should send the result back to the main thread. + +- `main.js` — implement function that creates a number of worker threads (equal to the number of host machine logical CPU cores) from file `worker.js` and distributes the range `[2, 10_000_000]` evenly among them. For example: on a host machine with **4** cores you should create **4** workers, each computing primes in its own subrange. After all workers finish, the function should log an array of results into the console. The results are an array of objects with 2 properties: + - `status` — `'resolved'` in case of successfully received value from `worker` or `'error'` in case of error in `worker` + - `data` — array of prime numbers from `worker` in case of success or `null` in case of error + + The results in the array must be in the same order that the workers were created. + +### Child Processes (src/cp) + +You should implement a function in a dedicated file: + +- `cp.js` — implement function `spawnChildProcess` that receives an array of arguments `args` and creates a child process from file `script.js`, passing these `args` to it. This function should create an IPC-channel between `stdin` and `stdout` of the master process and child process: + - child process `stdin` should receive input from master process `stdin` + - child process `stdout` should send data to master process `stdout` + +## Hints + +- Use `fs/promises` API for file system operations +- Use `crypto.createHash` with Streams for hash calculation +- Use `os.cpus().length` to get the number of CPU cores +- Use `child_process.spawn` or `child_process.fork` for child processes diff --git a/assignments-v2/01-nodejs-fundamentals/score.md b/assignments-v2/01-nodejs-fundamentals/score.md new file mode 100644 index 0000000..c6d4cf4 --- /dev/null +++ b/assignments-v2/01-nodejs-fundamentals/score.md @@ -0,0 +1,43 @@ +# Scoring: Node.js Fundamentals + +## Check + +For check simplification you have npm-scripts in `package.json`. +NB! Some scripts have predefined data (e.g. environment variables, CLI arguments). Feel free to change it during the check if necessary. + +## Basic Scope + +- File System (src/fs) + - **+6** `create.js` implemented properly + - **+10** `copy.js` implemented properly + - **+10** `rename.js` implemented properly + - **+6** `delete.js` implemented properly + - **+8** `list.js` implemented properly (including `name` and `sizeKB` properties) + - **+6** `read.js` implemented properly +- Command Line Interface (src/cli) + - **+6** `args.js` implemented properly + - **+6** `env.js` implemented properly +- Hash (src/hash) + - **+10** `calcHash.js` implemented properly +- Streams (src/streams) + - **+10** `read.js` implemented properly + - **+10** `write.js` implemented properly + - **+12** `transform.js` implemented properly +- Zlib (src/zip) + - **+10** `compress.js` implemented properly + - **+10** `decompress.js` implemented properly + +## Advanced Scope + +- Worker Threads (src/wt) + - **+10** `worker.js` implemented properly + - **+30** `main.js` implemented properly (creates correct number of workers, distributes ranges, collects results in order) +- Child Processes (src/cp) + - **+10** spawns child process + - **+10** child process `stdin` receives input from master process `stdin` + - **+10** child process `stdout` sends data to master process `stdout` + +## Forfeits + +- **-95% of total task score** Any external tools/libraries are used +- **-30% of total task score** Commits after deadline (except commits that affect only Readme.md, .gitignore, etc.) diff --git a/assignments-v2/02-crud-api/assignment.md b/assignments-v2/02-crud-api/assignment.md new file mode 100644 index 0000000..a8b9a56 --- /dev/null +++ b/assignments-v2/02-crud-api/assignment.md @@ -0,0 +1,84 @@ +# Assignment: CRUD API + +## Description + +Your task is to implement a simple CRUD API for a **Product Catalog** using an in-memory database underneath. You must use **only the built-in Node.js `http` module** — no frameworks are allowed. + +## Technical requirements + +- Task can be implemented in JavaScript or TypeScript +- Only `nodemon`, `dotenv`, `cross-env`, `typescript`, `ts-node`, `ts-node-dev`, `tsx`, linter and its plugins, bundler and its plugins and loaders, formatter and its plugins, `uuid`, `@types/*` as well as libraries used for testing are allowed +- Use 24.x.x version (24.10.0 or upper) of Node.js +- Prefer asynchronous API whenever possible + +## Implementation details + +1. Implemented endpoint `api/products`: + - **GET** `api/products` is used to get all products + - Server should answer with `status code` **200** and all product records + - **GET** `api/products/{productId}` + - Server should answer with `status code` **200** and the record with `id === productId` if it exists + - Server should answer with `status code` **400** and corresponding message if `productId` is invalid (not `uuid`) + - Server should answer with `status code` **404** and corresponding message if record with `id === productId` doesn't exist + - **POST** `api/products` is used to create a record about a new product and store it in the database + - Server should answer with `status code` **201** and newly created record + - Server should answer with `status code` **400** and corresponding message if request `body` does not contain **required** fields or if `price` is not a positive number + - **PUT** `api/products/{productId}` is used to update an existing product + - Server should answer with `status code` **200** and the updated record + - Server should answer with `status code` **400** and corresponding message if `productId` is invalid (not `uuid`) + - Server should answer with `status code` **404** and corresponding message if record with `id === productId` doesn't exist + - **DELETE** `api/products/{productId}` is used to delete an existing product from the database + - Server should answer with `status code` **204** if the record is found and deleted + - Server should answer with `status code` **400** and corresponding message if `productId` is invalid (not `uuid`) + - Server should answer with `status code` **404** and corresponding message if record with `id === productId` doesn't exist + +2. Products are stored as `objects` that have the following properties: + - `id` — unique identifier (`string`, `uuid`) generated on the server side + - `name` — product name (`string`, **required**) + - `description` — product description (`string`, **required**) + - `price` — product price (`number`, **required**, must be > 0) + - `category` — product category (`string`, **required**, e.g. `"electronics"`, `"books"`, `"clothing"`) + - `inStock` — whether the product is in stock (`boolean`, **required**) + +3. Requests to non-existing endpoints (e.g. `some-non/existing/resource`) should be handled (server should answer with `status code` **404** and corresponding human-friendly message) + +4. Errors on the server side that occur during the processing of a request should be handled and processed correctly (server should answer with `status code` **500** and corresponding human-friendly message) + +5. Value of `port` on which the application is running should be stored in `.env` file + +- **Important:** The `.env` file itself should not be committed to the repository as it is considered a security bad practice. Please consider adding the `.env` file to `.gitignore`. + + - Instead, create and commit an `.env.example` file that contains a list of required environment variables with reasonable default values + - Example of `.env.example` contents: + + ``` + PORT=4000 + ``` + +6. There should be 2 modes of running the application (**development** and **production**): + - The application is run in development mode using `nodemon` or `ts-node-dev` (there is an `npm` script `start:dev`) + - The application is run in production mode (there is an `npm` script `start:prod` that starts the build process and then runs the bundled file) + +7. There could be some tests for the API (not less than **3** scenarios). Example of a test scenario: + 1. Get all records with a `GET` `api/products` request (an empty array is expected) + 2. A new object is created by a `POST` `api/products` request (a response containing the newly created record is expected) + 3. With a `GET` `api/products/{productId}` request, we try to get the created record by its `id` (the created record is expected) + 4. We try to update the created record with a `PUT` `api/products/{productId}` request (a response is expected containing an updated object with the same `id`) + 5. With a `DELETE` `api/products/{productId}` request, we delete the created object by `id` (confirmation of successful deletion is expected) + 6. With a `GET` `api/products/{productId}` request, we are trying to get the deleted object by `id` (expected answer is that there is no such object) + +8. There could be implemented horizontal scaling for the application. There should be an `npm` script `start:multi` that starts multiple instances of your application using the Node.js `Cluster` API (equal to the number of available parallelism - 1 on the host machine, each listening on port PORT + n) with a **load balancer** that distributes requests across them (using Round-robin algorithm). For example: available parallelism is 4, `PORT` is 4000. On run `npm run start:multi` it works the following way: + +- On `localhost:4000/api` the load balancer is listening for requests +- On `localhost:4001/api`, `localhost:4002/api`, `localhost:4003/api` workers are listening for requests from the load balancer +- When user sends a request to `localhost:4000/api`, the load balancer sends this request to `localhost:4001/api`, the next user request is sent to `localhost:4002/api` and so on +- After sending a request to `localhost:4003/api`, the load balancer starts from the first worker again (sends request to `localhost:4001/api`) +- State of the db should be consistent between different workers, for example: + 1. First `POST` request addressed to `localhost:4001/api` creates a product + 2. Second `GET` request addressed to `localhost:4002/api` should return the created product + 3. Third `DELETE` request addressed to `localhost:4003/api` deletes the created product + 4. Fourth `GET` request addressed to `localhost:4001/api` should return **404** status code for the created product + +## Hints + +- To generate all entities `id`s use [Node.js randomUUID](https://nodejs.org/dist/latest-v24.x/docs/api/crypto.html#cryptorandomuuidoptions) diff --git a/assignments-v2/02-crud-api/score.md b/assignments-v2/02-crud-api/score.md new file mode 100644 index 0000000..1cfe6b9 --- /dev/null +++ b/assignments-v2/02-crud-api/score.md @@ -0,0 +1,34 @@ +# Scoring: CRUD API + +## Basic Scope + +- **+10** The repository with the application contains a `Readme.md` file containing detailed instructions for installing, running and using the application +- **+10** **GET** `api/products` implemented properly +- **+10** **GET** `api/products/{productId}` implemented properly +- **+10** **POST** `api/products` implemented properly +- **+10** **PUT** `api/products/{productId}` implemented properly +- **+10** **DELETE** `api/products/{productId}` implemented properly +- **+6** Products are stored in the form described in the technical requirements +- **+6** Value of `port` on which application is running is stored in `.env` file + +## Advanced Scope + +- **+30** Task implemented in TypeScript +- **+10** Processing of requests to non-existing endpoints implemented properly +- **+10** Errors on the server side that occur during the processing of a request are handled and processed properly +- **+10** Development mode: `npm` script `start:dev` implemented properly +- **+10** Production mode: `npm` script `start:prod` implemented properly + +## Hacker Scope + +- **+30** There are tests for API (not less than **3** scenarios) +- **+50** There is horizontal scaling for the application with a **load balancer** + +## Forfeits + +- **-95% of total task score** Any external tools except `nodemon`, `dotenv`, `cross-env`, `typescript`, `ts-node`, `ts-node-dev`, `tsx`, linter and its plugins, bundler and its plugins, formatter and its plugins, `uuid`, `@types/*` as well as libraries used for testing +- **-30% of total task score** Commits after deadline (except commits that affect only Readme.md, .gitignore, etc.) +- **-20** Missing PR or its description is incorrect +- **-20** No separate development branch +- **-20** Less than 3 commits in the development branch, not including commits that make changes only to `Readme.md` or similar files (`tsconfig.json`, `.gitignore`, `.prettierrc.json`, etc.) +- **-5** The `.env` file is present in the repository (should be `.env.example` instead) diff --git a/assignments-v2/03-fastify-rest-api/assignment.md b/assignments-v2/03-fastify-rest-api/assignment.md new file mode 100644 index 0000000..bd61a4d --- /dev/null +++ b/assignments-v2/03-fastify-rest-api/assignment.md @@ -0,0 +1,176 @@ +# Assignment: Fastify REST API + +## Description + +Your task is to create a REST API for a **Knowledge Hub** platform using the Fastify framework. The Knowledge Hub allows users to create, edit, and organize articles by categories and tags. + +NB! You must create a new repository from the starter template for this task. Its name must be `nodejs2025Q2-knowledge-hub`, i.e. full link to the repository must be `https://github.com/%your-github-id%/nodejs2025Q2-knowledge-hub`. + +**Create an application that operates with the following resources:** + +- `User` (with attributes): + ```typescript + interface User { + id: string; // uuid v4 + login: string; + password: string; + role: 'admin' | 'editor' | 'viewer'; + createdAt: number; // timestamp of creation + updatedAt: number; // timestamp of last update + } + ``` + +- `Article` (with attributes): + ```typescript + interface Article { + id: string; // uuid v4 + title: string; + content: string; + status: 'draft' | 'published' | 'archived'; + authorId: string | null; // refers to User + categoryId: string | null; // refers to Category + tags: string[]; // array of tag names + createdAt: number; // timestamp of creation + updatedAt: number; // timestamp of last update + } + ``` + +- `Category` (with attributes): + ```typescript + interface Category { + id: string; // uuid v4 + name: string; + description: string; + } + ``` + +- `Comment` (with attributes): + ```typescript + interface Comment { + id: string; // uuid v4 + content: string; + articleId: string; // refers to Article + authorId: string | null; // refers to User + createdAt: number; // timestamp of creation + } + ``` + +**Details:** + +1. For `Users`, `Articles`, `Categories`, and `Comments`, REST endpoints with separate router paths should be created: + + * `Users` (`/user` route) + * `GET /user` — get all users + - Server should answer with `status code` **200** and all user records + * `GET /user/:id` — get single user by id + - Server should answer with `status code` **200** and the record with `id === userId` if it exists + - Server should answer with `status code` **400** and corresponding message if `userId` is invalid (not `uuid`) + - Server should answer with `status code` **404** and corresponding message if record with `id === userId` doesn't exist + * `POST /user` — create user + `CreateUserDto`: + ```typescript + interface CreateUserDto { + login: string; + password: string; + role?: 'admin' | 'editor' | 'viewer'; // defaults to 'viewer' + } + ``` + - Server should answer with `status code` **201** and newly created record if request is valid + - Server should answer with `status code` **400** and corresponding message if request `body` does not contain **required** fields + * `PUT /user/:id` — update user's password + `UpdatePasswordDto`: + ```typescript + interface UpdatePasswordDto { + oldPassword: string; + newPassword: string; + } + ``` + - Server should answer with `status code` **200** and updated record if request is valid + - Server should answer with `status code` **400** and corresponding message if `userId` is invalid (not `uuid`) + - Server should answer with `status code` **404** and corresponding message if record with `id === userId` doesn't exist + - Server should answer with `status code` **403** and corresponding message if `oldPassword` is wrong + * `DELETE /user/:id` — delete user + - Server should answer with `status code` **204** if the record is found and deleted + - Server should answer with `status code` **400** and corresponding message if `userId` is invalid (not `uuid`) + - Server should answer with `status code` **404** and corresponding message if record with `id === userId` doesn't exist + + * `Articles` (`/article` route) + * `GET /article` — get all articles + - Server should answer with `status code` **200** and all article records + - Supports optional query parameters for filtering: `status`, `categoryId`, `tag` (e.g. `GET /article?status=published&tag=nodejs`) + * `GET /article/:id` — get single article by id + - Server should answer with `status code` **200** and the record with `id === articleId` if it exists + - Server should answer with `status code` **400** and corresponding message if `articleId` is invalid (not `uuid`) + - Server should answer with `status code` **404** and corresponding message if record with `id === articleId` doesn't exist + * `POST /article` — create new article + - Server should answer with `status code` **201** and newly created record if request is valid + - Server should answer with `status code` **400** and corresponding message if request `body` does not contain **required** fields (`title`, `content`) + * `PUT /article/:id` — update article info + - Server should answer with `status code` **200** and updated record if request is valid + - Server should answer with `status code` **400** and corresponding message if `articleId` is invalid (not `uuid`) + - Server should answer with `status code` **404** and corresponding message if record with `id === articleId` doesn't exist + * `DELETE /article/:id` — delete article + - Server should answer with `status code` **204** if the record is found and deleted + - Server should answer with `status code` **400** and corresponding message if `articleId` is invalid (not `uuid`) + - Server should answer with `status code` **404** and corresponding message if record with `id === articleId` doesn't exist + + * `Categories` (`/category` route) + * `GET /category` — get all categories + - Server should answer with `status code` **200** and all category records + * `GET /category/:id` — get single category by id + - Server should answer with `status code` **200** and the record with `id === categoryId` if it exists + - Server should answer with `status code` **400** and corresponding message if `categoryId` is invalid (not `uuid`) + - Server should answer with `status code` **404** and corresponding message if record with `id === categoryId` doesn't exist + * `POST /category` — create new category + - Server should answer with `status code` **201** and newly created record if request is valid + - Server should answer with `status code` **400** and corresponding message if request `body` does not contain **required** fields (`name`, `description`) + * `PUT /category/:id` — update category info + - Server should answer with `status code` **200** and updated record if request is valid + - Server should answer with `status code` **400** and corresponding message if `categoryId` is invalid (not `uuid`) + - Server should answer with `status code` **404** and corresponding message if record with `id === categoryId` doesn't exist + * `DELETE /category/:id` — delete category + - Server should answer with `status code` **204** if the record is found and deleted + - Server should answer with `status code` **400** and corresponding message if `categoryId` is invalid (not `uuid`) + - Server should answer with `status code` **404** and corresponding message if record with `id === categoryId` doesn't exist + + * `Comments` (`/comment` route) + * `GET /comment?articleId={articleId}` — get all comments for an article + - Server should answer with `status code` **200** and all comment records for the given article + - `articleId` query parameter is **required** + * `POST /comment` — create new comment + - Body must contain `content` and `articleId` (both **required**) + - Server should answer with `status code` **201** and newly created record if request is valid + - Server should answer with `status code` **400** if required fields are missing + - Server should answer with `status code` **422** if the referenced `articleId` doesn't exist + * `DELETE /comment/:id` — delete comment + - Server should answer with `status code` **204** if the record is found and deleted + - Server should answer with `status code` **400** and corresponding message if `commentId` is invalid (not `uuid`) + - Server should answer with `status code` **404** and corresponding message if record with `id === commentId` doesn't exist + +2. For now, these endpoints should operate only with **in-memory** (hardcoded) data. In the next tasks we will use a database for it. You should organize your modules with the consideration that the data source will be changed soon. + +3. An `application/json` format should be used for request and response body. + +4. Do not put everything in one file — use Fastify plugins to organize code by domain (user plugin, article plugin, category plugin, comment plugin). Each plugin should be registered with a proper prefix. + +5. `User`'s password should be excluded from server response. + +6. When you delete a `User`, their `authorId` in corresponding `Articles` should become `null`, and their `Comments` should be deleted. When you delete a `Category`, the `categoryId` in corresponding `Articles` should become `null`. When you delete an `Article`, its `Comments` should be deleted. + +7. All request bodies should be validated using **Fastify JSON Schema validation** (the `schema` property on route options). + +8. Use **Fastify hooks** (`onRequest`, `preHandler`, etc.) for cross-cutting concerns such as request logging. + +9. Integrate `@fastify/swagger` and `@fastify/swagger-ui` to provide OpenAPI documentation accessible at `/doc`. + +10. To run the service, `npm start` command should be used. + +11. Service should listen on PORT `4000` by default, PORT value is stored in `.env` file. + +12. Incoming requests should be validated. + +**Hints:** + +* To generate all entities `id`s use [Node.js randomUUID](https://nodejs.org/dist/latest-v24.x/docs/api/crypto.html#cryptorandomuuidoptions). +* Use [Fastify plugins](https://fastify.dev/docs/latest/Reference/Plugins/) to modularize your application. +* Use [Fastify schema validation](https://fastify.dev/docs/latest/Reference/Validation-and-Serialization/) for request/response validation. diff --git a/assignments-v2/03-fastify-rest-api/score.md b/assignments-v2/03-fastify-rest-api/score.md new file mode 100644 index 0000000..70423b9 --- /dev/null +++ b/assignments-v2/03-fastify-rest-api/score.md @@ -0,0 +1,33 @@ +# Scoring: Fastify REST API + +## Basic Scope + +- **+10** The repository with the application contains a `Readme.md` file containing detailed instructions for installing, running and using the application +- **+10** The application code that works with `Users` is organized as a Fastify plugin with proper separation of concerns (route handlers, business logic) +- **+10** The application code that works with `Articles` is organized as a Fastify plugin with proper separation of concerns +- **+10** The application code that works with `Categories` is organized as a Fastify plugin with proper separation of concerns +- **+10** The application code that works with `Comments` is organized as a Fastify plugin with proper separation of concerns +- **+10** For each successfully passed test + +## Advanced Scope + +- **+15** JSON Schema validation is used for all request bodies and responses (via Fastify `schema` option) +- **+10** Article filtering by `status`, `categoryId`, and `tag` query parameters works correctly +- **+15** OpenAPI documentation is generated via `@fastify/swagger` and accessible at `/doc` +- **+10** Cascading behavior on delete is implemented correctly (User delete → articles nullified + comments removed; Category delete → articles nullified; Article delete → comments removed) + +## Hacker Scope + +- **+10** Pagination is implemented for list endpoints (query params `page` and `limit`, response includes `total`, `page`, `limit`, `data`) +- **+10** Sorting is implemented for list endpoints (query params `sortBy` and `order`) +- **+10** Additional automated tests are written + +## Forfeits + +- **-670** Changes in tests +- **-30% of max task score** Commits after deadline (except commits that affect only Readme.md, .gitignore, etc.) +- **-20** No separate development branch +- **-20** No Pull Request +- **-10** Pull Request description is incorrect +- **-10** Every lint error after `npm run lint` using local config (errors, not warnings) +- **-20** Less than 3 commits in the development branch, not including commits that make changes only to `Readme.md` or similar files (`tsconfig.json`, `.gitignore`, `.prettierrc.json`, etc.) diff --git a/assignments-v2/04-database-prisma/assignment.md b/assignments-v2/04-database-prisma/assignment.md new file mode 100644 index 0000000..b3f97f3 --- /dev/null +++ b/assignments-v2/04-database-prisma/assignment.md @@ -0,0 +1,96 @@ +# Assignment: Database & Prisma ORM + +## Description + +Your task is to replace the in-memory data storage in the Knowledge Hub API with a real **PostgreSQL** database, using **Prisma** as the ORM to communicate with it. + +This is a continuation of the Fastify REST API assignment (Week 3). You will work in the same `nodejs2025Q2-knowledge-hub` repository. + +## Technical requirements + +- Task should be implemented in TypeScript +- Use 24.x.x version (24.10.0 or upper) of Node.js +- PostgreSQL database should run inside a Docker container + +## Implementation details + +1. **Prisma Schema** + + Create a Prisma schema (`prisma/schema.prisma`) that defines the following models: + + - `User` — id, login, password, role (enum: ADMIN, EDITOR, VIEWER), createdAt, updatedAt + - `Article` — id, title, content, status (enum: DRAFT, PUBLISHED, ARCHIVED), createdAt, updatedAt + - `Category` — id, name, description + - `Comment` — id, content, createdAt + - `Tag` — id, name (unique) + +2. **Relations** + + - `User` has many `Articles` (one-to-many via `authorId`) + - `User` has many `Comments` (one-to-many via `authorId`) + - `Category` has many `Articles` (one-to-many via `categoryId`) + - `Article` has many `Comments` (one-to-many via `articleId`) + - `Article` has many `Tags` (many-to-many relation through an implicit or explicit join table) + +3. **Cascading behavior** + + Configure Prisma `onDelete` rules: + - When a `User` is deleted: their `Articles` should have `authorId` set to `null` (`SetNull`), their `Comments` should be deleted (`Cascade`) + - When a `Category` is deleted: its `Articles` should have `categoryId` set to `null` (`SetNull`) + - When an `Article` is deleted: its `Comments` should be deleted (`Cascade`), its tag relations should be removed + +4. **Migrations** + + - Use `npx prisma migrate dev` to create and apply migrations + - Migration files should be committed to the repository + +5. **Seed Script** + + - Implement a seed script (`prisma/seed.ts`) that populates the database with initial data: + - At least 2 users (one admin, one editor) + - At least 3 categories + - At least 5 tags + - At least 5 articles with different statuses, categories, and tags + - At least 3 comments + - Seed script should be runnable via `npx prisma db seed` + +6. **Database Connection** + + - Connection string should be stored in `.env` file as `DATABASE_URL` + - Example: `DATABASE_URL="postgresql://user:password@localhost:5432/knowledge_hub?schema=public"` + - The `.env` file should not be committed (add to `.gitignore`), but `.env.example` should be present + +7. **Docker for PostgreSQL** + + - Provide a `docker-compose.yml` (or a section in existing one) to run PostgreSQL: + ```yaml + services: + db: + image: postgres:16-alpine + environment: + POSTGRES_USER: user + POSTGRES_PASSWORD: password + POSTGRES_DB: knowledge_hub + ports: + - "5432:5432" + volumes: + - pgdata:/var/lib/postgresql/data + volumes: + pgdata: + ``` + +8. **Replace In-Memory Storage** + + - All in-memory operations from the previous assignment should be replaced with Prisma Client queries + - The `tags` field on articles should now work through the many-to-many `Tag` model: when creating/updating an article, tags should be connected or created (connectOrCreate pattern) + - Article filtering (`GET /article?status=...&categoryId=...&tag=...`) should use Prisma `where` clauses + +9. All existing endpoints and their behavior should remain the same (same routes, same status codes, same response formats) + +## Hints + +- Use `npx prisma generate` after schema changes to regenerate the Prisma Client +- Use `npx prisma studio` for a database GUI +- Use `npx prisma migrate reset` to reset the database (this triggers seeding) +- Consider using Prisma's `include` and `select` to optimize queries and avoid returning unnecessary data +- Use Prisma transactions (`prisma.$transaction`) for operations that modify multiple entities diff --git a/assignments-v2/04-database-prisma/score.md b/assignments-v2/04-database-prisma/score.md new file mode 100644 index 0000000..cabd2bf --- /dev/null +++ b/assignments-v2/04-database-prisma/score.md @@ -0,0 +1,35 @@ +# Scoring: Database & Prisma ORM + +## Basic Scope + +- **+10** Prisma schema is defined with all required models (`User`, `Article`, `Category`, `Comment`, `Tag`) +- **+10** All relations are correctly defined (one-to-many for User→Article, User→Comment, Category→Article, Article→Comment) +- **+10** Many-to-many relation between `Article` and `Tag` is implemented +- **+10** Prisma migrations are created and committed to the repository +- **+10** PostgreSQL runs inside a Docker container via `docker-compose.yml` +- **+10** `DATABASE_URL` is stored in `.env` (`.env.example` is committed) +- **+10** `GET /user` works with real database +- **+10** `GET /article` works with real database (including filtering by status, categoryId, tag) +- **+5** `GET /category` works with real database +- **+5** `GET /comment` works with real database + +## Advanced Scope + +- **+10** Seed script is implemented and runnable via `npx prisma db seed` +- **+10** Cascading delete/nullify rules are correctly configured via Prisma `onDelete` +- **+10** Prisma transactions are used for complex operations (e.g., deleting a user and updating their articles) +- **+10** Article tags use `connectOrCreate` pattern when creating/updating articles + +## Hacker Scope + +- **+10** Indexes are added for frequently queried fields (`Article.status`, `Article.categoryId`, `Tag.name`) +- **+5** Connection pooling is configured +- **+5** N+1 problem is avoided by using Prisma `include` appropriately + +## Forfeits + +- **-30% of max task score** Commits after deadline (except commits that affect only Readme.md, .gitignore, etc.) +- **-20** Missing PR or its description is incorrect +- **-20** No separate development branch +- **-20** Less than 3 commits in the development branch, not including commits that make changes only to `Readme.md` or similar files (`tsconfig.json`, `.gitignore`, `.prettierrc.json`, etc.) +- **-5** The `.env` file is present in the repository (should be `.env.example` instead) diff --git a/assignments-v2/05-auth-jwt/assignment.md b/assignments-v2/05-auth-jwt/assignment.md new file mode 100644 index 0000000..fbf6c91 --- /dev/null +++ b/assignments-v2/05-auth-jwt/assignment.md @@ -0,0 +1,76 @@ +# Assignment: Authentication & Authorization + +## Description + +Your task is to implement Authentication and Authorization with JWT (Access and Refresh tokens) for the Knowledge Hub API. + +This is a continuation of the Database & Prisma assignment (Week 4). You will work in the same `nodejs2025Q2-knowledge-hub` repository. + +## Technical requirements + +- Task should be implemented in TypeScript +- Use 24.x.x version (24.10.0 or upper) of Node.js + +## Implementation details + +1. **Endpoints** + +- `Signup` (`/auth/signup` route) + - `POST /auth/signup` — send `login` and `password` to create a new `user` + - Server should answer with `status code` **201** and corresponding message if dto is valid + - Server should answer with `status code` **400** and corresponding message if dto is invalid (no `login` or `password`, or they are not `strings`, or `login` is already taken) + +- `Login` (`/auth/login` route) + - `POST /auth/login` — send `login` and `password` to get Access token and Refresh token + - Server should answer with `status code` **200** and tokens in the response body: `{ accessToken: string, refreshToken: string }` + - Server should answer with `status code` **400** and corresponding message if dto is invalid (no `login` or `password`, or they are not `strings`) + - Server should answer with `status code` **403** and corresponding message if authentication failed (no user with such `login`, `password` doesn't match actual one, etc.) + +- `Refresh` (`/auth/refresh` route) + - `POST /auth/refresh` — send refresh token in body as `{ refreshToken }` to get a new pair of Access token and Refresh token + - Server should answer with `status code` **200** and new tokens in body if dto is valid + - Server should answer with `status code` **401** and corresponding message if dto is invalid (no `refreshToken` in body) + - Server should answer with `status code` **403** and corresponding message if authentication failed (Refresh token is invalid or expired) + +2. Once `POST /auth/signup` accepts `password` property, it is replaced with a **hash** (using [bcrypt](https://www.npmjs.com/package/bcrypt) or [bcryptjs](https://www.npmjs.com/package/bcryptjs) package) for password encryption. No raw passwords should be stored in the database. + + NB! Password should remain hashed after any operation with the service. + +3. **JWT Access token** should contain `userId`, `login`, and `role` in its **payload** and has a short expiration time (e.g. 15 minutes). **JWT Refresh token** should have a longer expiration time (e.g. 7 days). + +4. The **JWT Access token** should be added in the HTTP `Authorization` header to all requests that require authentication. Proxy all the requests (except `/auth/signup`, `/auth/login`, `/auth/refresh`, `/doc`, `/`) and check that the HTTP `Authorization` header has the correct value of the JWT Access token. + + HTTP authentication must follow the `Bearer` scheme: + ``` + Authorization: Bearer + ``` + +5. In case the HTTP `Authorization` header in the request is absent or invalid or doesn't follow the `Bearer` scheme or the Access token has expired, further route handler execution should be stopped and lead to a response with HTTP **401** code and corresponding error message. + +6. **Role-Based Access Control (RBAC)**: + + - `viewer` — can only perform `GET` requests (read-only access to all resources) + - `editor` — can perform `GET` requests and can `POST` (create) / `PUT` (update) their own articles and comments. Cannot delete other users' content or manage categories. + - `admin` — full access to all operations on all resources + + If a user attempts an operation they are not authorized for, server should respond with `status code` **403** and a corresponding message. + +7. Secrets used for signing the tokens should be stored in `.env` file: + ``` + JWT_SECRET=your_access_token_secret + JWT_REFRESH_SECRET=your_refresh_token_secret + JWT_ACCESS_TTL=15m + JWT_REFRESH_TTL=7d + ``` + +8. New users created via `/auth/signup` should have the `viewer` role by default. Only admins can change user roles. + +### `bcrypt` installation issues: + +#### If you see an error that starts with: + +```console +gyp ERR! stack Error: "pre" versions of node cannot be installed, use the --nodedir flag instead +``` + +Please check [compatibility between Node.JS and Bcrypt versions](https://www.npmjs.com/package/bcrypt#version-compatibility). Alternatively, use `bcryptjs` which is a pure JavaScript implementation and doesn't require native compilation. diff --git a/assignments-v2/05-auth-jwt/score.md b/assignments-v2/05-auth-jwt/score.md new file mode 100644 index 0000000..c1da711 --- /dev/null +++ b/assignments-v2/05-auth-jwt/score.md @@ -0,0 +1,28 @@ +# Scoring: Authentication & Authorization + +## Basic Scope + +- **+20** Route `POST /auth/signup` implemented correctly (creates user, validates input, returns 201/400) +- **+20** Route `POST /auth/login` implemented correctly (validates credentials, returns tokens, returns 200/400/403) +- **+10** User `password` is saved into the database as a hash (bcrypt) +- **+20** Access Token is implemented, JWT payload contains `userId`, `login`, and `role`, secret key is saved in `.env` +- **+30** Authentication is required for access to all routes except `/auth/signup`, `/auth/login`, `/auth/refresh`, `/doc`, and `/`. Implemented via Fastify `onRequest` hook or `preHandler`. + +## Advanced Scope + +- **+25** Route `POST /auth/refresh` implemented correctly (validates refresh token, returns new token pair, returns 200/401/403) +- **+15** Correct handling of expired/invalid tokens (returns 401 with descriptive message) +- **+10** RBAC is implemented (viewer: read-only, editor: own content, admin: full access). Returns 403 for unauthorized operations. + +## Hacker Scope + +- **+10** Rate limiting for auth endpoints (`/auth/signup`, `/auth/login`) — limits requests per IP per time window +- **+10** Logout endpoint (`POST /auth/logout`) that invalidates the refresh token (e.g., via a blacklist or by deleting from DB) + +## Forfeits + +- **-10** for each failing test with `npm run test:auth` (this forfeit applied once if coincides with same forfeit in different assignments) +- **-30% of max task score** Commits after deadline (except commits that affect only Readme.md, .gitignore, etc.) +- **-20** Missing PR or its description is incorrect +- **-20** No separate development branch +- **-20** Less than 3 commits in the development branch, not including commits that make changes only to `Readme.md` or similar files (`tsconfig.json`, `.gitignore`, `.prettierrc.json`, etc.) diff --git a/assignments-v2/06a-testing/assignment.md b/assignments-v2/06a-testing/assignment.md new file mode 100644 index 0000000..b162390 --- /dev/null +++ b/assignments-v2/06a-testing/assignment.md @@ -0,0 +1,70 @@ +# Assignment: Testing + +## Description + +Your task is to write unit and integration tests for the Knowledge Hub API using [Vitest](https://vitest.dev/) testing framework. + +This is a continuation of the Authentication & Authorization assignment (Week 5). You will work in the same `nodejs2025Q2-knowledge-hub` repository. + +## Technical requirements + +- Use [Vitest](https://vitest.dev/) as the testing framework +- Use Fastify's built-in [`inject()`](https://fastify.dev/docs/latest/Guides/Testing/) method (powered by `light-my-request`) for integration tests +- Use 24.x.x version (24.10.0 or upper) of Node.js + +## Implementation details + +1. **Unit Tests** + + Write unit tests for the service/business logic layer: + - User service: validation of signup data, password hashing verification, role assignment + - Article service: validation of article creation data, status transitions (e.g. draft → published → archived), tag management logic + - Auth logic: JWT token generation, token verification, RBAC permission checks + +2. **Integration Tests** + + Write integration tests for API endpoints using Fastify's `inject()` method. Implement **at least 3** complete test scenarios: + + **Scenario 1: Full Article lifecycle** + 1. Signup a new user via `POST /auth/signup` + 2. Login via `POST /auth/login` to get an access token + 3. Create a category via `POST /category` + 4. Create an article with tags via `POST /article` (expect **201** with the created record) + 5. Get the created article via `GET /article/:id` (expect the created record) + 6. Update the article via `PUT /article/:id` (expect the updated record with same `id`) + 7. Delete the article via `DELETE /article/:id` (expect **204**) + 8. Try to get the deleted article via `GET /article/:id` (expect **404**) + + **Scenario 2: Authentication & Authorization flow** + 1. Try to access `GET /article` without a token (expect **401**) + 2. Signup a new user via `POST /auth/signup` + 3. Login via `POST /auth/login` to get tokens + 4. Access `GET /article` with a valid token (expect **200**) + 5. Refresh tokens via `POST /auth/refresh` (expect new token pair) + 6. Access `GET /article` with the new access token (expect **200**) + 7. Try to access `GET /article` with the old (now invalid) refresh token for refresh (expect **403**) + + **Scenario 3: Cascading operations & filtering** + 1. Login as admin + 2. Create a category via `POST /category` + 3. Create multiple articles with different statuses and tags + 4. Filter articles by status via `GET /article?status=published` (expect only published articles) + 5. Filter articles by tag via `GET /article?tag=nodejs` (expect only articles with that tag) + 6. Delete the category via `DELETE /category/:id` (expect **204**) + 7. Get the articles that were in that category (expect `categoryId` to be `null`) + +3. **Mocking** + + - For unit tests, mock database calls (Prisma Client) to isolate business logic + - Use Vitest's built-in mocking capabilities (`vi.mock`, `vi.fn`, `vi.spyOn`) + +4. **NPM Scripts** + + - `npm run test` — runs all tests + - `npm run test:coverage` — runs all tests with coverage report + - Target coverage: **> 60%** for lines and branches + +5. **Test Configuration** + + - Create a `vitest.config.ts` (or configure in `vite.config.ts`) with proper settings + - Tests should be placed in `__tests__/` directories or in files with `.test.ts` / `.spec.ts` suffix diff --git a/assignments-v2/06a-testing/score.md b/assignments-v2/06a-testing/score.md new file mode 100644 index 0000000..b35bd7f --- /dev/null +++ b/assignments-v2/06a-testing/score.md @@ -0,0 +1,23 @@ +# Scoring: Testing + +## Basic Scope + +- **+10** Unit tests for user service are implemented (signup validation, role assignment) +- **+10** Unit tests for article service are implemented (creation validation, status transitions) +- **+10** Integration test Scenario 1 (Full Article lifecycle) is implemented and passes +- **+10** Integration test Scenario 2 (Authentication & Authorization flow) is implemented and passes +- **+10** Integration test Scenario 3 (Cascading operations & filtering) is implemented and passes +- **+10** NPM scripts `npm run test` and `npm run test:coverage` are configured and work + +## Advanced Scope + +- **+15** Test coverage is above 60% (lines and branches) +- **+15** Prisma Client is properly mocked in unit tests (database calls are isolated) +- **+10** Edge cases are tested (invalid UUIDs, missing required fields, duplicate logins, expired tokens, unauthorized role operations) + +## Forfeits + +- **-30% of max task score** Commits after deadline (except commits that affect only Readme.md, .gitignore, etc.) +- **-20** Missing PR or its description is incorrect +- **-20** No separate development branch +- **-20** Less than 3 commits in the development branch, not including commits that make changes only to `Readme.md` or similar files (`tsconfig.json`, `.gitignore`, `.prettierrc.json`, etc.) diff --git a/assignments-v2/06b-logging-errors/assignment.md b/assignments-v2/06b-logging-errors/assignment.md new file mode 100644 index 0000000..fbb0645 --- /dev/null +++ b/assignments-v2/06b-logging-errors/assignment.md @@ -0,0 +1,80 @@ +# Assignment: Logging & Error Handling + +## Description + +Your task is to implement production-ready logging and error handling for the Knowledge Hub API. + +This is a continuation of the previous assignments. You will work in the same `nodejs2025Q2-knowledge-hub` repository. + +## Technical requirements + +- Only Fastify's built-in Pino logger should be used for logging — no additional logging libraries are allowed +- Use 24.x.x version (24.10.0 or upper) of Node.js + +## Implementation details + +1. **Pino Logger Configuration** + + Configure Fastify's built-in Pino logger with the following settings: + - Log level should be configurable via the `LOG_LEVEL` environment variable (default: `info`) + - Supported levels: `trace`, `debug`, `info`, `warn`, `error`, `fatal` + - In development mode, use `pino-pretty` for human-readable output + - In production mode, use structured JSON format + +2. **Request/Response Logging** + + Log all incoming requests and outgoing responses: + - Incoming requests: HTTP method, URL, query parameters, request body + - Outgoing responses: status code, response time + - **Important**: Sanitize sensitive data in logs — passwords and tokens must never appear in log output. If the request body contains a `password` field, it should be replaced with `"[REDACTED]"` in the log. + +3. **Custom Error Handler** + + Implement a custom error handler using Fastify's `setErrorHandler`: + - Catch all unhandled errors during request processing + - Log the error with full stack trace at `error` level + - Return a proper HTTP response with the appropriate status code and a JSON body: + ```json + { + "statusCode": 500, + "error": "Internal Server Error", + "message": "An unexpected error occurred" + } + ``` + - For known errors, return the correct status code (400, 401, 403, 404, etc.) + +4. **Custom Error Classes** + + Create custom error classes that extend the base `Error` class: + - `NotFoundError` — results in HTTP **404** response + - `ValidationError` — results in HTTP **400** response + - `UnauthorizedError` — results in HTTP **401** response + - `ForbiddenError` — results in HTTP **403** response + + Each custom error class should have: + - A `statusCode` property + - A descriptive `message` + + The error handler should check if the thrown error is an instance of a custom error class and use its `statusCode`. Otherwise, default to **500**. + +5. **Process Error Handling** + + Add listeners for unhandled errors at the process level: + - `uncaughtException` — log the error at `fatal` level and perform graceful shutdown (close the server, close database connections, then exit with code 1) + - `unhandledRejection` — log the error at `error` level and perform graceful shutdown + +6. **Log File Rotation** + + - Write logs to a file in addition to (or instead of) stdout + - Implement log file rotation based on file size + - The maximum file size should be configurable via the `LOG_MAX_FILE_SIZE` environment variable (in kilobytes, default: `1024` = 1MB) + - When the log file exceeds the maximum size, it should be renamed with a timestamp suffix (e.g., `app.log` → `app-2025-06-15T10-30-00.log`) and a new `app.log` should be created + - Use `pino.destination` or `pino.transport` with a file target + +7. **Environment Variables** + + Add the following to `.env.example`: + ``` + LOG_LEVEL=info + LOG_MAX_FILE_SIZE=1024 + ``` diff --git a/assignments-v2/06b-logging-errors/score.md b/assignments-v2/06b-logging-errors/score.md new file mode 100644 index 0000000..e430937 --- /dev/null +++ b/assignments-v2/06b-logging-errors/score.md @@ -0,0 +1,24 @@ +# Scoring: Logging & Error Handling + +## Basic Scope + +- **+10** Fastify Pino logger is configured with configurable log level via `LOG_LEVEL` env variable +- **+10** Incoming requests are logged (method, URL, query parameters, body) +- **+10** Outgoing responses are logged (status code, response time) +- **+10** Custom error handler is implemented via `setErrorHandler` — catches errors, logs them, returns proper HTTP responses +- **+10** Custom error classes are implemented (`NotFoundError`, `ValidationError`, `UnauthorizedError`, `ForbiddenError`) with `statusCode` property +- **+10** Custom error classes are used in route handlers instead of manually setting status codes + +## Advanced Scope + +- **+10** Log file rotation is implemented with configurable max file size via `LOG_MAX_FILE_SIZE` env variable +- **+10** Sensitive data (passwords, tokens) is sanitized in logs — replaced with `"[REDACTED]"` +- **+10** `uncaughtException` listener is added with fatal-level logging and graceful shutdown +- **+10** `unhandledRejection` listener is added with error-level logging and graceful shutdown (server close, DB disconnect, process exit) + +## Forfeits + +- **-30% of max task score** Commits after deadline (except commits that affect only Readme.md, .gitignore, etc.) +- **-20** Missing PR or its description is incorrect +- **-20** No separate development branch +- **-20** Less than 3 commits in the development branch, not including commits that make changes only to `Readme.md` or similar files (`tsconfig.json`, `.gitignore`, `.prettierrc.json`, etc.) diff --git a/assignments-v2/07-docker/assignment.md b/assignments-v2/07-docker/assignment.md new file mode 100644 index 0000000..487e091 --- /dev/null +++ b/assignments-v2/07-docker/assignment.md @@ -0,0 +1,102 @@ +# Assignment: Containerization & Docker + +## Description + +Your task is to build and run the Knowledge Hub API as a multi-container application using Docker. + +This is a continuation of the previous assignments. You will work in the same `nodejs2025Q2-knowledge-hub` repository. + +## Prerequisites + +1. Install [Docker](https://docs.docker.com/engine/install/) +2. Create a [Docker Hub](https://hub.docker.com/) account + +## Implementation details + +1. **`.dockerignore`** + + Create a `.dockerignore` file and list all files/folders that should be ignored by Docker: + - `node_modules` + - `.git` + - `logs/` + - `*.log` + - `.env` + - `dist/` + - `.vscode/` + - `.idea/` + +2. **Application Dockerfile** + + Create a `Dockerfile` for building the application image: + + - Use a **multi-stage build**: + - **Stage 1 (build)**: Install all dependencies, compile TypeScript, generate Prisma Client + - **Stage 2 (production)**: Copy only the compiled output and production dependencies, use a minimal base image (`node:24-alpine`) + - Install only production dependencies in the final stage (`npm ci --omit=dev`) + - Set the `NODE_ENV=production` environment variable + - Expose the application port + - Use a non-root user for running the application + - The `CMD` should start the application + +3. **`docker-compose.yml`** + + Create a `docker-compose.yml` file that defines the following services: + + - **`app`** (Knowledge Hub API): + - Built from the application `Dockerfile` + - Depends on the `db` service + - Maps the application port (e.g. `4000:4000`) + - Uses environment variables from `.env` file + - Has a health check (e.g. `curl -f http://localhost:4000/ || exit 1`) + - Restart policy: `on-failure` + + - **`db`** (PostgreSQL): + - Uses the official `postgres:16-alpine` image + - Environment variables: `POSTGRES_USER`, `POSTGRES_PASSWORD`, `POSTGRES_DB` + - Persists data using a named volume + - Has a health check (`pg_isready`) + - Restart policy: `unless-stopped` + + - **Network**: Define a custom bridge network (e.g. `knowledge-hub-network`) for communication between services + + - **Volumes**: Define a named volume for PostgreSQL data persistence + +4. **Application Startup** + + The application should fully work when started with: + ```bash + docker-compose up --build + ``` + + This includes: + - Building the application image + - Starting the PostgreSQL database + - Running Prisma migrations (`npx prisma migrate deploy`) + - Starting the Fastify server + + Consider using a startup script or Docker Compose `entrypoint` to ensure the database is ready before running migrations and starting the app. + +5. **Security Scanning** + + Scan the built application image for vulnerabilities using one of: + - `docker scout cves ` (Docker Scout) + - [Trivy](https://github.com/aquasecurity/trivy) (`trivy image `) + + Document the results (or absence of critical vulnerabilities) in the PR description. + +6. **Docker Hub** + + - Push the built application image to your personal Docker Hub repository + - Add the Docker Hub image link to the `Readme.md` + +## Hints + +- Use `depends_on` with `condition: service_healthy` to ensure proper startup order +- For running Prisma migrations before app start, you can use an entrypoint script: + ```bash + #!/bin/sh + npx prisma migrate deploy + node dist/main.js + ``` +- To check image size: `docker images ` +- To scan with Docker Scout: `docker scout cves ` diff --git a/assignments-v2/07-docker/score.md b/assignments-v2/07-docker/score.md new file mode 100644 index 0000000..ab9b215 --- /dev/null +++ b/assignments-v2/07-docker/score.md @@ -0,0 +1,30 @@ +# Scoring: Containerization & Docker + +## Basic Scope + +- **+10** `.dockerignore` file is created with appropriate entries +- **+20** `Dockerfile` for the application is created and builds successfully +- **+20** `docker-compose.yml` is created with `app` and `db` services +- **+10** Application and database communicate over a custom Docker network +- **+20** Application fully works when started via `docker-compose up --build` (API responds, database operations work) + +## Advanced Scope + +- **+15** Multi-stage build is used in the Dockerfile (separate build and production stages) +- **+10** Health checks are configured for both `app` and `db` services +- **+10** Named volume is used for PostgreSQL data persistence +- **+10** Custom bridge network is defined for inter-service communication +- **+5** Environment variables are properly configured via `.env` file + +## Hacker Scope + +- **+10** Security scan is performed and results are documented (no critical vulnerabilities or they are addressed) +- **+5** Final application image size is under 200MB +- **+5** Application image is pushed to Docker Hub and the link is in `Readme.md` + +## Forfeits + +- **-30% of max task score** Commits after deadline (except commits that affect only Readme.md, .gitignore, etc.) +- **-20** Missing PR or its description is incorrect +- **-20** No separate development branch +- **-20** Less than 3 commits in the development branch, not including commits that make changes only to `Readme.md` or similar files (`tsconfig.json`, `.gitignore`, `.prettierrc.json`, etc.) diff --git a/assignments-v2/08-websockets/assignment.md b/assignments-v2/08-websockets/assignment.md new file mode 100644 index 0000000..84a4bd9 --- /dev/null +++ b/assignments-v2/08-websockets/assignment.md @@ -0,0 +1,319 @@ +# Assignment: WebSocket — Live Quiz Game + +## Description + +Your task is to implement a backend for a real-time **Live Quiz Game** using WebSocket. A host creates a quiz with questions, players join the game and answer questions in real time. + +## Technical requirements + +- Task can be implemented in JavaScript or TypeScript +- Use 24.x.x version (24.10.0 or upper) of Node.js +- Only [ws](https://www.npmjs.com/package/ws), `cross-env`, `typescript`, `tsx`, `ts-node`, `ts-node-dev`, `nodemon`, `dotenv`, linter and formatter and their plugins, bundler and its plugins, `@types/*` and testing tools are allowed +- The program is started by npm script `start`: + ```bash + npm run start + ``` +- After starting, the program displays the WebSocket server address and port +- All requests and responses must be sent as JSON strings + +## Game Flow + +1. Players and hosts **register** or **login** with a name and password +2. A host **creates a game** by submitting a list of questions (each with 4 answer options, a correct answer index, and a time limit) +3. The server generates a **6-character room code** for the game +4. Players **join the game** using the room code +5. The host **starts the game** — the first question is broadcast to all players +6. Players **submit answers** within the time limit +7. When the timer expires (or all players have answered), the server broadcasts the **correct answer** and **updated scores** +8. The **next question** is sent, and the process repeats +9. After the last question, the server broadcasts the **final scoreboard** with ranks + +## Scoring Rules + +- Correct answer: `basePoints * (timeRemaining / timeLimit)` — faster answers earn more points (maximum 1000 points per question) +- Wrong answer or no answer: 0 points +- `basePoints` = 1000 + +## Data Structures (in-memory) + +```typescript +interface Player { + name: string; + index: number | string; // unique player id + score: number; +} + +interface Question { + text: string; + options: string[]; // exactly 4 options + correctIndex: number; // index of the correct option (0-3) + timeLimitSec: number; // time limit for the question in seconds +} + +interface Game { + id: string; + code: string; // 6-character alphanumeric code + hostId: number | string; + questions: Question[]; + players: Player[]; + currentQuestion: number; // index of current question (-1 before start) + status: 'waiting' | 'in_progress' | 'finished'; +} +``` + +## WebSocket Commands + +### Note: `data` value should be a **JSON string**, `id` should always be `0` + +### Player Commands + +- **Register / Login** + + `<-` (from client) + ```json + { + "type": "reg", + "data": { + "name": "", + "password": "" + }, + "id": 0 + } + ``` + `->` (from server, personal response) + ```json + { + "type": "reg", + "data": { + "name": "", + "index": "", + "error": false, + "errorText": "" + }, + "id": 0 + } + ``` + +### Game Management Commands + +- **Create Game** (host sends questions) + + `<-` + ```json + { + "type": "create_game", + "data": { + "questions": [ + { + "text": "", + "options": ["", "", "", ""], + "correctIndex": "", + "timeLimitSec": "" + } + ] + }, + "id": 0 + } + ``` + `->` (personal response to host) + ```json + { + "type": "game_created", + "data": { + "gameId": "", + "code": "" + }, + "id": 0 + } + ``` + +- **Join Game** (player joins by code) + + `<-` + ```json + { + "type": "join_game", + "data": { + "code": "" + }, + "id": 0 + } + ``` + `->` (personal response to joining player) + ```json + { + "type": "game_joined", + "data": { + "gameId": "" + }, + "id": 0 + } + ``` + `->` (broadcast to all players in the game) + ```json + { + "type": "player_joined", + "data": { + "playerName": "", + "playerCount": "" + }, + "id": 0 + } + ``` + +- **Update Players** (broadcast when player list changes) + + `->` (broadcast to all in game) + ```json + { + "type": "update_players", + "data": [ + { + "name": "", + "index": "", + "score": "" + } + ], + "id": 0 + } + ``` + +### Game Play Commands + +- **Start Game** (host only) + + `<-` + ```json + { + "type": "start_game", + "data": { + "gameId": "" + }, + "id": 0 + } + ``` + `->` (broadcast — first question, options only, no correct answer) + ```json + { + "type": "question", + "data": { + "questionNumber": "", + "totalQuestions": "", + "text": "", + "options": ["", "", "", ""], + "timeLimitSec": "" + }, + "id": 0 + } + ``` + +- **Submit Answer** (player) + + `<-` + ```json + { + "type": "answer", + "data": { + "gameId": "", + "questionIndex": "", + "answerIndex": "" + }, + "id": 0 + } + ``` + `->` (personal response) + ```json + { + "type": "answer_accepted", + "data": { + "questionIndex": "" + }, + "id": 0 + } + ``` + +- **Question Result** (broadcast after timer expires or all answered) + + `->` (broadcast) + ```json + { + "type": "question_result", + "data": { + "questionIndex": "", + "correctIndex": "", + "playerResults": [ + { + "name": "", + "answered": "", + "correct": "", + "pointsEarned": "", + "totalScore": "" + } + ] + }, + "id": 0 + } + ``` + +- **Next Question / Game Finished** + + If there are more questions: + `->` (broadcast — next question, same format as `question` above) + + If it was the last question: + `->` (broadcast) + ```json + { + "type": "game_finished", + "data": { + "scoreboard": [ + { + "name": "", + "score": "", + "rank": "" + } + ] + }, + "id": 0 + } + ``` + +## Command Sequence Diagram + +``` + Host Server Player1 Player2 + reg --> + <-- reg + <-- reg + reg --> + <-- reg + reg --> +create_game --> + <-- game_created + <-- join_game + <-- player_joined --> + <-- update_players --> + <-- join_game + <-- player_joined --> + <-- update_players --> +start_game --> + <-- question --> --> + <-- answer + <-- answer_accepted + <-- answer + answer_accepted --> + (timer expires) + <-- question_result --> --> + <-- question --> --> + ... + <-- game_finished --> --> +``` + +## Requirements Summary + +- WebSocket server using `ws` library +- In-memory storage for players and games +- Server-side timer for each question +- Score calculation based on speed of correct answer +- Proper handling of player disconnects (remove from game, update player list) +- All communication via JSON strings diff --git a/assignments-v2/08-websockets/score.md b/assignments-v2/08-websockets/score.md new file mode 100644 index 0000000..5f4f8f5 --- /dev/null +++ b/assignments-v2/08-websockets/score.md @@ -0,0 +1,31 @@ +# Scoring: WebSocket — Live Quiz Game + +## Basic Scope + +- **+10** WebSocket server starts and accepts connections +- **+15** Player registration/login (`reg`) works correctly — stores player data, handles duplicate names, returns error on wrong password +- **+15** Game creation (`create_game`) works correctly — validates questions, generates 6-character code, stores game +- **+15** Joining a game (`join_game`) works correctly — validates code, adds player, broadcasts `player_joined` and `update_players` +- **+15** Starting a game (`start_game`) works correctly — only host can start, sends first question to all players (without correct answer) +- **+15** Submitting answers (`answer`) works correctly — validates answer, stores it, sends `answer_accepted` +- **+15** Question results (`question_result`) are broadcast after timer expires — includes correct answer and per-player results + +## Advanced Scope + +- **+25** Full game flow works end-to-end: create → join → start → answer all questions → `game_finished` with final scoreboard and ranks +- **+15** Scoring with speed bonus is implemented correctly (`basePoints * timeRemaining / timeLimit`) +- **+15** Server-side timer is implemented — question results are sent after `timeLimitSec` even if not all players answered +- **+15** Disconnects are handled correctly — player is removed from the game, `update_players` is broadcast, game continues + +## Hacker Scope + +- **+15** Bot player is implemented for single play — automatically joins and answers questions (randomly or with a strategy) +- **+15** Pause/resume game functionality — host can pause the game (timer stops, no answers accepted) and resume it + +## Forfeits + +- **-95% of total task score** Any external tools/libraries beyond those listed in technical requirements +- **-30% of total task score** Commits after deadline (except commits that affect only Readme.md, .gitignore, etc.) +- **-20** Missing PR or its description is incorrect +- **-20** No separate development branch +- **-20** Less than 3 commits in the development branch, not including commits that make changes only to `Readme.md` or similar files (`tsconfig.json`, `.gitignore`, `.prettierrc.json`, etc.) diff --git a/assignments-v2/09-ai-llm-integration/assignment.md b/assignments-v2/09-ai-llm-integration/assignment.md new file mode 100644 index 0000000..61faadc --- /dev/null +++ b/assignments-v2/09-ai-llm-integration/assignment.md @@ -0,0 +1,204 @@ +# Assignment: AI/LLM Integration — AI Content Assistant + +## Description + +Your task is to create an **AI Content Assistant API** — a Fastify server that provides a set of useful AI-powered endpoints by making requests to an OpenAI-compatible LLM. + +The server acts as an intermediary between the client and the LLM, adding prompt engineering, validation, rate limiting, caching, and usage tracking on top of raw API calls. + +## Technical requirements + +- Task should be implemented in TypeScript +- Use 24.x.x version (24.10.0 or upper) of Node.js +- Use [Fastify](https://fastify.dev/) as the web framework +- Use the official [OpenAI Node.js SDK](https://www.npmjs.com/package/openai) (`openai` npm package) +- Only `fastify`, `@fastify/*` plugins, `openai`, `dotenv`, `cross-env`, `typescript`, `tsx`, `ts-node`, `ts-node-dev`, `nodemon`, linter/formatter and their plugins, bundler and its plugins, `@types/*`, `uuid`, and testing tools are allowed + +## Implementation details + +### Endpoints + +1. **Summarize Text** — `POST /api/summarize` + + Accepts a text and returns a concise summary. + + Request body: + ```typescript + interface SummarizeRequest { + text: string; // required, the text to summarize + maxLength?: 'short' | 'medium' | 'detailed'; // optional, defaults to 'medium' + } + ``` + + Response body: + ```typescript + interface SummarizeResponse { + summary: string; + originalLength: number; // character count of original text + summaryLength: number; // character count of summary + } + ``` + + - Server should answer with `status code` **200** and the summary + - Server should answer with `status code` **400** if `text` is missing or empty + +2. **Translate Text** — `POST /api/translate` + + Accepts a text and a target language, returns the translated text. + + Request body: + ```typescript + interface TranslateRequest { + text: string; // required + targetLanguage: string; // required (e.g. "Spanish", "French", "Japanese") + sourceLanguage?: string; // optional (auto-detect if not provided) + } + ``` + + Response body: + ```typescript + interface TranslateResponse { + translatedText: string; + detectedLanguage: string; // the detected source language + } + ``` + + - Server should answer with `status code` **200** and the translation + - Server should answer with `status code` **400** if `text` or `targetLanguage` is missing + +3. **Analyze Code** — `POST /api/analyze-code` + + Accepts source code and returns an analysis (code review, bug detection, optimization suggestions, or explanation). + + Request body: + ```typescript + interface AnalyzeCodeRequest { + code: string; // required + language: string; // required (e.g. "typescript", "python", "go") + task?: 'review' | 'bugs' | 'optimize' | 'explain'; // optional, defaults to 'review' + } + ``` + + Response body: + ```typescript + interface AnalyzeCodeResponse { + analysis: string; // the main analysis text + suggestions: string[]; // actionable suggestions as a list + severity: 'info' | 'warning' | 'error'; // overall severity assessment + } + ``` + + - Server should answer with `status code` **200** and the analysis + - Server should answer with `status code` **400** if `code` or `language` is missing + +4. **Chat** — `POST /api/chat` + + A conversational endpoint that maintains context across messages within a session. + + Request body: + ```typescript + interface ChatRequest { + message: string; // required + sessionId?: string; // optional — if provided, continues existing conversation + } + ``` + + Response: **Server-Sent Events (SSE) stream** + + The response should use `Content-Type: text/event-stream` and stream tokens as they arrive from the LLM: + ``` + data: {"token": "Hello"} + + data: {"token": " there"} + + data: {"token": "!"} + + data: {"done": true, "sessionId": "abc-123", "usage": {"promptTokens": 50, "completionTokens": 12}} + + ``` + + - Each SSE event contains either a `token` (partial response) or a `done` flag with metadata + - The server stores conversation history per `sessionId` (in-memory, up to the last 20 messages) + - If no `sessionId` is provided, a new session is created and the `sessionId` is returned in the final event + - Server should answer with `status code` **400** if `message` is missing + +5. **Usage Statistics** — `GET /api/usage` + + Returns aggregated usage statistics for the server since startup. + + Response body: + ```typescript + interface UsageResponse { + totalRequests: number; + totalTokens: { + prompt: number; + completion: number; + }; + estimatedCost: number; // in USD, based on model pricing + requestsByEndpoint: { + summarize: number; + translate: number; + analyzeCode: number; + chat: number; + }; + } + ``` + +### Cross-Cutting Requirements + +1. **Prompt Templates** + + Each endpoint should use a carefully crafted **system prompt** that instructs the LLM on its role and expected output format. Prompts should be stored in a dedicated module/directory (e.g. `src/prompts/`), not hardcoded in route handlers. + +2. **JSON Schema Validation** + + All request bodies should be validated using Fastify's built-in JSON Schema validation. Invalid requests should return `status code` **400** with a descriptive error message. + +3. **Rate Limiting** + + Implement rate limiting for AI endpoints: + - Maximum `RATE_LIMIT_RPM` requests per minute (configurable via `.env`, default: 20) + - When the limit is exceeded, respond with `status code` **429** and a `Retry-After` header + - Rate limiting should be per-client (based on IP or a simple token) + +4. **Response Caching** + + For `/api/summarize` and `/api/translate` endpoints: + - Cache responses based on a hash of the input (text + parameters) + - Use an in-memory cache with a TTL (configurable via `CACHE_TTL_SEC` env variable, default: 300 seconds) + - If a cached response exists and is not expired, return it without calling the LLM + - Cached responses should include a `X-Cache: HIT` header; non-cached responses should include `X-Cache: MISS` + +5. **Token Tracking & Cost Estimation** + + After each LLM request: + - Extract token usage from the OpenAI response (`usage.prompt_tokens`, `usage.completion_tokens`) + - Accumulate totals in memory + - Calculate estimated cost based on the model used (e.g., `gpt-4o-mini`: $0.15 / 1M input tokens, $0.60 / 1M output tokens) + +6. **Error Handling** + + Handle OpenAI API errors gracefully: + - Rate limit errors (429) from OpenAI — retry with exponential backoff (up to 3 retries) or return 503 + - Invalid API key — return 500 with a message indicating configuration error (do not expose the key) + - Network timeouts — return 503 with a message indicating the AI service is temporarily unavailable + - All errors should be logged + +### Environment Variables + +Add to `.env.example`: +``` +OPENAI_API_KEY=sk-your-api-key-here +OPENAI_MODEL=gpt-4o-mini +PORT=4000 +RATE_LIMIT_RPM=20 +CACHE_TTL_SEC=300 +``` + +## Hints + +- Use OpenAI's `stream: true` option for the chat endpoint to get streaming responses +- For SSE, set the response headers manually: `Content-Type: text/event-stream`, `Cache-Control: no-cache`, `Connection: keep-alive` +- Use `crypto.createHash('sha256').update(input).digest('hex')` for cache key generation +- Consider using `@fastify/rate-limit` plugin or implement a simple sliding window counter +- The `openai` package returns usage information in the response object: `response.usage.prompt_tokens`, `response.usage.completion_tokens` diff --git a/assignments-v2/09-ai-llm-integration/score.md b/assignments-v2/09-ai-llm-integration/score.md new file mode 100644 index 0000000..6ae9a7d --- /dev/null +++ b/assignments-v2/09-ai-llm-integration/score.md @@ -0,0 +1,34 @@ +# Scoring: AI/LLM Integration — AI Content Assistant + +## Basic Scope + +- **+15** `POST /api/summarize` endpoint works correctly (sends text to LLM, returns summary with originalLength and summaryLength) +- **+15** `POST /api/translate` endpoint works correctly (sends text to LLM, returns translation with detectedLanguage) +- **+15** `POST /api/analyze-code` endpoint works correctly (sends code to LLM, returns analysis, suggestions array, and severity) +- **+15** `POST /api/chat` endpoint works correctly (sends message to LLM, returns response) +- **+15** OpenAI SDK is properly integrated (API key from `.env`, model configurable) +- **+10** Prompt templates are stored in a dedicated module (not hardcoded in route handlers) +- **+15** JSON Schema validation is used for all request bodies (returns 400 for invalid input) + +## Advanced Scope + +- **+20** SSE streaming is implemented for `/api/chat` — tokens are streamed as `text/event-stream` events with proper format +- **+15** Token tracking and cost estimation: `GET /api/usage` returns correct totals (totalRequests, totalTokens, estimatedCost, requestsByEndpoint) +- **+15** Rate limiting is implemented (configurable RPM via `.env`, returns 429 with `Retry-After` header when exceeded) +- **+10** OpenAI API errors are handled gracefully (rate limits, invalid key, timeouts — appropriate HTTP status codes and logged) +- **+10** Conversation context management: `/api/chat` maintains session history per `sessionId` (up to last 20 messages) + +## Hacker Scope + +- **+15** Response caching for `/api/summarize` and `/api/translate` (in-memory with TTL, `X-Cache: HIT/MISS` header, cache key from input hash) +- **+10** Graceful degradation when AI API is unavailable (returns 503 with descriptive message, retries with exponential backoff) +- **+5** Model selection is configurable per-request (optional `model` field in request body, falls back to `.env` default) + +## Forfeits + +- **-95% of total task score** Any external tools/libraries beyond those listed in technical requirements +- **-30% of total task score** Commits after deadline (except commits that affect only Readme.md, .gitignore, etc.) +- **-20** Missing PR or its description is incorrect +- **-20** No separate development branch +- **-20** Less than 3 commits in the development branch, not including commits that make changes only to `Readme.md` or similar files (`tsconfig.json`, `.gitignore`, `.prettierrc.json`, etc.) +- **-5** The `.env` file with actual API key is present in the repository (should be `.env.example` instead) diff --git a/assignments-v2/10-ai-rag-vectordb/assignment.md b/assignments-v2/10-ai-rag-vectordb/assignment.md new file mode 100644 index 0000000..759407d --- /dev/null +++ b/assignments-v2/10-ai-rag-vectordb/assignment.md @@ -0,0 +1,266 @@ +# Assignment: AI RAG & Vector Database — Smart Knowledge Base + +## Description + +Your task is to create a **Smart Knowledge Base** — an AI chatbot that answers questions based on documents you upload. The system uses **Retrieval-Augmented Generation (RAG)**: when a user asks a question, the server finds the most relevant parts of the uploaded documents and uses them as context for the LLM to generate an accurate, grounded answer. + +## Technical requirements + +- Task should be implemented in TypeScript +- Use 24.x.x version (24.10.0 or upper) of Node.js +- Use [Fastify](https://fastify.dev/) as the web framework +- Use the official [OpenAI Node.js SDK](https://www.npmjs.com/package/openai) (`openai` npm package) for both chat completions and embeddings +- Only `fastify`, `@fastify/*` plugins, `openai`, `dotenv`, `cross-env`, `typescript`, `tsx`, `ts-node`, `ts-node-dev`, `nodemon`, `uuid`, linter/formatter and their plugins, bundler and its plugins, `@types/*`, and testing tools are allowed +- For the **Basic Scope**, implement vector storage **in-memory** (no external vector database required) +- For the **Advanced Scope**, optionally use [ChromaDB](https://www.trychroma.com/) via Docker + +## Core Concepts + +### What is RAG? + +Retrieval-Augmented Generation (RAG) is a pattern that enhances LLM responses by: +1. **Retrieving** relevant information from a knowledge base +2. **Augmenting** the LLM prompt with this information +3. **Generating** a response that is grounded in the retrieved data + +### The RAG Pipeline + +``` +User Question + │ + ▼ +┌─────────────┐ +│ Embed the │ ← OpenAI Embeddings API +│ question │ +└──────┬──────┘ + │ + ▼ +┌─────────────┐ +│ Search for │ ← Cosine similarity against +│ similar │ document chunk embeddings +│ chunks │ +└──────┬──────┘ + │ + ▼ +┌─────────────┐ +│ Build prompt │ ← System prompt + relevant chunks +│ with context │ + user question +└──────┬──────┘ + │ + ▼ +┌─────────────┐ +│ Generate │ ← OpenAI Chat Completions API +│ answer │ +└──────┬──────┘ + │ + ▼ + Answer with + source attribution +``` + +## Implementation details + +### Endpoints + +1. **Upload Document** — `POST /api/documents` + + Upload a text document to the knowledge base. + + Request body: + ```typescript + interface UploadDocumentRequest { + title: string; // required + content: string; // required, plain text + metadata?: Record; // optional key-value metadata + } + ``` + + Processing steps: + 1. Split `content` into chunks of ~500-1000 characters with ~100-character overlap between consecutive chunks + 2. Generate embeddings for each chunk using OpenAI Embeddings API (`text-embedding-3-small` model) + 3. Store the document, its chunks, and their embedding vectors + + Response body: + ```typescript + interface UploadDocumentResponse { + id: string; // document id (uuid) + title: string; + chunksCount: number; // number of chunks created + createdAt: string; // ISO timestamp + } + ``` + + - Server should answer with `status code` **201** and the document info + - Server should answer with `status code` **400** if `title` or `content` is missing + +2. **List Documents** — `GET /api/documents` + + Response body: + ```typescript + interface DocumentListResponse { + documents: Array<{ + id: string; + title: string; + chunksCount: number; + metadata: Record; + createdAt: string; + }>; + } + ``` + + - Server should answer with `status code` **200** + +3. **Delete Document** — `DELETE /api/documents/:id` + + Removes the document and all its associated chunks and embeddings from storage. + + - Server should answer with `status code` **204** if the document was found and deleted + - Server should answer with `status code` **404** if the document doesn't exist + +4. **Semantic Search** — `POST /api/search` + + Find the most relevant chunks across all documents. + + Request body: + ```typescript + interface SearchRequest { + query: string; // required + limit?: number; // optional, default 5, max 20 + } + ``` + + Processing steps: + 1. Generate an embedding for the query + 2. Calculate cosine similarity between the query embedding and all stored chunk embeddings + 3. Return the top-K most similar chunks + + Response body: + ```typescript + interface SearchResponse { + results: Array<{ + documentId: string; + documentTitle: string; + chunk: string; // the text of the matched chunk + similarity: number; // cosine similarity score (0 to 1) + }>; + } + ``` + + - Server should answer with `status code` **200** + - Server should answer with `status code` **400** if `query` is missing + +5. **Chat with Knowledge Base** — `POST /api/chat` + + Ask a question and get an AI-generated answer based on the uploaded documents. + + Request body: + ```typescript + interface ChatRequest { + question: string; // required + conversationId?: string; // optional, to continue a conversation + } + ``` + + Processing steps: + 1. Generate an embedding for the question + 2. Find the top 3-5 most similar chunks from the vector store + 3. Build a prompt that includes: + - A system message instructing the AI to answer based on the provided context + - The relevant chunks as context + - Previous conversation messages (if `conversationId` is provided) + - The user's question + 4. Send the prompt to the LLM and return the answer + + Response body: + ```typescript + interface ChatResponse { + answer: string; + sources: Array<{ + documentId: string; + documentTitle: string; + relevantChunk: string; // the chunk that was used as context + }>; + conversationId: string; // new or existing conversation id + } + ``` + + - Server should answer with `status code` **200** + - Server should answer with `status code` **400** if `question` is missing + - If no documents are uploaded, the AI should respond saying it has no knowledge base to answer from + +6. **Conversation History** — `GET /api/chat/:conversationId/history` + + Response body: + ```typescript + interface ConversationHistoryResponse { + conversationId: string; + messages: Array<{ + role: 'user' | 'assistant'; + content: string; + timestamp: string; + }>; + } + ``` + + - Server should answer with `status code` **200** + - Server should answer with `status code` **404** if the conversation doesn't exist + +### Vector Storage (In-Memory) + +For the Basic Scope, implement vector storage in-memory: + +```typescript +interface StoredChunk { + id: string; + documentId: string; + text: string; + embedding: number[]; // vector of floats from OpenAI Embeddings API +} +``` + +**Cosine Similarity** formula: +``` +similarity(A, B) = (A · B) / (|A| * |B|) +``` + +Where: +- `A · B` = sum of (A[i] * B[i]) for all i +- `|A|` = sqrt(sum of A[i]^2 for all i) + +### Document Chunking + +Split documents into chunks using the following strategy: +- **Chunk size**: ~800 characters (configurable via `CHUNK_SIZE` env variable) +- **Overlap**: ~200 characters (configurable via `CHUNK_OVERLAP` env variable) +- Split on paragraph boundaries when possible (split on `\n\n`), falling back to sentence boundaries (`.`), then word boundaries + +### Conversation Memory + +- Store conversation messages per `conversationId` in memory +- Keep the last `CONVERSATION_MAX_MESSAGES` messages (configurable, default: 20) +- Include conversation history in the prompt when `conversationId` is provided + +### Environment Variables + +Add to `.env.example`: +``` +OPENAI_API_KEY=sk-your-api-key-here +OPENAI_MODEL=gpt-4o-mini +EMBEDDING_MODEL=text-embedding-3-small +PORT=4000 +CHUNK_SIZE=800 +CHUNK_OVERLAP=200 +CONVERSATION_MAX_MESSAGES=20 +``` + +## Hints + +- Use `openai.embeddings.create({ model: "text-embedding-3-small", input: text })` to generate embeddings +- The `text-embedding-3-small` model returns 1536-dimensional vectors +- For cosine similarity, you can normalize vectors to unit length first, then dot product equals cosine similarity +- The system prompt for the chat endpoint should instruct the AI to: + - Only answer based on the provided context + - If the context doesn't contain relevant information, say so + - Cite which documents the answer is based on +- For chunking, consider edge cases: very short documents (single chunk), very long paragraphs, documents with no paragraph breaks +- Store the `sources` based on which chunks were actually included in the prompt, not all chunks in the store diff --git a/assignments-v2/10-ai-rag-vectordb/score.md b/assignments-v2/10-ai-rag-vectordb/score.md new file mode 100644 index 0000000..6c06026 --- /dev/null +++ b/assignments-v2/10-ai-rag-vectordb/score.md @@ -0,0 +1,34 @@ +# Scoring: AI RAG & Vector Database — Smart Knowledge Base + +## Basic Scope + +- **+20** `POST /api/documents` works correctly: accepts title and content, splits into chunks, generates embeddings, stores in memory, returns document info with chunksCount +- **+10** `GET /api/documents` returns the list of all uploaded documents +- **+10** `DELETE /api/documents/:id` removes the document and all associated chunks/embeddings +- **+20** In-memory vector storage is implemented with cosine similarity search +- **+25** `POST /api/chat` implements the full RAG pipeline: embed question → find similar chunks → inject into prompt → generate answer with source attribution +- **+15** Source attribution in chat responses correctly identifies which documents/chunks were used + +## Advanced Scope + +- **+15** `POST /api/search` (semantic search) endpoint works correctly — returns top-K chunks ranked by similarity +- **+15** Conversation memory is implemented — `conversationId` tracks multi-turn dialogues, history is included in prompts +- **+10** `DELETE /api/documents/:id` correctly removes all associated embeddings from the vector store (search results no longer include deleted document's chunks) +- **+10** Document metadata filtering: `POST /api/search` accepts optional `metadata` filter to narrow results +- **+10** Chunk size and overlap are configurable via `.env` variables (`CHUNK_SIZE`, `CHUNK_OVERLAP`) +- **+10** ChromaDB is used via Docker instead of in-memory vector storage + +## Hacker Scope + +- **+10** Hybrid search: combines keyword matching (simple text search) with semantic similarity, merging and re-ranking results +- **+10** Re-ranking: after initial retrieval, re-rank chunks using a secondary scoring method (e.g., ask the LLM to rate relevance, or use reciprocal rank fusion) +- **+10** Large document handling: documents larger than 50KB are processed via streaming chunking without loading the entire content into memory at once + +## Forfeits + +- **-95% of total task score** Any external tools/libraries beyond those listed in technical requirements +- **-30% of total task score** Commits after deadline (except commits that affect only Readme.md, .gitignore, etc.) +- **-20** Missing PR or its description is incorrect +- **-20** No separate development branch +- **-20** Less than 3 commits in the development branch, not including commits that make changes only to `Readme.md` or similar files (`tsconfig.json`, `.gitignore`, `.prettierrc.json`, etc.) +- **-5** The `.env` file with actual API key is present in the repository (should be `.env.example` instead) From 8cdfb1abacc9bad3dbff981436683e21b6d7a424 Mon Sep 17 00:00:00 2001 From: Maksim Shylau Date: Mon, 9 Feb 2026 17:24:43 +0100 Subject: [PATCH 2/5] feat: update 1st assignment --- README.md | 10 +- .../01-nodejs-fundamentals/assignment.md | 99 ------ .../01-nodejs-fundamentals/score.md | 43 --- .../01a-nodejs-basics/assignment.md | 120 ++++++++ assignments-v2/01a-nodejs-basics/score.md | 43 +++ .../01b-data-processing-cli/assignment.md | 286 ++++++++++++++++++ .../01b-data-processing-cli/score.md | 46 +++ 7 files changed, 502 insertions(+), 145 deletions(-) delete mode 100644 assignments-v2/01-nodejs-fundamentals/assignment.md delete mode 100644 assignments-v2/01-nodejs-fundamentals/score.md create mode 100644 assignments-v2/01a-nodejs-basics/assignment.md create mode 100644 assignments-v2/01a-nodejs-basics/score.md create mode 100644 assignments-v2/01b-data-processing-cli/assignment.md create mode 100644 assignments-v2/01b-data-processing-cli/score.md diff --git a/README.md b/README.md index 532fa6f..a2e79ea 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,12 @@ # Node.js Assignments v2 -## Node.js Fundamentals -- [Assignment](assignments-v2/01-nodejs-fundamentals/assignment.md) -- [Scoring](assignments-v2/01-nodejs-fundamentals/score.md) +## Node.js Basics +- [Assignment](assignments-v2/01a-nodejs-basics/assignment.md) +- [Scoring](assignments-v2/01a-nodejs-basics/score.md) + +## Data Processing CLI +- [Assignment](assignments-v2/01b-data-processing-cli/assignment.md) +- [Scoring](assignments-v2/01b-data-processing-cli/score.md) ## CRUD API - [Assignment](assignments-v2/02-crud-api/assignment.md) diff --git a/assignments-v2/01-nodejs-fundamentals/assignment.md b/assignments-v2/01-nodejs-fundamentals/assignment.md deleted file mode 100644 index 0e81d79..0000000 --- a/assignments-v2/01-nodejs-fundamentals/assignment.md +++ /dev/null @@ -1,99 +0,0 @@ -# Assignment: Node.js Fundamentals - -## Description - -Your task is to complete several tasks to learn Node.js core APIs. You will build a **Data Processing Toolkit** — a set of utilities that work with the file system, streams, CLI, hashing, compression, worker threads, and child processes. - -Fork the starter repository and implement the required functionality inside the `src/` folder. Each subtask has its own dedicated file inside a corresponding subfolder. - -## Technical requirements - -- Any external tools and libraries are prohibited -- Use 24.x.x version (24.10.0 or upper) of Node.js -- Don't change the signature of pre-written functions (e.g. don't rename them, don't make them synchronous, etc.) -- Prefer asynchronous API whenever possible - -## Subtasks - -### File System (src/fs) - -You should implement several functions in dedicated files: - -- `create.js` — implement function that creates a new JSON file `config.json` with the following content inside the `workspace` folder: - ```json - { - "name": "Data Toolkit", - "version": "1.0.0", - "features": ["fs", "streams", "cli"] - } - ``` - If the file already exists, `Error` with message `FS operation failed` must be thrown. - -- `copy.js` — implement function that recursively copies the `workspace` folder with all its content into a `workspace_backup` folder at the same level. If the `workspace` folder doesn't exist or `workspace_backup` has already been created, `Error` with message `FS operation failed` must be thrown. - -- `rename.js` — implement function that renames the file `data.csv` to `processed_data.csv`. If there's no file `data.csv` or `processed_data.csv` already exists, `Error` with message `FS operation failed` must be thrown. - -- `delete.js` — implement function that deletes the file `obsolete.txt`. If there's no file `obsolete.txt`, `Error` with message `FS operation failed` must be thrown. - -- `list.js` — implement function that prints an array of all files and folders from the `workspace` folder into the console. Each entry should be an object with `name` (string) and `sizeKB` (number, file size in kilobytes rounded to 2 decimal places) properties. For directories, `sizeKB` should be `null`. If the `workspace` folder doesn't exist, `Error` with message `FS operation failed` must be thrown. - -- `read.js` — implement function that prints content of the `report.txt` into the console. If there's no file `report.txt`, `Error` with message `FS operation failed` must be thrown. - -### Command Line Interface (src/cli) - -You should implement several functions in dedicated files: - -- `args.js` — implement function that parses command line arguments given in the format `--input path/to/file --output path/to/output --format json` and prints them to the console in the format `input is path/to/file, output is path/to/output, format is json` - -- `env.js` — implement function that parses environment variables with the prefix `DPT_` and prints them to the console in the format `DPT_name1=value1; DPT_name2=value2` - -### Hash (src/hash) - -You should implement a function in a dedicated file: - -- `calcHash.js` — implement function that calculates the SHA256 hash for file `data.txt` and logs it into the console as a `hex` string using Streams API - -### Streams (src/streams) - -You should implement several functions in dedicated files: - -- `read.js` — implement function that reads file `input.txt` content using Readable Stream and prints its content into `process.stdout` - -- `write.js` — implement function that writes `process.stdin` data into file `output.txt` content using Writable Stream - -- `transform.js` — implement function that reads data from `process.stdin`, converts each line to upper case using Transform Stream, and then writes it into `process.stdout` - -### Zlib (src/zip) - -You should implement several functions in dedicated files: - -- `compress.js` — implement function that compresses file `archive_me.txt` to `archive.gz` using `zlib` and Streams API - -- `decompress.js` — implement function that decompresses `archive.gz` back to the `archive_me.txt` with same content as before compression using `zlib` and Streams API - -### Worker Threads (src/wt) - -You should implement several functions in dedicated files: - -- `worker.js` — implement a function that receives a range `{ start, end }` from the main thread and finds all prime numbers within that range. The function should send the result back to the main thread. - -- `main.js` — implement function that creates a number of worker threads (equal to the number of host machine logical CPU cores) from file `worker.js` and distributes the range `[2, 10_000_000]` evenly among them. For example: on a host machine with **4** cores you should create **4** workers, each computing primes in its own subrange. After all workers finish, the function should log an array of results into the console. The results are an array of objects with 2 properties: - - `status` — `'resolved'` in case of successfully received value from `worker` or `'error'` in case of error in `worker` - - `data` — array of prime numbers from `worker` in case of success or `null` in case of error - - The results in the array must be in the same order that the workers were created. - -### Child Processes (src/cp) - -You should implement a function in a dedicated file: - -- `cp.js` — implement function `spawnChildProcess` that receives an array of arguments `args` and creates a child process from file `script.js`, passing these `args` to it. This function should create an IPC-channel between `stdin` and `stdout` of the master process and child process: - - child process `stdin` should receive input from master process `stdin` - - child process `stdout` should send data to master process `stdout` - -## Hints - -- Use `fs/promises` API for file system operations -- Use `crypto.createHash` with Streams for hash calculation -- Use `os.cpus().length` to get the number of CPU cores -- Use `child_process.spawn` or `child_process.fork` for child processes diff --git a/assignments-v2/01-nodejs-fundamentals/score.md b/assignments-v2/01-nodejs-fundamentals/score.md deleted file mode 100644 index c6d4cf4..0000000 --- a/assignments-v2/01-nodejs-fundamentals/score.md +++ /dev/null @@ -1,43 +0,0 @@ -# Scoring: Node.js Fundamentals - -## Check - -For check simplification you have npm-scripts in `package.json`. -NB! Some scripts have predefined data (e.g. environment variables, CLI arguments). Feel free to change it during the check if necessary. - -## Basic Scope - -- File System (src/fs) - - **+6** `create.js` implemented properly - - **+10** `copy.js` implemented properly - - **+10** `rename.js` implemented properly - - **+6** `delete.js` implemented properly - - **+8** `list.js` implemented properly (including `name` and `sizeKB` properties) - - **+6** `read.js` implemented properly -- Command Line Interface (src/cli) - - **+6** `args.js` implemented properly - - **+6** `env.js` implemented properly -- Hash (src/hash) - - **+10** `calcHash.js` implemented properly -- Streams (src/streams) - - **+10** `read.js` implemented properly - - **+10** `write.js` implemented properly - - **+12** `transform.js` implemented properly -- Zlib (src/zip) - - **+10** `compress.js` implemented properly - - **+10** `decompress.js` implemented properly - -## Advanced Scope - -- Worker Threads (src/wt) - - **+10** `worker.js` implemented properly - - **+30** `main.js` implemented properly (creates correct number of workers, distributes ranges, collects results in order) -- Child Processes (src/cp) - - **+10** spawns child process - - **+10** child process `stdin` receives input from master process `stdin` - - **+10** child process `stdout` sends data to master process `stdout` - -## Forfeits - -- **-95% of total task score** Any external tools/libraries are used -- **-30% of total task score** Commits after deadline (except commits that affect only Readme.md, .gitignore, etc.) diff --git a/assignments-v2/01a-nodejs-basics/assignment.md b/assignments-v2/01a-nodejs-basics/assignment.md new file mode 100644 index 0000000..09fd227 --- /dev/null +++ b/assignments-v2/01a-nodejs-basics/assignment.md @@ -0,0 +1,120 @@ +# Assignment: Node.js Basics + +## Description + +Your task is to complete several tasks to learn Node.js core APIs. Each subtask is a standalone exercise in a dedicated file inside the corresponding subfolder of `src/`. + +Fork the starter repository and implement the required functionality. + +## Technical requirements + +- Any external tools and libraries are prohibited +- Use 24.x.x version (24.10.0 or upper) of Node.js +- Don't change the signature of pre-written functions (e.g. don't rename them, don't make them synchronous, etc.) +- Prefer asynchronous API whenever possible + +## Subtasks + +### File System (src/fs) + +You should implement several functions in dedicated files: + +- `snapshot.js` — implement function that recursively scans the `workspace` directory and writes a `snapshot.json` file next to it. The JSON file should contain a flat array of all entries with file contents: + ```json + [ + { "path": "file1.txt", "type": "file", "size": 1024, "content": "file contents as base64 string" }, + { "path": "subdir", "type": "directory" }, + { "path": "subdir/nested.txt", "type": "file", "size": 512, "content": "nested file contents as base64 string" } + ] + ``` + Paths should be relative to `workspace`. Size is in bytes (only for files). File contents should be stored as base64-encoded strings. If `workspace` doesn't exist, `Error` with message `FS operation failed` must be thrown. + +- `restore.js` — implement function that reads `snapshot.json` and recreates the directory/file structure described in it inside a `workspace_restored` folder. Directories should be created, files should be recreated with their original content (decoded from base64). If `snapshot.json` doesn't exist, `Error` with message `FS operation failed` must be thrown. If `workspace_restored` already exists, `Error` with message `FS operation failed` must be thrown. + +- `findByExt.js` — implement function that recursively finds all files with a specific extension inside the `workspace` directory and prints their relative paths sorted alphabetically, one per line. The extension is provided as a CLI argument `--ext ` (e.g. `--ext txt` or `--ext js`). If the `--ext` argument is not provided, default to `.txt`. If `workspace` doesn't exist, `Error` with message `FS operation failed` must be thrown. + +- `merge.js` — implement function that reads all `.txt` files from the `workspace/parts` folder in alphabetical order by filename, concatenates their content (separated by newline), and writes the result to `workspace/merged.txt`. If the `parts` folder doesn't exist or contains no `.txt` files, `Error` with message `FS operation failed` must be thrown. + +### CLI (src/cli) + +You should implement several functions in dedicated files: + +- `interactive.js` — implement a simple interactive command-line interface using the `readline` module. The program should: + - Display a prompt `> ` and wait for user input + - Support the following commands: + - `uptime` — prints process uptime in seconds (e.g. `Uptime: 12.34s`) + - `cwd` — prints the current working directory + - `date` — prints the current date and time in ISO format + - `exit` — prints `Goodbye!` and terminates the process + - On unknown command, print `Unknown command` + - On `Ctrl+C` or end of input, print `Goodbye!` and exit + +- `progress.js` — implement a function that simulates a progress bar in the terminal. The bar should go from 0% to 100% over approximately 5 seconds, updating in place (using `\r`) every 100ms. The output format should be: `[████████████████████ ] 67%` (30-character bar). When complete, print `Done!` on a new line. + +### Modules (src/modules) + +You should implement a function in a dedicated file: + +- `dynamic.js` — implement a function that accepts a plugin name as a command line argument and dynamically imports the corresponding module from the `plugins/` subdirectory. Each plugin module exports a `run()` function that returns a string. After importing, call `run()` and print the result. Three plugins are pre-created: `uppercase.js`, `reverse.js`, `repeat.js`. If the plugin doesn't exist, print `Plugin not found` and exit with code 1. + +### Hash (src/hash) + +You should implement a function in a dedicated file: + +- `verify.js` — implement function that reads a `checksums.json` file containing an object where keys are filenames and values are expected SHA256 hex hashes: + ```json + { + "file1.txt": "2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824", + "file2.txt": "486ea46224d1bb4fb680f34f7c9ad96a8f24ec88be73ea8e5a6c65260e9cb8a7" + } + ``` + For each file listed, calculate its actual SHA256 hash using Streams API and print the result: + ``` + file1.txt — OK + file2.txt — FAIL + ``` + If `checksums.json` doesn't exist, `Error` with message `FS operation failed` must be thrown. + +### Streams (src/streams) + +You should implement several functions in dedicated files: + +- `lineNumberer.js` — implement function that reads data from `process.stdin`, prepends each line with its line number (starting from 1) using a Transform Stream, and writes the result to `process.stdout`. Example: input `hello\nworld` → output `1 | hello\n2 | world` + +- `filter.js` — implement function that reads data from `process.stdin`, filters only lines that contain a pattern (given as a CLI argument `--pattern `), and writes matching lines to `process.stdout` using a Transform Stream + +- `split.js` — implement function that reads file `source.txt` using a Readable Stream and splits it into chunk files: `chunk_1.txt`, `chunk_2.txt`, etc. Each chunk should contain at most N lines (N is given as a CLI argument `--lines `, default: 10). Must use Streams API. + +### Zlib (src/zip) + +You should implement several functions in dedicated files: + +- `compressDir.js` — implement function that reads all files from the `workspace/toCompress/` directory, recursively compresses the entire directory structure (preserving directory paths and file names) into a single `.br` archive file `archive.br` and saves it to `workspace/compressed/` directory (creating it if it doesn't exist). Must use Streams API. If `toCompress` doesn't exist, `Error` with message `FS operation failed` must be thrown. + +- `decompressDir.js` — implement function that reads the `archive.br` file from `workspace/compressed/`, decompresses it, and extracts the original directory structure with all files to `workspace/decompressed/` directory (creating it if it doesn't exist). The decompressed content must match the original. If `compressed` doesn't exist or `archive.br` doesn't exist, `Error` with message `FS operation failed` must be thrown. + +### Worker Threads (src/wt) + +You should implement several functions in dedicated files: + +- `worker.js` — implement a function that receives an array of numbers from the main thread, sorts them in ascending order, and sends the sorted array back to the main thread + +- `main.js` — implement function that reads a JSON file `data.json` containing an array of numbers (e.g. `[5, 3, 8, 1, 9, 2, ...]`). The function should: + 1. Split the array into N chunks (where N = number of logical CPU cores) + 2. Create N worker threads from `worker.js`, sending one chunk to each + 3. Collect sorted chunks from all workers + 4. Merge the sorted chunks into a single sorted array (using k-way merge algorithm) + 5. Log the final sorted array to the console + + The results must be collected in the same order as workers were created. + +### Child Processes (src/cp) + +You should implement a function in a dedicated file: + +- `execCommand.js` — implement function `execCommand` that takes a command string as a CLI argument (e.g. `node src/cp/execCommand.js "ls -la"`), spawns it as a child process using `spawn`, and: + - pipes the child's `stdout` to `process.stdout` + - pipes the child's `stderr` to `process.stderr` + - passes environment variables from the parent process to the child process + - when the child exits, the parent process exits with the same exit code + diff --git a/assignments-v2/01a-nodejs-basics/score.md b/assignments-v2/01a-nodejs-basics/score.md new file mode 100644 index 0000000..f68540a --- /dev/null +++ b/assignments-v2/01a-nodejs-basics/score.md @@ -0,0 +1,43 @@ +# Scoring: Node.js Basics + +## Check + +For check simplification you have npm-scripts in `package.json`. +NB! Some scripts have predefined data (e.g. environment variables, CLI arguments). Feel free to change it during the check if necessary. + +## Basic Scope + +- File System (src/fs) + - **+10** `snapshot.js` implemented properly (recursive scan, correct JSON structure with path/type/size) + - **+10** `restore.js` implemented properly (reads snapshot, recreates structure) + - **+6** `findByExt.js` implemented properly (recursive search, sorted output) + - **+6** `merge.js` implemented properly (reads .txt files in order, concatenates, writes result) +- CLI (src/cli) + - **+10** `interactive.js` implemented properly (readline prompt, supports uptime/cwd/date/exit commands, handles Ctrl+C) + - **+6** `progress.js` implemented properly (in-place updating progress bar, 0-100% over ~5 seconds) +- Modules (src/modules) + - **+10** `dynamic.js` implemented properly (dynamic import from plugins/, calls run(), handles missing plugin) +- Hash (src/hash) + - **+12** `verify.js` implemented properly (reads checksums.json, calculates SHA256 via Streams, prints OK/FAIL per file) +- Streams (src/streams) + - **+10** `lineNumberer.js` implemented properly (Transform stream, prepends line numbers) + - **+10** `filter.js` implemented properly (Transform stream, filters by pattern from CLI arg) + - **+10** `split.js` implemented properly (Readable stream, splits file into chunks by line count) +- Zlib (src/zip) + - **+10** `compressDir.js` implemented properly (reads all files from workspace/toCompress/, recursively compresses entire directory structure into single .br archive, saves to workspace/compressed/) + - **+10** `decompressDir.js` implemented properly (reads archive.br from workspace/compressed/, decompresses and extracts to workspace/decompressed/, result matches original) + +## Advanced Scope + +- Worker Threads (src/wt) + - **+10** `worker.js` implemented properly (receives array, returns sorted array) + - **+30** `main.js` implemented properly (reads data.json, splits by CPU count, distributes to workers, k-way merges results) +- Child Processes (src/cp) + - **+10** `execCommand.js` spawns child process from CLI argument + - **+10** child process stdout/stderr piped to parent stdout/stderr + - **+10** parent exits with the same exit code as child + +## Forfeits + +- **-95% of total task score** Any external tools/libraries are used +- **-30% of total task score** Commits after deadline (except commits that affect only Readme.md, .gitignore, etc.) diff --git a/assignments-v2/01b-data-processing-cli/assignment.md b/assignments-v2/01b-data-processing-cli/assignment.md new file mode 100644 index 0000000..e877708 --- /dev/null +++ b/assignments-v2/01b-data-processing-cli/assignment.md @@ -0,0 +1,286 @@ +# Assignment: Data Processing CLI + +## Description + +Your task is to build a **Data Processing Toolkit** — an interactive command-line application that performs various useful data processing operations. The tool should work as a persistent Node.js process that accepts commands. + +Unlike the Node.js Basics assignment where you practiced APIs in isolation, here you will combine them into a real, cohesive tool with interactive file system navigation and data processing capabilities. + +## Technical requirements + +- Any external tools and libraries are prohibited +- Use 24.x.x version (24.10.0 or upper) of Node.js +- All file operations must use **Streams API** for efficiency (do not read entire files into memory) +- Prefer asynchronous API whenever possible +- The program should be an interactive REPL (Read-Eval-Print Loop) +- File paths in commands can be relative or absolute + +## CLI Interface + +The program is started via npm-script `start`: + +```bash +npm run start +``` + +Which runs: + +```bash +node src/main.js +``` + +The program should: +- Display a welcome message on startup: `Welcome to Data Processing CLI!` +- Print the current working directory initially: `You are currently in /path/to/home` +- Continuously prompt the user to enter commands: `>` +- Accept commands in the format: ` [arguments]` +- Display error messages for unknown or invalid commands without crashing +- Allow users to exit with `.exit` command or `Ctrl+C` +- Display a goodbye message on exit: `Thank you for using Data Processing CLI!` +- After each successful operation, print the current working directory again +- At the start of the program, working directory should be the user's home directory + +If a command is unknown, invalid, or has missing required arguments, the program should print an error message like `Invalid input` and prompt for a new command. + +If an operation fails, the program should print `Operation failed` and prompt for a new command. + +## Commands + +### Navigation & Working Directory Commands + +#### `up` — Move up one directory level + +```bash +up +``` + +**Behavior:** +- Moves up one directory level from the current working directory +- If already in the root directory, does nothing (no error) +- After successful navigation, prints the new current working directory path + +#### `cd` — Change to a specified directory + +```bash +cd path_to_directory +``` + +- `path_to_directory` — relative or absolute path to navigate to (**required**) + +**Behavior:** +- Navigates to the specified directory +- Can accept both relative and absolute paths +- If path doesn't exist or is not a directory, prints `Operation failed` and stays in current directory +- If successful, prints the new current working directory path + +#### `ls` — List files and directories in current directory + +```bash +ls +``` + +**Output:** +- A list of all files and folders in the current directory +- Folders listed first, then files, all in alphabetical order +- Each entry shows the name (with extension for files) and type (file or folder) + +**Example:** +``` +folder1 [folder] +folder2 [folder] +file1.txt [file] +file2.md [file] +``` + +### Data Processing Commands + +#### 1. `csv-to-json` — Convert CSV to JSON + +Convert a CSV file to a JSON file using Streams. + +```bash +csv-to-json --input data.csv --output data.json +``` + +- `--input` — path to the input CSV file (**required**) +- `--output` — path to the output JSON file (**required**) + +**Behavior:** +- The first line of the CSV file is treated as headers +- Each subsequent line becomes a JSON object with header names as keys +- The output file should contain a JSON array of objects +- Must use Readable Stream → Transform Stream → Writable Stream pipeline +- Paths are relative to the current working directory or can be absolute +- If the input file doesn't exist, print `Operation failed` + +**Example:** + +Input `data.csv`: +``` +name,age,city +Alice,30,New York +Bob,25,London +``` + +Output `data.json`: +```json +[ + { "name": "Alice", "age": "30", "city": "New York" }, + { "name": "Bob", "age": "25", "city": "London" } +] +``` + +#### 2. `json-to-csv` — Convert JSON to CSV + +Convert a JSON file (array of objects) to a CSV file using Streams. + +```bash +json-to-csv --input data.json --output data.csv +``` + +- `--input` — path to the input JSON file (**required**) +- `--output` — path to the output CSV file (**required**) + +**Behavior:** +- Input must be a JSON array of objects +- The first line of the output is the headers (keys from the first object) +- Each object becomes a CSV row +- Paths are relative to the current working directory or can be absolute +- If the input file doesn't exist or contains invalid JSON, print `Operation failed` + +#### 3. `count` — Count lines, words, and characters in txt file + +Count lines, words, and characters in a file (similar to the `wc` command). + +```bash +count --input file.txt +``` + +- `--input` — path to the input file (**required**) + +**Output format:** +``` +Lines: 42 +Words: 350 +Characters: 2048 +``` + +**Behavior:** +- Must use Streams API to process the file (do not load the entire file into memory) +- A word is any sequence of non-whitespace characters +- Paths are relative to the current working directory or can be absolute +- If the input file doesn't exist, print `Operation failed` + +#### 4. `hash` — Calculate file hash + +Calculate a cryptographic hash of a file. + +```bash +hash --input file.txt +hash --input file.txt --algorithm md5 +``` + +- `--input` — path to the input file (**required**) +- `--algorithm` — hash algorithm to use (optional, default: `sha256`). Supported values: `sha256`, `md5`, `sha512` + +**Output format:** +``` +sha256: 2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824 +``` + +**Behavior:** +- Must use `crypto.createHash` with Streams API +- Paths are relative to the current working directory or can be absolute +- If the input file doesn't exist, print `Operation failed` +- If the algorithm is not supported, print `Operation failed` + +#### 5. `compress` — Compress a file + +Compress a file using gzip. + +```bash +compress --input file.txt --output file.txt.gz +``` + +- `--input` — path to the input file (**required**) +- `--output` — path to the output compressed file (**required**) + +**Behavior:** +- Must use `zlib.createGzip()` with Streams API +- Paths are relative to the current working directory or can be absolute +- If the input file doesn't exist, print `Operation failed` + +#### 6. `decompress` — Decompress a file + +Decompress a gzip file. + +```bash +decompress --input file.txt.gz --output file.txt +``` + +- `--input` — path to the input compressed file (**required**) +- `--output` — path to the output file (**required**) + +**Behavior:** +- Must use `zlib.createGunzip()` with Streams API +- The decompressed result must match the original file content exactly +- Paths are relative to the current working directory or can be absolute +- If the input file doesn't exist, print `Operation failed` + +#### 7. `sort-large` — Sort a large file using Worker Threads + +Sort a large text file line by line in alphabetical order, using Worker Threads for parallel processing. + +```bash +sort-large --input huge.txt --output sorted.txt +``` + +- `--input` — path to the input text file (**required**) +- `--output` — path to the output sorted file (**required**) + +**Behavior:** +1. Read the input file and split it into N chunks (where N = number of CPU cores) +2. Send each chunk to a Worker Thread for sorting +3. Each Worker sorts its chunk alphabetically and returns the sorted lines +4. The main thread merges the sorted chunks using a merge-sort merge step +5. Write the final sorted result to the output file + +- Must use Worker Threads (`worker_threads` module) +- The number of workers should equal the number of logical CPU cores (`os.cpus().length`) +- Paths are relative to the current working directory or can be absolute +- If the input file doesn't exist, print `Operation failed` + +## Project Structure + +``` +src/ + main.js — entry point, sets up REPL, handles navigation state + repl.js — REPL handler, command parsing and dispatching + navigation.js — navigation commands (up, cd, ls) + commands/ + csvToJson.js — csv-to-json command handler + jsonToCsv.js — json-to-csv command handler + count.js — count command handler + hash.js — hash command handler + compress.js — compress command handler + decompress.js — decompress command handler + sortLarge.js — sort-large command handler + workers/ + sortWorker.js — worker thread for sort-large command + utils/ + pathResolver.js — resolve paths relative to current working directory + argParser.js — parse command line arguments +``` + +## Hints + +- Use `readline` module for interactive input +- Use `stream.pipeline` (from `stream/promises`) to connect streams and handle errors properly +- For CSV parsing in the Transform stream, handle the first line (headers) separately from data lines +- For `json-to-csv`, you'll need to buffer the JSON input to parse it, but write the CSV output via a stream +- For `sort-large`, use `os.cpus().length` to determine the number of workers +- For merging sorted arrays, implement a k-way merge: compare the first element of each sorted chunk, pick the smallest, advance that chunk's pointer +- Always resolve file paths relative to the current working directory before performing operations +- Use `path.resolve()` to combine current working directory with relative paths +- Use `process.cwd()` is NOT appropriate here - maintain your own current working directory variable +- Maintain the current working directory as application state throughout the session diff --git a/assignments-v2/01b-data-processing-cli/score.md b/assignments-v2/01b-data-processing-cli/score.md new file mode 100644 index 0000000..d1ac966 --- /dev/null +++ b/assignments-v2/01b-data-processing-cli/score.md @@ -0,0 +1,46 @@ +# Scoring: Data Processing CLI + +## Basic Scope + +- **+6** Application starts with npm run start and displays welcome message +- **+10** Application exits gracefully with `.exit` command or `Ctrl+C` and displays goodbye message +- **+5** Current working directory is printed at startup and after each successful operation +- **+10** Unknown or invalid commands display `Invalid input` and application continues running +- **+10** Operations that fail display `Operation failed` and application continues running + +### Navigation Commands +- **+8** `up` command moves up one directory level correctly +- **+8** `cd` command navigates to specified directory (both relative and absolute paths) +- **+12** `ls` command lists files and folders with proper sorting (folders first, then files, alphabetically) + +### Data Processing Commands +- **+20** `csv-to-json` command works correctly (headers parsed, rows converted to objects, output is valid JSON array, uses Streams) +- **+20** `json-to-csv` command works correctly (headers from object keys, values as rows, uses Streams) +- **+12** `count` command works correctly (lines, words, characters counted via Streams, output format matches specification) +- **+12** `hash` command works correctly (SHA256 by default, supports `md5` and `sha512` via `--algorithm` option, uses Streams) +- **+8** `compress` command works correctly (gzip compression via Streams) +- **+8** `decompress` command works correctly (gzip decompression via Streams, result matches original) + +### Path Resolution +- **+15** All file paths in commands are correctly resolved relative to current working directory +- **+10** All file operations properly handle errors (non-existent files, invalid paths, permission errors) + +## Advanced Scope + +- **+25** `sort-large` command works correctly: + - **+5** File is split into chunks equal to the number of CPU cores + - **+10** Each chunk is sorted in a separate Worker Thread + - **+10** Sorted chunks are merged correctly (k-way merge) + - **+5** Final output file contains all lines sorted alphabetically +- **+15** Project structure follows the specification (separate files for navigation, commands, utilities, worker) +- **+20** Interactive REPL implementation: + - **+10** Maintains application state (current working directory) across commands + - **+10** Properly handles readline for continuous command input + +## Forfeits + +- **-95% of total task score** Any external tools/libraries are used +- **-30% of total task score** Commits after deadline (except commits that affect only Readme.md, .gitignore, etc.) +- **-20** Missing PR or its description is incorrect +- **-20** No separate development branch +- **-20** Less than 3 commits in the development branch, not including commits that make changes only to `Readme.md` or similar files (`tsconfig.json`, `.gitignore`, `.prettierrc.json`, etc.) From b954c36cb9f956829b59bbbce8abc5409ae1d290 Mon Sep 17 00:00:00 2001 From: Maksim Shylau Date: Mon, 9 Feb 2026 17:42:19 +0100 Subject: [PATCH 3/5] Update data-processing-cli commands --- .../01b-data-processing-cli/assignment.md | 44 +++++++++++++------ .../01b-data-processing-cli/score.md | 4 +- 2 files changed, 32 insertions(+), 16 deletions(-) diff --git a/assignments-v2/01b-data-processing-cli/assignment.md b/assignments-v2/01b-data-processing-cli/assignment.md index e877708..97510cd 100644 --- a/assignments-v2/01b-data-processing-cli/assignment.md +++ b/assignments-v2/01b-data-processing-cli/assignment.md @@ -194,38 +194,54 @@ sha256: 2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824 - If the input file doesn't exist, print `Operation failed` - If the algorithm is not supported, print `Operation failed` -#### 5. `compress` — Compress a file +#### 5. `encrypt` — Encrypt a file -Compress a file using gzip. +Encrypt a file using `AES-256-GCM`. ```bash -compress --input file.txt --output file.txt.gz +encrypt --input file.txt --output file.txt.enc --password mySecret ``` - `--input` — path to the input file (**required**) -- `--output` — path to the output compressed file (**required**) +- `--output` — path to the output encrypted file (**required**) +- `--password` — password used to derive the encryption key (**required**) + +**Output file format (binary):** +- First 16 bytes: `salt` +- Next 12 bytes: `iv` +- Then: `ciphertext` +- Last 16 bytes: `authTag` **Behavior:** -- Must use `zlib.createGzip()` with Streams API +- Must derive a 32-byte key from `password` and `salt` +- Must encrypt using `AES-256-GCM` +- Must use Streams API end-to-end +- You must not load the full file into memory. The only allowed in-memory buffering is: + - the header (first 28 bytes = `salt` + `iv`) + - the authentication tag (last 16 bytes) - Paths are relative to the current working directory or can be absolute - If the input file doesn't exist, print `Operation failed` -#### 6. `decompress` — Decompress a file +#### 6. `decrypt` — Decrypt a file -Decompress a gzip file. +Decrypt a file produced by `encrypt`. ```bash -decompress --input file.txt.gz --output file.txt +decrypt --input file.txt.enc --output file.txt --password mySecret ``` -- `--input` — path to the input compressed file (**required**) +- `--input` — path to the input encrypted file (**required**) - `--output` — path to the output file (**required**) +- `--password` — password used to derive the encryption key (**required**) **Behavior:** -- Must use `zlib.createGunzip()` with Streams API -- The decompressed result must match the original file content exactly +- Must parse `salt` (first 16 bytes) and `iv` (next 12 bytes) from the input +- Must parse `authTag` (last 16 bytes) from the input +- Must decrypt using `AES-256-GCM` with authentication tag verification +- Must use Streams API end-to-end +- The decrypted result must match the original file content exactly - Paths are relative to the current working directory or can be absolute -- If the input file doesn't exist, print `Operation failed` +- If the input file doesn't exist or auth fails, print `Operation failed` #### 7. `sort-large` — Sort a large file using Worker Threads @@ -262,8 +278,8 @@ src/ jsonToCsv.js — json-to-csv command handler count.js — count command handler hash.js — hash command handler - compress.js — compress command handler - decompress.js — decompress command handler + encrypt.js — encrypt command handler + decrypt.js — decrypt command handler sortLarge.js — sort-large command handler workers/ sortWorker.js — worker thread for sort-large command diff --git a/assignments-v2/01b-data-processing-cli/score.md b/assignments-v2/01b-data-processing-cli/score.md index d1ac966..28a6423 100644 --- a/assignments-v2/01b-data-processing-cli/score.md +++ b/assignments-v2/01b-data-processing-cli/score.md @@ -18,8 +18,8 @@ - **+20** `json-to-csv` command works correctly (headers from object keys, values as rows, uses Streams) - **+12** `count` command works correctly (lines, words, characters counted via Streams, output format matches specification) - **+12** `hash` command works correctly (SHA256 by default, supports `md5` and `sha512` via `--algorithm` option, uses Streams) -- **+8** `compress` command works correctly (gzip compression via Streams) -- **+8** `decompress` command works correctly (gzip decompression via Streams, result matches original) +- **+8** `encrypt` command works correctly (AES-256-GCM, key derivation from password+salt, Streams, output format matches spec) +- **+8** `decrypt` command works correctly (AES-256-GCM, key derivation from password+salt, Streams, authTag verified, result matches original) ### Path Resolution - **+15** All file paths in commands are correctly resolved relative to current working directory From 1311f63766868fe44910a1fc0a0fcea4a917013f Mon Sep 17 00:00:00 2001 From: Maksim Shylau Date: Mon, 9 Feb 2026 19:03:56 +0100 Subject: [PATCH 4/5] feat: replace sort-large script --- .../01b-data-processing-cli/assignment.md | 65 ++++++++++---- .../01b-data-processing-cli/score.md | 10 +-- .../scripts/generate-logs.js | 90 +++++++++++++++++++ 3 files changed, 143 insertions(+), 22 deletions(-) create mode 100644 assignments-v2/01b-data-processing-cli/scripts/generate-logs.js diff --git a/assignments-v2/01b-data-processing-cli/assignment.md b/assignments-v2/01b-data-processing-cli/assignment.md index 97510cd..c0c50b8 100644 --- a/assignments-v2/01b-data-processing-cli/assignment.md +++ b/assignments-v2/01b-data-processing-cli/assignment.md @@ -243,29 +243,60 @@ decrypt --input file.txt.enc --output file.txt --password mySecret - Paths are relative to the current working directory or can be absolute - If the input file doesn't exist or auth fails, print `Operation failed` -#### 7. `sort-large` — Sort a large file using Worker Threads +#### 7. `log-stats` — Analyze a large log file using Worker Threads -Sort a large text file line by line in alphabetical order, using Worker Threads for parallel processing. +Compute statistics for a large log file using Worker Threads for parallel processing. ```bash -sort-large --input huge.txt --output sorted.txt +log-stats --input logs.txt --output stats.json ``` -- `--input` — path to the input text file (**required**) -- `--output` — path to the output sorted file (**required**) +- `--input` — path to the input log file (**required**) +- `--output` — path to the output JSON file (**required**) + +**Log line format (space-separated):** +``` + +``` + +**Example line:** +``` +2026-02-01T12:34:56.789Z INFO user-service 200 123 GET /api/users +``` + +**Output format (JSON):** +``` +{ + "total": 1000, + "levels": { "INFO": 700, "WARN": 200, "ERROR": 100 }, + "status": { "2xx": 800, "3xx": 50, "4xx": 120, "5xx": 30 }, + "topPaths": [ + { "path": "/api/users", "count": 120 }, + { "path": "/api/orders", "count": 95 } + ], + "avgResponseTimeMs": 137.42 +} +``` **Behavior:** -1. Read the input file and split it into N chunks (where N = number of CPU cores) -2. Send each chunk to a Worker Thread for sorting -3. Each Worker sorts its chunk alphabetically and returns the sorted lines -4. The main thread merges the sorted chunks using a merge-sort merge step -5. Write the final sorted result to the output file - -- Must use Worker Threads (`worker_threads` module) -- The number of workers should equal the number of logical CPU cores (`os.cpus().length`) +1. Split the input file into N chunks (where N = number of CPU cores), ensuring chunks start and end on line boundaries +2. Send each chunk to a Worker Thread for parsing and partial aggregation +3. Each Worker returns partial stats: counts by level, counts by status class, path counts, total lines, response time sum +4. The main thread merges partial stats and computes final `avgResponseTimeMs` +5. Write the JSON result to the output file + +- Must use Worker Threads for parallel processing +- The number of workers should equal the number of logical CPU cores - Paths are relative to the current working directory or can be absolute - If the input file doesn't exist, print `Operation failed` +**Test data generator:** +Use the provided script to generate a large log file for testing: + +```bash +node scripts/generate-logs.js --output workspace/logs.txt --lines 500000 +``` + ## Project Structure ``` @@ -280,9 +311,9 @@ src/ hash.js — hash command handler encrypt.js — encrypt command handler decrypt.js — decrypt command handler - sortLarge.js — sort-large command handler + logStats.js — log-stats command handler workers/ - sortWorker.js — worker thread for sort-large command + logWorker.js — worker thread for log-stats command utils/ pathResolver.js — resolve paths relative to current working directory argParser.js — parse command line arguments @@ -294,8 +325,8 @@ src/ - Use `stream.pipeline` (from `stream/promises`) to connect streams and handle errors properly - For CSV parsing in the Transform stream, handle the first line (headers) separately from data lines - For `json-to-csv`, you'll need to buffer the JSON input to parse it, but write the CSV output via a stream -- For `sort-large`, use `os.cpus().length` to determine the number of workers -- For merging sorted arrays, implement a k-way merge: compare the first element of each sorted chunk, pick the smallest, advance that chunk's pointer +- For `log-stats`, make sure chunks start/end on line boundaries to avoid partial log lines +- For merging stats, sum counters and merge path maps before computing `topPaths` - Always resolve file paths relative to the current working directory before performing operations - Use `path.resolve()` to combine current working directory with relative paths - Use `process.cwd()` is NOT appropriate here - maintain your own current working directory variable diff --git a/assignments-v2/01b-data-processing-cli/score.md b/assignments-v2/01b-data-processing-cli/score.md index 28a6423..e221cda 100644 --- a/assignments-v2/01b-data-processing-cli/score.md +++ b/assignments-v2/01b-data-processing-cli/score.md @@ -27,11 +27,11 @@ ## Advanced Scope -- **+25** `sort-large` command works correctly: - - **+5** File is split into chunks equal to the number of CPU cores - - **+10** Each chunk is sorted in a separate Worker Thread - - **+10** Sorted chunks are merged correctly (k-way merge) - - **+5** Final output file contains all lines sorted alphabetically +- **+25** `log-stats` command works correctly: + - **+5** File is split into chunks equal to the number of CPU cores (line boundaries preserved) + - **+10** Each chunk is processed in a separate Worker Thread + - **+5** Partial stats are merged correctly (counters, maps, totals) + - **+5** Final output JSON matches the specification - **+15** Project structure follows the specification (separate files for navigation, commands, utilities, worker) - **+20** Interactive REPL implementation: - **+10** Maintains application state (current working directory) across commands diff --git a/assignments-v2/01b-data-processing-cli/scripts/generate-logs.js b/assignments-v2/01b-data-processing-cli/scripts/generate-logs.js new file mode 100644 index 0000000..2e9d5e1 --- /dev/null +++ b/assignments-v2/01b-data-processing-cli/scripts/generate-logs.js @@ -0,0 +1,90 @@ +#!/usr/bin/env node +'use strict'; + +const fs = require('node:fs'); +const path = require('node:path'); + +const args = process.argv.slice(2); +const getArg = (name, fallback) => { + const idx = args.indexOf(name); + if (idx === -1 || idx + 1 >= args.length) return fallback; + return args[idx + 1]; +}; + +const output = getArg('--output'); +const lines = Number(getArg('--lines', '100000')); +const seed = Number(getArg('--seed', '123456')); + +if (!output || !Number.isFinite(lines) || lines <= 0) { + process.stderr.write('Usage: node scripts/generate-logs.js --output --lines [--seed ]\n'); + process.exit(1); +} + +const levels = ['INFO', 'WARN', 'ERROR']; +const services = ['user-service', 'order-service', 'payment-service', 'search-service', 'email-service']; +const methods = ['GET', 'POST', 'PUT', 'DELETE']; +const paths = [ + '/api/users', + '/api/users/:id', + '/api/orders', + '/api/orders/:id', + '/api/payments', + '/api/search', + '/api/login', + '/api/logout', + '/api/health' +]; + +let state = seed >>> 0; +const rand = () => { + // LCG: deterministic pseudo-random generator + state = (1664525 * state + 1013904223) >>> 0; + return state / 0xffffffff; +}; + +const pick = (arr) => arr[Math.floor(rand() * arr.length)]; + +const start = Date.parse('2026-01-01T00:00:00.000Z'); +let current = start; + +const outPath = path.resolve(process.cwd(), output); +fs.mkdirSync(path.dirname(outPath), { recursive: true }); + +const stream = fs.createWriteStream(outPath, { encoding: 'utf8' }); + +let written = 0; +const writeBatch = () => { + let ok = true; + while (written < lines && ok) { + const dt = Math.floor(rand() * 5000); // up to 5s + current += dt; + const iso = new Date(current).toISOString(); + const level = pick(levels); + const service = pick(services); + const method = pick(methods); + const pathVal = pick(paths); + const statusBase = level === 'ERROR' ? 500 : level === 'WARN' ? 400 : 200; + const status = statusBase + Math.floor(rand() * 50); + const responseTime = 5 + Math.floor(rand() * 2000); + const line = `${iso} ${level} ${service} ${status} ${responseTime} ${method} ${pathVal}\n`; + ok = stream.write(line); + written += 1; + } + + if (written < lines) { + stream.once('drain', writeBatch); + } else { + stream.end(); + } +}; + +stream.on('finish', () => { + process.stdout.write(`Generated ${lines} lines at ${outPath}\n`); +}); + +stream.on('error', (err) => { + process.stderr.write(`Failed to write logs: ${err.message}\n`); + process.exit(1); +}); + +writeBatch(); From 7edd680807857a7dac693637b886592a77d2537d Mon Sep 17 00:00:00 2001 From: Maksim Shylau Date: Mon, 9 Feb 2026 19:07:20 +0100 Subject: [PATCH 5/5] refactor: use utils api for cli args parsing --- .../scripts/generate-logs.js | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/assignments-v2/01b-data-processing-cli/scripts/generate-logs.js b/assignments-v2/01b-data-processing-cli/scripts/generate-logs.js index 2e9d5e1..0bfbeaf 100644 --- a/assignments-v2/01b-data-processing-cli/scripts/generate-logs.js +++ b/assignments-v2/01b-data-processing-cli/scripts/generate-logs.js @@ -3,17 +3,19 @@ const fs = require('node:fs'); const path = require('node:path'); +const { parseArgs } = require('node:util'); -const args = process.argv.slice(2); -const getArg = (name, fallback) => { - const idx = args.indexOf(name); - if (idx === -1 || idx + 1 >= args.length) return fallback; - return args[idx + 1]; -}; +const { values } = parseArgs({ + options: { + output: { type: 'string' }, + lines: { type: 'string', default: '100000' }, + seed: { type: 'string', default: '123456' } + } +}); -const output = getArg('--output'); -const lines = Number(getArg('--lines', '100000')); -const seed = Number(getArg('--seed', '123456')); +const output = values.output; +const lines = Number(values.lines); +const seed = Number(values.seed); if (!output || !Number.isFinite(lines) || lines <= 0) { process.stderr.write('Usage: node scripts/generate-logs.js --output --lines [--seed ]\n');