Skip to content

Commit a42b603

Browse files
committed
feat: Add on-the-fly filtering
1 parent 666da13 commit a42b603

File tree

3 files changed

+81
-24
lines changed

3 files changed

+81
-24
lines changed

README.md

Lines changed: 42 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,19 @@
11
# jsonarrayfs <img src="https://img.shields.io/npm/dm/jsonarrayfs" />
22

3-
jsonarrayfs is a Node.js library designed to facilitate efficient handling of JSON array data stored in files. It offers functionality for streaming JSON array elements in batches, allowing for reduced memory consumption when working with large datasets. Additionally, jsonarrayfs provides a method for appending new data to existing JSON array files without loading the entire dataset into memory.
3+
"jsonarrayfs" is a Node.js library crafted for robust and memory-efficient management of massive JSON array files. It enables seamless handling of JSON arrays without the need to load the entire file into memory, making it perfect for efficiently managing large datasets without overwhelming system resources.
4+
5+
## Key Features
6+
7+
- **Stream Processing**: Read JSON array in manageable chunks (eg: 50k elements at a time) using stream.
8+
- **On-the-Fly Filtering**: Apply filter to the stream to fetch only relevant data, reducing the data you handle even further.
9+
- **Direct Appends**: Append new elements directly to the JSON array file, avoiding unnecessary loading, modification and rewriting.
10+
11+
## Benefits
12+
13+
- **Memory Optimization**: Process JSON array files with minimal memory usage, making it ideal for resource-constrained environments.
14+
- **Handles Large Datasets**: Efficiently manage massive JSON array files without memory limitations.
15+
- **Improved Performance**: Faster processing times due to efficient streaming, filtering and appending capabilities.
16+
- **Enhanced Scalability**: Scales seamlessly with growing datasets, ensuring smooth performance.
417

518
## Installation
619

@@ -10,35 +23,54 @@ To install jsonarrayfs, use:
1023
npm install jsonarrayfs
1124
```
1225

13-
## Features
26+
## Usage
1427

15-
- Stream JSON array elements:
28+
- Stream Processing:
1629

17-
```js
30+
```ts
1831
import { createReadStream } from "jsonarrayfs";
1932

2033
// Create a streamer to read JSON array elements from a file
21-
const streamer = await createReadStream("./data.json", { encoding: 'utf-8' });
34+
const streamer = await createReadStream("./data.json", { encoding: "utf-8" });
2235

2336
// Stream JSON array elements in batches of 100
2437
for await (const chunk of streamer.stream(100)) {
2538
// Your processing logic here
2639
}
2740
```
2841

42+
- On-the-Fly Filtering:
43+
44+
```ts
45+
import { createReadStream } from "jsonarrayfs";
46+
47+
const streamer = await createReadStream<{ offer: boolean; price: number }>(
48+
"./data.json",
49+
{ encoding: "utf-8" }
50+
);
51+
52+
// Add filter to the stream to fetch only relevant elements
53+
for await (const chunk of streamer.stream(
54+
100,
55+
(element) => element.price < 500 || element.offer
56+
)) {
57+
// Your processing logic here
58+
}
59+
```
60+
2961
- Append data to existing JSON array:
3062

31-
```js
63+
```ts
3264
import { appendFile } from "jsonarrayfs";
3365

3466
// Simulate new data to append
3567
const newData = [
36-
{ id: 1, name: "JavaScript" },
37-
{ id: 2, name: "Go" }
68+
{ id: 1, name: "Earth", price: 1000, offer: true },
69+
{ id: 2, name: "Moon", price: 500, offer: false },
3870
];
3971

4072
// Append new data to the existing JSON array file
41-
await appendFile("./data.json", 'utf-8', ...newData);
73+
await appendFile("./data.json", "utf-8", ...newData);
4274
```
4375

4476
## Contributing
@@ -47,4 +79,4 @@ Pull requests are welcome. For major changes, please open an issue first to disc
4779

4880
## License
4981

50-
[MIT License ](https://github.com/mochatek/jsonarrayfs/blob/main/LICENSE)
82+
[MIT License ](https://github.com/mochatek/jsonarrayfs/blob/main/LICENSE)

package.json

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"name": "jsonarrayfs",
33
"version": "1.0.1",
4-
"description": "A Node.js library to stream JSON array elements from files in custom-sized chunks and effortlessly append data to existing arrays.",
4+
"description": "Efficiently handle JSON array files in Node.js with minimal memory usage. Perfect for efficiently processing large data volumes without worrying about memory limitations.",
55
"exports": {
66
".": {
77
"require": "./dist/index.js",
@@ -21,10 +21,17 @@
2121
"keywords": [
2222
"json",
2323
"stream",
24-
"json array",
2524
"chunk",
2625
"batch",
27-
"append json"
26+
"json file handling",
27+
"json array",
28+
"json stream",
29+
"stream filter",
30+
"json append",
31+
"stream processing",
32+
"on the fly filtering",
33+
"direct appends",
34+
"memory efficient"
2835
],
2936
"author": "Akash S Panickar",
3037
"license": "MIT",

src/modules/JsonArrayStreamer.ts

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@ class JsonArrayStreamer<T> {
88
private rootDetected: boolean;
99
private elementDetected: boolean;
1010
private elementType: ElementType;
11-
private elementParser: ((char: string) => void) | null;
11+
private elementParser:
12+
| ((char: string, filter?: (element: T) => boolean) => void)
13+
| null;
1214
private elementEnclosureCount: number;
1315
private isCharInsideQuotes: boolean;
1416
private isCharEscaped: boolean;
@@ -39,6 +41,16 @@ class JsonArrayStreamer<T> {
3941
}
4042
}
4143

44+
private addToResult(element: T, filter?: (element: T) => boolean) {
45+
if (!filter) {
46+
this.resultBuffer.push(element);
47+
} else {
48+
try {
49+
if (filter(element)) this.resultBuffer.push(element);
50+
} catch (_) {}
51+
}
52+
}
53+
4254
private resetParser() {
4355
this.elementDetected = false;
4456
this.elementType = "others";
@@ -49,13 +61,13 @@ class JsonArrayStreamer<T> {
4961
this.chunkBuffer = "";
5062
}
5163

52-
private stringElementParser(char: string) {
64+
private stringElementParser(char: string, filter?: (element: T) => boolean) {
5365
this.chunkBuffer = `${this.chunkBuffer}${char}`;
5466

5567
if (char === CHARACTER.QUOTE) {
5668
if (this.isCharInsideQuotes && !this.isCharEscaped) {
5769
const element: T = JSON.parse(this.chunkBuffer);
58-
this.resultBuffer.push(element);
70+
this.addToResult(element, filter);
5971
this.resetParser();
6072
} else if (this.chunkBuffer === CHARACTER.QUOTE) {
6173
this.isCharInsideQuotes = true;
@@ -69,17 +81,23 @@ class JsonArrayStreamer<T> {
6981
}
7082
}
7183

72-
private primitiveElementParser(char: string) {
84+
private primitiveElementParser(
85+
char: string,
86+
filter?: (element: T) => boolean
87+
) {
7388
if ([CHARACTER.COMMA, CHARACTER.BRACKET.CLOSE].includes(char)) {
7489
const element: T = JSON.parse(this.chunkBuffer);
75-
this.resultBuffer.push(element);
90+
this.addToResult(element, filter);
7691
this.resetParser();
7792
} else {
7893
this.chunkBuffer = `${this.chunkBuffer}${char}`;
7994
}
8095
}
8196

82-
private containerElementParser(char: string) {
97+
private containerElementParser(
98+
char: string,
99+
filter?: (element: T) => boolean
100+
) {
83101
const ENCLOSURE =
84102
this.elementType === "array" ? CHARACTER.BRACKET : CHARACTER.BRACE;
85103

@@ -92,7 +110,7 @@ class JsonArrayStreamer<T> {
92110

93111
if (this.elementEnclosureCount === 0) {
94112
const element: T = JSON.parse(this.chunkBuffer);
95-
this.resultBuffer.push(element);
113+
this.addToResult(element, filter);
96114
this.resetParser();
97115
}
98116
} else if (this.chunkBuffer.length) {
@@ -108,7 +126,7 @@ class JsonArrayStreamer<T> {
108126
}
109127
}
110128

111-
public async *stream<T>(chunkSize: number) {
129+
public async *stream(chunkSize: number, filter?: (element: T) => boolean) {
112130
for await (const chunk of this.chunkGenerator()) {
113131
for (let char of chunk) {
114132
if (!this.rootDetected) {
@@ -142,7 +160,7 @@ class JsonArrayStreamer<T> {
142160
}
143161
}
144162

145-
this.elementParser(char);
163+
this.elementParser(char, filter);
146164

147165
if (this.resultBuffer.length === chunkSize) {
148166
if (!this.readStream?.closed) this.readStream?.pause();
@@ -157,8 +175,8 @@ class JsonArrayStreamer<T> {
157175
this.readStream = null;
158176

159177
if (this.chunkBuffer.length) {
160-
const element = JSON.parse(this.chunkBuffer);
161-
this.resultBuffer.push(element);
178+
const element: T = JSON.parse(this.chunkBuffer);
179+
this.addToResult(element, filter);
162180
this.resetParser();
163181
}
164182
if (this.resultBuffer.length) {

0 commit comments

Comments
 (0)