diff --git a/core/embedjs-interfaces/package.json b/core/embedjs-interfaces/package.json index dd4f63a4..e11cbe91 100644 --- a/core/embedjs-interfaces/package.json +++ b/core/embedjs-interfaces/package.json @@ -1,6 +1,6 @@ { "name": "@llm-tools/embedjs-interfaces", - "version": "0.1.26", + "version": "0.1.27", "description": "Interfaces for extending the embedjs ecosystem", "dependencies": { "@langchain/core": "^0.3.26", diff --git a/core/embedjs-interfaces/src/interfaces/base-loader.ts b/core/embedjs-interfaces/src/interfaces/base-loader.ts index 3755b334..824bf3f6 100644 --- a/core/embedjs-interfaces/src/interfaces/base-loader.ts +++ b/core/embedjs-interfaces/src/interfaces/base-loader.ts @@ -4,6 +4,7 @@ import { EventEmitter } from 'node:events'; import { BaseStore } from './base-store.js'; import { LoaderChunk, UnfilteredLoaderChunk } from '../types.js'; +import { BaseModel } from './base-model.js'; export abstract class BaseLoader< MetadataTemplate extends Record = Record, @@ -39,13 +40,16 @@ export abstract class BaseLoader< createDebugMessages('embedjs:loader:BaseLoader')(`New loader class initalized with key ${uniqueId}`); } - // eslint-disable-next-line @typescript-eslint/no-empty-function - public async init(): Promise {} - public getUniqueId(): string { return this.uniqueId; } + // eslint-disable-next-line @typescript-eslint/no-empty-function + public async init(): Promise {} + + // eslint-disable-next-line @typescript-eslint/no-empty-function, @typescript-eslint/no-unused-vars + public injectModel(_model: BaseModel) {} + private async recordLoaderInCache(chunksProcessed: number) { if (!BaseLoader.store) return; diff --git a/core/embedjs-interfaces/src/interfaces/base-model.ts b/core/embedjs-interfaces/src/interfaces/base-model.ts index 1ce8a940..84b9511b 100644 --- a/core/embedjs-interfaces/src/interfaces/base-model.ts +++ b/core/embedjs-interfaces/src/interfaces/base-model.ts @@ -135,5 +135,17 @@ export abstract class BaseModel { }; } + public async simpleQuery(messages: (AIMessage | SystemMessage | HumanMessage)[]) { + const response = await this.runQuery(messages); + + return { + result: response.result, + tokenUse: { + inputTokens: response.tokenUse?.inputTokens ?? 'UNKNOWN', + outputTokens: response.tokenUse?.outputTokens ?? 'UNKNOWN', + }, + }; + } + protected abstract runQuery(messages: (AIMessage | SystemMessage | HumanMessage)[]): Promise; } diff --git a/core/embedjs-utils/package.json b/core/embedjs-utils/package.json index 40466744..c4dfd376 100644 --- a/core/embedjs-utils/package.json +++ b/core/embedjs-utils/package.json @@ -1,9 +1,9 @@ { "name": "@llm-tools/embedjs-utils", - "version": "0.1.26", + "version": "0.1.27", "description": "Useful util functions when extending the embedjs ecosystem", "dependencies": { - "@llm-tools/embedjs-interfaces": "0.1.26" + "@llm-tools/embedjs-interfaces": "0.1.27" }, "type": "module", "main": "./src/index.js", diff --git a/core/embedjs-utils/src/util/stream.ts b/core/embedjs-utils/src/util/stream.ts index cbb2f079..c3cdc5a0 100644 --- a/core/embedjs-utils/src/util/stream.ts +++ b/core/embedjs-utils/src/util/stream.ts @@ -1,6 +1,6 @@ import { Stream } from 'stream'; -export async function stream2buffer(stream: Stream): Promise { +export async function streamToBuffer(stream: Stream): Promise { return new Promise((resolve, reject) => { const _buf = Array(); @@ -10,6 +10,16 @@ export async function stream2buffer(stream: Stream): Promise { }); } +export async function streamToString(stream: Stream): Promise { + return new Promise((resolve, reject) => { + const chunks = []; + + stream.on('data', (chunk) => chunks.push(Buffer.from(chunk))); + stream.on('end', () => resolve(Buffer.concat(chunks).toString('utf8'))); + stream.on('error', (err) => reject(`error converting stream - ${err}`)); + }); +} + export function contentTypeToMimeType(contentType: string) { if (!contentType) return contentType; if (contentType.includes(';')) return contentType.split(';')[0]; diff --git a/core/embedjs/package.json b/core/embedjs/package.json index f8ee5480..086033f2 100644 --- a/core/embedjs/package.json +++ b/core/embedjs/package.json @@ -1,12 +1,12 @@ { "type": "module", "name": "@llm-tools/embedjs", - "version": "0.1.26", + "version": "0.1.27", "description": "A NodeJS RAG framework to easily work with LLMs and custom datasets", "dependencies": { "@langchain/textsplitters": "^0.1.0", - "@llm-tools/embedjs-interfaces": "0.1.26", - "@llm-tools/embedjs-utils": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", + "@llm-tools/embedjs-utils": "0.1.27", "debug": "^4.4.0", "langchain": "^0.3.8", "md5": "^2.3.0", diff --git a/core/embedjs/src/core/rag-application.ts b/core/embedjs/src/core/rag-application.ts index 5e1a78bd..9c102cf0 100644 --- a/core/embedjs/src/core/rag-application.ts +++ b/core/embedjs/src/core/rag-application.ts @@ -164,6 +164,7 @@ export class RAGApplication { private async _addLoader(loader: BaseLoader, forceReload: boolean): Promise { const uniqueId = loader.getUniqueId(); this.debug('Exploring loader', uniqueId); + if (this.model) loader.injectModel(this.model); if (this.store && (await this.store.hasLoaderMetadata(uniqueId))) { if (forceReload) { diff --git a/core/embedjs/src/util/mime.ts b/core/embedjs/src/util/mime.ts index 5ee579d3..2b223d69 100644 --- a/core/embedjs/src/util/mime.ts +++ b/core/embedjs/src/util/mime.ts @@ -99,6 +99,15 @@ export async function createLoaderFromMimeType(loaderData: string, mimeType: str createDebugMessages('embedjs:util:createLoaderFromMimeType')('Dynamically imported MarkdownLoader'); return new MarkdownLoader({ filePathOrUrl: loaderData }); } + case 'image/png': + case 'image/jpeg': { + const { ImageLoader } = await import('@llm-tools/embedjs-loader-image').catch(() => { + throw new Error('Package `@llm-tools/embedjs-loader-image` needs to be installed to load images'); + }); + createDebugMessages('embedjs:util:createLoaderFromMimeType')('Dynamically imported ImageLoader'); + return new ImageLoader({ filePathOrUrl: loaderData, mime: mimeType }); + } + case undefined: throw new Error(`MIME type could not be detected. Please file an issue if you think this is a bug.`); default: diff --git a/databases/embedjs-astra/package.json b/databases/embedjs-astra/package.json index eadc0786..92bcf55d 100644 --- a/databases/embedjs-astra/package.json +++ b/databases/embedjs-astra/package.json @@ -1,10 +1,10 @@ { "name": "@llm-tools/embedjs-astradb", - "version": "0.1.26", + "version": "0.1.27", "description": "Add AstraDB support to embedjs", "dependencies": { "@datastax/astra-db-ts": "^1.5.0", - "@llm-tools/embedjs-interfaces": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", "debug": "^4.4.0" }, "type": "module", diff --git a/databases/embedjs-cosmos/package.json b/databases/embedjs-cosmos/package.json index ee96a20f..5e704e2a 100644 --- a/databases/embedjs-cosmos/package.json +++ b/databases/embedjs-cosmos/package.json @@ -1,10 +1,10 @@ { "name": "@llm-tools/embedjs-cosmos", - "version": "0.1.26", + "version": "0.1.27", "description": "Add CosmosDB support to embedjs", "dependencies": { "@azure/cosmos": "^4.2.0", - "@llm-tools/embedjs-interfaces": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", "debug": "^4.4.0" }, "type": "module", diff --git a/databases/embedjs-hnswlib/package.json b/databases/embedjs-hnswlib/package.json index 97d22fa7..13e183d4 100644 --- a/databases/embedjs-hnswlib/package.json +++ b/databases/embedjs-hnswlib/package.json @@ -1,9 +1,9 @@ { "name": "@llm-tools/embedjs-hnswlib", - "version": "0.1.26", + "version": "0.1.27", "description": "Add HNSWLib support to embedjs", "dependencies": { - "@llm-tools/embedjs-interfaces": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", "debug": "^4.4.0", "hnswlib-node": "^3.0.0" }, diff --git a/databases/embedjs-lancedb/package.json b/databases/embedjs-lancedb/package.json index bf8f33b8..2a00da48 100644 --- a/databases/embedjs-lancedb/package.json +++ b/databases/embedjs-lancedb/package.json @@ -1,10 +1,10 @@ { "name": "@llm-tools/embedjs-lancedb", - "version": "0.1.26", + "version": "0.1.27", "description": "Add LanceDb support to embedjs", "dependencies": { "@lancedb/lancedb": "^0.14.1", - "@llm-tools/embedjs-interfaces": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", "compute-cosine-similarity": "^1.1.0", "debug": "^4.4.0" }, diff --git a/databases/embedjs-libsql/package.json b/databases/embedjs-libsql/package.json index 5b3627db..57e2b8dc 100644 --- a/databases/embedjs-libsql/package.json +++ b/databases/embedjs-libsql/package.json @@ -1,11 +1,11 @@ { "name": "@llm-tools/embedjs-libsql", - "version": "0.1.26", + "version": "0.1.27", "description": "Add LibSQL support to embedjs", "dependencies": { "@libsql/client": "^0.14.0", - "@llm-tools/embedjs-interfaces": "0.1.26", - "@llm-tools/embedjs-utils": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", + "@llm-tools/embedjs-utils": "0.1.27", "debug": "^4.4.0" }, "type": "module", diff --git a/databases/embedjs-libsql/src/libsql-db.ts b/databases/embedjs-libsql/src/libsql-db.ts index 83243ac3..8d4d3946 100644 --- a/databases/embedjs-libsql/src/libsql-db.ts +++ b/databases/embedjs-libsql/src/libsql-db.ts @@ -49,7 +49,7 @@ export class LibSqlDb implements BaseVectorDatabase { async similaritySearch(query: number[], k: number): Promise { const statement = `SELECT id, pageContent, uniqueLoaderId, source, metadata, - vector_distance_cos(vector, vector32('[${query.join(',')}]')) + vector_distance_cos(vector, vector32('[${query.join(',')}]')) as distance FROM ${this.tableName} ORDER BY vector_distance_cos(vector, vector32('[${query.join(',')}]')) ASC LIMIT ${k};`; @@ -63,7 +63,7 @@ export class LibSqlDb implements BaseVectorDatabase { return { metadata, pageContent: result.pageContent.toString(), - score: 1, + score: 1 - result.distance, }; }); } diff --git a/databases/embedjs-lmdb/package.json b/databases/embedjs-lmdb/package.json index d01cbaa0..334bb92e 100644 --- a/databases/embedjs-lmdb/package.json +++ b/databases/embedjs-lmdb/package.json @@ -1,9 +1,9 @@ { "name": "@llm-tools/embedjs-lmdb", - "version": "0.1.26", + "version": "0.1.27", "description": "Add LMDB support to embedjs", "dependencies": { - "@llm-tools/embedjs-interfaces": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", "debug": "^4.4.0", "lmdb": "^3.2.0" }, diff --git a/databases/embedjs-mongodb/package.json b/databases/embedjs-mongodb/package.json index e0dc21c7..3d11ec12 100644 --- a/databases/embedjs-mongodb/package.json +++ b/databases/embedjs-mongodb/package.json @@ -1,9 +1,9 @@ { "name": "@llm-tools/embedjs-mongodb", - "version": "0.1.26", + "version": "0.1.27", "description": "Add MongoDB support to embedjs", "dependencies": { - "@llm-tools/embedjs-interfaces": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", "debug": "^4.4.0", "mongodb": "^6.12.0" }, diff --git a/databases/embedjs-pinecone/package.json b/databases/embedjs-pinecone/package.json index 61f577b1..3dc141a8 100644 --- a/databases/embedjs-pinecone/package.json +++ b/databases/embedjs-pinecone/package.json @@ -1,9 +1,9 @@ { "name": "@llm-tools/embedjs-pinecone", - "version": "0.1.26", + "version": "0.1.27", "description": "Add Pinecone support to embedjs", "dependencies": { - "@llm-tools/embedjs-interfaces": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", "@pinecone-database/pinecone": "^4.0.0", "debug": "^4.4.0" }, diff --git a/databases/embedjs-qdrant/package.json b/databases/embedjs-qdrant/package.json index 92c1f23d..39e6f1ff 100644 --- a/databases/embedjs-qdrant/package.json +++ b/databases/embedjs-qdrant/package.json @@ -1,9 +1,9 @@ { "name": "@llm-tools/embedjs-qdrant", - "version": "0.1.26", + "version": "0.1.27", "description": "Add Qdrant support to embedjs", "dependencies": { - "@llm-tools/embedjs-interfaces": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", "@qdrant/js-client-rest": "^1.12.0", "debug": "^4.4.0", "uuid": "^11.0.3" diff --git a/databases/embedjs-redis/package.json b/databases/embedjs-redis/package.json index 6cc8bb2a..0f110558 100644 --- a/databases/embedjs-redis/package.json +++ b/databases/embedjs-redis/package.json @@ -1,9 +1,9 @@ { "name": "@llm-tools/embedjs-redis", - "version": "0.1.26", + "version": "0.1.27", "description": "Add Redis support to embedjs", "dependencies": { - "@llm-tools/embedjs-interfaces": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", "ioredis": "^5.4.2" }, "type": "module", diff --git a/databases/embedjs-weaviate/package.json b/databases/embedjs-weaviate/package.json index 5f8de287..aa18b1c2 100644 --- a/databases/embedjs-weaviate/package.json +++ b/databases/embedjs-weaviate/package.json @@ -1,9 +1,9 @@ { "name": "@llm-tools/embedjs-weaviate", - "version": "0.1.26", + "version": "0.1.27", "description": "Add Weaviate support to embedjs", "dependencies": { - "@llm-tools/embedjs-interfaces": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", "compute-cosine-similarity": "^1.1.0", "debug": "^4.4.0", "weaviate-ts-client": "^2.2.0" diff --git a/loaders/embedjs-loader-confluence/package.json b/loaders/embedjs-loader-confluence/package.json index c36aa926..c4b8e435 100644 --- a/loaders/embedjs-loader-confluence/package.json +++ b/loaders/embedjs-loader-confluence/package.json @@ -1,10 +1,10 @@ { "name": "@llm-tools/embedjs-loader-confluence", - "version": "0.1.26", + "version": "0.1.27", "description": "Confluence loader for embedjs", "dependencies": { - "@llm-tools/embedjs-interfaces": "0.1.26", - "@llm-tools/embedjs-loader-web": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", + "@llm-tools/embedjs-loader-web": "0.1.27", "confluence.js": "^1.7.4", "debug": "^4.4.0", "md5": "^2.3.0" diff --git a/loaders/embedjs-loader-confluence/src/confluence-loader.ts b/loaders/embedjs-loader-confluence/src/confluence-loader.ts index 7dd7345d..0e2012a7 100644 --- a/loaders/embedjs-loader-confluence/src/confluence-loader.ts +++ b/loaders/embedjs-loader-confluence/src/confluence-loader.ts @@ -21,7 +21,7 @@ export class ConfluenceLoader extends BaseLoader<{ type: 'ConfluenceLoader' }, { confluenceToken, chunkSize, chunkOverlap, - options, + filterOptions, }: { spaceName: string; confluenceBaseUrl?: string; @@ -29,7 +29,7 @@ export class ConfluenceLoader extends BaseLoader<{ type: 'ConfluenceLoader' }, { confluenceToken?: string; chunkSize?: number; chunkOverlap?: number; - options?: { + filterOptions?: { lastUpdatedFilter: Date; }; }) { @@ -37,7 +37,7 @@ export class ConfluenceLoader extends BaseLoader<{ type: 'ConfluenceLoader' }, { this.spaceName = spaceName; this.confluenceBaseUrl = confluenceBaseUrl ?? process.env.CONFLUENCE_BASE_URL; - this.lastUpdatedFilter = options?.lastUpdatedFilter ?? null; + this.lastUpdatedFilter = filterOptions?.lastUpdatedFilter ?? null; this.confluence = new ConfluenceClient({ host: this.confluenceBaseUrl, diff --git a/loaders/embedjs-loader-csv/package.json b/loaders/embedjs-loader-csv/package.json index e04d7ea8..e6db1f7f 100644 --- a/loaders/embedjs-loader-csv/package.json +++ b/loaders/embedjs-loader-csv/package.json @@ -1,10 +1,10 @@ { "name": "@llm-tools/embedjs-loader-csv", - "version": "0.1.26", + "version": "0.1.27", "description": "CSV loader for embedjs", "dependencies": { - "@llm-tools/embedjs-interfaces": "0.1.26", - "@llm-tools/embedjs-utils": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", + "@llm-tools/embedjs-utils": "0.1.27", "csv-parse": "^5.6.0", "debug": "^4.4.0", "md5": "^2.3.0" diff --git a/loaders/embedjs-loader-csv/src/csv-loader.ts b/loaders/embedjs-loader-csv/src/csv-loader.ts index ea98c5f8..b9f73e9a 100644 --- a/loaders/embedjs-loader-csv/src/csv-loader.ts +++ b/loaders/embedjs-loader-csv/src/csv-loader.ts @@ -4,7 +4,7 @@ import fs from 'node:fs'; import md5 from 'md5'; import { BaseLoader } from '@llm-tools/embedjs-interfaces'; -import { cleanString, getSafe, isValidURL, stream2buffer } from '@llm-tools/embedjs-utils'; +import { cleanString, getSafe, isValidURL, streamToBuffer } from '@llm-tools/embedjs-utils'; export class CsvLoader extends BaseLoader<{ type: 'CsvLoader' }> { private readonly debug = createDebugMessages('embedjs:loader:CsvLoader'); @@ -33,7 +33,7 @@ export class CsvLoader extends BaseLoader<{ type: 'CsvLoader' }> { override async *getUnfilteredChunks() { const buffer = this.isUrl ? (await getSafe(this.filePathOrUrl, { format: 'buffer' })).body - : await stream2buffer(fs.createReadStream(this.filePathOrUrl)); + : await streamToBuffer(fs.createReadStream(this.filePathOrUrl)); this.debug('CsvParser stream created'); const parser = parse(buffer, this.csvParseOptions); diff --git a/loaders/embedjs-loader-image/README.md b/loaders/embedjs-loader-image/README.md new file mode 100644 index 00000000..740cf0a9 --- /dev/null +++ b/loaders/embedjs-loader-image/README.md @@ -0,0 +1,8 @@ +# embedjs-loader-image + +

+NPM Version +License +

+ +This package extends and offers additional functionality to [embedJs](https://www.npmjs.com/package/@llm-tools/embedjs). Refer to the documentation there for more details. diff --git a/loaders/embedjs-loader-image/eslint.config.js b/loaders/embedjs-loader-image/eslint.config.js new file mode 100644 index 00000000..4c3c47f6 --- /dev/null +++ b/loaders/embedjs-loader-image/eslint.config.js @@ -0,0 +1,20 @@ +import baseConfig from '../../eslint.config.js'; +import parser from '@nx/eslint-plugin'; + +export default [ + ...baseConfig, + { + files: ['**/*.json'], + rules: { + '@nx/dependency-checks': [ + 'error', + { + ignoredFiles: ['{projectRoot}/eslint.config.{js,cjs,mjs}'], + }, + ], + }, + languageOptions: { + parser, + }, + }, +]; diff --git a/loaders/embedjs-loader-image/package.json b/loaders/embedjs-loader-image/package.json new file mode 100644 index 00000000..a2f66cab --- /dev/null +++ b/loaders/embedjs-loader-image/package.json @@ -0,0 +1,43 @@ +{ + "name": "@llm-tools/embedjs-loader-image", + "version": "0.1.27", + "description": "Load images into embedjs", + "dependencies": { + "@langchain/core": "^0.3.26", + "@llm-tools/embedjs-interfaces": "0.1.27", + "@llm-tools/embedjs-utils": "0.1.27", + "debug": "^4.4.0", + "md5": "^2.3.0", + "mime": "^4.0.6", + "stream-mime-type": "^2.0.0" + }, + "type": "module", + "main": "./src/index.js", + "license": "Apache-2.0", + "publishConfig": { + "access": "public" + }, + "keywords": [ + "llm", + "ai", + "gpt3", + "chain", + "prompt", + "prompt engineering", + "chatgpt", + "machine learning", + "ml", + "anthropic", + "embeddings", + "vectorstores" + ], + "author": "K V Adhityan", + "bugs": { + "url": "https://github.com/llm-tools/embedjs/issues" + }, + "homepage": "https://github.com/llm-tools/embedjs#readme", + "repository": { + "type": "git", + "url": "git+https://github.com/llm-tools/embedjs.git" + } +} diff --git a/loaders/embedjs-loader-image/project.json b/loaders/embedjs-loader-image/project.json new file mode 100644 index 00000000..b54dd212 --- /dev/null +++ b/loaders/embedjs-loader-image/project.json @@ -0,0 +1,29 @@ +{ + "name": "embedjs-loader-image", + "$schema": "../../node_modules/nx/schemas/project-schema.json", + "sourceRoot": "loaders/embedjs-loader-image/src", + "projectType": "library", + "tags": [], + "targets": { + "build": { + "executor": "@nx/js:tsc", + "outputs": ["{options.outputPath}"], + "options": { + "outputPath": "dist/esm/embedjs-loader-image", + "main": "loaders/embedjs-loader-image/src/index.ts", + "tsConfig": "loaders/embedjs-loader-image/tsconfig.lib.json", + "assets": ["loaders/embedjs-loader-image/*.md"] + } + }, + "build-cjs": { + "executor": "@nx/js:tsc", + "outputs": ["{options.outputPath}"], + "dependsOn": ["^build-cjs"], + "options": { + "outputPath": "dist/cjs/embedjs-loader-image", + "main": "loaders/embedjs-loader-image/src/index.ts", + "tsConfig": "loaders/embedjs-loader-image/tsconfig.cjs.json" + } + } + } +} diff --git a/loaders/embedjs-loader-image/src/image-loader.ts b/loaders/embedjs-loader-image/src/image-loader.ts new file mode 100644 index 00000000..491a1b2f --- /dev/null +++ b/loaders/embedjs-loader-image/src/image-loader.ts @@ -0,0 +1,96 @@ +import { HumanMessage } from '@langchain/core/messages'; +import { getMimeType } from 'stream-mime-type'; +import createDebugMessages from 'debug'; +import fs from 'node:fs'; +import md5 from 'md5'; + +import { BaseLoader, BaseModel } from '@llm-tools/embedjs-interfaces'; +import { cleanString, contentTypeToMimeType, getSafe, isValidURL, streamToString } from '@llm-tools/embedjs-utils'; + +export class ImageLoader extends BaseLoader<{ type: 'ImageLoader' }> { + private readonly debug = createDebugMessages('embedjs:loader:ImageLoader'); + private readonly filePathOrUrl: string; + private readonly isUrl: boolean; + private captionModel: BaseModel; + private mime?: string; + + constructor({ + filePathOrUrl, + captionModel, + mime, + }: { + filePathOrUrl: string; + captionModel?: BaseModel; + mime?: string; + }) { + super(`ImageLoader_${md5(filePathOrUrl)}`, { filePathOrUrl }, 100000, 300); + + this.mime = mime; + this.captionModel = captionModel; + this.filePathOrUrl = filePathOrUrl; + this.isUrl = isValidURL(filePathOrUrl) ? true : false; + } + + public override injectModel(model: BaseModel) { + if (!this.captionModel) { + this.captionModel = model; + } + } + + override async *getUnfilteredChunks() { + if (!this.captionModel) throw new Error('No model available to describe image'); + + if (!this.mime) { + this.debug('Mime type not provided; starting auto-detect'); + + if (this.isUrl) { + const response = await getSafe(this.filePathOrUrl, { headers: { 'Accept-Encoding': '' } }); + const stream = response.body as unknown as NodeJS.ReadableStream; + this.mime = (await getMimeType(stream, { strict: true })).mime; + + if (!this.mime) { + this.mime = contentTypeToMimeType(response.headers.get('content-type')); + this.debug(`Using type '${this.mime}' from content-type header`); + } + } else { + const stream = fs.createReadStream(this.filePathOrUrl); + this.mime = (await getMimeType(stream)).mime; + stream.destroy(); + } + } + + this.debug(`Image stream detected type '${this.mime}'`); + const text = this.isUrl + ? (await getSafe(this.filePathOrUrl, { format: 'text' })).body + : await streamToString(fs.createReadStream(this.filePathOrUrl)); + + const message = new HumanMessage({ + content: [ + { + type: 'text', + text: 'what does this image contain?', + }, + { + type: 'image_url', + image_url: { + url: `data:${this.mime};base64,${btoa(text)}`, + }, + }, + ], + }); + + this.debug('Asking LLM to describe image'); + const response = await this.captionModel.simpleQuery([message]); + this.debug('LLM describes image as', response.result); + + yield { + pageContent: cleanString(response.result), + metadata: { + type: 'ImageLoader' as const, + source: this.filePathOrUrl, + }, + }; + + this.debug(`Loaded image details for filePathOrUrl '${this.filePathOrUrl}'`); + } +} diff --git a/loaders/embedjs-loader-image/src/index.ts b/loaders/embedjs-loader-image/src/index.ts new file mode 100644 index 00000000..39be6c94 --- /dev/null +++ b/loaders/embedjs-loader-image/src/index.ts @@ -0,0 +1 @@ +export { ImageLoader } from './image-loader.js'; diff --git a/loaders/embedjs-loader-image/tsconfig.cjs.json b/loaders/embedjs-loader-image/tsconfig.cjs.json new file mode 100644 index 00000000..1be21d0d --- /dev/null +++ b/loaders/embedjs-loader-image/tsconfig.cjs.json @@ -0,0 +1,7 @@ +{ + "extends": "./tsconfig.lib.json", + "compilerOptions": { + "module": "commonjs", + "moduleResolution": "Node10" + } +} diff --git a/loaders/embedjs-loader-image/tsconfig.json b/loaders/embedjs-loader-image/tsconfig.json new file mode 100644 index 00000000..eeb778bc --- /dev/null +++ b/loaders/embedjs-loader-image/tsconfig.json @@ -0,0 +1,26 @@ +{ + "extends": "../../tsconfig.base.json", + "compilerOptions": { + "target": "ES2022", + "lib": ["ES2022", "ES2022.Object"], + "module": "NodeNext", + "moduleResolution": "nodenext", + "esModuleInterop": true, + "declaration": true, + "noImplicitReturns": true, + "noFallthroughCasesInSwitch": true, + "noUnusedLocals": true, + "noUnusedParameters": true, + "useDefineForClassFields": true, + "strictPropertyInitialization": false, + "allowJs": false, + "strict": false + }, + "files": [], + "include": [], + "references": [ + { + "path": "./tsconfig.lib.json" + } + ] +} diff --git a/loaders/embedjs-loader-image/tsconfig.lib.json b/loaders/embedjs-loader-image/tsconfig.lib.json new file mode 100644 index 00000000..bdeb03cf --- /dev/null +++ b/loaders/embedjs-loader-image/tsconfig.lib.json @@ -0,0 +1,10 @@ +{ + "extends": "./tsconfig.json", + "compilerOptions": { + "outDir": "../../dist/out-tsc", + "declaration": true, + "types": ["node"] + }, + "include": ["src/**/*.ts"], + "exclude": ["src/**/*.spec.ts", "src/**/*.test.ts"] +} diff --git a/loaders/embedjs-loader-markdown/package.json b/loaders/embedjs-loader-markdown/package.json index 4dd10d82..29e161a9 100644 --- a/loaders/embedjs-loader-markdown/package.json +++ b/loaders/embedjs-loader-markdown/package.json @@ -1,10 +1,10 @@ { "name": "@llm-tools/embedjs-loader-markdown", - "version": "0.1.26", + "version": "0.1.27", "description": "XML loader for embedjs", "dependencies": { - "@llm-tools/embedjs-interfaces": "0.1.26", - "@llm-tools/embedjs-loader-web": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", + "@llm-tools/embedjs-loader-web": "0.1.27", "debug": "^4.4.0", "md5": "^2.3.0", "micromark": "^4.0.1", diff --git a/loaders/embedjs-loader-markdown/src/markdown-loader.ts b/loaders/embedjs-loader-markdown/src/markdown-loader.ts index f77b801b..99a50279 100644 --- a/loaders/embedjs-loader-markdown/src/markdown-loader.ts +++ b/loaders/embedjs-loader-markdown/src/markdown-loader.ts @@ -6,7 +6,7 @@ import fs from 'node:fs'; import md5 from 'md5'; import { BaseLoader } from '@llm-tools/embedjs-interfaces'; -import { getSafe, isValidURL, stream2buffer } from '@llm-tools/embedjs-utils'; +import { getSafe, isValidURL, streamToBuffer } from '@llm-tools/embedjs-utils'; import { WebLoader } from '@llm-tools/embedjs-loader-web'; export class MarkdownLoader extends BaseLoader<{ type: 'MarkdownLoader' }> { @@ -32,7 +32,7 @@ export class MarkdownLoader extends BaseLoader<{ type: 'MarkdownLoader' }> { override async *getUnfilteredChunks() { const buffer = this.isUrl ? (await getSafe(this.filePathOrUrl, { format: 'buffer' })).body - : await stream2buffer(fs.createReadStream(this.filePathOrUrl)); + : await streamToBuffer(fs.createReadStream(this.filePathOrUrl)); this.debug('MarkdownLoader stream created'); const result = micromark(buffer, { extensions: [gfm(), mdxJsx()], htmlExtensions: [gfmHtml()] }); diff --git a/loaders/embedjs-loader-msoffice/package.json b/loaders/embedjs-loader-msoffice/package.json index 0a47f24c..690eb531 100644 --- a/loaders/embedjs-loader-msoffice/package.json +++ b/loaders/embedjs-loader-msoffice/package.json @@ -1,11 +1,11 @@ { "name": "@llm-tools/embedjs-loader-msoffice", - "version": "0.1.26", + "version": "0.1.27", "description": "Word, PPT and Excel loader for embedjs", "dependencies": { "@langchain/textsplitters": "^0.1.0", - "@llm-tools/embedjs-interfaces": "0.1.26", - "@llm-tools/embedjs-utils": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", + "@llm-tools/embedjs-utils": "0.1.27", "md5": "^2.3.0", "office-text-extractor": "^3.0.3" }, diff --git a/loaders/embedjs-loader-pdf/package.json b/loaders/embedjs-loader-pdf/package.json index f54cf93c..be82b4fb 100644 --- a/loaders/embedjs-loader-pdf/package.json +++ b/loaders/embedjs-loader-pdf/package.json @@ -1,11 +1,11 @@ { "name": "@llm-tools/embedjs-loader-pdf", - "version": "0.1.26", + "version": "0.1.27", "description": "PDF loader for embedjs", "dependencies": { "@langchain/textsplitters": "^0.1.0", - "@llm-tools/embedjs-interfaces": "0.1.26", - "@llm-tools/embedjs-utils": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", + "@llm-tools/embedjs-utils": "0.1.27", "md5": "^2.3.0", "office-text-extractor": "^3.0.3" }, diff --git a/loaders/embedjs-loader-sitemap/package.json b/loaders/embedjs-loader-sitemap/package.json index d717391f..2cfc5f0a 100644 --- a/loaders/embedjs-loader-sitemap/package.json +++ b/loaders/embedjs-loader-sitemap/package.json @@ -1,10 +1,10 @@ { "name": "@llm-tools/embedjs-loader-sitemap", - "version": "0.1.26", + "version": "0.1.27", "description": "Sitemap recursive loader for embedjs", "dependencies": { - "@llm-tools/embedjs-interfaces": "0.1.26", - "@llm-tools/embedjs-loader-web": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", + "@llm-tools/embedjs-loader-web": "0.1.27", "debug": "^4.4.0", "md5": "^2.3.0", "sitemapper": "^3.2.20" diff --git a/loaders/embedjs-loader-web/package.json b/loaders/embedjs-loader-web/package.json index 4532b161..957a573a 100644 --- a/loaders/embedjs-loader-web/package.json +++ b/loaders/embedjs-loader-web/package.json @@ -1,11 +1,11 @@ { "name": "@llm-tools/embedjs-loader-web", - "version": "0.1.26", + "version": "0.1.27", "description": "Web page loader for embedjs", "dependencies": { "@langchain/textsplitters": "^0.1.0", - "@llm-tools/embedjs-interfaces": "0.1.26", - "@llm-tools/embedjs-utils": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", + "@llm-tools/embedjs-utils": "0.1.27", "debug": "^4.4.0", "html-to-text": "^9.0.5", "md5": "^2.3.0" diff --git a/loaders/embedjs-loader-xml/package.json b/loaders/embedjs-loader-xml/package.json index 6811b830..f2622633 100644 --- a/loaders/embedjs-loader-xml/package.json +++ b/loaders/embedjs-loader-xml/package.json @@ -1,9 +1,9 @@ { "name": "@llm-tools/embedjs-loader-xml", - "version": "0.1.26", + "version": "0.1.27", "description": "XML loader for embedjs", "dependencies": { - "@llm-tools/embedjs-interfaces": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", "debug": "^4.4.0", "fast-xml-parser": "^4.5.1", "md5": "^2.3.0" diff --git a/loaders/embedjs-loader-xml/src/xml-loader.ts b/loaders/embedjs-loader-xml/src/xml-loader.ts index 49711315..b074e90d 100644 --- a/loaders/embedjs-loader-xml/src/xml-loader.ts +++ b/loaders/embedjs-loader-xml/src/xml-loader.ts @@ -4,7 +4,7 @@ import fs from 'node:fs'; import md5 from 'md5'; import { BaseLoader } from '@llm-tools/embedjs-interfaces'; -import { cleanString, getSafe, isValidURL, stream2buffer } from '@llm-tools/embedjs-utils'; +import { cleanString, getSafe, isValidURL, streamToBuffer } from '@llm-tools/embedjs-utils'; export class XmlLoader extends BaseLoader<{ type: 'XmlLoader' }> { private readonly debug = createDebugMessages('embedjs:loader:XmlLoader'); @@ -33,7 +33,7 @@ export class XmlLoader extends BaseLoader<{ type: 'XmlLoader' }> { override async *getUnfilteredChunks() { const buffer = this.isUrl ? (await getSafe(this.filePathOrUrl, { format: 'buffer' })).body - : await stream2buffer(fs.createReadStream(this.filePathOrUrl)); + : await streamToBuffer(fs.createReadStream(this.filePathOrUrl)); this.debug('XmlLoader stream created'); const parsed = new XMLParser(this.xmlParseOptions).parse(buffer); diff --git a/loaders/embedjs-loader-youtube/package.json b/loaders/embedjs-loader-youtube/package.json index 307af4eb..89ef9a3a 100644 --- a/loaders/embedjs-loader-youtube/package.json +++ b/loaders/embedjs-loader-youtube/package.json @@ -1,11 +1,11 @@ { "name": "@llm-tools/embedjs-loader-youtube", - "version": "0.1.26", + "version": "0.1.27", "description": "Youtube transcript and channel recursive loader for embedjs", "dependencies": { "@langchain/textsplitters": "^0.1.0", - "@llm-tools/embedjs-interfaces": "0.1.26", - "@llm-tools/embedjs-utils": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", + "@llm-tools/embedjs-utils": "0.1.27", "debug": "^4.4.0", "md5": "^2.3.0", "usetube": "^2.2.7", diff --git a/models/embedjs-anthropic/package.json b/models/embedjs-anthropic/package.json index 70fa7d56..afd4497e 100644 --- a/models/embedjs-anthropic/package.json +++ b/models/embedjs-anthropic/package.json @@ -1,11 +1,11 @@ { "name": "@llm-tools/embedjs-anthropic", - "version": "0.1.26", + "version": "0.1.27", "description": "Enable usage of Anthropic models with embedjs", "dependencies": { "@langchain/anthropic": "^0.3.11", "@langchain/core": "^0.3.26", - "@llm-tools/embedjs-interfaces": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", "debug": "^4.4.0" }, "type": "module", diff --git a/models/embedjs-cohere/package.json b/models/embedjs-cohere/package.json index eac3dcf7..bbc09a84 100644 --- a/models/embedjs-cohere/package.json +++ b/models/embedjs-cohere/package.json @@ -1,10 +1,10 @@ { "name": "@llm-tools/embedjs-cohere", - "version": "0.1.26", + "version": "0.1.27", "description": "Enable usage of Cohere models with embedjs", "dependencies": { "@langchain/cohere": "^0.3.2", - "@llm-tools/embedjs-interfaces": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", "cohere-ai": "^7.15.0" }, "type": "module", diff --git a/models/embedjs-huggingface/package.json b/models/embedjs-huggingface/package.json index f80c1833..f5fbc2c5 100644 --- a/models/embedjs-huggingface/package.json +++ b/models/embedjs-huggingface/package.json @@ -1,12 +1,12 @@ { "name": "@llm-tools/embedjs-huggingface", - "version": "0.1.26", + "version": "0.1.27", "description": "Enable usage of HuggingFace models with embedjs", "dependencies": { "@huggingface/inference": "^2.8.1", "@langchain/community": "^0.3.20", "@langchain/core": "^0.3.26", - "@llm-tools/embedjs-interfaces": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", "debug": "^4.4.0" }, "type": "module", diff --git a/models/embedjs-llama-cpp/package.json b/models/embedjs-llama-cpp/package.json index cb262588..2c80c4ec 100644 --- a/models/embedjs-llama-cpp/package.json +++ b/models/embedjs-llama-cpp/package.json @@ -1,11 +1,11 @@ { "name": "@llm-tools/embedjs-llama-cpp", - "version": "0.1.26", + "version": "0.1.27", "description": "Enable usage of Node-Llama-Cpp with embedjs", "dependencies": { "@langchain/community": "^0.3.20", "@langchain/core": "^0.3.26", - "@llm-tools/embedjs-interfaces": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", "debug": "^4.4.0", "node-llama-cpp": "^3.3.1" }, diff --git a/models/embedjs-mistral/package.json b/models/embedjs-mistral/package.json index f3d784ea..a48150a3 100644 --- a/models/embedjs-mistral/package.json +++ b/models/embedjs-mistral/package.json @@ -1,11 +1,11 @@ { "name": "@llm-tools/embedjs-mistral", - "version": "0.1.26", + "version": "0.1.27", "description": "Enable usage of Mistral models with embedjs", "dependencies": { "@langchain/core": "^0.3.26", "@langchain/mistralai": "^0.2.0", - "@llm-tools/embedjs-interfaces": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", "debug": "^4.4.0" }, "type": "module", diff --git a/models/embedjs-ollama/package.json b/models/embedjs-ollama/package.json index 14995fca..ba51c0da 100644 --- a/models/embedjs-ollama/package.json +++ b/models/embedjs-ollama/package.json @@ -1,11 +1,11 @@ { "name": "@llm-tools/embedjs-ollama", - "version": "0.1.26", + "version": "0.1.27", "description": "Enable usage of Ollama with embedjs", "dependencies": { "@langchain/core": "^0.3.26", "@langchain/ollama": "^0.1.4", - "@llm-tools/embedjs-interfaces": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", "debug": "^4.4.0" }, "type": "module", diff --git a/models/embedjs-openai/package.json b/models/embedjs-openai/package.json index eb547038..0e7b427b 100644 --- a/models/embedjs-openai/package.json +++ b/models/embedjs-openai/package.json @@ -1,11 +1,11 @@ { "name": "@llm-tools/embedjs-openai", - "version": "0.1.26", + "version": "0.1.27", "description": "Enable usage of OpenAI models with embedjs", "dependencies": { "@langchain/core": "^0.3.26", "@langchain/openai": "^0.3.16", - "@llm-tools/embedjs-interfaces": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", "debug": "^4.4.0" }, "type": "module", diff --git a/models/embedjs-vertexai/package.json b/models/embedjs-vertexai/package.json index c5e706c9..31b3ed98 100644 --- a/models/embedjs-vertexai/package.json +++ b/models/embedjs-vertexai/package.json @@ -1,11 +1,11 @@ { "name": "@llm-tools/embedjs-vertexai", - "version": "0.1.26", + "version": "0.1.27", "description": "Enable usage of VertexAI models with embedjs", "dependencies": { "@langchain/core": "^0.3.26", "@langchain/google-vertexai": "^0.1.5", - "@llm-tools/embedjs-interfaces": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", "debug": "^4.4.0" }, "type": "module", diff --git a/package-lock.json b/package-lock.json index 5d829ee1..0407ecff 100644 --- a/package-lock.json +++ b/package-lock.json @@ -48,12 +48,12 @@ }, "core/embedjs": { "name": "@llm-tools/embedjs", - "version": "0.1.26", + "version": "0.1.27", "license": "Apache-2.0", "dependencies": { "@langchain/textsplitters": "^0.1.0", - "@llm-tools/embedjs-interfaces": "0.1.26", - "@llm-tools/embedjs-utils": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", + "@llm-tools/embedjs-utils": "0.1.27", "debug": "^4.4.0", "langchain": "^0.3.8", "md5": "^2.3.0", @@ -68,7 +68,7 @@ }, "core/embedjs-interfaces": { "name": "@llm-tools/embedjs-interfaces", - "version": "0.1.26", + "version": "0.1.27", "license": "Apache-2.0", "dependencies": { "@langchain/core": "^0.3.26", @@ -91,10 +91,10 @@ }, "core/embedjs-utils": { "name": "@llm-tools/embedjs-utils", - "version": "0.1.26", + "version": "0.1.27", "license": "Apache-2.0", "dependencies": { - "@llm-tools/embedjs-interfaces": "0.1.26" + "@llm-tools/embedjs-interfaces": "0.1.27" } }, "core/embedjs/node_modules/langchain": { @@ -190,92 +190,92 @@ }, "databases/embedjs-astra": { "name": "@llm-tools/embedjs-astradb", - "version": "0.1.26", + "version": "0.1.27", "license": "Apache-2.0", "dependencies": { "@datastax/astra-db-ts": "^1.5.0", - "@llm-tools/embedjs-interfaces": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", "debug": "^4.4.0" } }, "databases/embedjs-cosmos": { "name": "@llm-tools/embedjs-cosmos", - "version": "0.1.26", + "version": "0.1.27", "license": "Apache-2.0", "dependencies": { "@azure/cosmos": "^4.2.0", - "@llm-tools/embedjs-interfaces": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", "debug": "^4.4.0" } }, "databases/embedjs-hnswlib": { "name": "@llm-tools/embedjs-hnswlib", - "version": "0.1.26", + "version": "0.1.27", "license": "Apache-2.0", "dependencies": { - "@llm-tools/embedjs-interfaces": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", "debug": "^4.4.0", "hnswlib-node": "^3.0.0" } }, "databases/embedjs-lancedb": { "name": "@llm-tools/embedjs-lancedb", - "version": "0.1.26", + "version": "0.1.27", "license": "Apache-2.0", "dependencies": { "@lancedb/lancedb": "^0.14.1", - "@llm-tools/embedjs-interfaces": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", "compute-cosine-similarity": "^1.1.0", "debug": "^4.4.0" } }, "databases/embedjs-libsql": { "name": "@llm-tools/embedjs-libsql", - "version": "0.1.26", + "version": "0.1.27", "license": "Apache-2.0", "dependencies": { "@libsql/client": "^0.14.0", - "@llm-tools/embedjs-interfaces": "0.1.26", - "@llm-tools/embedjs-utils": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", + "@llm-tools/embedjs-utils": "0.1.27", "debug": "^4.4.0" } }, "databases/embedjs-lmdb": { "name": "@llm-tools/embedjs-lmdb", - "version": "0.1.26", + "version": "0.1.27", "license": "Apache-2.0", "dependencies": { - "@llm-tools/embedjs-interfaces": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", "debug": "^4.4.0", "lmdb": "^3.2.0" } }, "databases/embedjs-mongodb": { "name": "@llm-tools/embedjs-mongodb", - "version": "0.1.26", + "version": "0.1.27", "license": "Apache-2.0", "dependencies": { - "@llm-tools/embedjs-interfaces": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", "debug": "^4.4.0", "mongodb": "^6.12.0" } }, "databases/embedjs-pinecone": { "name": "@llm-tools/embedjs-pinecone", - "version": "0.1.26", + "version": "0.1.27", "license": "Apache-2.0", "dependencies": { - "@llm-tools/embedjs-interfaces": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", "@pinecone-database/pinecone": "^4.0.0", "debug": "^4.4.0" } }, "databases/embedjs-qdrant": { "name": "@llm-tools/embedjs-qdrant", - "version": "0.1.26", + "version": "0.1.27", "license": "Apache-2.0", "dependencies": { - "@llm-tools/embedjs-interfaces": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", "@qdrant/js-client-rest": "^1.12.0", "debug": "^4.4.0", "uuid": "^11.0.3" @@ -295,19 +295,19 @@ }, "databases/embedjs-redis": { "name": "@llm-tools/embedjs-redis", - "version": "0.1.26", + "version": "0.1.27", "license": "Apache-2.0", "dependencies": { - "@llm-tools/embedjs-interfaces": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", "ioredis": "^5.4.2" } }, "databases/embedjs-weaviate": { "name": "@llm-tools/embedjs-weaviate", - "version": "0.1.26", + "version": "0.1.27", "license": "Apache-2.0", "dependencies": { - "@llm-tools/embedjs-interfaces": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", "compute-cosine-similarity": "^1.1.0", "debug": "^4.4.0", "weaviate-ts-client": "^2.2.0" @@ -315,11 +315,11 @@ }, "loaders/embedjs-loader-confluence": { "name": "@llm-tools/embedjs-loader-confluence", - "version": "0.1.26", + "version": "0.1.27", "license": "Apache-2.0", "dependencies": { - "@llm-tools/embedjs-interfaces": "0.1.26", - "@llm-tools/embedjs-loader-web": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", + "@llm-tools/embedjs-loader-web": "0.1.27", "confluence.js": "^1.7.4", "debug": "^4.4.0", "md5": "^2.3.0" @@ -327,23 +327,37 @@ }, "loaders/embedjs-loader-csv": { "name": "@llm-tools/embedjs-loader-csv", - "version": "0.1.26", + "version": "0.1.27", "license": "Apache-2.0", "dependencies": { - "@llm-tools/embedjs-interfaces": "0.1.26", - "@llm-tools/embedjs-utils": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", + "@llm-tools/embedjs-utils": "0.1.27", "csv-parse": "^5.6.0", "debug": "^4.4.0", "md5": "^2.3.0" } }, + "loaders/embedjs-loader-image": { + "name": "@llm-tools/embedjs-loader-image", + "version": "0.1.27", + "license": "Apache-2.0", + "dependencies": { + "@langchain/core": "^0.3.26", + "@llm-tools/embedjs-interfaces": "0.1.27", + "@llm-tools/embedjs-utils": "0.1.27", + "debug": "^4.4.0", + "md5": "^2.3.0", + "mime": "^4.0.6", + "stream-mime-type": "^2.0.0" + } + }, "loaders/embedjs-loader-markdown": { "name": "@llm-tools/embedjs-loader-markdown", - "version": "0.1.26", + "version": "0.1.27", "license": "Apache-2.0", "dependencies": { - "@llm-tools/embedjs-interfaces": "0.1.26", - "@llm-tools/embedjs-loader-web": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", + "@llm-tools/embedjs-loader-web": "0.1.27", "debug": "^4.4.0", "md5": "^2.3.0", "micromark": "^4.0.1", @@ -353,35 +367,35 @@ }, "loaders/embedjs-loader-msoffice": { "name": "@llm-tools/embedjs-loader-msoffice", - "version": "0.1.26", + "version": "0.1.27", "license": "Apache-2.0", "dependencies": { "@langchain/textsplitters": "^0.1.0", - "@llm-tools/embedjs-interfaces": "0.1.26", - "@llm-tools/embedjs-utils": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", + "@llm-tools/embedjs-utils": "0.1.27", "md5": "^2.3.0", "office-text-extractor": "^3.0.3" } }, "loaders/embedjs-loader-pdf": { "name": "@llm-tools/embedjs-loader-pdf", - "version": "0.1.26", + "version": "0.1.27", "license": "Apache-2.0", "dependencies": { "@langchain/textsplitters": "^0.1.0", - "@llm-tools/embedjs-interfaces": "0.1.26", - "@llm-tools/embedjs-utils": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", + "@llm-tools/embedjs-utils": "0.1.27", "md5": "^2.3.0", "office-text-extractor": "^3.0.3" } }, "loaders/embedjs-loader-sitemap": { "name": "@llm-tools/embedjs-loader-sitemap", - "version": "0.1.26", + "version": "0.1.27", "license": "Apache-2.0", "dependencies": { - "@llm-tools/embedjs-interfaces": "0.1.26", - "@llm-tools/embedjs-loader-web": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", + "@llm-tools/embedjs-loader-web": "0.1.27", "debug": "^4.4.0", "md5": "^2.3.0", "sitemapper": "^3.2.20" @@ -389,12 +403,12 @@ }, "loaders/embedjs-loader-web": { "name": "@llm-tools/embedjs-loader-web", - "version": "0.1.26", + "version": "0.1.27", "license": "Apache-2.0", "dependencies": { "@langchain/textsplitters": "^0.1.0", - "@llm-tools/embedjs-interfaces": "0.1.26", - "@llm-tools/embedjs-utils": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", + "@llm-tools/embedjs-utils": "0.1.27", "debug": "^4.4.0", "html-to-text": "^9.0.5", "md5": "^2.3.0" @@ -405,10 +419,10 @@ }, "loaders/embedjs-loader-xml": { "name": "@llm-tools/embedjs-loader-xml", - "version": "0.1.26", + "version": "0.1.27", "license": "Apache-2.0", "dependencies": { - "@llm-tools/embedjs-interfaces": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", "debug": "^4.4.0", "fast-xml-parser": "^4.5.1", "md5": "^2.3.0" @@ -416,12 +430,12 @@ }, "loaders/embedjs-loader-youtube": { "name": "@llm-tools/embedjs-loader-youtube", - "version": "0.1.26", + "version": "0.1.27", "license": "Apache-2.0", "dependencies": { "@langchain/textsplitters": "^0.1.0", - "@llm-tools/embedjs-interfaces": "0.1.26", - "@llm-tools/embedjs-utils": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", + "@llm-tools/embedjs-utils": "0.1.27", "debug": "^4.4.0", "md5": "^2.3.0", "usetube": "^2.2.7", @@ -433,34 +447,34 @@ }, "models/embedjs-anthropic": { "name": "@llm-tools/embedjs-anthropic", - "version": "0.1.26", + "version": "0.1.27", "license": "Apache-2.0", "dependencies": { "@langchain/anthropic": "^0.3.11", "@langchain/core": "^0.3.26", - "@llm-tools/embedjs-interfaces": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", "debug": "^4.4.0" } }, "models/embedjs-cohere": { "name": "@llm-tools/embedjs-cohere", - "version": "0.1.26", + "version": "0.1.27", "license": "Apache-2.0", "dependencies": { "@langchain/cohere": "^0.3.2", - "@llm-tools/embedjs-interfaces": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", "cohere-ai": "^7.15.0" } }, "models/embedjs-huggingface": { "name": "@llm-tools/embedjs-huggingface", - "version": "0.1.26", + "version": "0.1.27", "license": "Apache-2.0", "dependencies": { "@huggingface/inference": "^2.8.1", "@langchain/community": "^0.3.20", "@langchain/core": "^0.3.26", - "@llm-tools/embedjs-interfaces": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", "debug": "^4.4.0" } }, @@ -980,12 +994,12 @@ }, "models/embedjs-llama-cpp": { "name": "@llm-tools/embedjs-llama-cpp", - "version": "0.1.26", + "version": "0.1.27", "license": "Apache-2.0", "dependencies": { "@langchain/community": "^0.3.20", "@langchain/core": "^0.3.26", - "@llm-tools/embedjs-interfaces": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", "debug": "^4.4.0", "node-llama-cpp": "^3.3.1" } @@ -1506,45 +1520,45 @@ }, "models/embedjs-mistral": { "name": "@llm-tools/embedjs-mistral", - "version": "0.1.26", + "version": "0.1.27", "license": "Apache-2.0", "dependencies": { "@langchain/core": "^0.3.26", "@langchain/mistralai": "^0.2.0", - "@llm-tools/embedjs-interfaces": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", "debug": "^4.4.0" } }, "models/embedjs-ollama": { "name": "@llm-tools/embedjs-ollama", - "version": "0.1.26", + "version": "0.1.27", "license": "Apache-2.0", "dependencies": { "@langchain/core": "^0.3.26", "@langchain/ollama": "^0.1.4", - "@llm-tools/embedjs-interfaces": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", "debug": "^4.4.0" } }, "models/embedjs-openai": { "name": "@llm-tools/embedjs-openai", - "version": "0.1.26", + "version": "0.1.27", "license": "Apache-2.0", "dependencies": { "@langchain/core": "^0.3.26", "@langchain/openai": "^0.3.16", - "@llm-tools/embedjs-interfaces": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", "debug": "^4.4.0" } }, "models/embedjs-vertexai": { "name": "@llm-tools/embedjs-vertexai", - "version": "0.1.26", + "version": "0.1.27", "license": "Apache-2.0", "dependencies": { "@langchain/core": "^0.3.26", "@langchain/google-vertexai": "^0.1.5", - "@llm-tools/embedjs-interfaces": "0.1.26", + "@llm-tools/embedjs-interfaces": "0.1.27", "debug": "^4.4.0" } }, @@ -6743,6 +6757,10 @@ "resolved": "loaders/embedjs-loader-csv", "link": true }, + "node_modules/@llm-tools/embedjs-loader-image": { + "resolved": "loaders/embedjs-loader-image", + "link": true + }, "node_modules/@llm-tools/embedjs-loader-markdown": { "resolved": "loaders/embedjs-loader-markdown", "link": true diff --git a/tsconfig.base.json b/tsconfig.base.json index a1be3ea2..ef1cdee6 100644 --- a/tsconfig.base.json +++ b/tsconfig.base.json @@ -29,6 +29,7 @@ "@llm-tools/embedjs-lmdb": ["databases/embedjs-lmdb/src/index.ts"], "@llm-tools/embedjs-loader-confluence": ["loaders/embedjs-loader-confluence/src/index.ts"], "@llm-tools/embedjs-loader-csv": ["loaders/embedjs-loader-csv/src/index.ts"], + "@llm-tools/embedjs-loader-image": ["loaders/embedjs-loader-image/src/index.ts"], "@llm-tools/embedjs-loader-markdown": ["loaders/embedjs-loader-markdown/src/index.ts"], "@llm-tools/embedjs-loader-msoffice": ["loaders/embedjs-loader-msoffice/src/index.ts"], "@llm-tools/embedjs-loader-pdf": ["loaders/embedjs-loader-pdf/src/index.ts"],