diff --git a/dockers/llm.rag.service/common.py b/dockers/llm.rag.service/common.py
index 4994ab9..f632954 100644
--- a/dockers/llm.rag.service/common.py
+++ b/dockers/llm.rag.service/common.py
@@ -3,6 +3,9 @@
 import logging
 
 
+logging.basicConfig(level=logging.DEBUG)
+
+
 def format_context(results: List[Dict[str, Any]]) -> str:
     """Format search results into context for the LLM"""
     context_parts = []
@@ -44,7 +47,22 @@ def trim_answer(generated_answer: str, label_separator: str) -> str:
 
 
 def get_answer_with_settings(question, retriever, client, model_id, max_tokens, model_temperature, system_prompt):
-    docs = retriever.invoke(input=question)
+    search_params = {
+        "param": {
+            "metric_type": "L2",
+            "params": {"nprobe": 10},
+        },
+        "limit": 5,
+        "field_names": ["page_content", "metadata"],
+        "vector_field": ["dense", "sparse"],
+        "weights": [0.7, 0.2]  # Weights for dense and sparse vectors
+    }
+
+    docs = retriever.get_relevant_documents(
+        query=question,
+        search_kwargs=search_params
+    )
+
     num_of_docs = len(docs)
     logging.info(f"Number of relevant documents retrieved and that will be used as context for query: {num_of_docs}")
 
diff --git a/dockers/llm.rag.service/serveragllm_milvus_local.py b/dockers/llm.rag.service/serveragllm_milvus_local.py
new file mode 100644
index 0000000..8b07302
--- /dev/null
+++ b/dockers/llm.rag.service/serveragllm_milvus_local.py
@@ -0,0 +1,139 @@
+# /// script
+# requires-python = ">=3.12"
+# dependencies = [
+#     "faiss-cpu",
+#     "fastapi",
+#     "langchain-community",
+#     "langchain-huggingface",
+#     "openai",
+#     "uvicorn",
+#     "weaviate",
+#     "langchain_milvus",
+#     "langchain-openai",
+#     "pymilvus"
+# ]
+# ///
+
+import os
+import sys
+import uvicorn
+
+from functools import partial
+from typing import Union
+
+import click
+from fastapi import FastAPI
+from openai import OpenAI
+
+from common import get_answer_with_settings
+
+
+SYSTEM_PROMPT="""You are a specialized support ticket assistant. Format your responses following these rules:
+                1. Answer the provided question only using the provided context.
+                2. Do not add the provided context to the generated answer.
+                3. Include relevant technical details when present or provide a summary of the comments in the ticket.
+                4. Include the submitter, assignee and collaborator for a ticket when this info is available.
+                5. If the question cannot be answered with the given context, please say so and do not attempt to provide an answer.
+                6. Do not create new questions related to the given question, instead answer only the provided question.
+                7. Provide a clear, direct and factual answer."""
+
+
+def setup(
+        relevant_docs: int,
+        llm_server_url:str,
+        model_id: str,
+        max_tokens: int,
+        model_temperature: float,
+):
+    app = FastAPI()
+
+    # TODO: move to imports
+    from langchain_milvus.retrievers import MilvusCollectionHybridSearchRetriever
+    from langchain_milvus.function import (
+        BM25BuiltInFunction,
+    )
+    from langchain.embeddings import HuggingFaceEmbeddings
+    from pymilvus import connections, Collection, utility, WeightedRanker
+
+    # TODO: pass through settings or params
+    URI = "http://localhost:19530"
+    collection_name = "test_milvus_collection"
+    embedding_model_name = "sentence-transformers/all-MiniLM-L6-v2"
+    embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name)
+
+    connections.connect(
+        alias="default",
+        uri=URI
+    )
+
+    # Connect to the existing collection
+    collection = Collection(collection_name)
+    collection.load()
+
+    # from langchain_openai import OpenAIEmbeddings
+    # dense_embedding_func = OpenAIEmbeddings()
+
+    # Initialize the hybrid retriever with both vector fields
+    retriever = MilvusCollectionHybridSearchRetriever(
+        collection=collection,
+        content_field="page_content",  # Field containing the document text
+        anns_fields=["dense", "sparse"],  # Both vector fields
+        metadata_fields=["metadata"],  # Include all metadata
+        field_embeddings=[embeddings, BM25BuiltInFunction()],  # You might need to specify how sparse embeddings are handled
+        # Reranking configuration (optional but resolves validation)
+        rerank=WeightedRanker(0.5, 0.5),  # or provide a reranking method if available
+    )
+
+    print("Created Vector DB retriever successfully. \n")
+
+    print("Creating an OpenAI client to the hosted model at URL: ", llm_server_url)
+    try:
+        client = OpenAI(base_url=llm_server_url, api_key="na")
+    except Exception as e:
+        print("Error creating client:", e)
+        sys.exit(1)
+
+    get_answer = partial(
+        get_answer_with_settings,
+        retriever=retriever,
+        client=client,
+        model_id=model_id,
+        max_tokens=max_tokens,
+        model_temperature=model_temperature,
+        system_prompt=SYSTEM_PROMPT,
+    )
+
+    @app.get("/answer/{question}")
+    def read_item(question: Union[str, None] = None):
+        print(f"Received question: {question}")
+        answer = get_answer(question)
+        return {"question": question, "answer": answer}
+
+    return app
+
+
+MICROSOFT_MODEL_ID = "microsoft/Phi-3-mini-4k-instruct"
+MOSAICML_MODEL_ID = "mosaicml/mpt-7b-chat"
+RELEVANT_DOCS_DEFAULT = 2
+MAX_TOKENS_DEFAULT = 64
+MODEL_TEMPERATURE_DEFAULT = 0.01
+
+relevant_docs = os.getenv("RELEVANT_DOCS", RELEVANT_DOCS_DEFAULT)
+llm_server_url = os.getenv("LLM_SERVER_URL", "http://localhost:11434/v1")
+model_id = os.getenv("MODEL_ID", "llama2")
+max_tokens = int(os.getenv("MAX_TOKENS", MAX_TOKENS_DEFAULT))
+model_temperature = float(os.getenv("MODEL_TEMPERATURE", MODEL_TEMPERATURE_DEFAULT))
+
+app = setup(relevant_docs, llm_server_url, model_id, max_tokens, model_temperature)
+
+
+@click.command()
+@click.option("--host", default="127.0.0.1", help="Host for the FastAPI server (default: 127.0.0.1)")
+@click.option("--port", type=int, default=8000, help="Port for the FastAPI server (default: 8000)")
+def run(host, port):
+    # Serve the app using Uvicorn
+    uvicorn.run("serveragllm_milvus_local:app", host=host, port=port, reload=True)
+
+
+if __name__ == "__main__":
+    run()
\ No newline at end of file
diff --git a/dockers/llm.vdb.service/.env_local_template b/dockers/llm.vdb.service/.env_local_template
index 5f7fefd..495627f 100644
--- a/dockers/llm.vdb.service/.env_local_template
+++ b/dockers/llm.vdb.service/.env_local_template
@@ -5,4 +5,8 @@ OUTPUT_FILENAME=/path/to/local/output_pickled.obj
 # Vector DB Optional Settings
 # EMBEDDING_CHUNK_SIZE=1000
 # EMBEDDING_CHUNK_OVERLAP=100
-# EMBEDDING_MODEL_NAME=sentence-transformers/all-MiniLM-L6-v2
\ No newline at end of file
+# EMBEDDING_MODEL_NAME=sentence-transformers/all-MiniLM-L6-v2
+
+# Milvus Vector DB Optional Settings
+# MILVUS_URI="http://localhost:19530"
+# MILVUS_COLLECTION_NAME="test_milvus_collection"
\ No newline at end of file
diff --git a/dockers/llm.vdb.service/.env_s3_template b/dockers/llm.vdb.service/.env_s3_template
index 28e0d4a..d0a8542 100644
--- a/dockers/llm.vdb.service/.env_s3_template
+++ b/dockers/llm.vdb.service/.env_s3_template
@@ -9,4 +9,8 @@ AWS_SECRET_ACCESS_KEY=my-secret-key
 # Vector DB Optional Settings
 # EMBEDDING_CHUNK_SIZE=1000
 # EMBEDDING_CHUNK_OVERLAP=100
-# EMBEDDING_MODEL_NAME=sentence-transformers/all-MiniLM-L6-v2
\ No newline at end of file
+# EMBEDDING_MODEL_NAME=sentence-transformers/all-MiniLM-L6-v2
+
+# Milvus Vector DB Optional Settings
+# MILVUS_URI="http://localhost:19530"
+# MILVUS_COLLECTION_NAME="test_milvus_collection"
\ No newline at end of file
diff --git a/dockers/llm.vdb.service/common.py b/dockers/llm.vdb.service/common.py
index c0080a8..e3bcf2c 100644
--- a/dockers/llm.vdb.service/common.py
+++ b/dockers/llm.vdb.service/common.py
@@ -3,7 +3,9 @@
 
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.vectorstores import FAISS
+from langchain_core.documents import Document
 from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_milvus import BM25BuiltInFunction, Milvus
 
 
 def load_jsonl_files_from_directory(directory):
@@ -77,3 +79,39 @@ def create_vectordb_from_data(
     print("Convert to FAISS vectorstore")
     vectorstore = FAISS.from_texts(texts, embeddings, metadatas=metadatas)
     return vectorstore
+
+
+def create_milvus_vectordb_from_data(
+    data,
+    embedding_model_name: str,
+    milvus_uri: str,
+    collection_name: str,
+    chunk_size,
+    chunk_overlap,
+):
+    print("Start chunking documents")
+    texts, metadatas = chunk_documents_with_metadata(data, chunk_size, chunk_overlap)
+
+    docs = []
+    for text, metadata in zip(texts, metadatas):
+        docs.append(
+            Document(
+                page_content=text,
+                metadata=metadata,
+            )
+        )
+
+    embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name)
+    print("Convert to Milvus vectorstore")
+
+    vectorstore = Milvus(
+        embedding_function=embeddings,
+        vector_field=["dense", "sparse"],
+        builtin_function=BM25BuiltInFunction(),
+        collection_name=collection_name,
+        connection_args={"uri": milvus_uri},
+        auto_id=True
+    )
+
+    vectorstore.add_documents(documents=docs)
+    return vectorstore
diff --git a/dockers/llm.vdb.service/config.py b/dockers/llm.vdb.service/config.py
index ee67095..2e48ed8 100644
--- a/dockers/llm.vdb.service/config.py
+++ b/dockers/llm.vdb.service/config.py
@@ -54,6 +54,15 @@ class S3Settings(BaseSettings):
         description="Name of the embedding model to use"
     )
 
+    milvus_uri: str = Field(
+        default="",
+        description="Milvus connection URI"
+    )
+    milvus_collection_name: str = Field(
+        default="",
+        description="Milvus collection name"
+    )
+
     class Config:
         env_file = ".env"
 
@@ -80,6 +89,15 @@ class LocalSettings(BaseSettings):
         description="Name of the embedding model to use"
     )
 
+    milvus_uri: str = Field(
+        default="",
+        description="Milvus connection URI"
+    )
+    milvus_collection_name: str = Field(
+        default="",
+        description="Milvus collection name"
+    )
+
     class Config:
         env_file = ".env"
 
diff --git a/dockers/llm.vdb.service/createvectordb.py b/dockers/llm.vdb.service/createvectordb.py
index 4b18eb5..0c938b3 100644
--- a/dockers/llm.vdb.service/createvectordb.py
+++ b/dockers/llm.vdb.service/createvectordb.py
@@ -2,7 +2,11 @@
 import sys
 
 from config import try_load_settings
-from service import LocalDirDbCreationService, S3VectorDbCreationService
+from service import (
+    LocalDirDbCreationService,
+    LocalDirMilvusDbCreationService,
+    S3VectorDbCreationService,
+)
 
 
 @click.command()
@@ -11,12 +15,18 @@ def run(env_file: str):
     s3_settings, local_settings = try_load_settings(env_file)
 
     if s3_settings:
+        if s3_settings.milvus_uri and s3_settings.milvus_collection_name:
+            raise "Missing config"
         service = S3VectorDbCreationService(s3_settings)
         service.create()
 
     elif local_settings:
-        service = LocalDirDbCreationService(local_settings)
-        service.create()
+        if local_settings.milvus_uri and local_settings.milvus_collection_name:
+            service = LocalDirMilvusDbCreationService(local_settings)
+            service.create()
+        else:
+            service = LocalDirDbCreationService(local_settings)
+            service.create()
 
     else:
         # TODO: not really needed, error will be thrown earlier
diff --git a/dockers/llm.vdb.service/createvectordb_test.py b/dockers/llm.vdb.service/createvectordb_test.py
index 7db0f1f..759a8c9 100644
--- a/dockers/llm.vdb.service/createvectordb_test.py
+++ b/dockers/llm.vdb.service/createvectordb_test.py
@@ -2,6 +2,7 @@
 import os
 import pytest
 import s3fs
+import subprocess
 
 from botocore.session import Session
 from moto.moto_server.threaded_moto_server import ThreadedMotoServer
@@ -23,6 +24,26 @@ def test_create_faiss_vector_db_using_local_files():
         os.remove("test_data/output/output_pickled.obj")
 
 
+@pytest.fixture(scope="module")
+def standalone_environment():
+    # Start the standalone environment before tests
+    try:
+        subprocess.run(["bash", "standalone_embed.sh", "start"], check=True)
+        yield
+    finally:
+        # Stop the standalone environment after tests, even if tests fail
+        subprocess.run(["bash", "standalone_embed.sh", "stop"], check=True)
+        subprocess.run(["bash", "standalone_embed.sh", "delete"], check=True)
+
+
+def test_create_milvus_vector_db_using_local_files(standalone_environment):
+    ctx = click.Context(run)
+    try:
+        ctx.forward(run, env_file="test_data/.env_local_milvus")
+    except SystemExit as e:
+        assert e.code == 0
+
+
 @pytest.fixture(scope="module")
 def s3_base():
     # writable local S3 system
diff --git a/dockers/llm.vdb.service/haystack_milvus.py b/dockers/llm.vdb.service/haystack_milvus.py
new file mode 100644
index 0000000..49b5812
--- /dev/null
+++ b/dockers/llm.vdb.service/haystack_milvus.py
@@ -0,0 +1,98 @@
+# /// script
+# requires-python = ">=3.12"
+# dependencies = [
+#     "haystack",
+#     "fastembed-haystack",
+#     "milvus-haystack",
+#     "pymilvus",
+#     "sentence-transformers>=3.0.0",
+# ]
+# ///
+import os
+
+from haystack import Document, Pipeline
+from haystack.utils import Secret
+from haystack.components.embedders import HuggingFaceAPIDocumentEmbedder, HuggingFaceAPITextEmbedder, OpenAIDocumentEmbedder, OpenAITextEmbedder
+from haystack.components.writers import DocumentWriter
+from haystack.document_stores.types import DuplicatePolicy
+from haystack_integrations.components.embedders.fastembed import (
+    FastembedSparseDocumentEmbedder,
+    FastembedSparseTextEmbedder,
+)
+
+from milvus_haystack import MilvusDocumentStore, MilvusHybridRetriever
+
+token = "xxx"
+os.environ["HUGGINGFACE_HUB_TOKEN"] = token
+
+document_store = MilvusDocumentStore(
+    connection_args={"uri": "./milvus.db"},
+    drop_old=True,
+    sparse_vector_field="sparse_vector",  # Specify a name of the sparse vector field to enable hybrid retrieval.
+)
+
+documents = [
+    Document(content="My name is Wolfgang and I live in Berlin"),
+    Document(content="I saw a black horse running"),
+    Document(content="Germany has many big cities"),
+    Document(content="fastembed is supported by and maintained by Milvus."),
+]
+
+writer = DocumentWriter(document_store=document_store, policy=DuplicatePolicy.OVERWRITE)
+
+# dense_embedder = OpenAIDocumentEmbedder()
+dense_embedder = HuggingFaceAPIDocumentEmbedder(
+    api_type="text_embeddings_inference",
+    api_params={
+        "model": "sentence-transformers/all-MiniLM-L6-v2",
+        "url": "http://localhost:11434",
+    },
+    token=Secret.from_token(token),
+)
+
+indexing_pipeline = Pipeline()
+indexing_pipeline.add_component("sparse_doc_embedder", FastembedSparseDocumentEmbedder())
+indexing_pipeline.add_component("dense_doc_embedder", dense_embedder)
+indexing_pipeline.add_component("writer", writer)
+indexing_pipeline.connect("sparse_doc_embedder", "dense_doc_embedder")
+indexing_pipeline.connect("dense_doc_embedder", "writer")
+
+indexing_pipeline.run({"sparse_doc_embedder": {"documents": documents}})
+
+querying_pipeline = Pipeline()
+querying_pipeline.add_component("sparse_text_embedder",
+                                FastembedSparseTextEmbedder(model="prithvida/Splade_PP_en_v1"))
+
+# dense_text_embedder = OpenAITextEmbedder()
+dense_text_embedder = HuggingFaceAPITextEmbedder(
+    api_type="text_embeddings_inference",
+    api_params={
+        "model": "sentence-transformers/all-MiniLM-L6-v2",
+        "url": "http://localhost:11434",
+    },
+    token=Secret.from_token(token),
+)
+
+querying_pipeline.add_component("dense_text_embedder", dense_text_embedder)
+querying_pipeline.add_component(
+    "retriever",
+    MilvusHybridRetriever(
+        document_store=document_store,
+        # reranker=WeightedRanker(0.5, 0.5),  # Default is RRFRanker()
+    )
+)
+
+querying_pipeline.connect("sparse_text_embedder.sparse_embedding", "retriever.query_sparse_embedding")
+querying_pipeline.connect("dense_text_embedder.embedding", "retriever.query_embedding")
+
+question = "Who supports fastembed?"
+
+results = querying_pipeline.run(
+    {"dense_text_embedder": {"text": question},
+     "sparse_text_embedder": {"text": question}}
+)
+
+print(results)
+print(results["retriever"]["documents"][0])
+
+# Document(id=..., content: 'fastembed is supported by and maintained by Milvus.', embedding: vector of size 1536, sparse_embedding: vector with 48 non-zero elements)
diff --git a/dockers/llm.vdb.service/requirements.txt b/dockers/llm.vdb.service/requirements.txt
index 46a7b98..d79c0d6 100644
--- a/dockers/llm.vdb.service/requirements.txt
+++ b/dockers/llm.vdb.service/requirements.txt
@@ -2,5 +2,6 @@ click
 faiss-cpu
 langchain_community
 langchain_huggingface
+langchain_milvus
 pydantic_settings
 s3fs
\ No newline at end of file
diff --git a/dockers/llm.vdb.service/service.py b/dockers/llm.vdb.service/service.py
index ee23e3b..dde50fe 100644
--- a/dockers/llm.vdb.service/service.py
+++ b/dockers/llm.vdb.service/service.py
@@ -4,6 +4,7 @@
 from dataclasses import dataclass
 
 from common import (
+    create_milvus_vectordb_from_data,
     create_vectordb_from_data,
     load_jsonl_files_from_directory,
 )
@@ -61,3 +62,24 @@ def create(self):
         with open(self.config.output_filename, "wb") as file:
             file.write(pickle_byte_obj)
         print(f"Pickle byte object saved to {self.config.output_filename}")
+
+
+@dataclass
+class LocalDirMilvusDbCreationService:
+    config: LocalSettings
+
+    def create(self):
+        print("Load JSON files")
+        data = load_jsonl_files_from_directory(self.config.local_directory)
+
+        print("Convert to Milvus vectorstore")
+        create_milvus_vectordb_from_data(
+            data,
+            self.config.embedding_model_name,
+            self.config.milvus_uri,
+            self.config.milvus_collection_name,
+            self.config.embedding_chunk_size,
+            self.config.embedding_chunk_overlap,
+        )
+
+        print(f"Milvus collection saved {self.config.milvus_collection_name}")
diff --git a/dockers/llm.vdb.service/standalone_embed.sh b/dockers/llm.vdb.service/standalone_embed.sh
new file mode 100644
index 0000000..da0db43
--- /dev/null
+++ b/dockers/llm.vdb.service/standalone_embed.sh
@@ -0,0 +1,169 @@
+#!/usr/bin/env bash
+
+# Licensed to the LF AI & Data foundation under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+run_embed() {
+    cat << EOF > embedEtcd.yaml
+listen-client-urls: http://0.0.0.0:2379
+advertise-client-urls: http://0.0.0.0:2379
+quota-backend-bytes: 4294967296
+auto-compaction-mode: revision
+auto-compaction-retention: '1000'
+EOF
+
+    cat << EOF > user.yaml
+# Extra config to override default milvus.yaml
+EOF
+
+    sudo docker run -d \
+        --name milvus-standalone \
+        --security-opt seccomp:unconfined \
+        -e ETCD_USE_EMBED=true \
+        -e ETCD_DATA_DIR=/var/lib/milvus/etcd \
+        -e ETCD_CONFIG_PATH=/milvus/configs/embedEtcd.yaml \
+        -e COMMON_STORAGETYPE=local \
+        -v $(pwd)/volumes/milvus:/var/lib/milvus \
+        -v $(pwd)/embedEtcd.yaml:/milvus/configs/embedEtcd.yaml \
+        -v $(pwd)/user.yaml:/milvus/configs/user.yaml \
+        -p 19530:19530 \
+        -p 9091:9091 \
+        -p 2379:2379 \
+        --health-cmd="curl -f http://localhost:9091/healthz" \
+        --health-interval=30s \
+        --health-start-period=90s \
+        --health-timeout=20s \
+        --health-retries=3 \
+        milvusdb/milvus:v2.5.4 \
+        milvus run standalone  1> /dev/null
+}
+
+wait_for_milvus_running() {
+    echo "Wait for Milvus Starting..."
+    while true
+    do
+        res=`sudo docker ps|grep milvus-standalone|grep healthy|wc -l`
+        if [ $res -eq 1 ]
+        then
+            echo "Start successfully."
+            echo "To change the default Milvus configuration, add your settings to the user.yaml file and then restart the service."
+            break
+        fi
+        sleep 1
+    done
+}
+
+start() {
+    res=`sudo docker ps|grep milvus-standalone|grep healthy|wc -l`
+    if [ $res -eq 1 ]
+    then
+        echo "Milvus is running."
+        exit 0
+    fi
+
+    res=`sudo docker ps -a|grep milvus-standalone|wc -l`
+    if [ $res -eq 1 ]
+    then
+        sudo docker start milvus-standalone 1> /dev/null
+    else
+        run_embed
+    fi
+
+    if [ $? -ne 0 ]
+    then
+        echo "Start failed."
+        exit 1
+    fi
+
+    wait_for_milvus_running
+}
+
+stop() {
+    sudo docker stop milvus-standalone 1> /dev/null
+
+    if [ $? -ne 0 ]
+    then
+        echo "Stop failed."
+        exit 1
+    fi
+    echo "Stop successfully."
+
+}
+
+delete_container() {
+    res=`sudo docker ps|grep milvus-standalone|wc -l`
+    if [ $res -eq 1 ]
+    then
+        echo "Please stop Milvus service before delete."
+        exit 1
+    fi
+    sudo docker rm milvus-standalone 1> /dev/null
+    if [ $? -ne 0 ]
+    then
+        echo "Delete milvus container failed."
+        exit 1
+    fi
+    echo "Delete milvus container successfully."
+}
+
+delete() {
+    delete_container
+    sudo rm -rf $(pwd)/volumes
+    sudo rm -rf $(pwd)/embedEtcd.yaml
+    sudo rm -rf $(pwd)/user.yaml
+    echo "Delete successfully."
+}
+
+upgrade() {
+    read -p "Please confirm if you'd like to proceed with the upgrade. The default will be to the latest version. Confirm with 'y' for yes or 'n' for no. > " check
+    if [ "$check" == "y" ] ||[ "$check" == "Y" ];then
+        res=`sudo docker ps -a|grep milvus-standalone|wc -l`
+        if [ $res -eq 1 ]
+        then
+            stop
+            delete_container
+        fi
+
+        curl -sfL https://raw.githubusercontent.com/milvus-io/milvus/master/scripts/standalone_embed.sh -o standalone_embed_latest.sh && \
+        bash standalone_embed_latest.sh start 1> /dev/null && \
+        echo "Upgrade successfully."
+    else
+        echo "Exit upgrade"
+        exit 0
+    fi
+}
+
+case $1 in
+    restart)
+        stop
+        start
+        ;;
+    start)
+        start
+        ;;
+    stop)
+        stop
+        ;;
+    upgrade)
+        upgrade
+        ;;
+    delete)
+        delete
+        ;;
+    *)
+        echo "please use bash standalone_embed.sh restart|start|stop|upgrade|delete"
+        ;;
+esac
diff --git a/dockers/llm.vdb.service/test_data/.env_local b/dockers/llm.vdb.service/test_data/.env_local
index e625831..dd22e3e 100644
--- a/dockers/llm.vdb.service/test_data/.env_local
+++ b/dockers/llm.vdb.service/test_data/.env_local
@@ -5,4 +5,8 @@ OUTPUT_FILENAME=test_data/output/output_pickled.obj
 # Vector DB Optional Settings
 # EMBEDDING_CHUNK_SIZE=1000
 # EMBEDDING_CHUNK_OVERLAP=100
-# EMBEDDING_MODEL_NAME=sentence-transformers/all-MiniLM-L6-v2
\ No newline at end of file
+# EMBEDDING_MODEL_NAME=sentence-transformers/all-MiniLM-L6-v2
+
+# Milvus Vector DB Optional Settings
+# MILVUS_URI="http://localhost:19530"
+# MILVUS_COLLECTION_NAME="test_milvus_collection"
\ No newline at end of file
diff --git a/dockers/llm.vdb.service/test_data/.env_local_milvus b/dockers/llm.vdb.service/test_data/.env_local_milvus
new file mode 100644
index 0000000..f4a6b45
--- /dev/null
+++ b/dockers/llm.vdb.service/test_data/.env_local_milvus
@@ -0,0 +1,12 @@
+# Local Settings
+LOCAL_DIRECTORY=test_data/input/
+OUTPUT_FILENAME=test_data/output/output_pickled.obj
+
+# Vector DB Optional Settings
+# EMBEDDING_CHUNK_SIZE=1000
+# EMBEDDING_CHUNK_OVERLAP=100
+# EMBEDDING_MODEL_NAME=sentence-transformers/all-MiniLM-L6-v2
+
+# Milvus Vector DB Optional Settings
+MILVUS_URI="http://localhost:19530"
+MILVUS_COLLECTION_NAME="test_milvus_collection"
\ No newline at end of file
diff --git a/dockers/llm.vdb.service/test_data/.env_s3 b/dockers/llm.vdb.service/test_data/.env_s3
index 33b004b..d9b3406 100644
--- a/dockers/llm.vdb.service/test_data/.env_s3
+++ b/dockers/llm.vdb.service/test_data/.env_s3
@@ -10,4 +10,8 @@ AWS_REGION=us-east-2
 # Vector DB Optional Settings
 # EMBEDDING_CHUNK_SIZE=1000
 # EMBEDDING_CHUNK_OVERLAP=100
-# EMBEDDING_MODEL_NAME=sentence-transformers/all-MiniLM-L6-v2
\ No newline at end of file
+# EMBEDDING_MODEL_NAME=sentence-transformers/all-MiniLM-L6-v2
+
+# Milvus Vector DB Optional Settings
+# MILVUS_URI="http://localhost:19530"
+# MILVUS_COLLECTION_NAME="test_milvus_collection"
\ No newline at end of file