From 5000fbf21fa11507ea94eaf68e4a783a5e218ed3 Mon Sep 17 00:00:00 2001
From: Deng Yuang <alex.hunter@example.com>
Date: Mon, 2 Mar 2026 19:50:17 +0800
Subject: [PATCH 1/2] feat: integrate LazyLLM for unified LLM provider support

- Add LazyLLMModel class with AutoModel integration
- Support 20+ providers (OpenAI, DeepSeek, Qwen, GLM, Kimi, etc.)
- Use MLE_ namespace prefix for API keys (e.g., MLE_DEEPSEEK_API_KEY)
- Keep all existing provider files intact (backward compatible)
- Add comprehensive test suite (test_lazyllm.py)
- Add documentation (docs/integrations/lazyllm.md)
- Add lazyllm to optional dependencies in pyproject.toml

Features:
- Unified interface for online and local models
- Automatic provider detection from model name
- Environment variable support with MLE_ prefix
- Seamless switching between providers via config
- LazyLLM AutoModel handles online/local fallback

Testing:
- DeepSeek integration test
- Qwen integration test
- Streaming mode test
- Environment variable loading test

Related issue: #324
---
 docs/integrations/lazyllm.md | 184 +++++++++++++++++++++++++++++++
 mle/model/__init__.py        |  15 ++-
 mle/model/lazyllm_model.py   | 208 +++++++++++++++++++++++++++++++++++
 pyproject.toml               |   3 +
 tests/test_lazyllm.py        | 208 +++++++++++++++++++++++++++++++++++
 5 files changed, 616 insertions(+), 2 deletions(-)
 create mode 100644 docs/integrations/lazyllm.md
 create mode 100644 mle/model/lazyllm_model.py
 create mode 100644 tests/test_lazyllm.py

diff --git a/docs/integrations/lazyllm.md b/docs/integrations/lazyllm.md
new file mode 100644
index 0000000..058511c
--- /dev/null
+++ b/docs/integrations/lazyllm.md
@@ -0,0 +1,184 @@
+# MLE-agent LazyLLM Integration
+
+This document describes how to use LazyLLM integration in MLE-agent for unified LLM provider support.
+
+## 🎯 What is LazyLLM?
+
+[LazyLLM](https://github.com/LazyAGI/LazyLLM) is a low-code development tool for building multi-Agent LLM applications. Its core feature is **unifying interfaces across different LLM providers**.
+
+### Supported Providers
+
+**Online Models (Cloud APIs):**
+- International: OpenAI, Anthropic, Gemini, Mistral, DeepSeek
+- Chinese: Qwen (通义), Zhipu GLM (智谱), Kimi, MiniMax, Doubao (豆包), etc.
+- And 20+ more providers...
+
+**Local Models (Self-hosted):**
+- vLLM, LMDeploy, Ollama
+- Automatic model download and deployment
+- Support for fine-tuning
+
+## 📦 Installation
+
+Install LazyLLM as an optional dependency:
+
+```bash
+pip install lazyllm
+# or
+uv pip install lazyllm
+```
+
+## ⚙️ Configuration
+
+### Option 1: Project Configuration File
+
+Edit `.mle/project.yml`:
+
+```yaml
+platform: LazyLLM
+model: deepseek-chat  # or qwen-plus, gpt-4o, etc.
+source: deepseek      # optional: auto-detected from model name
+api_key: your-api-key # optional: can use environment variable
+temperature: 0.7
+base_url: null        # optional: custom endpoint
+```
+
+### Option 2: Environment Variables (MLE_ Namespace)
+
+LazyLLM integration uses the `MLE_` namespace prefix for API keys:
+
+```bash
+export MLE_DEEPSEEK_API_KEY=your-deepseek-api-key
+export MLE_QWEN_API_KEY=your-qwen-api-key
+export MLE_OPENAI_API_KEY=your-openai-api-key
+```
+
+Then configure `.mle/project.yml`:
+
+```yaml
+platform: LazyLLM
+model: deepseek-chat
+# API key will be loaded from MLE_DEEPSEEK_API_KEY env var
+```
+
+## 🚀 Usage Examples
+
+### Example 1: Using DeepSeek
+
+```python
+from mle.model import load_model
+
+# Configure project
+# .mle/project.yml:
+#   platform: LazyLLM
+#   model: deepseek-chat
+
+model = load_model(project_dir='/path/to/project')
+response = model.query([
+    {"role": "user", "content": "Hello!"}
+])
+```
+
+### Example 2: Switching Providers
+
+Just change the model name in config - no code changes needed!
+
+```yaml
+# Switch from DeepSeek to Qwen
+platform: LazyLLM
+model: qwen-plus
+source: qwen
+```
+
+### Example 3: Using Local Models
+
+```yaml
+# Use local model with vLLM
+platform: LazyLLM
+model: internlm2-chat-7b
+# LazyLLM will automatically use local deployment
+```
+
+## 🔑 API Key Management
+
+### Priority Order
+
+1. Explicit `api_key` in `.mle/project.yml`
+2. `MLE_<SOURCE>_API_KEY` environment variable
+3. `<SOURCE>_API_KEY` environment variable (fallback)
+4. Common default keys (e.g., `OPENAI_API_KEY`)
+
+### Supported API Key Variables
+
+```bash
+# Chinese providers
+export MLE_DEEPSEEK_API_KEY=sk-xxx
+export MLE_QWEN_API_KEY=sk-xxx
+export MLE_GLM_API_KEY=xxx
+export MLE_KIMI_API_KEY=xxx
+export MLE_MINIMAX_API_KEY=xxx
+export MLE_DOUBAO_API_KEY=xxx
+
+# International providers
+export MLE_OPENAI_API_KEY=sk-xxx
+export MLE_ANTHROPIC_API_KEY=sk-ant-xxx
+export MLE_GEMINI_API_KEY=xxx
+```
+
+## 💡 Benefits
+
+1. **Unified Interface**: One code path for 20+ providers
+2. **Easy Switching**: Change providers by config, not code
+3. **Auto-Detection**: LazyLLM AutoModel selects best option
+4. **Local + Cloud**: Seamless fallback between local and cloud models
+5. **Fine-tuning**: Access to LazyLLM's fine-tuning capabilities
+
+## 🧪 Testing
+
+Run the test suite:
+
+```bash
+cd MLE-agent
+python tests/test_lazyllm.py
+```
+
+The test suite covers:
+- DeepSeek integration
+- Qwen integration
+- Streaming mode
+- Environment variable loading
+
+## 📝 Migration Guide
+
+### From Existing Provider to LazyLLM
+
+**Before (OpenAI):**
+```yaml
+platform: OpenAI
+model: gpt-4o
+api_key: sk-xxx
+```
+
+**After (LazyLLM with OpenAI):**
+```yaml
+platform: LazyLLM
+model: gpt-4o
+# Uses MLE_OPENAI_API_KEY env var
+```
+
+**Or switch to DeepSeek:**
+```yaml
+platform: LazyLLM
+model: deepseek-chat
+# Uses MLE_DEEPSEEK_API_KEY env var
+```
+
+## 🔗 References
+
+- LazyLLM GitHub: https://github.com/LazyAGI/LazyLLM
+- LazyLLM Docs: https://docs.lazyllm.ai/
+- MLE-agent Issue: https://github.com/MLSysOps/MLE-agent/issues/324
+
+## 🤝 Contributing
+
+Found a bug or want to add more providers? Please open an issue or submit a PR!
diff --git a/mle/model/__init__.py b/mle/model/__init__.py
index fab71d8..2c36063 100644
--- a/mle/model/__init__.py
+++ b/mle/model/__init__.py
@@ -1,9 +1,10 @@
 from .anthropic import *
 from .deepseek import *
+from .gemini import *
+from .lazyllm_model import *
 from .mistral import *
 from .ollama import *
 from .openai import *
-from .gemini import *
 from .vllm import *
 
 from mle.utils import get_config
@@ -12,9 +13,10 @@
 MODEL_OLLAMA = 'Ollama'
 MODEL_OPENAI = 'OpenAI'
 MODEL_CLAUDE = 'Claude'
-MODEL_MISTRAL = 'MistralAI'
 MODEL_DEEPSEEK = 'DeepSeek'
 MODEL_GEMINI = 'Gemini'
+MODEL_LAZYLLM = 'LazyLLM'
+MODEL_MISTRAL = 'MistralAI'
 MODEL_VLLM = 'vLLM'
 
 
@@ -58,6 +60,15 @@ def load_model(project_dir: str, model_name: str=None, observable=True):
     config = get_config(project_dir)
     model = None
 
+    if config['platform'] == MODEL_LAZYLLM:
+        # LazyLLM unified interface - supports 20+ providers automatically
+        model = LazyLLMModel(
+            model=model_name,
+            source=config.get('source', None),
+            api_key=config.get('api_key', None),
+            base_url=config.get('base_url', None),
+            temperature=config.get('temperature', 0.7),
+        )
     if config['platform'] == MODEL_OLLAMA:
         # For Ollama, use base_url as host_url if available
         host_url = config.get('base_url', None)
diff --git a/mle/model/lazyllm_model.py b/mle/model/lazyllm_model.py
new file mode 100644
index 0000000..d12e76b
--- /dev/null
+++ b/mle/model/lazyllm_model.py
@@ -0,0 +1,208 @@
+"""
+LazyLLM Model Integration for MLE-agent
+
+This module provides a unified interface for multiple LLM providers through LazyLLM.
+It supports both online (cloud) and local (self-hosted) models.
+
+Supported Providers:
+- Online: OpenAI, Anthropic, Gemini, DeepSeek, Qwen, GLM, Kimi, MiniMax, Doubao, etc.
+- Local: vLLM, LMDeploy, Ollama, etc.
+
+Usage:
+    from mle.model import LazyLLMModel
+    model = LazyLLMModel(model='gpt-4o', api_key='your-api-key')
+    response = model.query(chat_history)
+"""
+
+import os
+import importlib.util
+from mle.model.common import Model
+
+
+class LazyLLMModel(Model):
+    """
+    LazyLLM-backed model class for unified LLM provider support.
+    
+    LazyLLM automatically handles:
+    - Provider selection and configuration
+    - Online vs local model detection
+    - API key management with MLE_ namespace prefix
+    - Model-specific parameter formatting
+    """
+    
+    def __init__(self, model=None, source=None, api_key=None, base_url=None, temperature=0.7):
+        """
+        Initialize the LazyLLM model.
+        
+        Args:
+            model (str): Model name (e.g., 'gpt-4o', 'deepseek-chat', 'qwen-plus')
+            source (str): Provider source (e.g., 'openai', 'deepseek', 'qwen'). 
+                         If None, will be auto-detected from model name.
+            api_key (str): API key. If None, will look for MLE_<SOURCE>_API_KEY env var.
+            base_url (str): Custom base URL for the API endpoint.
+            temperature (float): Sampling temperature (default: 0.7).
+        """
+        super().__init__()
+        
+        # Check LazyLLM dependency
+        dependency = "lazyllm"
+        spec = importlib.util.find_spec(dependency)
+        if spec is not None:
+            self.lazyllm = importlib.import_module(dependency)
+        else:
+            raise ImportError(
+                "It seems you didn't install lazyllm. In order to enable LazyLLM integration, "
+                "please install it via: pip install lazyllm"
+            )
+        
+        self.model = model
+        self.source = source
+        self.temperature = temperature
+        self.base_url = base_url
+        self.model_type = 'LazyLLM'
+        
+        # Handle API key with MLE_ namespace prefix
+        self.api_key = self._get_api_key(api_key, source)
+        
+        # Initialize LazyLLM AutoModel (automatically selects OnlineModule or TrainableModule)
+        self._init_model()
+        
+        self.func_call_history = []
+    
+    def _get_api_key(self, api_key, source):
+        """
+        Get API key from parameter or environment variable with MLE_ namespace prefix.
+        
+        Priority:
+        1. Explicit api_key parameter
+        2. MLE_<SOURCE>_API_KEY environment variable
+        3. <SOURCE>_API_KEY environment variable (fallback for compatibility)
+        
+        Args:
+            api_key (str): API key from parameter
+            source (str): Provider source name
+            
+        Returns:
+            str: API key
+        """
+        if api_key:
+            return api_key
+        
+        if source:
+            # Try MLE_ namespace first
+            env_key_name = f"MLE_{source.upper()}_API_KEY"
+            api_key = os.getenv(env_key_name)
+            
+            if api_key:
+                return api_key
+            
+            # Fallback to standard env var name
+            standard_key_name = f"{source.upper()}_API_KEY"
+            api_key = os.getenv(standard_key_name)
+            
+            if api_key:
+                return api_key
+        
+        # Last resort: try common API key env vars
+        common_keys = [
+            "MLE_API_KEY",
+            "LAZYLLM_API_KEY", 
+            "OPENAI_API_KEY",  # Default fallback
+        ]
+        
+        for key_name in common_keys:
+            api_key = os.getenv(key_name)
+            if api_key:
+                return api_key
+        
+        return None
+    
+    def _init_model(self):
+        """
+        Initialize LazyLLM AutoModel with configuration.
+        
+        AutoModel automatically:
+        1. Checks if model is available locally (TrainableModule)
+        2. Falls back to online API (OnlineModule)
+        3. Handles provider-specific configuration
+        """
+        try:
+            # Use LazyLLM's AutoModel for automatic model selection
+            kwargs = {
+                'model': self.model,
+                'temperature': self.temperature,
+            }
+            
+            if self.source:
+                kwargs['source'] = self.source
+            
+            if self.api_key:
+                # Set API key in environment for LazyLLM to pick up
+                if self.source:
+                    env_key_name = f"MLE_{self.source.upper()}_API_KEY"
+                    os.environ[env_key_name] = self.api_key
+            
+            if self.base_url:
+                kwargs['base_url'] = self.base_url
+            
+            # Create AutoModel instance
+            self._model = self.lazyllm.AutoModel(**kwargs)
+            
+        except Exception as e:
+            raise RuntimeError(f"Failed to initialize LazyLLM model: {e}")
+    
+    def query(self, chat_history, **kwargs):
+        """
+        Query the LLM model.
+        
+        Args:
+            chat_history (list): List of message dictionaries with 'role' and 'content'
+            **kwargs: Additional parameters (e.g., max_tokens, top_p)
+            
+        Returns:
+            str: Model response content
+        """
+        try:
+            # LazyLLM models accept messages in OpenAI format
+            response = self._model.query(
+                chat_history,
+                temperature=self.temperature,
+                **kwargs
+            )
+            return response
+            
+        except Exception as e:
+            raise RuntimeError(f"LazyLLM query failed: {e}")
+    
+    def stream(self, chat_history, **kwargs):
+        """
+        Stream the output from the LLM model.
+        
+        Args:
+            chat_history (list): List of message dictionaries
+            **kwargs: Additional parameters
+            
+        Yields:
+            str: Chunks of model response
+        """
+        try:
+            for chunk in self._model.stream(
+                chat_history,
+                temperature=self.temperature,
+                **kwargs
+            ):
+                yield chunk
+                
+        except Exception as e:
+            raise RuntimeError(f"LazyLLM stream failed: {e}")
+    
+    def get_model_type(self):
+        """
+        Get the underlying model type (OnlineModule or TrainableModule).
+        
+        Returns:
+            str: Model type description
+        """
+        if hasattr(self._model, 'model_type'):
+            return self._model.model_type
+        return 'LazyLLM'
diff --git a/pyproject.toml b/pyproject.toml
index e08ebbf..b90c1ca 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -55,6 +55,9 @@ dependencies = [
 bench = [
   "mlebench @ git+https://github.com/openai/mle-bench.git@main ; python_version >= '3.11' and python_version < '3.13'",
 ]
+lazyllm = [
+  "lazyllm>=0.1.0",
+]
 
 [project.urls]
 Homepage = "https://github.com/MLSysOps/MLE-agent"
diff --git a/tests/test_lazyllm.py b/tests/test_lazyllm.py
new file mode 100644
index 0000000..9b8e3f4
--- /dev/null
+++ b/tests/test_lazyllm.py
@@ -0,0 +1,208 @@
+#!/usr/bin/env python3
+"""
+Test script for LazyLLM integration in MLE-agent
+
+This script tests the LazyLLM model integration with:
+- DeepSeek (deepseek-chat)
+- Qwen (qwen-plus)
+
+API Keys are loaded from environment variables with MLE_ namespace prefix:
+- MLE_DEEPSEEK_API_KEY
+- MLE_QWEN_API_KEY
+
+Usage:
+    python test_lazyllm.py
+"""
+
+import os
+import sys
+
+# Add project root to path
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+
+from mle.model import LazyLLMModel
+
+
+# API Keys - 爸爸提供的
+DEEPSEEK_API_KEY = os.getenv('MLE_DEEPSEEK_API_KEY', 'sk-3c39a96af748453bb65c9e05833dd365')
+QWEN_API_KEY = os.getenv('MLE_QWEN_API_KEY', 'sk-3c39a96af748453bb65c9e05833dd365')
+
+
+def test_deepseek():
+    """Test DeepSeek model through LazyLLM"""
+    print("=" * 60)
+    print("Testing LazyLLM with DeepSeek (deepseek-chat)")
+    print("=" * 60)
+    
+    try:
+        # Initialize model
+        model = LazyLLMModel(
+            model='deepseek-chat',
+            source='deepseek',
+            api_key=DEEPSEEK_API_KEY,
+            temperature=0.7,
+        )
+        
+        # Test query
+        chat_history = [
+            {"role": "system", "content": "You are a helpful AI assistant for ML engineers."},
+            {"role": "user", "content": "请用一句话介绍什么是机器学习？"}
+        ]
+        
+        print("\n📤 Sending query...")
+        response = model.query(chat_history)
+        
+        print(f"\n📥 Response:\n{response}")
+        print("\n✅ DeepSeek test PASSED!")
+        return True
+        
+    except Exception as e:
+        print(f"\n❌ DeepSeek test FAILED: {e}")
+        import traceback
+        traceback.print_exc()
+        return False
+
+
+def test_qwen():
+    """Test Qwen model through LazyLLM"""
+    print("\n" + "=" * 60)
+    print("Testing LazyLLM with Qwen (qwen-plus)")
+    print("=" * 60)
+    
+    try:
+        # Initialize model
+        model = LazyLLMModel(
+            model='qwen-plus',
+            source='qwen',
+            api_key=QWEN_API_KEY,
+            temperature=0.7,
+        )
+        
+        # Test query
+        chat_history = [
+            {"role": "system", "content": "You are a helpful AI assistant."},
+            {"role": "user", "content": "如何用 Python 实现一个简单的线性回归？请给出代码示例。"}
+        ]
+        
+        print("\n📤 Sending query...")
+        response = model.query(chat_history)
+        
+        print(f"\n📥 Response:\n{response}")
+        print("\n✅ Qwen test PASSED!")
+        return True
+        
+    except Exception as e:
+        print(f"\n❌ Qwen test FAILED: {e}")
+        import traceback
+        traceback.print_exc()
+        return False
+
+
+def test_streaming():
+    """Test streaming mode with LazyLLM"""
+    print("\n" + "=" * 60)
+    print("Testing LazyLLM streaming mode (DeepSeek)")
+    print("=" * 60)
+    
+    try:
+        model = LazyLLMModel(
+            model='deepseek-chat',
+            source='deepseek',
+            api_key=DEEPSEEK_API_KEY,
+            temperature=0.7,
+        )
+        
+        chat_history = [
+            {"role": "user", "content": "列举 3 个常用的机器学习框架。"}
+        ]
+        
+        print("\n📤 Streaming response:")
+        print("-" * 60)
+        
+        chunks = []
+        for chunk in model.stream(chat_history):
+            if chunk:
+                print(chunk, end='', flush=True)
+                chunks.append(chunk)
+        
+        print("\n" + "-" * 60)
+        print("\n✅ Streaming test PASSED!")
+        return True
+        
+    except Exception as e:
+        print(f"\n❌ Streaming test FAILED: {e}")
+        import traceback
+        traceback.print_exc()
+        return False
+
+
+def test_env_var_api_key():
+    """Test API key loading from environment variable"""
+    print("\n" + "=" * 60)
+    print("Testing API key loading from MLE_DEEPSEEK_API_KEY env var")
+    print("=" * 60)
+    
+    # Set environment variable
+    os.environ['MLE_DEEPSEEK_API_KEY'] = DEEPSEEK_API_KEY
+    
+    try:
+        # Don't pass api_key parameter - should load from env var
+        model = LazyLLMModel(
+            model='deepseek-chat',
+            source='deepseek',
+            temperature=0.7,
+        )
+        
+        chat_history = [
+            {"role": "user", "content": "Hello!"}
+        ]
+        
+        print("\n📤 Sending query (API key from env var)...")
+        response = model.query(chat_history)
+        
+        print(f"\n📥 Response: {response[:100]}...")
+        print("\n✅ Environment variable test PASSED!")
+        return True
+        
+    except Exception as e:
+        print(f"\n❌ Environment variable test FAILED: {e}")
+        return False
+
+
+def main():
+    """Run all tests"""
+    print("\n" + "🧪 " * 20)
+    print("MLE-agent LazyLLM Integration Test Suite")
+    print("🧪 " * 20 + "\n")
+    
+    results = {
+        'DeepSeek': test_deepseek(),
+        'Qwen': test_qwen(),
+        'Streaming': test_streaming(),
+        'Env Var': test_env_var_api_key(),
+    }
+    
+    # Summary
+    print("\n" + "=" * 60)
+    print("TEST SUMMARY")
+    print("=" * 60)
+    
+    passed = sum(1 for v in results.values() if v)
+    total = len(results)
+    
+    for test_name, result in results.items():
+        status = "✅ PASSED" if result else "❌ FAILED"
+        print(f"  {test_name}: {status}")
+    
+    print(f"\nTotal: {passed}/{total} tests passed")
+    
+    if passed == total:
+        print("\n🎉 All tests passed! LazyLLM integration is working correctly.")
+        return 0
+    else:
+        print(f"\n⚠️  {total - passed} test(s) failed. Please check the errors above.")
+        return 1
+
+
+if __name__ == '__main__':
+    sys.exit(main())

From 5b16f93674509c0f6cf90f0e4748cf43b0280456 Mon Sep 17 00:00:00 2001
From: Deng Yuang <alex.hunter@example.com>
Date: Mon, 2 Mar 2026 20:43:55 +0800
Subject: [PATCH 2/2] fix: correct LazyLLM forward() call signature
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- LazyLLM's forward() requires first parameter (input) to be non-None
- Use empty string '' as input, pass chat history via llm_chat_history parameter
- Fix streaming to filter empty chunks
- Update API key environment variable handling (LAZYLLM_<SOURCE>_API_KEY)

Test results:
✅ Qwen (qwen-plus) - PASSED
❌ DeepSeek - API key invalid (401 error, needs valid key)
---
 mle/model/lazyllm_model.py | 60 +++++++++++++++++++++++---------------
 1 file changed, 36 insertions(+), 24 deletions(-)

diff --git a/mle/model/lazyllm_model.py b/mle/model/lazyllm_model.py
index d12e76b..dc78d49 100644
--- a/mle/model/lazyllm_model.py
+++ b/mle/model/lazyllm_model.py
@@ -119,15 +119,24 @@ def _get_api_key(self, api_key, source):
     
     def _init_model(self):
         """
-        Initialize LazyLLM AutoModel with configuration.
+        Initialize LazyLLM OnlineModule with configuration.
         
-        AutoModel automatically:
-        1. Checks if model is available locally (TrainableModule)
-        2. Falls back to online API (OnlineModule)
-        3. Handles provider-specific configuration
+        OnlineModule is used for cloud API providers.
+        For local models, TrainableModule would be used instead.
         """
         try:
-            # Use LazyLLM's AutoModel for automatic model selection
+            # Use LazyLLM's OnlineModule for cloud API providers
+            # This avoids auto-downloading local models
+            
+            # Set API key in LazyLLM's expected environment variable format
+            if self.api_key and self.source:
+                # LazyLLM expects LAZYLLM_<SOURCE>_API_KEY or <SOURCE>_API_KEY
+                lazyllm_key_name = f"LAZYLLM_{self.source.upper()}_API_KEY"
+                os.environ[lazyllm_key_name] = self.api_key
+                # Also set standard format as fallback
+                standard_key_name = f"{self.source.upper()}_API_KEY"
+                os.environ[standard_key_name] = self.api_key
+            
             kwargs = {
                 'model': self.model,
                 'temperature': self.temperature,
@@ -136,24 +145,23 @@ def _init_model(self):
             if self.source:
                 kwargs['source'] = self.source
             
-            if self.api_key:
-                # Set API key in environment for LazyLLM to pick up
-                if self.source:
-                    env_key_name = f"MLE_{self.source.upper()}_API_KEY"
-                    os.environ[env_key_name] = self.api_key
-            
             if self.base_url:
                 kwargs['base_url'] = self.base_url
             
-            # Create AutoModel instance
-            self._model = self.lazyllm.AutoModel(**kwargs)
+            # Create OnlineModule instance for cloud APIs
+            # This is more reliable than AutoModel when no local models are configured
+            self._model = self.lazyllm.OnlineModule(**kwargs)
             
         except Exception as e:
-            raise RuntimeError(f"Failed to initialize LazyLLM model: {e}")
+            # Fallback to AutoModel if OnlineModule fails
+            try:
+                self._model = self.lazyllm.AutoModel(**kwargs)
+            except Exception as e2:
+                raise RuntimeError(f"Failed to initialize LazyLLM model: {e2}")
     
     def query(self, chat_history, **kwargs):
         """
-        Query the LLM model.
+        Query the LLM model using LazyLLM's forward method.
         
         Args:
             chat_history (list): List of message dictionaries with 'role' and 'content'
@@ -163,10 +171,11 @@ def query(self, chat_history, **kwargs):
             str: Model response content
         """
         try:
-            # LazyLLM models accept messages in OpenAI format
-            response = self._model.query(
-                chat_history,
-                temperature=self.temperature,
+            # LazyLLM uses forward() with llm_chat_history parameter
+            # First parameter (input) cannot be None, use empty string
+            response = self._model.forward(
+                '',  # Empty input, actual conversation is in llm_chat_history
+                llm_chat_history=chat_history,
                 **kwargs
             )
             return response
@@ -186,12 +195,15 @@ def stream(self, chat_history, **kwargs):
             str: Chunks of model response
         """
         try:
-            for chunk in self._model.stream(
-                chat_history,
-                temperature=self.temperature,
+            # LazyLLM streaming is done via stream_output parameter
+            for chunk in self._model.forward(
+                '',  # Empty input
+                llm_chat_history=chat_history,
+                stream_output=True,
                 **kwargs
             ):
-                yield chunk
+                if chunk:  # Filter out empty chunks
+                    yield chunk
                 
         except Exception as e:
             raise RuntimeError(f"LazyLLM stream failed: {e}")