RoboGen/robogen.py at main · RobotBase/RoboGen · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
RoboGen Flask Web Application
"""

import os
import json
import uuid
import base64
import mimetypes
from datetime import datetime
from flask import Flask, render_template, request, jsonify, redirect, url_for, flash, send_file
from werkzeug.utils import secure_filename
from google import genai
from google.genai import types
import logging
from PIL import Image
import io

# 配置日志
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# 从配置文件加载配置


def load_config():
    """从config.json加载配置，如果不存在则使用默认值"""
    default_config = {
        "google_api_key": "your_google_api_key_here",
        "model_name": "gemini-2.5-flash-image-preview",
        "secret_key": "your-secret-key-here"
    }

    try:
        config_path = 'config.json'
        if os.path.exists(config_path):
            with open(config_path, 'r', encoding='utf-8') as f:
                config = json.load(f)
            return config
        else:
            logger.warning(
                "config.json not found, using default configuration")
            return default_config
    except Exception as e:
        logger.error(f"Error loading config: {e}")
        return default_config


# 加载配置
config = load_config()

app = Flask(__name__)
app.secret_key = config.get("secret_key", "your-secret-key-here")  # 从配置文件获取密钥

# 配置
UPLOAD_FOLDER = 'uploads'
OUTPUT_FOLDER = 'outputs'
COMPRESSED_FOLDER = 'compressed'  # 新增压缩图片文件夹
ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg', 'gif', 'bmp'}

# 压缩配置
COMPRESSION_QUALITY = 75  # JPEG质量，1-100
MAX_DISPLAY_WIDTH = 600   # 显示图片最大宽度
MAX_DISPLAY_HEIGHT = 600  # 显示图片最大高度

# 获取脚本目录的绝对路径
script_dir = os.path.dirname(os.path.abspath(__file__))
UPLOAD_FOLDER = os.path.join(script_dir, UPLOAD_FOLDER)
OUTPUT_FOLDER = os.path.join(script_dir, OUTPUT_FOLDER)
COMPRESSED_FOLDER = os.path.join(script_dir, COMPRESSED_FOLDER)

# 确保文件夹存在
try:
    os.makedirs(UPLOAD_FOLDER, exist_ok=True)
    os.makedirs(OUTPUT_FOLDER, exist_ok=True)
    os.makedirs(COMPRESSED_FOLDER, exist_ok=True)
    logger.info(f"Created directories: {UPLOAD_FOLDER}, {OUTPUT_FOLDER}, {COMPRESSED_FOLDER}")
except Exception as e:
    logger.error(f"Error creating directories: {e}")
    # 使用当前目录的相对路径作为备选
    UPLOAD_FOLDER = './uploads'
    OUTPUT_FOLDER = './outputs'
    COMPRESSED_FOLDER = './compressed'
    os.makedirs(UPLOAD_FOLDER, exist_ok=True)
    os.makedirs(OUTPUT_FOLDER, exist_ok=True)
    os.makedirs(COMPRESSED_FOLDER, exist_ok=True)
    logger.info(f"Using fallback directories: {UPLOAD_FOLDER}, {OUTPUT_FOLDER}, {COMPRESSED_FOLDER}")

app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
app.config['OUTPUT_FOLDER'] = OUTPUT_FOLDER
app.config['COMPRESSED_FOLDER'] = COMPRESSED_FOLDER
app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024  # 16MB max file size

# Google Gemini API配置
GOOGLE_API_KEY = config.get("google_api_key")
MODEL_NAME = config.get("model_name", "gemini-2.5-flash-image-preview")

# 从模板文件加载工作流步骤定义


def load_workflow_steps():
    """从templates/prompts.json加载工作流步骤"""
    try:
        prompts_path = os.path.join('templates', 'prompts.json')
        with open(prompts_path, 'r', encoding='utf-8') as f:
            prompts_data = json.load(f)

        # 转换格式以匹配原有的WORKFLOW_STEPS结构
        workflow_steps = {}
        for key, value in prompts_data.items():
            step_num = int(key.split('_')[1])  # 从 'step_1' 提取数字 1
            workflow_steps[step_num] = {
                'title': value['title'],
                'description': value['description'],
                'prompt': value['prompt']
            }

        return workflow_steps
    except Exception as e:
        logger.error(f"Error loading prompts from template: {e}")
        # 如果加载失败，返回空字典，这样应用仍可运行
        return {}


# 工作流步骤定义
WORKFLOW_STEPS = load_workflow_steps()


class RoboGenAPI:
    """封装nanobanana API的类"""

    def __init__(self, api_key, model_name):
        self.client = genai.Client(api_key=api_key)
        self.model_name = model_name

    def save_binary_file(self, file_name, data):
        """保存二进制文件"""
        try:
            filepath = os.path.join(app.config['OUTPUT_FOLDER'], file_name)
            with open(filepath, "wb") as f:
                f.write(data)
            logger.info(f"File saved to: {filepath}")
            return filepath
        except Exception as e:
            logger.error(f"Error saving file: {e}")
            return None

    def create_compressed_image(self, original_path):
        """创建压缩版本的图片"""
        try:
            # 生成压缩图片的文件名
            original_filename = os.path.basename(original_path)
            name, ext = os.path.splitext(original_filename)
            compressed_filename = f"{name}_compressed.jpg"  # 统一使用jpg格式以获得更好的压缩效果
            compressed_path = os.path.join(app.config['COMPRESSED_FOLDER'], compressed_filename)

            # 打开原始图片
            with Image.open(original_path) as img:
                # 转换为RGB模式（如果是RGBA等其他模式）
                if img.mode in ('RGBA', 'LA'):
                    # 创建白色背景
                    background = Image.new('RGB', img.size, (255, 255, 255))
                    if img.mode == 'RGBA':
                        background.paste(img, mask=img.split()[-1])  # 使用alpha通道作为mask
                    else:
                        background.paste(img)
                    img = background
                elif img.mode != 'RGB':
                    img = img.convert('RGB')

                # 计算压缩后的尺寸
                width, height = img.size
                if width > MAX_DISPLAY_WIDTH or height > MAX_DISPLAY_HEIGHT:
                    # 按比例缩放
                    ratio = min(MAX_DISPLAY_WIDTH / width, MAX_DISPLAY_HEIGHT / height)
                    new_width = int(width * ratio)
                    new_height = int(height * ratio)
                    img = img.resize((new_width, new_height), Image.Resampling.LANCZOS)

                # 保存压缩图片
                img.save(compressed_path, 'JPEG', quality=COMPRESSION_QUALITY, optimize=True)

            logger.info(f"Compressed image saved to: {compressed_path}")
            return compressed_path

        except Exception as e:
            logger.error(f"Error creating compressed image: {e}")
            return None

    def generate_with_image(self, prompt_text, image_path=None):
        """使用图片和文本生成内容"""
        try:
            parts = [types.Part.from_text(text=prompt_text)]

            # 如果有图片，添加到parts中
            if image_path and os.path.exists(image_path):
                with open(image_path, 'rb') as f:
                    image_data = f.read()

                # 检测MIME类型
                mime_type, _ = mimetypes.guess_type(image_path)
                if not mime_type:
                    mime_type = 'image/jpeg'  # 默认

                parts.append(types.Part.from_bytes(
                    data=image_data,
                    mime_type=mime_type
                ))

            contents = [types.Content(role="user", parts=parts)]

            generate_content_config = types.GenerateContentConfig(
                response_modalities=["IMAGE", "TEXT"]
            )

            text_response = ""
            generated_files = []
            compressed_files = []  # 新增：存储压缩文件路径
            file_index = 0

            try:
                for chunk in self.client.models.generate_content_stream(
                    model=self.model_name,
                    contents=contents,
                    config=generate_content_config,
                ):
                    if (chunk.candidates is None or
                        chunk.candidates[0].content is None or
                            chunk.candidates[0].content.parts is None):
                        continue

                    # 遍历所有parts来处理不同类型的内容
                    for part in chunk.candidates[0].content.parts:
                        # 处理生成的图片
                        if (hasattr(part, 'inline_data') and part.inline_data and
                                hasattr(part.inline_data, 'data') and part.inline_data.data):

                            logger.info(f"Found image data in response - MIME type: {part.inline_data.mime_type}")

                            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
                            file_name = f"generated_step_{file_index}_{timestamp}"

                            inline_data = part.inline_data
                            data_buffer = inline_data.data
                            file_extension = mimetypes.guess_extension(
                                inline_data.mime_type)

                            if file_extension:
                                full_filename = f"{file_name}{file_extension}"
                                saved_path = self.save_binary_file(
                                    full_filename, data_buffer)
                                if saved_path:
                                    generated_files.append(saved_path)
                                    logger.info(f"Successfully saved image: {saved_path}")

                                    # 创建压缩版本
                                    compressed_path = self.create_compressed_image(saved_path)
                                    if compressed_path:
                                        compressed_files.append(compressed_path)
                                        logger.info(f"Successfully created compressed image: {compressed_path}")

                            file_index += 1

                        # 处理文本响应
                        elif hasattr(part, 'text') and part.text:
                            text_response += part.text
                            logger.debug(f"Added text content: {part.text[:100]}...")

            except Exception as stream_error:
                logger.error(f"Error during content generation stream: {stream_error}")
                # 如果有部分成功，仍然返回结果
                if generated_files:
                    logger.info(f"Partial success: {len(generated_files)} files generated despite error")
                else:
                    raise stream_error

            logger.info(f"Generation complete - Text length: {len(text_response)}, Files generated: {len(generated_files)}")

            return {
                'success': True,
                'text': text_response,
                'files': generated_files,
                'compressed_files': compressed_files  # 新增：返回压缩文件路径
            }

        except Exception as e:
            logger.error(f"Error in generate_with_image: {e}")
            return {
                'success': False,
                'error': str(e)
            }


# 初始化API
robogen_api = RoboGenAPI(GOOGLE_API_KEY, MODEL_NAME)


def allowed_file(filename):
    """检查文件扩展名是否允许"""
    return '.' in filename and \
           filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS


@app.route('/')
def index():
    """主页"""
    return render_template('index.html', workflow_steps=WORKFLOW_STEPS)


@app.route('/upload', methods=['POST'])
def upload_file():
    """处理文件上传"""
    if 'file' not in request.files:
        flash('没有选择文件')
        return redirect(request.url)

    file = request.files['file']
    if file.filename == '':
        flash('没有选择文件')
        return redirect(request.url)

    if file and allowed_file(file.filename):
        filename = secure_filename(file.filename)
        # 添加时间戳避免文件名冲突
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        filename = f"{timestamp}_{filename}"
        filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
        file.save(filepath)

        return jsonify({
            'success': True,
            'filename': filename,
            'filepath': filepath
        })
    else:
        return jsonify({
            'success': False,
            'error': '不支持的文件格式'
        })


@app.route('/process_step', methods=['POST'])
def process_step():
    """处理工作流步骤"""
    try:
        data = request.get_json()
        step_number = data.get('step')
        image_path = data.get('image_path')
        robot_type = data.get('robot_type')  # 获取机器人类型

        if step_number not in WORKFLOW_STEPS:
            return jsonify({
                'success': False,
                'error': '无效的步骤号'
            })

        step_config = WORKFLOW_STEPS[step_number]
        prompt = step_config['prompt']

        # 如果有机器人类型信息，在prompt中加入针对性的指导
        if robot_type:
            type_mapping = {
                'quadruped': '四足机器人',
                'humanoid': '人形机器人',
                'hexapod': '六足机器人',
                'manipulator': '机械臂'
            }

            robot_type_name = type_mapping.get(robot_type, robot_type)

            # 在prompt前添加机器人类型信息
            prompt = f"注意：这是一个{robot_type_name}类型的机器人。请针对{robot_type_name}的特点进行分析和设计。\n\n{prompt}"

            logger.info(f"Processing step {step_number} for robot type: {robot_type_name}")

        # 调用API生成内容
        result = robogen_api.generate_with_image(prompt, image_path)

        return jsonify(result)

    except Exception as e:
        logger.error(f"Error in process_step: {e}")
        return jsonify({
            'success': False,
            'error': str(e)
        })


@app.route('/outputs/<filename>')
def download_file(filename):
    """下载生成的文件"""
    try:
        return send_file(
            os.path.join(app.config['OUTPUT_FOLDER'], filename),
            as_attachment=True
        )
    except Exception as e:
        logger.error(f"Error downloading file: {e}")
        return "文件不存在", 404


@app.route('/view_output/<filename>')
def view_output(filename):
    """查看生成的图片"""
    try:
        return send_file(
            os.path.join(app.config['OUTPUT_FOLDER'], filename)
        )
    except Exception as e:
        logger.error(f"Error viewing file: {e}")
        return "文件不存在", 404


@app.route('/view_compressed/<filename>')
def view_compressed(filename):
    """查看压缩版本的图片"""
    try:
        return send_file(
            os.path.join(app.config['COMPRESSED_FOLDER'], filename)
        )
    except Exception as e:
        logger.error(f"Error viewing compressed file: {e}")
        return "文件不存在", 404


if __name__ == '__main__':
    app.run(debug=False, host='0.0.0.0', port=80)