feat(web): integrate Claude LLM streaming with markdown rendering

- Add Anthropic SDK with DeepSeek-compatible API config - Streaming tool-use loop in WebSocket chat handler - GitHub-style markdown rendering with markdown-it - Tool status indicators and thinking states in chat UI - Fix Tailwind content path and CSS border utility Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-07 03:22:15 +08:00
parent d6b18fb7dc
commit 001dbde9f4
15 changed files with 759 additions and 95 deletions
--- a/web/server/agent/index.ts
+++ b/web/server/agent/index.ts
@@ -1,4 +1,43 @@
+import Anthropic from '@anthropic-ai/sdk';
 import { tools, ToolDefinition } from './tools';
+import { getDb } from '../db';
+import fs from 'fs';
+import path from 'path';
+import { fileURLToPath } from 'url';
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = path.dirname(__filename);
+const PROJECT_ROOT = path.resolve(__dirname, '..', '..', '..', '..');
+
+function getAnthropicClient(): Anthropic {
+  const configRow = getDb().prepare('SELECT value FROM configs WHERE key = ?').get('api_keys') as { value: string } | undefined;
+  let apiKey = process.env.ANTHROPIC_API_KEY || '';
+  let baseURL: string | undefined;
+
+  if (configRow) {
+    try {
+      const cfg = JSON.parse(configRow.value);
+      if (cfg.ANTHROPIC_AUTH_TOKEN) apiKey = cfg.ANTHROPIC_AUTH_TOKEN;
+      if (cfg.ANTHROPIC_BASE_URL) baseURL = cfg.ANTHROPIC_BASE_URL;
+    } catch {}
+  }
+
+  return new Anthropic({
+    apiKey,
+    baseURL,
+  });
+}
+
+function getModel(): string {
+  const configRow = getDb().prepare('SELECT value FROM configs WHERE key = ?').get('api_keys') as { value: string } | undefined;
+  if (configRow) {
+    try {
+      const cfg = JSON.parse(configRow.value);
+      if (cfg.ANTHROPIC_MODEL) return cfg.ANTHROPIC_MODEL;
+    } catch {}
+  }
+  return process.env.ANTHROPIC_MODEL || 'claude-sonnet-4-6';
+}

 export class VideoAgent {
  private tools: ToolDefinition[];
@@ -7,11 +46,11 @@ export class VideoAgent {
    this.tools = tools;
  }

-  getToolDefinitions() {
+  getAnthropicTools(): Anthropic.Tool[] {
    return this.tools.map((t) => ({
      name: t.name,
      description: t.description,
-      parameters: t.parameters,
+      input_schema: t.input_schema,
    }));
  }

@@ -21,18 +60,60 @@ export class VideoAgent {
    return tool.execute(params);
  }

-  getSystemPrompt(accountContext?: string): string {
-    return `你是美图 Agent，帮助用户进行短视频创作。
+  getSystemPrompt(): string {
+    // Dynamically list accounts
+    const accountsDir = path.join(PROJECT_ROOT, 'accounts');
+    let accountList = '暂无账号';
+    if (fs.existsSync(accountsDir)) {
+      const dirs = fs.readdirSync(accountsDir, { withFileTypes: true })
+        .filter((d) => d.isDirectory() && !d.name.startsWith('_') && !d.name.startsWith('.'));
+      if (dirs.length > 0) {
+        accountList = dirs.map((d) => {
+          const configPath = path.join(accountsDir, d.name, 'account.json');
+          if (fs.existsSync(configPath)) {
+            const cfg = JSON.parse(fs.readFileSync(configPath, 'utf-8'));
+            return `- ${d.name}: ${cfg.description || '无描述'} (生图:${cfg.imageModel}, 视频:${cfg.videoModel}, 画幅:${cfg.defaultFormat})`;
+          }
+          return `- ${d.name}`;
+        }).join('\n');
+      }
+    }

-可用账号：${accountContext || '暂无'}
+    return `你是美图 Agent，一个专业的短视频创作助手。你可以帮助用户完成从创意到成片的完整流程。

-你可以：
-1. 帮用户创建新账号
-2. 查看和管理已有账号
-3. 执行视频创作 pipeline（分镜→生图→生视频→TTS→成片）
-4. 管理提示词模板
+## 当前可用账号
+${accountList}

-用户想创作视频时，一步步引导他们完成流程。`;
+## 你的能力
+1. **查看账号** - 使用 list_accounts 列出所有可用账号及其配置
+2. **创建账号** - 使用 create_account 创建新的短视频账号，配置生图/视频模型、画幅等
+3. **查看账号配置** - 使用 get_account_config 获取账号详细配置
+4. **查看 Pipeline 进度** - 使用 pipeline_status 检查创作进度
+5. **执行创作阶段** - 使用 run_pipeline_phase 执行 pipeline 阶段
+
+## 视频创作流程
+1. 确认用户意图（A.幻灯片视频 / B.AI视频）
+2. 选择/创建账号
+3. 规划分镜脚本
+4. 生成图片（images 阶段）
+5. 生成视频片段（videos 阶段，仅 B 模式）
+6. 配音（tts 阶段）
+7. 成片组装（assemble 阶段）
+
+## 行为准则
+- 用中文回复，友好、专业
+- 在用户不清楚时主动询问：成片类型、账号选择、素材来源、画幅等
+- 执行 pipeline 前确认 manifest 路径
+- 如果用户只是闲聊，就闲聊。如果用户想做视频，引导完成流程
+- 不要编造账号或文件路径，使用工具获取真实信息`;
+  }
+
+  getClient(): Anthropic {
+    return getAnthropicClient();
+  }
+
+  getModel(): string {
+    return getModel();
  }
 }

--- a/web/server/agent/tools.ts
+++ b/web/server/agent/tools.ts
@@ -1,22 +1,33 @@
 import { spawn, execSync } from 'child_process';
 import path from 'path';
 import fs from 'fs';
+import { fileURLToPath } from 'url';

+const __filename = fileURLToPath(import.meta.url);
+const __dirname = path.dirname(__filename);
 const PROJECT_ROOT = path.resolve(__dirname, '..', '..', '..', '..');
 const PIPELINE_SCRIPT = path.join(PROJECT_ROOT, '.claude', 'skills', 'video-from-script', 'scripts', 'pipeline.js');

 export interface ToolDefinition {
  name: string;
  description: string;
-  parameters: Record<string, unknown>;
+  input_schema: {
+    type: 'object';
+    properties: Record<string, { type: string; description: string }>;
+    required?: string[];
+  };
  execute: (params: Record<string, unknown>) => Promise<string>;
 }

 export const tools: ToolDefinition[] = [
  {
    name: 'list_accounts',
-    description: '列出所有可用账号',
-    parameters: { type: 'object', properties: {}, required: [] },
+    description: '列出所有可用账号，返回每个账号的名称、描述、生图模型和视频模型',
+    input_schema: {
+      type: 'object',
+      properties: {},
+      required: [],
+    },
    execute: async () => {
      const accountsDir = path.join(PROJECT_ROOT, 'accounts');
      const dirs = fs.readdirSync(accountsDir, { withFileTypes: true })
@@ -25,21 +36,69 @@ export const tools: ToolDefinition[] = [
          const configPath = path.join(accountsDir, d.name, 'account.json');
          if (fs.existsSync(configPath)) {
            const cfg = JSON.parse(fs.readFileSync(configPath, 'utf-8'));
-            return `${d.name} - ${cfg.description || '无描述'} (${cfg.imageModel}/${cfg.videoModel})`;
+            return `${d.name} - ${cfg.description || '无描述'} (生图:${cfg.imageModel} 视频:${cfg.videoModel} 画幅:${cfg.defaultFormat})`;
          }
          return d.name;
        });
-      return dirs.join('\n');
+      return dirs.join('\n') || '暂无账号';
+    },
+  },
+  {
+    name: 'create_account',
+    description: '创建新的短视频账号。需要提供账号ID、名称和描述。创建后可在 accounts/ 目录下找到配置。',
+    input_schema: {
+      type: 'object',
+      properties: {
+        id: { type: 'string', description: '账号唯一标识，英文小写，如 military-account' },
+        name: { type: 'string', description: '账号显示名称，中文，如 军事账号' },
+        desc: { type: 'string', description: '账号描述，说明视频风格和主题' },
+        imageModel: { type: 'string', description: '生图模型: gemini, mj, gpt, kling' },
+        videoModel: { type: 'string', description: '视频模型: veo3-fast, veo3-fast-frames, kling, grok' },
+        format: { type: 'string', description: '画幅: 9:16 (竖屏), 16:9 (横屏), 1:1 (方形)' },
+      },
+      required: ['id', 'name'],
+    },
+    execute: async (params) => {
+      const { id, name, desc, imageModel, videoModel, format } = params as Record<string, string>;
+      const cmd = [
+        `node "${PIPELINE_SCRIPT}" create-account`,
+        `--id "${id}"`,
+        `--name "${name}"`,
+        `--desc "${desc || ''}"`,
+        `--video-model ${videoModel || 'veo3-fast'}`,
+        imageModel ? `--image-model ${imageModel}` : '',
+        format ? `--format ${format}` : '',
+      ].filter(Boolean).join(' ');
+      const result = execSync(cmd, { cwd: PROJECT_ROOT, encoding: 'utf-8' });
+      return `账号「${name}」创建成功。\n${result}`;
+    },
+  },
+  {
+    name: 'pipeline_status',
+    description: '查看指定 manifest 的 pipeline 执行进度和各阶段状态',
+    input_schema: {
+      type: 'object',
+      properties: {
+        manifest: { type: 'string', description: 'manifest.json 的绝对路径' },
+      },
+      required: ['manifest'],
+    },
+    execute: async (params) => {
+      const { manifest } = params as { manifest: string };
+      const result = execSync(`node "${PIPELINE_SCRIPT}" status --manifest "${manifest}"`, {
+        cwd: PROJECT_ROOT, encoding: 'utf-8',
+      });
+      return result;
    },
  },
  {
    name: 'run_pipeline_phase',
-    description: '执行 pipeline 阶段 (images/upload/videos/tts/assemble)',
-    parameters: {
+    description: '执行视频创作 pipeline 的指定阶段。阶段顺序: images(生图) → upload(上传) → videos(生视频) → tts(配音) → assemble(成片组装)。执行前需确认 manifest.json 已存在。',
+    input_schema: {
      type: 'object',
      properties: {
-        manifest: { type: 'string', description: 'manifest.json 绝对路径' },
-        phase: { type: 'string', description: '阶段名: images, upload, videos, tts, assemble' },
+        manifest: { type: 'string', description: 'manifest.json 的绝对路径' },
+        phase: { type: 'string', description: '要执行的阶段: images, upload, videos, tts, assemble。多个阶段用逗号分隔如 images,upload' },
      },
      required: ['manifest', 'phase'],
    },
@@ -54,48 +113,27 @@ export const tools: ToolDefinition[] = [
        proc.stdout.on('data', (d: Buffer) => { output += d.toString(); });
        proc.stderr.on('data', (d: Buffer) => { output += d.toString(); });
        proc.on('close', (code) => {
-          code === 0 ? resolve(output) : reject(new Error(`Pipeline exit code ${code}: ${output}`));
+          code === 0 ? resolve(output || '执行成功') : reject(new Error(`Pipeline exit code ${code}: ${output.slice(-500)}`));
        });
      });
    },
  },
  {
-    name: 'pipeline_status',
-    description: '查看 pipeline 进度',
-    parameters: {
+    name: 'get_account_config',
+    description: '获取指定账号的完整配置，包括模型选择、TTS语音、字幕风格等',
+    input_schema: {
      type: 'object',
      properties: {
-        manifest: { type: 'string', description: 'manifest.json 绝对路径' },
+        accountId: { type: 'string', description: '账号ID，如 军事账号' },
      },
-      required: ['manifest'],
+      required: ['accountId'],
    },
    execute: async (params) => {
-      const { manifest } = params as { manifest: string };
-      const result = execSync(`node "${PIPELINE_SCRIPT}" status --manifest "${manifest}"`, {
-        cwd: PROJECT_ROOT, encoding: 'utf-8',
-      });
-      return result;
-    },
-  },
-  {
-    name: 'create_account',
-    description: '创建新账号',
-    parameters: {
-      type: 'object',
-      properties: {
-        id: { type: 'string', description: '账号 ID' },
-        name: { type: 'string', description: '账号名称' },
-        desc: { type: 'string', description: '账号描述' },
-      },
-      required: ['id', 'name'],
-    },
-    execute: async (params) => {
-      const { id, name, desc } = params as { id: string; name: string; desc?: string };
-      const result = execSync(
-        `node "${PIPELINE_SCRIPT}" create-account --id "${id}" --name "${name}" --desc "${desc || ''}" --video-model veo3-fast`,
-        { cwd: PROJECT_ROOT, encoding: 'utf-8' }
-      );
-      return result;
+      const { accountId } = params as { accountId: string };
+      const configPath = path.join(PROJECT_ROOT, 'accounts', accountId, 'account.json');
+      if (!fs.existsSync(configPath)) return `账号「${accountId}」不存在`;
+      const cfg = JSON.parse(fs.readFileSync(configPath, 'utf-8'));
+      return JSON.stringify(cfg, null, 2);
    },
  },
 ];
--- a/web/server/ws/chat.ts
+++ b/web/server/ws/chat.ts
@@ -1,47 +1,79 @@
 import { WebSocket } from 'ws';
 import { randomUUID } from 'crypto';
 import { getDb } from '../db';
+import { videoAgent } from '../agent';
+import type { MessageParam, ToolUseBlock, TextBlock } from '@anthropic-ai/sdk/resources/messages.mjs';
+
+interface ChatMsg {
+  type: string;
+  conversationId?: string;
+  content?: string;
+  title?: string;
+  accountId?: string;
+  data?: Record<string, unknown>;
+  conversation_id?: string;
+  role?: string;
+  tool_calls?: string;
+  created_at?: string;
+  id?: string;
+}
+
+interface DbMessage {
+  id: string;
+  conversation_id: string;
+  role: string;
+  content: string;
+  tool_calls: string | null;
+  created_at: string;
+}
+
+function dbToAnthropic(msg: DbMessage): MessageParam {
+  if (msg.role === 'user') {
+    return { role: 'user', content: msg.content };
+  }
+  if (msg.role === 'assistant') {
+    if (msg.tool_calls) {
+      try {
+        const parsed = JSON.parse(msg.tool_calls);
+        return { role: 'assistant', content: parsed };
+      } catch {
+        return { role: 'assistant', content: msg.content };
+      }
+    }
+    return { role: 'assistant', content: msg.content };
+  }
+  if (msg.role === 'tool') {
+    try {
+      const { tool_use_id, content } = JSON.parse(msg.content);
+      return {
+        role: 'user',
+        content: [{ type: 'tool_result', tool_use_id, content }],
+      };
+    } catch {
+      return { role: 'user', content: msg.content };
+    }
+  }
+  return { role: 'user', content: msg.content };
+}

 export function handleChat(ws: WebSocket) {
  let conversationId: string | null = null;

  ws.on('message', async (raw) => {
    try {
-      const msg = JSON.parse(raw.toString());
+      const msg: ChatMsg = JSON.parse(raw.toString());

+      // --- Init: load conversation history ---
      if (msg.type === 'init') {
        conversationId = msg.conversationId || randomUUID();
        const history = getDb().prepare(
          'SELECT * FROM messages WHERE conversation_id = ? ORDER BY created_at'
-        ).all(conversationId);
+        ).all(conversationId) as DbMessage[];
        ws.send(JSON.stringify({ type: 'history', data: { conversationId, messages: history } }));
        return;
      }

-      if (msg.type === 'chat') {
-        const { content } = msg;
-        const msgId = randomUUID();
-
-        getDb().prepare(
-          'INSERT INTO messages (id, conversation_id, role, content) VALUES (?, ?, ?, ?)'
-        ).run(msgId, conversationId, 'user', content);
-
-        ws.send(JSON.stringify({ type: 'message', data: { id: msgId, role: 'user', content } }));
-
-        // Assistant echo (placeholder until LLM integration in Task 3.3)
-        const assistantId = randomUUID();
-        const assistantContent = `收到你的消息：「${content}」。Agent 引擎正在启动中...`;
-
-        getDb().prepare(
-          'INSERT INTO messages (id, conversation_id, role, content) VALUES (?, ?, ?, ?)'
-        ).run(assistantId, conversationId, 'assistant', assistantContent);
-
-        ws.send(JSON.stringify({
-          type: 'message',
-          data: { id: assistantId, role: 'assistant', content: assistantContent },
-        }));
-      }
-
+      // --- Create conversation ---
      if (msg.type === 'create_conversation') {
        const { title, accountId } = msg;
        conversationId = randomUUID();
@@ -49,13 +81,194 @@ export function handleChat(ws: WebSocket) {
          'INSERT INTO conversations (id, title, account_id) VALUES (?, ?, ?)'
        ).run(conversationId, title || '新对话', accountId || null);
        ws.send(JSON.stringify({ type: 'conversation_created', data: { id: conversationId, title } }));
+        return;
+      }
+
+      // --- Chat with LLM ---
+      if (msg.type === 'chat') {
+        await handleChatMessage(ws, conversationId!, msg.content!);
      }
    } catch (e) {
+      console.error('WebSocket error:', e);
      ws.send(JSON.stringify({ type: 'error', data: { message: (e as Error).message } }));
    }
  });

-  ws.on('close', () => {
-    // cleanup if needed
-  });
+  ws.on('close', () => {});
+}
+
+async function handleChatMessage(ws: WebSocket, convId: string, content: string) {
+  // 1. Save user message
+  const userMsgId = randomUUID();
+  getDb().prepare(
+    'INSERT INTO messages (id, conversation_id, role, content) VALUES (?, ?, ?, ?)'
+  ).run(userMsgId, convId, 'user', content);
+  ws.send(JSON.stringify({ type: 'message', data: { id: userMsgId, role: 'user', content } }));
+
+  // Update conversation title if first message
+  const msgCount = getDb().prepare(
+    'SELECT COUNT(*) as count FROM messages WHERE conversation_id = ?'
+  ).get(convId) as { count: number };
+  if (msgCount.count <= 1) {
+    const title = content.slice(0, 30) + (content.length > 30 ? '...' : '');
+    getDb().prepare('UPDATE conversations SET title = ?, updated_at = datetime(\'now\') WHERE id = ?')
+      .run(title, convId);
+  }
+
+  // Update conversation timestamp
+  getDb().prepare('UPDATE conversations SET updated_at = datetime(\'now\') WHERE id = ?').run(convId);
+
+  // 2. Build message history for Anthropic
+  const history = getDb().prepare(
+    'SELECT * FROM messages WHERE conversation_id = ? AND id != ? ORDER BY created_at'
+  ).all(convId, userMsgId) as DbMessage[];
+
+  const messages: MessageParam[] = history.map(dbToAnthropic);
+
+  // 3. Call LLM with tool loop
+  const client = videoAgent.getClient();
+  const model = videoAgent.getModel();
+  const systemPrompt = videoAgent.getSystemPrompt();
+
+  ws.send(JSON.stringify({ type: 'status', data: { status: 'thinking' } }));
+
+  try {
+    let currentMessages = messages;
+    let maxLoops = 10;
+
+    while (maxLoops-- > 0) {
+      const stream = client.messages.stream({
+        model,
+        max_tokens: 4096,
+        system: systemPrompt,
+        tools: videoAgent.getAnthropicTools(),
+        messages: currentMessages,
+      });
+
+      let assistantContent = '';
+      let toolUseBlocks: { id: string; name: string; input: Record<string, unknown> }[] = [];
+      const assistantMsgId = randomUUID();
+
+      // Stream text
+      ws.send(JSON.stringify({ type: 'message_start', data: { id: assistantMsgId } }));
+
+      for await (const event of stream) {
+        if (event.type === 'content_block_delta') {
+          if (event.delta.type === 'text_delta') {
+            assistantContent += event.delta.text;
+            ws.send(JSON.stringify({
+              type: 'text_delta',
+              data: { id: assistantMsgId, text: event.delta.text },
+            }));
+          }
+          if (event.delta.type === 'input_json_delta') {
+            // Accumulating tool input — handled by SDK internally
+          }
+        }
+        if (event.type === 'content_block_start') {
+          if (event.content_block.type === 'tool_use') {
+            toolUseBlocks.push({
+              id: event.content_block.id,
+              name: event.content_block.name,
+              input: (event.content_block.input || {}) as Record<string, unknown>,
+            });
+          }
+        }
+      }
+
+      const finalMsg = await stream.finalMessage();
+      ws.send(JSON.stringify({ type: 'message_end', data: { id: assistantMsgId } }));
+
+      // Extract tool uses from final message
+      const toolUses: { id: string; name: string; input: Record<string, unknown> }[] = [];
+      const textBlocks: string[] = [];
+
+      for (const block of finalMsg.content) {
+        if (block.type === 'text') {
+          textBlocks.push(block.text);
+        }
+        if (block.type === 'tool_use') {
+          toolUses.push({ id: block.id, name: block.name, input: block.input as Record<string, unknown> });
+        }
+      }
+
+      // No tool calls — save assistant message and done
+      if (toolUses.length === 0) {
+        const finalText = textBlocks.join('');
+        getDb().prepare(
+          'INSERT INTO messages (id, conversation_id, role, content) VALUES (?, ?, ?, ?)'
+        ).run(assistantMsgId, convId, 'assistant', finalText);
+        return;
+      }
+
+      // Has tool calls — save assistant message with tool_calls, execute tools, add results
+      getDb().prepare(
+        'INSERT INTO messages (id, conversation_id, role, content, tool_calls) VALUES (?, ?, ?, ?, ?)'
+      ).run(assistantMsgId, convId, 'assistant', textBlocks.join('') || '(调用工具)', JSON.stringify(finalMsg.content));
+
+      // Build assistant content blocks for Anthropic
+      const assistantBlocks: (TextBlock | ToolUseBlock)[] = finalMsg.content
+        .filter((b): b is TextBlock | ToolUseBlock => b.type === 'text' || b.type === 'tool_use');
+
+      currentMessages.push({ role: 'assistant', content: assistantBlocks });
+
+      // Execute tools and send results
+      const toolResults: { type: 'tool_result'; tool_use_id: string; content: string }[] = [];
+
+      for (const tool of toolUses) {
+        ws.send(JSON.stringify({
+          type: 'tool_start',
+          data: { tool: tool.name, input: tool.input },
+        }));
+
+        try {
+          const result = await videoAgent.executeTool(tool.name, tool.input);
+          toolResults.push({ type: 'tool_result', tool_use_id: tool.id, content: result });
+
+          // Save tool result to DB
+          const toolMsgId = randomUUID();
+          getDb().prepare(
+            'INSERT INTO messages (id, conversation_id, role, content) VALUES (?, ?, ?, ?)'
+          ).run(toolMsgId, convId, 'tool', JSON.stringify({ tool_use_id: tool.id, content: result }));
+
+          ws.send(JSON.stringify({
+            type: 'tool_result',
+            data: { tool: tool.name, result: result.slice(0, 1000) },
+          }));
+        } catch (err) {
+          const errMsg = (err as Error).message;
+          toolResults.push({ type: 'tool_result', tool_use_id: tool.id, content: `Error: ${errMsg}` });
+
+          const toolMsgId = randomUUID();
+          getDb().prepare(
+            'INSERT INTO messages (id, conversation_id, role, content) VALUES (?, ?, ?, ?)'
+          ).run(toolMsgId, convId, 'tool', JSON.stringify({ tool_use_id: tool.id, content: `Error: ${errMsg}` }));
+
+          ws.send(JSON.stringify({
+            type: 'tool_error',
+            data: { tool: tool.name, error: errMsg },
+          }));
+        }
+      }
+
+      // Add tool results to conversation
+      currentMessages.push({
+        role: 'user',
+        content: toolResults,
+      });
+
+      // Continue loop — LLM will process tool results and possibly call more tools or give final response
+    }
+  } catch (err) {
+    const errMsg = (err as Error).message;
+    console.error('LLM error:', errMsg);
+    const errId = randomUUID();
+    getDb().prepare(
+      'INSERT INTO messages (id, conversation_id, role, content) VALUES (?, ?, ?, ?)'
+    ).run(errId, convId, 'assistant', `抱歉，出错了：${errMsg}`);
+    ws.send(JSON.stringify({
+      type: 'message',
+      data: { id: errId, role: 'assistant', content: `抱歉，出错了：${errMsg}` },
+    }));
+  }
 }