feat(web): 重构前端UI并支持OpenAI协议

- 添加账号管理详情页（基本信息、提示词、CapCut、参考图标签页） - 重构资产页面，按项目组分开展示图片/视频 - 聊天界面支持深度思考内容折叠展示、复制、删除消息 - 设置页面支持Agent配置（Anthropic/OpenAI协议）和工具配置 - 后端支持OpenAI兼容协议流式输出和DeepSeek思考模式 - 添加对话置顶/删除功能、数据库迁移、资产清单API - 添加账号参考图上传/删除、技能配置持久化、连接测试API
2026-05-07 23:48:26 +08:00
parent 01963aac96
commit 088bdb9a8e
40 changed files with 2594 additions and 678 deletions
--- a/web/server/ws/chat.ts
+++ b/web/server/ws/chat.ts
@@ -3,6 +3,7 @@ import { randomUUID } from 'crypto';
 import { getDb } from '../db';
 import { videoAgent } from '../agent';
 import type Anthropic from '@anthropic-ai/sdk';
+import type OpenAI from 'openai';

 type MessageParam = Anthropic.MessageParam;
 type ContentBlock = Anthropic.ContentBlock;
@@ -91,6 +92,295 @@ export function handleChat(ws: WebSocket) {
  ws.on('close', () => {});
 }

+// Helper: convert DB messages to OpenAI format
+function extractToolCalls(blocks: ContentBlock[]): OpenAI.ChatCompletionMessageToolCall[] {
+  return blocks
+    .filter((b) => b.type === 'tool_use')
+    .map((b) => ({
+      id: (b as { id: string }).id,
+      type: 'function' as const,
+      function: {
+        name: (b as { name: string }).name,
+        arguments: JSON.stringify((b as { input: unknown }).input),
+      },
+    }));
+}
+
+function dbToOpenAI(msg: DbMessage): OpenAI.ChatCompletionMessageParam {
+  if (msg.role === 'user') {
+    return { role: 'user', content: msg.content };
+  }
+  if (msg.role === 'assistant') {
+    const result: Record<string, unknown> = { role: 'assistant', content: msg.content || null };
+
+    if (!msg.tool_calls) return result as unknown as OpenAI.ChatCompletionMessageParam;
+
+    let parsed: unknown;
+    try { parsed = JSON.parse(msg.tool_calls); } catch { return result as unknown as OpenAI.ChatCompletionMessageParam; }
+
+    // Legacy format: parsed is ContentBlock[] (array)
+    if (Array.isArray(parsed)) {
+      const toolCalls = extractToolCalls(parsed);
+      if (toolCalls.length > 0) {
+        result.tool_calls = toolCalls;
+        const textBlocks = parsed.filter((b) => b.type === 'text');
+        result.content = textBlocks.map((b) => (b as { text: string }).text).join('') || null;
+      }
+      return result as unknown as OpenAI.ChatCompletionMessageParam;
+    }
+
+    // New format: parsed is { reasoning_content?, content_blocks? }
+    const meta = parsed as { reasoning_content?: string; content_blocks?: ContentBlock[] };
+    if (meta.reasoning_content) {
+      result.reasoning_content = meta.reasoning_content;
+    }
+    if (meta.content_blocks) {
+      const toolCalls = extractToolCalls(meta.content_blocks);
+      if (toolCalls.length > 0) {
+        result.tool_calls = toolCalls;
+        const textBlocks = meta.content_blocks.filter((b) => b.type === 'text');
+        result.content = textBlocks.map((b) => (b as { text: string }).text).join('') || null;
+      }
+    }
+
+    return result as unknown as OpenAI.ChatCompletionMessageParam;
+  }
+  if (msg.role === 'tool') {
+    try {
+      const { tool_use_id, content } = JSON.parse(msg.content);
+      return { role: 'tool', tool_call_id: tool_use_id, content };
+    } catch {
+      return { role: 'tool', tool_call_id: 'unknown', content: msg.content };
+    }
+  }
+  return { role: 'user', content: msg.content };
+}
+
+// --- Anthropic protocol streaming ---
+async function streamAnthropic(
+  ws: WebSocket,
+  convId: string,
+  messages: MessageParam[],
+): Promise<void> {
+  const client = videoAgent.getAnthropicClient();
+  const model = videoAgent.getModel();
+  const systemPrompt = videoAgent.getSystemPrompt();
+
+  let currentMessages = messages;
+  let maxLoops = 10;
+
+  while (maxLoops-- > 0) {
+    console.log(`[chat:anthropic] Loop ${9 - maxLoops}, messages: ${currentMessages.length}`);
+    const stream = client.messages.stream({
+      model,
+      max_tokens: 4096,
+      system: systemPrompt,
+      tools: videoAgent.getAnthropicTools(),
+      messages: currentMessages,
+    });
+
+    const assistantMsgId = randomUUID();
+    ws.send(JSON.stringify({ type: 'message_start', data: { id: assistantMsgId } }));
+
+    for await (const event of stream) {
+      if (event.type === 'content_block_delta') {
+        if (event.delta.type === 'text_delta') {
+          ws.send(JSON.stringify({
+            type: 'text_delta',
+            data: { id: assistantMsgId, text: event.delta.text },
+          }));
+        }
+      }
+    }
+
+    const finalMsg = await stream.finalMessage();
+    ws.send(JSON.stringify({ type: 'message_end', data: { id: assistantMsgId } }));
+
+    const toolUses = finalMsg.content.filter((b): b is Anthropic.ToolUseBlock => b.type === 'tool_use');
+    const textBlocks = finalMsg.content.filter((b): b is Anthropic.TextBlock => b.type === 'text');
+    const finalText = textBlocks.map((b) => b.text).join('');
+
+    if (toolUses.length === 0) {
+      getDb().prepare(
+        'INSERT INTO messages (id, conversation_id, role, content) VALUES (?, ?, ?, ?)'
+      ).run(assistantMsgId, convId, 'assistant', finalText);
+      return;
+    }
+
+    // Save assistant with tool calls
+    const cleanContent = filterContent(finalMsg.content as ContentBlock[]);
+    getDb().prepare(
+      'INSERT INTO messages (id, conversation_id, role, content, tool_calls) VALUES (?, ?, ?, ?, ?)'
+    ).run(assistantMsgId, convId, 'assistant', finalText || '(调用工具)', JSON.stringify(cleanContent));
+
+    currentMessages.push({ role: 'assistant', content: cleanContent });
+
+    // Execute tools
+    const toolResults: Anthropic.ToolResultBlockParam[] = [];
+    for (const tool of toolUses) {
+      ws.send(JSON.stringify({ type: 'tool_start', data: { tool: tool.name, input: tool.input } }));
+      console.log(`[chat:anthropic] Executing tool: ${tool.name}`);
+
+      try {
+        const result = await videoAgent.executeTool(tool.name, tool.input as Record<string, unknown>);
+        toolResults.push({ type: 'tool_result', tool_use_id: tool.id, content: result });
+
+        const toolMsgId = randomUUID();
+        getDb().prepare(
+          'INSERT INTO messages (id, conversation_id, role, content) VALUES (?, ?, ?, ?)'
+        ).run(toolMsgId, convId, 'tool', JSON.stringify({ tool_use_id: tool.id, content: result }));
+
+        ws.send(JSON.stringify({ type: 'tool_result', data: { tool: tool.name, result: result.slice(0, 1000) } }));
+      } catch (err) {
+        const errMsg = (err as Error).message;
+        toolResults.push({ type: 'tool_result', tool_use_id: tool.id, content: `Error: ${errMsg}` });
+
+        const toolMsgId = randomUUID();
+        getDb().prepare(
+          'INSERT INTO messages (id, conversation_id, role, content) VALUES (?, ?, ?, ?)'
+        ).run(toolMsgId, convId, 'tool', JSON.stringify({ tool_use_id: tool.id, content: `Error: ${errMsg}` }));
+
+        ws.send(JSON.stringify({ type: 'tool_error', data: { tool: tool.name, error: errMsg } }));
+      }
+    }
+
+    currentMessages.push({ role: 'user', content: toolResults });
+  }
+}
+
+// --- OpenAI protocol streaming ---
+async function streamOpenAI(
+  ws: WebSocket,
+  convId: string,
+  dbMessages: DbMessage[],
+): Promise<void> {
+  const client = videoAgent.getOpenAIClient();
+  const model = videoAgent.getModel();
+  const systemPrompt = videoAgent.getSystemPrompt();
+
+  const openaiTools = videoAgent.getOpenAITools();
+
+  let currentDbMessages = [...dbMessages];
+  let maxLoops = 10;
+
+  while (maxLoops-- > 0) {
+    const openaiMessages: OpenAI.ChatCompletionMessageParam[] = [
+      { role: 'system', content: systemPrompt },
+      ...currentDbMessages.map(dbToOpenAI),
+    ];
+
+    console.log(`[chat:openai] Loop ${9 - maxLoops}, messages: ${openaiMessages.length}`);
+
+    const assistantMsgId = randomUUID();
+    ws.send(JSON.stringify({ type: 'message_start', data: { id: assistantMsgId } }));
+
+    let fullText = '';
+    let reasoningContent = '';
+    let toolCallsAcc: Array<{ id: string; name: string; arguments: string }> = [];
+
+    const stream = await client.chat.completions.create({
+      model,
+      messages: openaiMessages,
+      tools: openaiTools.length > 0 ? openaiTools : undefined,
+      stream: true,
+    });
+
+    for await (const chunk of stream) {
+      const delta = chunk.choices[0]?.delta;
+      if (delta?.content) {
+        fullText += delta.content;
+        ws.send(JSON.stringify({
+          type: 'text_delta',
+          data: { id: assistantMsgId, text: delta.content },
+        }));
+      }
+      // DeepSeek thinking mode: capture reasoning_content
+      if ((delta as Record<string, unknown>)?.reasoning_content) {
+        const chunk = (delta as Record<string, unknown>).reasoning_content as string;
+        reasoningContent += chunk;
+        ws.send(JSON.stringify({
+          type: 'reasoning_delta',
+          data: { id: assistantMsgId, text: chunk },
+        }));
+      }
+      if (delta?.tool_calls) {
+        for (const tc of delta.tool_calls) {
+          if (tc.index !== undefined) {
+            while (toolCallsAcc.length <= tc.index) {
+              toolCallsAcc.push({ id: '', name: '', arguments: '' });
+            }
+            if (tc.id) toolCallsAcc[tc.index].id = tc.id;
+            if (tc.function?.name) toolCallsAcc[tc.index].name = tc.function.name;
+            if (tc.function?.arguments) toolCallsAcc[tc.index].arguments += tc.function.arguments;
+          }
+        }
+      }
+    }
+
+    ws.send(JSON.stringify({ type: 'message_end', data: { id: assistantMsgId } }));
+
+    // Store extra metadata (reasoning_content, tool_calls) in tool_calls column as JSON
+    const extraMeta: Record<string, unknown> = {};
+    if (reasoningContent) extraMeta.reasoning_content = reasoningContent;
+
+    // No tool calls — save and done
+    if (toolCallsAcc.length === 0) {
+      getDb().prepare(
+        'INSERT INTO messages (id, conversation_id, role, content, tool_calls) VALUES (?, ?, ?, ?, ?)'
+      ).run(assistantMsgId, convId, 'assistant', fullText, Object.keys(extraMeta).length > 0 ? JSON.stringify(extraMeta) : null);
+      return;
+    }
+
+    // Save assistant with tool calls in Anthropic-compatible format for DB
+    const dbToolCalls = toolCallsAcc.map((tc) => ({
+      type: 'tool_use',
+      id: tc.id,
+      name: tc.name,
+      input: JSON.parse(tc.arguments || '{}'),
+    }));
+    const cleanContent = fullText
+      ? [{ type: 'text', text: fullText }, ...dbToolCalls]
+      : dbToolCalls;
+    extraMeta.content_blocks = cleanContent;
+
+    getDb().prepare(
+      'INSERT INTO messages (id, conversation_id, role, content, tool_calls) VALUES (?, ?, ?, ?, ?)'
+    ).run(assistantMsgId, convId, 'assistant', fullText || '(调用工具)', JSON.stringify(extraMeta));
+
+    // Execute tools and collect results
+    for (const tc of toolCallsAcc) {
+      ws.send(JSON.stringify({ type: 'tool_start', data: { tool: tc.name, input: JSON.parse(tc.arguments || '{}') } }));
+      console.log(`[chat:openai] Executing tool: ${tc.name}`);
+
+      try {
+        const params = JSON.parse(tc.arguments || '{}');
+        const result = await videoAgent.executeTool(tc.name, params);
+
+        const toolMsgId = randomUUID();
+        getDb().prepare(
+          'INSERT INTO messages (id, conversation_id, role, content) VALUES (?, ?, ?, ?)'
+        ).run(toolMsgId, convId, 'tool', JSON.stringify({ tool_use_id: tc.id, content: result }));
+
+        ws.send(JSON.stringify({ type: 'tool_result', data: { tool: tc.name, result: result.slice(0, 1000) } }));
+      } catch (err) {
+        const errMsg = (err as Error).message;
+
+        const toolMsgId = randomUUID();
+        getDb().prepare(
+          'INSERT INTO messages (id, conversation_id, role, content) VALUES (?, ?, ?, ?)'
+        ).run(toolMsgId, convId, 'tool', JSON.stringify({ tool_use_id: tc.id, content: `Error: ${errMsg}` }));
+
+        ws.send(JSON.stringify({ type: 'tool_error', data: { tool: tc.name, error: errMsg } }));
+      }
+    }
+
+    // Reload all messages for next loop
+    currentDbMessages = getDb().prepare(
+      'SELECT * FROM messages WHERE conversation_id = ? ORDER BY created_at'
+    ).all(convId) as DbMessage[];
+  }
+}
+
 async function handleChatMessage(ws: WebSocket, convId: string, content: string) {
  const userMsgId = randomUUID();
  getDb().prepare(
@@ -112,106 +402,18 @@ async function handleChatMessage(ws: WebSocket, convId: string, content: string)
    'SELECT * FROM messages WHERE conversation_id = ? AND id != ? ORDER BY created_at'
  ).all(convId, userMsgId) as DbMessage[];

-  const messages: MessageParam[] = history.map(dbToAnthropic);
-
-  const client = videoAgent.getClient();
-  const model = videoAgent.getModel();
-  const systemPrompt = videoAgent.getSystemPrompt();
-
  ws.send(JSON.stringify({ type: 'status', data: { status: 'thinking' } }));

  try {
-    let currentMessages = messages;
-    let maxLoops = 10;
+    const protocol = videoAgent.getProtocol();

-    while (maxLoops-- > 0) {
-      console.log(`[chat] Calling LLM, loop ${9 - maxLoops}, messages: ${currentMessages.length}`);
-      const stream = client.messages.stream({
-        model,
-        max_tokens: 4096,
-        system: systemPrompt,
-        tools: videoAgent.getAnthropicTools(),
-        messages: currentMessages,
-      });
-
-      const assistantMsgId = randomUUID();
-      ws.send(JSON.stringify({ type: 'message_start', data: { id: assistantMsgId } }));
-
-      for await (const event of stream) {
-        if (event.type === 'content_block_delta') {
-          if (event.delta.type === 'text_delta') {
-            ws.send(JSON.stringify({
-              type: 'text_delta',
-              data: { id: assistantMsgId, text: event.delta.text },
-            }));
-          }
-        }
-      }
-
-      const finalMsg = await stream.finalMessage();
-      ws.send(JSON.stringify({ type: 'message_end', data: { id: assistantMsgId } }));
-
-      const toolUses = finalMsg.content.filter((b): b is Anthropic.ToolUseBlock => b.type === 'tool_use');
-      const textBlocks = finalMsg.content.filter((b): b is Anthropic.TextBlock => b.type === 'text');
-      const finalText = textBlocks.map((b) => b.text).join('');
-
-      // No tool calls — save and done
-      if (toolUses.length === 0) {
-        getDb().prepare(
-          'INSERT INTO messages (id, conversation_id, role, content) VALUES (?, ?, ?, ?)'
-        ).run(assistantMsgId, convId, 'assistant', finalText);
-        console.log(`[chat] Done, response: ${finalText.slice(0, 80)}`);
-        return;
-      }
-
-      // Save assistant message with filtered content (no thinking blocks)
-      const cleanContent = filterContent(finalMsg.content as ContentBlock[]);
-      getDb().prepare(
-        'INSERT INTO messages (id, conversation_id, role, content, tool_calls) VALUES (?, ?, ?, ?, ?)'
-      ).run(assistantMsgId, convId, 'assistant', finalText || '(调用工具)', JSON.stringify(cleanContent));
-
-      currentMessages.push({ role: 'assistant', content: cleanContent });
-
-      // Execute tools
-      const toolResults: Anthropic.ToolResultBlockParam[] = [];
-
-      for (const tool of toolUses) {
-        ws.send(JSON.stringify({
-          type: 'tool_start',
-          data: { tool: tool.name, input: tool.input },
-        }));
-        console.log(`[chat] Executing tool: ${tool.name}`);
-
-        try {
-          const result = await videoAgent.executeTool(tool.name, tool.input as Record<string, unknown>);
-          toolResults.push({ type: 'tool_result', tool_use_id: tool.id, content: result });
-
-          const toolMsgId = randomUUID();
-          getDb().prepare(
-            'INSERT INTO messages (id, conversation_id, role, content) VALUES (?, ?, ?, ?)'
-          ).run(toolMsgId, convId, 'tool', JSON.stringify({ tool_use_id: tool.id, content: result }));
-
-          ws.send(JSON.stringify({
-            type: 'tool_result',
-            data: { tool: tool.name, result: result.slice(0, 1000) },
-          }));
-        } catch (err) {
-          const errMsg = (err as Error).message;
-          toolResults.push({ type: 'tool_result', tool_use_id: tool.id, content: `Error: ${errMsg}` });
-
-          const toolMsgId = randomUUID();
-          getDb().prepare(
-            'INSERT INTO messages (id, conversation_id, role, content) VALUES (?, ?, ?, ?)'
-          ).run(toolMsgId, convId, 'tool', JSON.stringify({ tool_use_id: tool.id, content: `Error: ${errMsg}` }));
-
-          ws.send(JSON.stringify({
-            type: 'tool_error',
-            data: { tool: tool.name, error: errMsg },
-          }));
-        }
-      }
-
-      currentMessages.push({ role: 'user', content: toolResults });
+    if (protocol === 'openai') {
+      // OpenAI protocol
+      await streamOpenAI(ws, convId, history);
+    } else {
+      // Anthropic protocol (default)
+      const messages: MessageParam[] = history.map(dbToAnthropic);
+      await streamAnthropic(ws, convId, messages);
    }
  } catch (err) {
    const errMsg = (err as Error).message;