集成

2025-07-24 17:22:36 +08:00 · 2025-07-24 17:22:36 +08:00 · 1901cf611e
commit 1901cf611e
parent fcdfe71646
24 changed files with 1263 additions and 0 deletions
--- a/.DS_Store
+++ b/.DS_Store
--- a/assets/.DS_Store
+++ b/assets/.DS_Store
--- a/compliance-mcp-agent/pycache/response_utils.cpython-312.pyc
+++ b/compliance-mcp-agent/pycache/response_utils.cpython-312.pyc
--- a/compliance-mcp-agent/agent_main_loop.py
+++ b/compliance-mcp-agent/agent_main_loop.py
@ -0,0 +1,253 @@
+import asyncio
+import json
+import traceback
+from typing import List, Dict, Any
+
+from mcp.client.streamable_http import streamablehttp_client
+from mcp import ClientSession
+from mcp.types import Tool, TextContent
+
+from llm.llm_service import LLMService
+
+# --- 配置区 ---
+SERVER_ENDPOINTS = {
+    "api_caller": "http://127.0.0.1:8001/mcp",
+    "schema_validator": "http://127.0.0.1:8002/mcp",
+    "dms_provider": "http://127.0.0.1:8003/mcp",
+    "test_manager": "http://127.0.0.1:8004/mcp",
+}
+MAX_AGENT_LOOPS = 50
+
+def mcp_tools_to_openai_format(mcp_tools: List[Tool]) -> List[Dict[str, Any]]:
+    """
+    将MCP工具列表转换为OpenAI工具格式。
+    """
+    openai_tools = []
+    for tool in mcp_tools:
+        # tool is a mcp.types.Tool object, which has .name, .description, and .inputSchema
+        openai_tools.append({
+            "type": "function",
+            "function": {
+                "name": tool.name,
+                "description": tool.description or "",
+                "parameters": tool.inputSchema or {"type": "object", "properties": {}}
+            }
+        })
+    return openai_tools
+
+async def get_structured_response(tool_response: Any) -> Dict[str, Any]:
+    """
+    健壮地从工具调用响应中获取结构化内容。
+    能处理SDK未能自动解析JSON，而是将其放入TextContent的情况。
+    """
+    if tool_response.structuredContent:
+        # 正常情况，SDK已成功解析
+        return tool_response.structuredContent
+
+    # 异常情况：尝试从TextContent手动解析JSON
+    if tool_response.content and isinstance(tool_response.content[0], TextContent):
+        try:
+            json_text = tool_response.content[0].text
+            parsed_json = json.loads(json_text)
+            return parsed_json
+        except (json.JSONDecodeError, IndexError) as e:
+            # 如果手动解析也失败，则抛出致命错误
+            raise RuntimeError(f"Failed to manually parse JSON from TextContent: {e}. Raw text: '{json_text}'")
+
+    # 如果既没有structuredContent，也没有可解析的TextContent，则抛出致命错误
+    raise RuntimeError("Tool call returned no structuredContent and no parsable TextContent.")
+
+
+async def execute_task(task: Dict, tool_to_session_map: Dict, openai_tools: List[Dict]):
+    """
+    为一个通用的、由prompt驱动的任务执行完整的、隔离的测试生命周期。
+    """
+    llm_service = LLMService(tools=openai_tools)
+    
+    task_name = task['name']
+    prompt = task['prompt']
+
+    print(f"\n>>>> Starting Task: {task_name} <<<<")
+    llm_service.start_new_task(prompt)
+
+    # 针对当前任务的子任务循环
+    for sub_loop in range(25): # 单个任务的测试循环上限
+        print("\n" + "="*20 + f" Sub-Loop for '{task_name}' ({sub_loop+1}/25) " + "="*20)
+        
+        tool_name, tool_args, tool_call_id = llm_service.execute_completion()
+
+        if not tool_name:
+            print(f"Agent: LLM did not request a tool call for task '{task_name}'. It might be confused. Ending task.")
+            # 即使LLM困惑，我们仍然尝试记录一个失败结果，如果record_test_result可用的话
+            record_session = tool_to_session_map.get("record_test_result")
+            if record_session:
+                # 我们需要从prompt中猜测api_id，这很脆弱，但比什么都不做要好
+                import re
+                match = re.search(r"API 模型 '([^']+)'", prompt)
+                api_id_guess = match.group(1) if match else "unknown"
+                await record_session.call_tool("record_test_result", {"api_id": api_id_guess, "task_name": task_name, "status": "failed", "details": "LLM got confused and stopped calling tools."})
+            return # 结束此任务
+
+        # 核心逻辑：如果LLM调用了record_test_result，说明这个任务结束了
+        if tool_name == "record_test_result":
+            print(f"Agent: LLM is recording result for task '{task_name}'. Task is complete.")
+            record_session = tool_to_session_map.get("record_test_result")
+            if record_session:
+                    # 将任务名称加入到参数中，以便更好地跟踪
+                    tool_args['task_name'] = task_name
+                    await record_session.call_tool(tool_name, tool_args)
+            return # 核心修复：使用return退出此任务的函数
+
+        if tool_name == "error_malformed_json":
+            error_info = tool_args
+            print(f"Agent: Detected a malformed JSON from LLM for tool '{error_info['tool_name']}'. Asking for correction.")
+            correction_request = f"你上次试图调用工具 '{error_info['tool_name']}'，但提供的参数不是一个有效的JSON。错误是：{error_info['error']}。这是你提供的错误参数：'{error_info['malformed_arguments']}'。请修正这个错误，并重新调用该工具。"
+            llm_service.add_user_message(correction_request)
+            continue
+
+        if tool_name in tool_to_session_map:
+            try:
+                target_session = tool_to_session_map[tool_name]
+                result = await target_session.call_tool(tool_name, tool_args)
+                
+                structured_result = await get_structured_response(result)
+                tool_result_str = json.dumps(structured_result, ensure_ascii=False, indent=2) if structured_result else "Tool executed successfully."
+                
+                print(f"Agent: Tool '{tool_name}' executed for '{task_name}'. Result: {tool_result_str}")
+                llm_service.add_tool_call_response(tool_call_id, tool_result_str)
+            except Exception as e:
+                error_message = f"An exception occurred while calling tool {tool_name} for '{task_name}': {e}"
+                print(f"Agent: {error_message}")
+                traceback.print_exc()
+                llm_service.add_tool_call_response(tool_call_id, error_message)
+        else:
+            error_message = f"Error: LLM tried to call an unknown tool '{tool_name}' for task '{task_name}'."
+            print(f"Agent: {error_message}")
+            llm_service.add_tool_call_response(tool_call_id, error_message)
+    
+    print(f"Agent: Reached sub-loop limit for task '{task_name}'. Recording as failed and moving on.")
+    record_session = tool_to_session_map.get("record_test_result")
+    if record_session:
+        import re
+        match = re.search(r"API 模型 '([^']+)'", prompt)
+        api_id_guess = match.group(1) if match else "unknown"
+        await record_session.call_tool("record_test_result", {"api_id": api_id_guess, "task_name": task_name, "status": "failed", "details": "Reached sub-loop limit."})
+
+
+async def main():
+    print("LLM-Powered Agent starting...")
+
+    # 使用 `async with` 来确保所有会话都能被正确关闭
+    async with streamablehttp_client(SERVER_ENDPOINTS["api_caller"]) as (r1, w1, _), \
+               streamablehttp_client(SERVER_ENDPOINTS["schema_validator"]) as (r2, w2, _), \
+               streamablehttp_client(SERVER_ENDPOINTS["dms_provider"]) as (r3, w3, _), \
+               streamablehttp_client(SERVER_ENDPOINTS["test_manager"]) as (r4, w4, _):
+        
+        print("Agent: All MCP server connections established.")
+        
+        async with ClientSession(r1, w1) as s1, ClientSession(r2, w2) as s2, ClientSession(r3, w3) as s3, ClientSession(r4, w4) as s4:
+            
+            await asyncio.gather(s1.initialize(), s2.initialize(), s3.initialize(), s4.initialize())
+
+            tool_to_session_map = {tool.name: s1 for tool in (await s1.list_tools()).tools}
+            tool_to_session_map.update({tool.name: s2 for tool in (await s2.list_tools()).tools})
+            tool_to_session_map.update({tool.name: s3 for tool in (await s3.list_tools()).tools})
+            tool_to_session_map.update({tool.name: s4 for tool in (await s4.list_tools()).tools})
+            
+            all_mcp_tools = list(tool_to_session_map.keys())
+            print(f"Total tools found: {len(all_mcp_tools)}")
+
+            openai_tools = mcp_tools_to_openai_format([tool for session in [s1, s2, s3, s4] for tool in (await session.list_tools()).tools])
+            print("Agent: LLM Service tools prepared.")
+
+            # --- Agent主导的宏观测试流程 ---
+
+            # 1. 获取所有待测试的API
+            print("\n" + "="*20 + " Phase 1: Fetching APIs " + "="*20)
+            get_api_list_session = tool_to_session_map.get("get_api_list")
+            if not get_api_list_session:
+                raise RuntimeError("Critical Error: 'get_api_list' tool not found.")
+            
+            api_list_result = await get_api_list_session.call_tool("get_api_list", {})
+            api_list_structured = await get_structured_response(api_list_result)
+            response_data = api_list_structured.get("result", api_list_structured)
+            api_records = response_data.get('records', [])
+            api_ids_to_test = [record['id'] for record in api_records if 'id' in record]
+
+            if not api_ids_to_test:
+                 raise RuntimeError(f"Critical Error: DMSProviderServer returned an empty list of APIs.")
+            print(f"Agent: Found {len(api_ids_to_test)} APIs to test: {api_ids_to_test}")
+
+            # 2. 加载任务模板
+            print("\n" + "="*20 + " Phase 2: Loading Task Templates " + "="*20)
+            try:
+                with open('compliance-mcp-agent/tasks.json', 'r', encoding='utf-8') as f:
+                    task_templates = json.load(f)
+                print(f"Agent: Loaded {len(task_templates)} task templates.")
+            except FileNotFoundError:
+                raise RuntimeError("Critical Error: 'tasks.json' not found in 'compliance-mcp-agent/' directory.")
+            except json.JSONDecodeError as e:
+                raise RuntimeError(f"Critical Error: Failed to parse 'tasks.json'. Error: {e}")
+
+            # 3. 初始化测试计划
+            print("\n" + "="*20 + " Phase 3: Initializing Test Plan " + "="*20)
+            initialize_plan_session = tool_to_session_map.get("initialize_test_plan")
+            if not initialize_plan_session:
+                raise RuntimeError("Critical Error: 'initialize_test_plan' tool not found.")
+            
+            total_task_count = len(api_ids_to_test) * len(task_templates)
+            print(f"Agent: Initializing test plan for {total_task_count} total tasks ({len(api_ids_to_test)} APIs x {len(task_templates)} templates)...")
+            init_result = await initialize_plan_session.call_tool("initialize_test_plan", {"api_ids": api_ids_to_test})
+            init_structured = await get_structured_response(init_result)
+            init_response_data = init_structured.get("result", init_structured)
+            if init_response_data.get("status") != "success":
+                raise RuntimeError(f"Failed to initialize test plan. Reason: {init_response_data.get('message')}")
+            print("Agent: Test plan initialized successfully in TestManager.")
+
+            # 4. 主执行循环 (M x N)
+            print("\n" + "="*20 + " Phase 4: Main Execution Loop " + "="*20)
+            
+            execution_tasks = []
+            for api_id in api_ids_to_test:
+                for template in task_templates:
+                    # 动态生成任务
+                    final_prompt = template['prompt_template'].format(api_id=api_id)
+                    task_name_with_api = f"{template['name']} for {api_id}"
+                    
+                    task_to_run = {
+                        "name": task_name_with_api,
+                        "prompt": final_prompt
+                    }
+
+                    # 为每个任务创建一个异步执行协程
+                    execution_tasks.append(
+                        execute_task(
+                            task=task_to_run,
+                            tool_to_session_map=tool_to_session_map,
+                            openai_tools=openai_tools
+                        )
+                    )
+            
+            # 并发执行所有生成的任务
+            await asyncio.gather(*execution_tasks)
+            print("\nAll generated tasks have concluded.")
+
+            # 5. 最终总结
+            print("\n" + "="*20 + " Phase 5: Final Summary " + "="*20)
+            summary_session = tool_to_session_map.get("get_test_summary")
+            if summary_session:
+                summary_result = await summary_session.call_tool("get_test_summary", {})
+                summary_structured = await get_structured_response(summary_result)
+                summary_data = summary_structured.get("result", summary_structured)
+                print("Final Test Summary:")
+                print(json.dumps(summary_data, indent=2, ensure_ascii=False))
+
+
+if __name__ == "__main__":
+    try:
+        asyncio.run(main())
+    except KeyboardInterrupt:
+        print("\nAgent manually interrupted.")
+    except Exception as e:
+        print(f"\nAn unexpected error occurred in main: {e}")
+        traceback.print_exc() 
--- a/compliance-mcp-agent/llm/pycache/llm_service.cpython-312.pyc
+++ b/compliance-mcp-agent/llm/pycache/llm_service.cpython-312.pyc
--- a/compliance-mcp-agent/llm/llm_service.py
+++ b/compliance-mcp-agent/llm/llm_service.py
@ -0,0 +1,103 @@
+import openai
+from typing import List, Dict, Any, Tuple
+import json
+import traceback
+import os
+
+class LLMService:
+    def __init__(self, model_name="qwen-plus", tools=None):
+        if tools is None:
+            tools = []
+        self.api_key = os.getenv("OPENAI_API_KEY")
+        if not self.api_key:
+            raise ValueError("API key not found. Please set the OPENAI_API_KEY environment variable.")
+
+        self.client = openai.OpenAI(api_key=self.api_key, base_url="https://dashscope.aliyuncs.com/compatible-mode/v1")
+        self.model_name = model_name
+        self.tools = tools
+        self.system_prompt = {"role": "system", "content": "你是一个智能API测试Agent。你的任务是根据用户的要求，通过自主、连续地调用给定的工具来完成API的自动化测试。请仔细分析每一步的结果，并决定下一步应该调用哪个工具。"}
+        self.messages: List[Dict[str, Any]] = [self.system_prompt]
+
+    def start_new_task(self, task_description: str):
+        """
+        开始一个新任务，这会重置对话历史，但保留最后的工具调用（如果有的话），
+        以提供任务切换的上下文。
+        """
+        print(f"\n{'='*25} Starting New Task Context {'='*25}")
+        print(f"Task Description: {task_description}")
+        
+        last_tool_call_response = None
+        if len(self.messages) > 1 and self.messages[-1]["role"] == "tool":
+            last_tool_call_response = self.messages[-1]
+
+        self.messages = [self.system_prompt]
+        if last_tool_call_response:
+            self.messages.append(last_tool_call_response)
+            print(f"Preserving last tool response for context: {last_tool_call_response['name']}")
+
+        self.add_user_message(task_description)
+        print(f"{'='*72}\n")
+
+    def add_user_message(self, content: str):
+        self.messages.append({"role": "user", "content": content})
+
+    def add_tool_call_response(self, tool_call_id: str, content: str):
+        self.messages.append(
+            {
+                "tool_call_id": tool_call_id,
+                "role": "tool",
+                "name": tool_call_id, # 名字可以和ID一样，重要的是ID要匹配
+                "content": content,
+            }
+        )
+
+    def get_last_assistant_message(self) -> str:
+        for msg in reversed(self.messages):
+            if msg["role"] == "assistant" and msg.get("content"):
+                return msg["content"]
+        return "No final response from assistant."
+
+    def execute_completion(self) -> Tuple[str, dict]:
+        print("\n" + "="*25 + " LLM Request " + "="*25)
+        print(json.dumps({"model": self.model_name, "messages": self.messages, "tools": self.tools}, ensure_ascii=False, indent=2))
+        print("="*71)
+
+        try:
+            response = self.client.chat.completions.create(
+                model=self.model_name,
+                messages=self.messages,
+                tools=self.tools,
+                tool_choice="auto",
+            )
+
+            print("\n" + "="*25 + " LLM Response " + "="*25)
+            print(response)
+            print("="*72)
+
+            response_message = response.choices[0].message
+            tool_calls = response_message.tool_calls
+
+            if tool_calls:
+                tool_call = tool_calls[0]
+                tool_name = tool_call.function.name
+                tool_call_id = tool_call.id
+                
+                try:
+                    tool_args = json.loads(tool_call.function.arguments)
+                    # 将成功的tool_call添加到历史记录
+                    self.messages.append(response_message.model_dump(exclude_unset=True))
+                    return tool_name, tool_args, tool_call_id
+                except json.JSONDecodeError as e:
+                    error_msg = f"LLM generated malformed JSON for tool arguments: {tool_call.function.arguments}. Error: {e}"
+                    print(error_msg)
+                    # 不将错误的assistant消息加入历史，而是返回错误信号
+                    return "error_malformed_json", {"tool_name": tool_name, "malformed_arguments": tool_call.function.arguments, "error": str(e)}, None
+            
+            # 如果没有工具调用，就将assistant的回复加入历史
+            self.messages.append(response_message.model_dump(exclude_unset=True))
+            return None, None, None
+
+        except Exception as e:
+            print(f"Error calling LLM API: {e}")
+            traceback.print_exc()
+            return None, None, None 
--- a/compliance-mcp-agent/memory-bank/activeContext.md
+++ b/compliance-mcp-agent/memory-bank/activeContext.md
@ -0,0 +1,39 @@
+# 活动上下文
+
+## 当前工作焦点
+
+我们正处于 **架构验证和问题修复的关键阶段**。在初步搭建了包含多个服务器（`APICaller`, `SchemaValidator`, `DMSProvider`, `TestManager`）的完整 MCP 架构后，我们遇到了一个**持续性的、与文件路径解析相关的核心障碍**。
+
+当前所有工作的核心焦点是 **彻底解决在子进程中运行的 MCP 服务器无法正确定位其依赖文件（如 `domain.json`）的问题**，并最终让整个测试流程成功运转起来。
+
+### 优先任务
+1.  **根源分析**: 彻底理解 `subprocess.Popen` 的工作目录（CWD）继承机制，以及它是如何与 `DMSProviderServer.py` 中 `os.path` 相关函数交互并导致错误的。
+2.  **实施健壮的解决方案**:
+    - **重构 `run_tests.py`**: 修改启动脚本，在创建服务器子进程时，为其**明确设置 `cwd` 参数**，确保每个服务器都在其脚本所在的目录中运行。
+    - **简化服务器路径**: 在 `DMSProviderServer.py` 中，将文件路径调整为基于其已被正确设置的 `cwd` 的、更简单的相对路径。
+3.  **最终验证**: 运行完整的端到端测试，确认 AI Agent 能够成功从 `DMSProviderServer` 获取 API 列表，并启动其测试循环。
+
+## 最近变更
+
+*   **多服务器架构实现**: 我们已经成功创建并集成了四个独立的 MCP 服务器，每个服务器都提供一组特定的工具。
+*   **Agent 逻辑进化**: Agent 的主循环 (`agent_main_loop.py`) 已经从硬编码逻辑演变为一个完全由 LLM 驱动的、动态的测试流程。
+*   **启动器脚本**: 创建了 `run_tests.py`，用于统一启动所有服务器和 Agent 进程。
+*   **反复的路径修复尝试**: 多次尝试修改 `DMSProviderServer.py` 中的相对路径，但均未成功，这促使我们对问题进行更深入的分析。
+
+## 活动决策和考虑
+
+### 当前决策
+1.  **接受失败并深入分析**: 我们认识到，简单的路径调整是无效的。我们决定暂停“打地鼠”式的修复，转而投入时间去理解问题的根本原因——进程的执行上下文。
+2.  **采用 `cwd` 解决方案**: 我们确定，通过在 `subprocess.Popen` 中为每个服务器子进程显式设置 `cwd`，是解决此类问题的最健壮、最可靠的方法。这将使我们的系统对执行环境的变化更具弹性。
+
+### 开放问题
+1.  **异步错误处理**: 当前的 `agent_main_loop.py` 在 `TaskGroup` 中遇到了未处理的异常。一旦路径问题解决，下一个需要关注的技术点将是如何在 `anyio` 和 `asyncio` 的环境中优雅地捕获和处理并发任务中的错误。
+2.  **LLM 的稳定性**: 尽管 Agent 的逻辑是 LLM 驱动的，但我们还未充分测试在真实、长链条的工具调用下，LLM 生成的参数和决策的稳定性。这可能是下一个潜在的问题点。
+
+## 下一步计划
+
+### 短期目标 (本次会话)
+- [x] **重构 `run_tests.py`** 以正确设置服务器的 `cwd`。 (已完成)
+- [x] **调整 `DMSProviderServer.py`** 中的文件路径以匹配新的 `cwd`。(已完成)
+- [ ] **执行最终测试**: 在您重启对话后，我们将立即运行 `run_tests.py`，并期望看到 `DMSProviderServer` 成功加载 API 列表，Agent 开始执行测试。
+- [ ] **修复 `TaskGroup` 异常**: 解决在 `agent_main_loop.py` 中出现的 `AttributeError: 'NoneType' object has no attribute 'get'`，这个错误很可能是由空的 API 列表间接触发的。 
--- a/compliance-mcp-agent/memory-bank/productContext.md
+++ b/compliance-mcp-agent/memory-bank/productContext.md
@ -0,0 +1,25 @@
+# 产品上下文
+
+## 问题陈述：传统测试框架的“天花板”
+
+随着 API 数量的增多和合规性规则日益复杂，我们现有的、基于代码的合规性测试框架正面临一个难以突破的“天花板”。
+
+1.  **僵化与脆弱**: 每当出现一个新的合规规则，我们就必须编写一个新的、硬编码的测试用例。这种紧耦合的设计使得框架越来越臃肿，修改一处就可能引发意想不到的连锁反应。
+2.  **扩展性差**: 添加一种新的测试能力（比如，集成一个新的静态分析工具）需要深入修改核心的测试编排器逻辑。这个过程不仅耗时，而且对开发人员的水平要求很高，阻碍了社区贡献和团队协作。
+3.  **可维护性噩梦**: 测试逻辑、工具调用、报告生成等所有功能都混杂在一起，使得代码难以理解和维护。排查一个简单的 bug 可能需要在多个模块之间来回跳转，心智负担极重。
+4.  **智能程度低**: 传统框架只能执行预先定义好的、线性的测试路径。它无法理解规则的“意图”，也无法在遇到预期外情况时进行动态调整或探索性测试。
+
+## 解决方案：一个“会思考”的测试平台
+
+我们提出的基于 MCP 的 AI Agent 框架，旨在从根本上解决上述问题，将我们的测试工具从一个死板的“执行器”升级为一个会思考、可扩展的“平台”。
+
+1.  **从“硬编码”到“软编排”**: 我们不再编写固定的测试流程。取而代之的是，我们给 Agent 一个**目标**（“验证这个API是否符合这条规则”），然后由 Agent **自主地、动态地**编排和调用一系列原子化的工具来达成这个目标。这种灵活性是革命性的。
+2.  **无限的扩展能力**: 想要增加一个新的测试能力？非常简单，只需开发一个独立的、符合 MCP 规范的工具 Server 即可。这个新工具会自动被 Agent 发现并使用，完全不需要修改 Host 或 Client 的核心代码。这为框架的生态发展打开了无限可能。
+3.  **清晰的关注点分离**: Host 只关心“流程”，Client 只关心“思考”，Server 只关心“执行”。这种架构上的清晰性使得每个组件都变得简单、可独立开发和测试，极大地降低了维护成本。
+4.  **涌现的智能**: Agent 不仅能执行已知的测试，未来还有可能通过推理，发现规则之间隐藏的关联，或者设计出人类工程师没有想到的测试路径，从而找到更深层次的 bug。
+
+## 用户体验目标
+
+*   **对于规则制定者**: 他们可以用更接近自然语言的方式来定义合规性规则，而无需关心具体的测试代码实现。
+*   **对于工具开发者**: 他们可以轻松地将自己的工具（如静态扫描器、安全检查器等）封装成 MCP Server，无缝集成到我们的测试生态中。
+*   **对于测试工程师**: 他们将得到一个高度自动化且结果可信、过程透明的测试伙伴，能将他们从繁琐的脚本编写中解放出来，专注于更有创造性的测试策略分析。 
--- a/compliance-mcp-agent/memory-bank/progress.md
+++ b/compliance-mcp-agent/memory-bank/progress.md
@ -0,0 +1,57 @@
+# 项目进度
+
+## 里程碑 1: 最小可行产品 (MVP) - (已完成)
+
+**目标**: 搭建并验证 MCP 架构的端到端通信。
+
+### 已完成功能
+- ✅ **项目初始化**
+  - ✅ 创建 `compliance-mcp-agent` 独立目录。
+  - ✅ 创建全新的 `memory-bank`。
+- ✅ **核心文档撰写**
+  - ✅ `projectbrief.md`, `systemPatterns.md`, `techContext.md`, `productContext.md`...
+- ✅ **搭建基础框架**
+  - ✅ 创建 `requirements.txt` 并添加依赖。
+  - ✅ 实现 `APICallerServer.py`。
+  - ✅ 实现 `run_tests.py` (Host)。
+  - ✅ 实现 `agent_main_loop.py` (Client)。
+- ✅ **"Hello World" 级测试**
+  - ✅ 成功运行了第一个端到端的单服务器测试。
+
+---
+
+## 里程碑 2: 功能完备版本 - (进行中)
+
+**目标**: 实现一个功能完备的、由 Agent 驱动的测试流程。
+
+### 已完成功能
+- ✅ **多服务器架构**
+    - ✅ 实现 `SchemaValidatorServer.py`，提供严格和灵活的 Schema 验证工具。
+    - ✅ 实现 `DMSProviderServer.py`，动态提供 API 列表和 Schema 定义。
+    - ✅ 实现 `TestManagerServer.py`，用于跟踪和管理测试进度。
+- ✅ **LLM 驱动的 Agent**
+    - ✅ 在 `agent_main_loop.py` 中集成了真实的 LLM 调用。
+    - ✅ Agent 能够自主地与所有服务器交互，获取工具并制定初步计划。
+
+### 正在进行的工作
+- 🔄 **修复核心架构障碍**:
+  - [x] **根源定位**: 已准确定位到 `run_tests.py` 启动的子进程因错误的 CWD 而无法找到数据文件。
+  - [x] **解决方案实施**: 已重构 `run_tests.py` 以强制设定子进程的 `cwd`，并同步更新了 `DMSProviderServer.py` 中的文件路径。
+  - [ ] **最终验证**: 等待下一次运行，以确认 Agent 现在可以成功获取 API 列表并开始执行测试。
+
+### 待完成工作
+- ⏳ 解决 `agent_main_loop.py` 中出现的 `TaskGroup` 异步错误。
+- ⏳ 实现完整的 CRUD (Create, Read, Update, Delete, List) 测试生命周期。
+- ⏳ 生成结构化的、可读的测试报告。
+
+---
+
+## 里程碑 3: 企业就绪版本 (未来规划)
+
+**目标**: 成为一个健壮、可靠、可用于生产环境的合规性审计平台。
+
+- ⏳ 拥有完善的错误处理、重试和超时机制。
+- ⏳ 提供清晰的日志和可观测性。
+- ⏳ 支持更复杂的测试场景，如多 Agent 协作。
+- ⏳ 具备优秀的用户文档和开发者文档。
+- ⏳ (可选) 提供 Web 界面来配置和查看测试结果。 
--- a/compliance-mcp-agent/memory-bank/projectbrief.md
+++ b/compliance-mcp-agent/memory-bank/projectbrief.md
@ -0,0 +1,23 @@
+# 项目简介：AI Agent 驱动的合规性测试框架
+
+## 项目概述
+本项目旨在从零开始，构建一个基于 **模型-上下文-协议 (Model-Context-Protocol, MCP)** 的下一代 API 合规性测试框架。我们将用一个自主决策的 **AI Agent** 来取代传统的、基于固定脚本的测试逻辑。这个 Agent 将利用一套标准化的、可扩展的 **工具集 (MCP Servers)**，动态地规划和执行测试步骤，以验证 API 是否符合指定的合规性规则。
+
+## 核心需求
+1.  **MCP 原生架构**: 系统的所有组件交互都必须严格遵循 MCP 规范，实现 Host, Client, 和 Servers 之间的清晰分离。
+2.  **AI Agent 驱动**: 测试的执行逻辑由一个核心的 LLM Agent 驱动，它能够自主进行推理、规划和调用工具。
+3.  **可扩展的工具集**: 所有的测试能力（如 API 调用、数据生成、结果断言）都必须被封装成独立的、符合 MCP 规范的 Server。
+4.  **标准化与模块化**: 彻底抛弃硬编码的集成方式，实现测试能力和测试流程的完全解耦。
+5.  **透明的可审计性**: Agent 的每一个决策步骤、每一次工具调用都必须被完整记录，形成清晰、可审计的测试日志。
+
+## 关键目标
+1.  **提升灵活性**: 使测试框架能够轻松适应新的合规规则，甚至在没有明确测试脚本的情况下，也能通过自然语言描述的规则进行测试。
+2.  **增强扩展性**: 允许任何开发者通过创建一个新的、符合 MCP 规范的工具服务器来为框架贡献新的测试能力。
+3.  **提高可维护性**: 通过将系统拆分为职责单一的独立组件，大幅降低代码的耦合度和维护成本。
+4.  **探索 Agentic Workflow**: 验证 AI Agent 在软件测试这一高度结构化领域的自主工作能力，为更复杂的 Agentic 自动化流程积累经验。
+
+## 技术栈
+- **核心协议**: Model-Context-Protocol (MCP)
+- **官方 SDK**: `model-context-protocol/python-sdk`
+- **核心语言**: Python 3.8+
+- **Agent 大脑**: 兼容 OpenAI API 的大语言模型 (LLM) 
--- a/compliance-mcp-agent/memory-bank/systemPatterns.md
+++ b/compliance-mcp-agent/memory-bank/systemPatterns.md
@ -0,0 +1,63 @@
+# 系统架构与设计模式
+
+## 核心架构：模型-上下文-协议 (MCP)
+
+本系统严格遵循 MCP 定义的 **Host-Client-Server** 架构，旨在实现组件的终极解耦和高可扩展性。
+
+```mermaid
+graph TD
+    subgraph TestRunnerApp [测试运行程序 (MCP Host)]
+        style TestRunnerApp fill:#e6f2ff,stroke:#b3d9ff
+        A[<b>run_mcp_tests.py</b><br/><i>(Host 实例)</i>]
+        A -- 1. 为每个测试任务<br/>创建并管理Client会话 --> B
+        A -- 4. 汇总所有Client的<br/>结论，生成报告 --> E[最终测试报告]
+    end
+
+    subgraph AgentSession [独立的Agent会话 (MCP Client)]
+        style AgentSession fill:#e6ffe6,stroke:#b3ffb3
+        B[<b>Client 实例</b><br/><i>(包含LLM的Agent核心)</i>]
+        B -- 2. 向Host请求<br/>使用工具 --> D
+    end
+
+    subgraph Toolbelt [MCP工具集 (MCP Servers)]
+        style Toolbelt fill:#fff0e6,stroke:#ffccb3
+        D[<b>APICallerServer<br/>DataGenServer<br/>AssertionServer<br/>...</b>]
+        D -- 3. 执行操作<br/>并将结果通过Host返回 --> B
+    end
+```
+
+## 组件职责详解
+
+### 1. MCP Host (测试运行程序)
+
+*   **角色**: 整个测试流程的 **总控制器**、**安全边界** 和 **环境提供者**。它如同一个“办公室”环境，为 Agent 的工作提供场地、工具和规则。
+*   **职责**:
+    *   **流程编排**: 加载 API 规范和合规规则，生成测试任务列表，并为每个任务启动一个独立的、隔离的 Client 会话。
+    *   **生命周期管理**: 负责创建、监督和销毁 Client 实例。如果某个 Agent 会话卡死或崩溃，Host 会终止它并继续下一个任务，确保整体流程的健壮性。
+    *   **安全与路由**: 作为所有通信的中间人，它接收来自 Client 的工具调用请求，验证其权限，然后将其安全地路由到指定的 Server。它也负责将 Server 的结果返回给正确的 Client。**Client 和 Server 之间永不直接通信**。
+
+### 2. MCP Client (Agent 会话)
+
+*   **角色**: 承载 **LLM（大语言模型）** 的执行实体，是 Agent 的“大脑”和“身体”的结合。
+*   **职责**:
+    *   **任务执行**: 从 Host 接收一个明确的测试目标。
+    *   **推理规划**: 内部的 LLM 负责思考和规划，决定需要执行哪些步骤、调用哪些工具来达成测试目标。
+    *   **与 Host 通信**: 将 LLM 的决策转化为对 Host 的标准 `call_tool` 请求。
+    *   **状态保持**: 在会话内部维持短期的记忆和上下文，以完成连贯的、多步骤的测试逻辑。
+
+### 3. MCP Servers (工具集)
+
+*   **角色**: 提供单一、原子化能力的 **功能模块**。每个 Server 都是一个独立的微服务。
+*   **职责**:
+    *   **提供能力**: 封装一种特定的能力，例如：
+        *   `APICallerServer`: 仅负责发起 HTTP 请求。
+        *   `DataGeneratorServer`: 仅负责根据 Schema 生成数据。
+        *   `AssertionServer`: 仅负责比较两个值是否相等。
+    *   **无状态与隔离**: Server 本身是无状态的（或会话状态由 Host 管理），并且对其他 Server 和整个测试任务一无所知。这种设计确保了工具的高度可复用性和可独立测试性。
+
+## 设计模式应用
+
+*   **单一职责原则**: 每个组件（Host, Client, Server）和每个 Server 内部的工具都有单一、明确的职责。
+*   **策略模式**: 每个合规性规则可以被看作一种“策略”，Agent 根据不同的策略（规则目标）来组织其工具调用序列。
+*   **外观模式**: Host 为 Client 提供了一个统一的、简化的接口来访问背后复杂的工具集，Client 无需关心工具的具体位置和实现。
+*   **微服务架构**: 整个工具集由一系列独立的、可独立部署的 Server 构成，体现了微服务的思想，极大地提高了系统的灵活性和可维护性。 
--- a/compliance-mcp-agent/memory-bank/techContext.md
+++ b/compliance-mcp-agent/memory-bank/techContext.md
@ -0,0 +1,54 @@
+# 技术上下文
+
+## 核心技术栈
+
+| 类别       | 技术/库                                   | 版本 | 用途                                           |
+| ---------- | ----------------------------------------- | ---- | ---------------------------------------------- |
+| 核心协议   | Model-Context-Protocol (MCP)              | v1+  | 定义系统所有组件间的通信标准。                 |
+| **官方SDK**  | **`model-context-protocol/python-sdk`**   | 最新 | **我们实现Host, Client, Server的基石。**       |
+| 核心语言   | Python                                    | 3.8+ | 主要开发语言。                                 |
+| AI模型     | 兼容OpenAI API的大语言模型 (LLM)        | -    | 作为Agent的“大脑”，负责推理和规划。            |
+| HTTP客户端 | requests                                  | 最新 | 在APICallerServer中用于执行HTTP请求。          |
+| Web框架    | (可选) FastAPI / Flask                    | -    | 或许会用于构建可通过HTTP访问的远程MCP Server。 |
+
+## 开发环境设置
+
+### 必要组件
+- Python 3.8 或更高版本
+- `uv` 或 `pip` (用于管理Python包依赖)
+- Git (版本控制)
+- 支持Python的IDE (推荐 VS Code 或 PyCharm)
+
+### 项目安装步骤 (预期)
+1.  **克隆代码仓库**:
+    ```bash
+    git clone <仓库URL>
+    cd compliance-mcp-agent
+    ```
+2.  **创建虚拟环境**:
+    ```bash
+    python -m venv .venv
+    source .venv/bin/activate
+    ```
+3.  **安装依赖**:
+    我们将创建一个 `requirements.txt` 文件，内容至少包括：
+    ```
+    model-context-protocol
+    requests
+    # 其他未来可能需要的依赖
+    ```
+    然后执行安装：
+    ```bash
+    uv pip install -r requirements.txt
+    ```
+4.  **运行项目**:
+    *   **启动所有MCP Servers**: 需要编写一个脚本来并行启动所有工具服务器。
+    *   **启动MCP Host**: 运行主程序 `run_mcp_tests.py` 来开始整个测试流程。
+
+## 关键技术决策
+
+1.  **SDK 优先**: 我们将尽可能地利用官方 Python SDK 的能力，而不是重新发明轮子。所有的 Host/Client/Server 实现都应基于该 SDK 提供的类和方法。
+2.  **Stdio 通信**: 在项目初期，为了简单起见，Host 和 Client 之间的通信将主要通过标准输入/输出 (`stdio`) 进行，这由 `stdio_client` 提供支持。这对于本地运行的 Agent 来说足够高效。
+3.  **独立的 Server 进程**: 每个 MCP Server 都将作为一个独立的 Python 进程运行。这确保了工具之间的完全隔离，并为未来将某个工具部署为网络服务（例如使用 FastAPI）提供了可能性。
+4.  **异步编程**: 官方 SDK 大量使用了 `asyncio`。因此，我们的 Host 和 Client 代码也必须是异步的，以充分利用 SDK 的性能。
+5.  **LLM 接口**: Agent 与 LLM 的交互将通过一个通用的、兼容 OpenAI 的 API 客户端进行。这允许我们未来可以轻松切换不同的后端 LLM 服务。 
--- a/compliance-mcp-agent/requirements.txt
+++ b/compliance-mcp-agent/requirements.txt
@ -0,0 +1,5 @@
+mcp[cli]
+requests
+uvicorn
+Flask
+openai 
--- a/compliance-mcp-agent/run_tests.py
+++ b/compliance-mcp-agent/run_tests.py
@ -0,0 +1,125 @@
+import subprocess
+import time
+import os
+import asyncio
+
+# 全局常量
+AGENT_SCRIPT = "compliance-mcp-agent/agent_main_loop.py"
+
+async def log_subprocess_output(stream, prefix):
+    """异步读取并打印子进程的输出流。"""
+    while True:
+        try:
+            line = await stream.readline()
+            if line:
+                print(f"[{prefix}] {line.decode().strip()}")
+            else:
+                # End of stream
+                break
+        except Exception as e:
+            print(f"Error reading stream for {prefix}: {e}")
+            break
+
+async def start_servers():
+    """异步启动所有MCP服务器，并捕获它们的日志。"""
+    print("="*20 + " Starting MCP Servers " + "="*20)
+    
+    server_processes = {}
+    log_tasks = []
+    
+    server_scripts = [
+        "servers/APICallerServer.py",
+        "servers/SchemaValidatorServer.py",
+        "servers/DMSProviderServer.py",
+        "servers/TestManagerServer.py",
+    ]
+
+    project_root = os.path.dirname(os.path.abspath(__file__))
+
+    for script_path_rel in server_scripts:
+        script_path_abs = os.path.join(project_root, script_path_rel)
+        server_name = os.path.splitext(os.path.basename(script_path_abs))[0]
+        server_dir = os.path.dirname(script_path_abs)
+        
+        print(f"Starting server: {script_path_rel}...")
+        
+        env = os.environ.copy()
+        
+        process = await asyncio.create_subprocess_exec(
+            "uv", "run", "python", os.path.basename(script_path_abs),
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.PIPE,
+            env=env,
+            cwd=server_dir
+        )
+        
+        server_processes[server_name] = process
+        print(f"  -> Started {script_path_rel} with PID: {process.pid}")
+
+        log_tasks.append(asyncio.create_task(log_subprocess_output(process.stdout, server_name)))
+        log_tasks.append(asyncio.create_task(log_subprocess_output(process.stderr, f"{server_name}-ERROR")))
+
+    print(f"\nAll {len(server_scripts)} servers are running in the background.")
+    return server_processes, log_tasks
+
+async def run_agent(agent_script_path):
+    """异步运行Agent主循环并实时打印其输出。"""
+    print("\n" + "="*20 + " Running Agent " + "="*20)
+    
+    process = await asyncio.create_subprocess_exec(
+        "uv", "run", "python", agent_script_path,
+        stdout=asyncio.subprocess.PIPE,
+        stderr=asyncio.subprocess.PIPE
+    )
+    
+    agent_stdout_task = asyncio.create_task(log_subprocess_output(process.stdout, "Agent"))
+    agent_stderr_task = asyncio.create_task(log_subprocess_output(process.stderr, "Agent-ERROR"))
+    
+    await process.wait()
+    
+    await asyncio.gather(agent_stdout_task, agent_stderr_task)
+        
+    print(f"\nAgent process finished with return code: {process.returncode}")
+
+
+def cleanup_servers(processes):
+    """停止所有服务器进程。"""
+    print("\n" + "="*20 + " Cleaning up Servers " + "="*20)
+    for name, process in processes.items():
+        if process.returncode is None: # 仅当进程仍在运行时才终止
+            print(f"Terminating server {name} (PID: {process.pid})...")
+            try:
+                process.terminate()
+                print(f"  -> Terminated signal sent.")
+            except ProcessLookupError:
+                print(f"  -> Process {name} (PID: {process.pid}) already gone.")
+        else:
+            print(f"  -> Server {name} (PID: {process.pid}) already finished with code {process.returncode}.")
+            
+    print("Cleanup complete.")
+
+async def main():
+    """
+    主入口点，异步运行所有测试。
+    """
+    server_processes, log_tasks = await start_servers()
+    
+    print("\nWaiting for servers to initialize...")
+    await asyncio.sleep(8)
+    
+    try:
+        await run_agent(AGENT_SCRIPT)
+    finally:
+        cleanup_servers(server_processes)
+        
+        # 取消仍在运行的日志任务
+        for task in log_tasks:
+            task.cancel()
+        await asyncio.gather(*log_tasks, return_exceptions=True)
+        print("Log watchers terminated.")
+
+if __name__ == "__main__":
+    try:
+        asyncio.run(main())
+    except KeyboardInterrupt:
+        print("\nMain process interrupted by user. Cleaning up...")
--- a/compliance-mcp-agent/servers/APICallerServer.py
+++ b/compliance-mcp-agent/servers/APICallerServer.py
@ -0,0 +1,58 @@
+import requests
+import uvicorn
+from mcp.server.fastmcp.server import FastMCP
+from typing import Optional
+import logging
+
+# 配置日志记录
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+
+# 1. 使用 FastMCP 创建一个 Server 实例
+mcp = FastMCP(
+    "APICallerServer",
+    title="API Caller Server",
+    description="A server that provides a tool to call APIs.",
+    version="0.2.0"  # a new version
+)
+
+# 2. 使用 @mcp.tool() 装饰器来定义一个工具
+@mcp.tool()
+def api_caller(method: str, url: str, headers: Optional[dict] = None, params: Optional[dict] = None, json_body: Optional[dict] = None) -> dict:
+    """
+    一个通用的API调用工具，可以发送HTTP请求。
+    """
+    logging.info(f"api_caller: Received request -> method={method}, url={url}, params={params}, json_body={json_body}")
+    try:
+        response = requests.request(
+            method=method,
+            url=url,
+            headers=headers,
+            params=params,
+            json=json_body
+        )
+        response.raise_for_status()  # 如果状态码是 4xx 或 5xx，则引发HTTPError
+
+        logging.info(f"api_caller: Request to {url} successful with status code {response.status_code}")
+
+        # 尝试将响应解析为JSON，如果失败则作为纯文本返回
+        try:
+            response_body = response.json()
+        except requests.exceptions.JSONDecodeError:
+            response_body = response.text
+        
+        return {
+            "status_code": response.status_code,
+            "headers": dict(response.headers),
+            "body": response_body
+        }
+    except requests.exceptions.RequestException as e:
+        logging.error(f"api_caller: Request to {url} failed. Error: {e}", exc_info=True)
+        return {
+            "error": "APIRequestError",
+            "message": str(e)
+        }
+
+# 3. (可选) 如果直接运行此文件，则启动服务器
+if __name__ == "__main__":
+    # FastMCP对象本身不是ASGI应用，但它的 streamable_http_app() 方法会返回一个
+    uvicorn.run(mcp.streamable_http_app(), host="127.0.0.1", port=8001) 
--- a/compliance-mcp-agent/servers/DMSProviderServer.py
+++ b/compliance-mcp-agent/servers/DMSProviderServer.py
@ -0,0 +1,119 @@
+import requests
+import uvicorn
+import logging
+from mcp.server.fastmcp import FastMCP
+from typing import List, Dict, Any
+import json
+import os
+
+# --- 配置 ---
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+
+# --- MCP Server 定义 ---
+mcp = FastMCP()
+
+# 定义正确的DMS服务器基地址
+DMS_BASE_URL = "http://127.0.0.1:5001"
+MOCK_DMS_API_LIST_URL = f"{DMS_BASE_URL}/api/schema/manage/schema"
+MOCK_DMS_SCHEMA_DETAIL_URL_TEMPLATE = f"{DMS_BASE_URL}/api/schema/manage/schema/{{model_id}}"
+
+@mcp.tool()
+def get_api_list() -> Dict[str, Any]:
+    """
+    通过HTTP请求从模拟的DMS服务器获取所有可用API的列表,并返回一个干净的、包含records的字典。
+    """
+    logging.info(f"DMSProviderServer: Attempting to fetch API list from {MOCK_DMS_API_LIST_URL}")
+    try:
+        response = requests.get(MOCK_DMS_API_LIST_URL, timeout=5)
+        response.raise_for_status()
+        
+        raw_data = response.json()
+        
+        # 核心修正：根据mock server的实际返回，深入到'data'键下提取'records'
+        records = raw_data.get("data", {}).get("records", [])
+        
+        logging.info(f"DMSProviderServer: Successfully parsed response. Found {len(records)} records.")
+        
+        # 返回一个干净、统一的结构，并加入版本号探针
+        return {"records": records, "version": "1.1"}
+
+    except requests.exceptions.RequestException as e:
+        error_message = f"Failed to connect to mock DMS server for API list: {e}"
+        logging.error(f"DMSProviderServer: {error_message}")
+        return {"records": [], "error": error_message}
+    except json.JSONDecodeError:
+        error_message = "Failed to decode JSON response for API list from mock DMS server."
+        logging.error(f"DMSProviderServer: {error_message}")
+        return {"records": [], "error": error_message}
+
+@mcp.tool()
+def get_schema_by_id(model_id: str) -> Dict[str, Any]:
+    """
+    根据模型ID，通过HTTP请求从模拟的DMS服务器获取其JSON Schema。
+    """
+    schema_url = MOCK_DMS_SCHEMA_DETAIL_URL_TEMPLATE.format(model_id=model_id)
+    logging.info(f"DMSProviderServer: Attempting to fetch schema for '{model_id}' from {schema_url}")
+    
+    try:
+        response = requests.get(schema_url, timeout=5)
+        response.raise_for_status()
+        
+        raw_data = response.json()
+        
+        # 核心修正：提取'data'键下的schema对象
+        schema = raw_data.get("data")
+        
+        if schema:
+            logging.info(f"DMSProviderServer: Successfully parsed schema for '{model_id}'.")
+            return {"schema": schema}
+        else:
+            error_message = f"Schema data for '{model_id}' is empty or missing in the response."
+            logging.warning(f"DMSProviderServer: {error_message}")
+            return {"error": error_message}
+
+    except requests.exceptions.RequestException as e:
+        error_message = f"Failed to connect to mock DMS server for schema '{model_id}': {e}"
+        logging.error(f"DMSProviderServer: {error_message}")
+        return {"error": error_message}
+    except json.JSONDecodeError:
+        error_message = f"Failed to decode JSON response for schema '{model_id}' from mock DMS server."
+        logging.error(f"DMSProviderServer: {error_message}")
+        return {"error": error_message}
+
+
+@mcp.tool()
+def get_dms_crud_endpoints(model_id: str) -> Dict[str, Any]:
+    """
+    根据模型ID，生成并返回其所有标准的CRUD操作端点（create, list, read, update, delete）的完整定义。
+    """
+    # 这个函数的逻辑是基于名称生成，暂时不需要对接mock服务，所以保持不变
+    base_path = model_id.split('.')[0]
+    
+    endpoints = {
+        "create": {
+            "method": "POST",
+            "url": f"{DMS_BASE_URL}/api/dms/wb_ml/v1/{base_path}"
+        },
+        "list": {
+            "method": "POST", # 根据mock server，list是POST
+            "url": f"{DMS_BASE_URL}/api/dms/wb_ml/v1/{base_path}/1.0.0" # 根据mock server，需要版本号
+        },
+        "read": {
+            "method": "GET",
+            "url": f"{DMS_BASE_URL}/api/dms/wb_ml/v1/{base_path}/1.0.0/{{id}}" # 根据mock server，需要版本号
+        },
+        "update": {
+            "method": "PUT",
+            "url": f"{DMS_BASE_URL}/api/dms/wb_ml/v1/{base_path}"
+        },
+        "delete": {
+            "method": "DELETE",
+            "url": f"{DMS_BASE_URL}/api/dms/wb_ml/v1/{base_path}"
+        }
+    }
+    return endpoints
+
+# --- 启动服务器 ---
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(mcp.streamable_http_app(), host="127.0.0.1", port=8003) 
--- a/compliance-mcp-agent/servers/SchemaValidatorServer.py
+++ b/compliance-mcp-agent/servers/SchemaValidatorServer.py
@ -0,0 +1,88 @@
+from mcp.server.fastmcp.server import FastMCP
+from pydantic import BaseModel, ValidationError
+import jsonschema
+import uvicorn
+import logging
+from jsonschema import validate, ValidationError
+from mcp.server.fastmcp.server import FastMCP
+
+# 新增导入
+from response_utils import extract_data_for_validation
+
+mcp = FastMCP(
+    "SchemaValidatorServer",
+    title="JSON Schema Validator Server",
+    description="A server that provides a tool to validate data against a JSON Schema.",
+    version="0.1.0"
+)
+
+@mcp.tool()
+def validate_schema(data_instance: dict, schema: dict) -> dict:
+    """
+    Validates a data instance against a given JSON Schema.
+
+    Args:
+        data_instance: The data object to validate.
+        schema: The JSON Schema to validate against.
+
+    Returns:
+        A dictionary containing the validation result.
+        {"isValid": True} on success.
+        {"isValid": False, "error": "Validation error message"} on failure.
+    """
+    try:
+        jsonschema.validate(instance=data_instance, schema=schema)
+        return {"isValid": True, "error": None}
+    except ValidationError as e:
+        logging.error(f"SchemaValidator: Validation failed. Error: {e.message}", exc_info=True)
+        return {"isValid": False, "error": e.message}
+    except Exception as e:
+        # Catch other potential errors from the jsonschema library
+        return {"isValid": False, "error": str(e)}
+
+@mcp.tool()
+def validate_flexible_schema(api_response: dict, item_schema: dict) -> dict:
+    """
+    对一个可能带有标准包装（如 {code, message, data}）的API响应进行灵活的schema验证。
+    它能自动提取核心业务数据（无论是单个对象还是列表）并逐项进行验证。
+
+    Args:
+        api_response (dict): 完整的API响应体。
+        item_schema (dict): 描述核心业务数据**单个元素**的JSON Schema。
+    
+    Returns:
+        dict: 一个包含验证结果的字典, {"isValid": True} 或 {"isValid": False, "error": "..."}。
+    """
+    logging.info("SchemaValidator: Running flexible validation...")
+    try:
+        # 使用工具函数提取需要验证的数据
+        items_to_validate = extract_data_for_validation(api_response)
+
+        if not items_to_validate:
+            error_message = "Flexible validation failed: Could not extract any items to validate from the response."
+            logging.warning(error_message)
+            return {"isValid": False, "error": error_message}
+        
+        logging.info(f"Flexible validation: Extracted {len(items_to_validate)} item(s) to validate.")
+
+        # 逐个验证提取出的项
+        for i, item in enumerate(items_to_validate):
+            validate(instance=item, schema=item_schema)
+            logging.info(f"  -> Item {i+1}/{len(items_to_validate)} passed validation.")
+
+        logging.info("SchemaValidator: Flexible validation successful. All items conform to the schema.")
+        return {"isValid": True}
+
+    except ValidationError as e:
+        error_message = f"Flexible validation failed on an item. Error: {e.message}"
+        logging.error(error_message, exc_info=True)
+        return {"isValid": False, "error": error_message}
+    except Exception as e:
+        error_message = f"An unexpected error occurred during flexible validation: {e}"
+        logging.error(error_message, exc_info=True)
+        return {"isValid": False, "error": error_message}
+
+
+# --- 启动服务器 ---
+if __name__ == "__main__":
+    uvicorn.run(mcp.streamable_http_app(), host="127.0.0.1", port=8002) 
--- a/compliance-mcp-agent/servers/TestManagerServer.py
+++ b/compliance-mcp-agent/servers/TestManagerServer.py
@ -0,0 +1,105 @@
+import uvicorn
+import logging
+from mcp.server.fastmcp import FastMCP
+from typing import List, Dict, Any
+import threading
+
+# --- 配置 ---
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+
+# --- MCP Server 定义 ---
+mcp = FastMCP()
+
+# 使用一个简单的字典和线程锁来安全地管理状态
+test_state: Dict[str, Any] = {}
+state_lock = threading.Lock()
+
+def _reset_state():
+    """重置测试状态，不加锁，供内部调用"""
+    global test_state
+    test_state = {
+        "results": {}, # e.g., {"api_id_1": [{"task_name": "...", "status": "...", "details": "..."}]}
+        "apis_pending_init": [],
+    }
+
+_reset_state() # 初始化状态
+
+@mcp.tool()
+def initialize_test_plan(api_ids: List[str]) -> Dict[str, Any]:
+    """
+    根据提供的API ID列表，初始化测试计划。
+    这会重置所有测试状态，并为每个API准备好存储多次任务结果的列表。
+    """
+    with state_lock:
+        _reset_state()
+        for api_id in api_ids:
+            test_state["results"][api_id] = []
+        
+        test_state["apis_pending_init"] = list(api_ids)
+        logger.info(f"TestManager: Initialized test plan with {len(api_ids)} APIs.")
+        return {
+            "status": "success",
+            "message": f"Test plan initialized for {len(api_ids)} APIs.",
+            "apis_pending": list(api_ids)
+        }
+
+@mcp.tool()
+def record_test_result(api_id: str, task_name: str, status: str, details: str) -> Dict[str, Any]:
+    """
+    记录一个API在一个特定任务上的测试结果。
+    
+    Args:
+        api_id (str): 被测试的API的ID。
+        task_name (str): 执行的任务的名称。
+        status (str): 测试状态，例如 'passed' 或 'failed'。
+        details (str): 关于测试结果的详细描述或摘要。
+    """
+    with state_lock:
+        if api_id not in test_state["results"]:
+            # 如果由于某种原因API ID不存在，先创建它
+            test_state["results"][api_id] = []
+        
+        # 将本次任务的结果追加到列表中
+        test_state["results"][api_id].append({
+            "task_name": task_name,
+            "status": status,
+            "details": details
+        })
+        
+        logger.info(f"TestManager: Recorded result for {api_id} on task '{task_name}': {status}")
+        return {"status": "success", "message": f"Result for {api_id} on task '{task_name}' recorded."}
+
+@mcp.tool()
+def get_test_summary() -> Dict[str, Any]:
+    """
+    获取整个测试活动的最终摘要。
+    """
+    with state_lock:
+        total_apis = len(test_state["results"])
+        tasks_completed_count = sum(len(tasks) for tasks in test_state["results"].values())
+
+        summary = {
+            "total_apis": total_apis,
+            "total_tasks_completed": tasks_completed_count,
+            "results": test_state["results"]
+        }
+        logger.info("TestManager: Providing test summary.")
+        return summary
+
+# --- 启动服务器 ---
+if __name__ == "__main__":
+    import uvicorn
+    
+    # 移除get_next_api_to_test工具，因为它在M*N模型中不再需要。
+    # 我们使用 try...except 来确保即使工具不存在或属性名不正确，程序也不会崩溃。
+    try:
+        # 基于之前的观察，我们尝试使用 _tools 属性
+        if "get_next_api_to_test" in mcp._tools:
+            del mcp._tools["get_next_api_to_test"]
+            logger.info("Successfully removed deprecated tool: get_next_api_to_test")
+    except (AttributeError, KeyError):
+        logger.warning("Could not remove 'get_next_api_to_test' tool (it may not exist or the tools attribute name is different). Continuing...")
+        pass
+
+    uvicorn.run(mcp.streamable_http_app(), host="127.0.0.1", port=8004) 
--- a/compliance-mcp-agent/servers/pycache/response_utils.cpython-312.pyc
+++ b/compliance-mcp-agent/servers/pycache/response_utils.cpython-312.pyc
--- a/compliance-mcp-agent/servers/response_utils.py
+++ b/compliance-mcp-agent/servers/response_utils.py
@ -0,0 +1,50 @@
+from typing import Any, List, Optional
+import logging
+
+logger = logging.getLogger(__name__)
+
+def extract_data_for_validation(response_json: Any, nested_list_keywords: Optional[List[str]] = None) -> List[Any]:
+    """
+    从原始API响应JSON中智能提取需要被验证的核心业务数据列表。
+    即使只有一个对象，也返回一个单元素的列表。
+
+    策略:
+    1. 如果响应体是包含 'code' 和 'data' 的标准包装，则提取 'data' 的内容。
+    2. 对上一步的结果，遍历一个关键字列表(nested_list_keywords)，检查是否存在分页列表模式，如果存在则提取该列表。
+    3. 如果处理后的数据是列表，直接返回该列表。
+    4. 如果处理后的数据是单个对象（字典），将其包装在单元素列表中返回。
+    5. 如果数据为空或不适用，返回空列表。
+    """
+    if nested_list_keywords is None:
+        nested_list_keywords = ["list", "records", "items", "data"]
+
+    if not response_json:
+        return []
+
+    data_to_process = response_json
+
+    # 策略 1: 解开标准包装
+    if isinstance(response_json, dict) and 'code' in response_json and 'data' in response_json:
+        logger.debug("检测到标准响应包装，提取 'data' 字段内容进行处理。")
+        data_to_process = response_json['data']
+
+    # 策略 2: 提取嵌套的分页列表
+    if isinstance(data_to_process, dict):
+        for keyword in nested_list_keywords:
+            if keyword in data_to_process and isinstance(data_to_process[keyword], list):
+                logger.debug(f"检测到关键字为 '{keyword}' 的嵌套列表，提取其内容。")
+                data_to_process = data_to_process[keyword]
+                break # 找到第一个匹配的就停止
+
+    # 策略 3 & 4: 统一返回列表
+    if isinstance(data_to_process, list):
+        logger.debug(f"数据本身为列表，包含 {len(data_to_process)} 个元素，直接返回。")
+        return data_to_process
+    
+    if isinstance(data_to_process, dict):
+        logger.debug("数据为单个对象，将其包装在列表中返回。")
+        return [data_to_process]
+
+    # 策略 5: 对于其他情况（如数据为None或非对象/列表类型），返回空列表
+    logger.warning(f"待处理的数据既不是列表也不是对象，无法提取进行验证。数据: {str(data_to_process)[:100]}")
+    return [] 
--- a/compliance-mcp-agent/tasks.json
+++ b/compliance-mcp-agent/tasks.json
@ -0,0 +1,11 @@
+[
+  {
+    "name": "完整的CRUD测试",
+    "prompt_template": "你的当前唯一任务是为 API 模型 '{api_id}' 执行一个完整的、更严谨的CRUD生命周期测试。推荐遵循以下子步骤：1. **(Setup)** 调用 `get_dms_crud_endpoints` 获取该API的所有端点URL。2. **(CREATE)** 调用'create'端点创建一个新资源。**重要提示**：请求的JSON body必须遵循 `{{\"data\": [ ... ]}}` 的格式，其中 `[...]` 是一个包含一个模型对象的列表。你需要自己根据API的schema来构建这个模型对象，并把它放进列表中。创建成功后，务必从响应中提取并记住新资源的ID。3. **(READ after Create)** 使用上一步获得的ID，调用'read'端点，验证该资源可以被成功读取，且内容与创建时一致。4. **(UPDATE)** 调用'update'端点，修改该资源。和CREATE一样，请求的JSON body也必须遵循 `{{\"data\": [ ... ]}}` 的格式。5. **(READ after Update)** 再次调用'read'端点，验证资源确实已被更新。6. **(DELETE)** 调用'delete'端点，删除该资源。7. **(VERIFY DELETE BY READ)** 再次调用'read'端点，并验证它返回了预期的“未找到”或类似的错误。8. **(VERIFY DELETE BY LIST)** 调用'list'端点，并验证返回的列表中 **不包含** 你已删除的资源的ID。9. **(Record Result)** 最后，调用 `record_test_result` 来记录最终结果。你必须在 'details' 参数中提供一份详细的中文测试总结，说明执行了哪些步骤，关键的断言是什么，以及最终结果是成功还是失败。例如，成功时可以总结：“完成了完整的CRUD流程：成功创建资源（ID: xxx），读取验证一致，更新成功，删除成功，并通过再次读取和列举确认资源已不存在。测试通过。” 失败时则要说明在哪一步失败以及原因。**一旦 `record_test_result` 被调用，你对这个API的任务就彻底结束了。**"
+  },
+  {
+    "name": "API Schema一致性检查",
+    "prompt_template": "你的任务是为API模型 '{api_id}' 验证其schema的一致性。请调用 `get_schema_by_id` 获取其JSON Schema。然后，自己构造一个符合该schema的简单数据样本。最后，调用 `validate_schema` 工具，用获取的schema来验证你构造的样本数据。调用 `record_test_result` 记录结果：如果验证成功，则在details中说明“Schema一致性检查通过”，并将状态标记为'passed'；否则标记为'failed'并说明原因。"
+  }
+] 
+
--- a/1
+++ b/1
@ -0,0 +1 @@
+Subproject commit 0b1b52ba45edd5bd3bf4c85e6bf3a8d7baf2766c
--- a/memory-bank/capability_statement.md
+++ b/memory-bank/capability_statement.md
@ -0,0 +1,82 @@
+# 项目核心能力说明 (Capability Statement)
+
+## 1. 引言与愿景
+
+本文档旨在提炼并阐述当前自动化测试框架的核心能力。该框架最初为 API 合规性测试而设计，但其底层架构具备高度的灵活性和可扩展性。我们的愿景是基于这些核心能力，将此项目逐步演进为一个支持多种测试类型（如前端UI测试、性能测试、数据一致性测试等）的**通用自动化测试平台**。
+
+本文将详细介绍框架的关键设计理念与核心能力，并提供一份清晰的蓝图，展示如何将新的测试领域无缝集成到现有体系中。
+
+## 2. 核心能力详解
+
+我们的框架通过将测试流程解耦为“目标发现”、“测试执行”、“数据生成”和“结果报告”四个主要阶段，实现了高度的模块化。以下是构成这些阶段的核心能力：
+
+### 2.1. 可插拔的测试引擎架构 (Pluggable Test Engine)
+
+这是框架最具扩展性的能力。我们通过“注册表模式”和“基类继承”实现。
+
+- **测试用例注册表 (`TestCaseRegistry`)**: 系统会自动发现并注册所有继承自 `BaseTestCase` 的测试用例类。这使得添加新测试用例就像编写一个新类一样简单，无需修改任何核心代码。
+- **通用测试基类 (`BaseTestCase`)**: 它定义了测试用例的生命周期和必要接口（如 `applies_to` 判断适用性，`execute` 执行测试）。
+
+**扩展潜力**:
+此模式不局限于API测试。我们可以定义新的测试基类，如 `BaseUITestCase` (集成Selenium/Playwright) 或 `BasePerformanceTestCase` (集成JMeter/Locust)，测试用例注册表可以同样对它们进行管理和调度。
+
+### 2.2. 配置驱动的测试编排 (Configuration-Driven Orchestration)
+
+测试的执行流程由核心的**测试编排器 (`Orchestrator`)** 控制，但其所有行为都通过外部配置（如YAML文件、命令行参数）来驱动。
+
+- **解耦测试逻辑与执行策略**: 用户可以指定测试目标、筛选条件、启用的测试用例、报告输出位置等，而无需触碰代码。
+- **灵活的测试阶段 (`Stage`)**: 编排器支持自定义测试阶段（`BaseStage`），允许在测试执行前后插入自定义逻辑，如环境准备、数据清理等。
+
+**扩展潜力**:
+当引入新的测试类型时，我们可以为其创建一个新的编排器（如 `UIOrchestrator`)，复用相同的配置读取和阶段管理逻辑，仅替换核心的目标发现和测试执行部分。
+
+### 2.3. 智能化的数据与场景生成 (Intelligent Data & Scenario Generation)
+
+框架集成了**LLM服务 (`LLMService`)**，使其超越了传统的数据驱动测试。
+
+- **超越静态数据**: LLM能够根据API的Schema动态生成各种有效和无效的测试数据，极大地提升了边缘情况的测试覆盖率。
+- **场景生成**: 未来可以利用LLM生成复杂的用户操作序列（用于UI测试）或模拟真实的用户行为模式（用于性能测试）。
+
+**扩展潜力**:
+`LLMService` 是一个通用能力，可以为任何需要复杂测试数据的场景提供支持，例如为前端表单生成多样化的输入值。
+
+### 2.4. 标准化的多维报告体系 (Standardized, Multi-dimensional Reporting)
+
+测试框架的核心优势之一是其强大且独立的报告系统。
+
+- **执行与查看分离**: `run_api_tests.py` 负责执行测试并生成原始报告数据，而 `history_viewer.py` 提供一个独立的Web应用来查询和可视化所有历史报告。
+- **多种报告格式**: 自动生成机器可读的 `summary.json` 和人类可读的 `api_call_details.md`。
+- **统一数据模型**: 所有测试结果都将被格式化为一个标准的 `TestSummary` 对象。
+
+**扩展潜力**:
+这个报告系统是完全通用的。任何新的测试引擎（UI测试、性能测试等）只需将其结果构造成 `TestSummary` 格式，就可以立刻被我们的历史查看器支持，无需任何额外开发。
+
+### 2.5. 灵活的目标发现与筛选机制 (Flexible Target Discovery & Filtering)
+
+自动化测试的第一步是确定“测什么”。我们的框架将这一过程抽象化。
+
+- **输入源解析器 (`InputParser`)**: 当前系统能解析OpenAPI/Swagger文件来发现API端点。
+- **目标筛选**: 支持通过标签、路径、名称等多种方式筛选出本次需要测试的具体目标。
+
+**扩展潜力**:
+我们可以轻松添加新的解析器。例如，为前端测试添加一个 `SitemapParser` (解析 `sitemap.xml`) 或 `ComponentManifestParser` (解析组件库的清单文件)，以自动发现所有待测页面或组件。
+
+## 3. 扩展蓝图：集成前端UI自动化测试
+
+为了更具体地说明如何利用上述能力进行扩展，我们以“集成前端UI自动化测试”为例，描绘一个清晰的实施路径：
+
+1.  **定义测试目标输入**: 约定使用 `sitemap.xml` 或自定义的 `ui-targets.json` 文件来描述所有待测试的Web页面及其关键元素。
+2.  **实现新解析器**: 创建一个 `SitemapParser` 类，用于解析站点地图文件，并返回一个标准化的“待测目标”列表。
+3.  **实现UI测试基类**: 创建 `BaseUITestCase(BaseTestCase)`，它在内部初始化一个WebDriver实例（如Selenium），并提供一些基础的UI操作方法（如 `click`, `type_text`）。
+4.  **编写具体UI测试用例**:
+    - `TC-UI-001-TitleCheck(BaseUITestCase)`: 检查页面标题是否正确。
+    - `TC-UI-002-LoginForm(BaseUITestCase)`: 测试登录表单的校验逻辑。
+    - `TC-UI-003-BrokenLinks(BaseUITestCase)`: 检查页面是否存在死链。
+5.  **适配/创建编排器**: 创建 `UIOrchestrator`，它使用 `SitemapParser` 来发现目标，并调度所有适用的 `BaseUITestCase` 子类来执行测试。
+6.  **统一报告格式**: 确保 `UIOrchestrator` 在测试结束后，将其执行结果（包括截图、操作日志等）封装到标准的 `TestSummary` 对象中，并存入报告目录。
+
+完成以上步骤后，`history_viewer.py` 将能直接展示UI测试的历史结果，实现了新测试能力的无缝集成。
+
+## 4. 结论
+
+本框架通过其模块化、可插拔和配置驱动的设计，已为成为一个通用测试平台奠定了坚实的基础。其核心能力并非仅仅为API测试服务，而是构成了一套通用的自动化测试解决方案。通过遵循本文档提供的扩展蓝图，我们可以高效、低成本地将新的测试领域整合进来，逐步实现平台的宏伟愿景。 
--- a/mock_dms_server.py
+++ b/mock_dms_server.py
@ -73,6 +73,7 @@ def preload_schemas():
 def get_api_list():
    """模拟获取DMS中所有API列表的接口。"""
    logging.info("Mock服务器: 收到API列表请求。")
+    print(f"API_LIST_DATA: {API_LIST_DATA}")
    return jsonify(API_LIST_DATA)

@app.route('/api/schema/manage/schema/<string:model_id>', methods=['GET'])
@ -99,6 +100,7 @@ def create_resource(dms_instance_code, name):
    logging.info(f"Mock服务器: 收到对 '{name}' 的CREATE请求")
    request_data = request.get_json(silent=True)
    if not request_data or 'data' not in request_data or not isinstance(request_data['data'], list):
+        print(f"Mock服务器: 收到对 '{name}' 的CREATE请求, 请求体格式错误: {request_data}")
        return jsonify({"code": 400, "message": "请求体格式错误，应为 {'data': [...]}"}), 400

    if name not in IN_MEMORY_DB:
				`@ -0,0 +1 @@`
				`Subproject commit 0b1b52ba45edd5bd3bf4c85e6bf3a8d7baf2766c`