302 lines
9.3 KiB
Python
302 lines
9.3 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
测试基于LLM的智能数据生成
|
||
"""
|
||
|
||
import sys
|
||
import json
|
||
from unittest.mock import Mock
|
||
from ddms_compliance_suite.utils.data_generator import DataGenerator
|
||
|
||
def test_llm_prompt_building():
|
||
"""测试LLM提示构建功能"""
|
||
|
||
print("🧪 测试LLM提示构建")
|
||
print("=" * 60)
|
||
|
||
# 模拟包含bsflag的schema
|
||
schema = {
|
||
"type": "object",
|
||
"properties": {
|
||
"bsflag": {
|
||
"type": "number",
|
||
"title": "删除标识",
|
||
"description": "逻辑删除标识,表示该条记录在用或者已经失效,1表示正常数据、-5表示废弃数据"
|
||
},
|
||
"siteId": {
|
||
"type": "string",
|
||
"title": "物探工区ID",
|
||
"description": "物探工区ID"
|
||
},
|
||
"siteName": {
|
||
"type": "string",
|
||
"title": "物探工区名称",
|
||
"description": "物探工区名称"
|
||
},
|
||
"dataRegion": {
|
||
"type": "string",
|
||
"title": "油田标识",
|
||
"description": "油田标识"
|
||
}
|
||
},
|
||
"required": ["bsflag", "siteId"]
|
||
}
|
||
|
||
generator = DataGenerator()
|
||
|
||
# 测试是否应该使用LLM
|
||
should_use_llm = generator._should_use_llm_for_schema(schema)
|
||
print(f"是否应该使用LLM: {should_use_llm}")
|
||
|
||
if should_use_llm:
|
||
print("✅ 检测到包含描述信息的schema,应该使用LLM")
|
||
|
||
# 构建LLM提示
|
||
prompt = generator._build_llm_prompt(schema, "create_payload", "CREATE_SITE")
|
||
|
||
print("\n📝 生成的LLM提示:")
|
||
print("-" * 40)
|
||
print(prompt)
|
||
print("-" * 40)
|
||
|
||
# 检查提示是否包含关键信息
|
||
if "bsflag" in prompt and "1表示正常数据、-5表示废弃数据" in prompt:
|
||
print("✅ 提示包含bsflag的业务规则描述")
|
||
return True
|
||
else:
|
||
print("❌ 提示缺少关键的业务规则信息")
|
||
return False
|
||
else:
|
||
print("❌ 未检测到应该使用LLM的条件")
|
||
return False
|
||
|
||
def test_mock_llm_generation():
|
||
"""测试模拟LLM数据生成"""
|
||
|
||
print("\n🧪 测试模拟LLM数据生成")
|
||
print("=" * 60)
|
||
|
||
# 创建模拟的LLM服务
|
||
mock_llm_service = Mock()
|
||
|
||
# 模拟LLM返回符合业务规则的数据
|
||
mock_llm_service.generate_data_from_schema.return_value = {
|
||
"bsflag": 1, # 正确的业务值
|
||
"siteId": "SITE_001",
|
||
"siteName": "大庆油田勘探工区",
|
||
"dataRegion": "华北"
|
||
}
|
||
|
||
schema = {
|
||
"type": "object",
|
||
"properties": {
|
||
"bsflag": {
|
||
"type": "number",
|
||
"title": "删除标识",
|
||
"description": "1表示正常数据、-5表示废弃数据"
|
||
},
|
||
"siteId": {
|
||
"type": "string",
|
||
"title": "物探工区ID"
|
||
},
|
||
"siteName": {
|
||
"type": "string",
|
||
"title": "物探工区名称"
|
||
},
|
||
"dataRegion": {
|
||
"type": "string",
|
||
"title": "油田标识"
|
||
}
|
||
}
|
||
}
|
||
|
||
generator = DataGenerator()
|
||
|
||
# 使用模拟的LLM服务生成数据
|
||
generated_data = generator.generate_data_from_schema(
|
||
schema,
|
||
context_name="create_payload",
|
||
operation_id="CREATE_SITE",
|
||
llm_service=mock_llm_service
|
||
)
|
||
|
||
print(f"生成的数据: {generated_data}")
|
||
|
||
if generated_data and isinstance(generated_data, dict):
|
||
bsflag_value = generated_data.get('bsflag')
|
||
site_name = generated_data.get('siteName')
|
||
|
||
print(f"bsflag值: {bsflag_value}")
|
||
print(f"siteName: {site_name}")
|
||
|
||
# 检查LLM是否被调用
|
||
if mock_llm_service.generate_data_from_schema.called:
|
||
print("✅ LLM服务被成功调用")
|
||
|
||
# 检查调用参数
|
||
call_args = mock_llm_service.generate_data_from_schema.call_args
|
||
if call_args and 'prompt_instruction' in call_args.kwargs:
|
||
prompt = call_args.kwargs['prompt_instruction']
|
||
if "1表示正常数据、-5表示废弃数据" in prompt:
|
||
print("✅ LLM调用时传递了正确的业务规则描述")
|
||
else:
|
||
print("❌ LLM调用时缺少业务规则描述")
|
||
return False
|
||
|
||
# 检查生成的数据是否符合业务规则
|
||
if bsflag_value in [1, -5]:
|
||
print(f"✅ 生成的bsflag值符合业务规则: {bsflag_value}")
|
||
return True
|
||
else:
|
||
print(f"❌ 生成的bsflag值不符合业务规则: {bsflag_value}")
|
||
return False
|
||
else:
|
||
print("❌ LLM服务未被调用")
|
||
return False
|
||
else:
|
||
print(f"❌ 生成的数据格式不正确: {generated_data}")
|
||
return False
|
||
|
||
def test_fallback_to_traditional():
|
||
"""测试回退到传统生成的情况"""
|
||
|
||
print("\n🧪 测试回退到传统生成")
|
||
print("=" * 60)
|
||
|
||
# 创建一个会抛出异常的模拟LLM服务
|
||
mock_llm_service = Mock()
|
||
mock_llm_service.generate_data_from_schema.side_effect = Exception("LLM服务不可用")
|
||
|
||
schema = {
|
||
"type": "object",
|
||
"properties": {
|
||
"bsflag": {
|
||
"type": "number",
|
||
"description": "1表示正常数据、-5表示废弃数据"
|
||
},
|
||
"testField": {
|
||
"type": "string"
|
||
}
|
||
}
|
||
}
|
||
|
||
generator = DataGenerator()
|
||
|
||
# 尝试生成数据,应该回退到传统方式
|
||
generated_data = generator.generate_data_from_schema(
|
||
schema,
|
||
context_name="create_payload",
|
||
operation_id="CREATE_SITE",
|
||
llm_service=mock_llm_service
|
||
)
|
||
|
||
print(f"回退生成的数据: {generated_data}")
|
||
|
||
if generated_data and isinstance(generated_data, dict):
|
||
print("✅ 成功回退到传统数据生成")
|
||
|
||
# 检查是否包含基本字段
|
||
if 'bsflag' in generated_data and 'testField' in generated_data:
|
||
print("✅ 传统生成包含所有必要字段")
|
||
return True
|
||
else:
|
||
print("❌ 传统生成缺少字段")
|
||
return False
|
||
else:
|
||
print(f"❌ 回退生成失败: {generated_data}")
|
||
return False
|
||
|
||
def test_no_description_schema():
|
||
"""测试没有描述信息的schema"""
|
||
|
||
print("\n🧪 测试没有描述信息的schema")
|
||
print("=" * 60)
|
||
|
||
# 没有描述信息的简单schema
|
||
schema = {
|
||
"type": "object",
|
||
"properties": {
|
||
"id": {"type": "string"},
|
||
"count": {"type": "number"}
|
||
}
|
||
}
|
||
|
||
generator = DataGenerator()
|
||
|
||
# 检查是否应该使用LLM
|
||
should_use_llm = generator._should_use_llm_for_schema(schema)
|
||
print(f"是否应该使用LLM: {should_use_llm}")
|
||
|
||
if not should_use_llm:
|
||
print("✅ 正确识别出不需要使用LLM的schema")
|
||
|
||
# 生成数据应该直接使用传统方式
|
||
generated_data = generator.generate_data_from_schema(schema)
|
||
print(f"传统生成的数据: {generated_data}")
|
||
|
||
if generated_data and isinstance(generated_data, dict):
|
||
print("✅ 传统生成工作正常")
|
||
return True
|
||
else:
|
||
print("❌ 传统生成失败")
|
||
return False
|
||
else:
|
||
print("❌ 错误地认为应该使用LLM")
|
||
return False
|
||
|
||
def main():
|
||
"""主函数"""
|
||
print("🚀 基于LLM的智能数据生成测试")
|
||
print("=" * 80)
|
||
|
||
success_count = 0
|
||
total_tests = 4
|
||
|
||
# 测试1: LLM提示构建
|
||
if test_llm_prompt_building():
|
||
success_count += 1
|
||
|
||
# 测试2: 模拟LLM生成
|
||
if test_mock_llm_generation():
|
||
success_count += 1
|
||
|
||
# 测试3: 回退机制
|
||
if test_fallback_to_traditional():
|
||
success_count += 1
|
||
|
||
# 测试4: 无描述schema
|
||
if test_no_description_schema():
|
||
success_count += 1
|
||
|
||
# 总结
|
||
print("\n" + "=" * 80)
|
||
print("📋 测试总结")
|
||
print("=" * 80)
|
||
print(f"通过测试: {success_count}/{total_tests}")
|
||
|
||
if success_count == total_tests:
|
||
print("🎉 智能数据生成测试通过!")
|
||
print("\n✅ 实现的功能:")
|
||
print("- LLM根据字段描述智能生成数据")
|
||
print("- 自动检测是否需要使用LLM")
|
||
print("- 构建包含业务规则的详细提示")
|
||
print("- 优雅的回退到传统生成方式")
|
||
print("- 支持复杂的业务规则理解")
|
||
|
||
print("\n💡 优势:")
|
||
print("- 不需要硬编码业务规则")
|
||
print("- LLM可以理解自然语言描述")
|
||
print("- 自动适应新的业务字段")
|
||
print("- 生成更真实的测试数据")
|
||
|
||
print("\n🔧 使用方法:")
|
||
print("在schema中添加详细的description字段,LLM会自动理解并生成合适的数据")
|
||
|
||
sys.exit(0)
|
||
else:
|
||
print("❌ 部分测试失败")
|
||
sys.exit(1)
|
||
|
||
if __name__ == "__main__":
|
||
main()
|