| """ |
| 测试Profile格式生成是否符合OASIS要求 |
| 验证: |
| 1. Twitter Profile生成CSV格式 |
| 2. Reddit Profile生成JSON详细格式 |
| """ |
|
|
| import os |
| import sys |
| import json |
| import csv |
| import tempfile |
|
|
| |
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) |
|
|
| from app.services.oasis_profile_generator import OasisProfileGenerator, OasisAgentProfile |
|
|
|
|
| def test_profile_formats(): |
| """测试Profile格式""" |
| print("=" * 60) |
| print("OASIS Profile格式测试") |
| print("=" * 60) |
| |
| |
| test_profiles = [ |
| OasisAgentProfile( |
| user_id=0, |
| user_name="test_user_123", |
| name="Test User", |
| bio="A test user for validation", |
| persona="Test User is an enthusiastic participant in social discussions.", |
| karma=1500, |
| friend_count=100, |
| follower_count=200, |
| statuses_count=500, |
| age=25, |
| gender="male", |
| mbti="INTJ", |
| country="China", |
| profession="Student", |
| interested_topics=["Technology", "Education"], |
| source_entity_uuid="test-uuid-123", |
| source_entity_type="Student", |
| ), |
| OasisAgentProfile( |
| user_id=1, |
| user_name="org_official_456", |
| name="Official Organization", |
| bio="Official account for Organization", |
| persona="This is an official institutional account that communicates official positions.", |
| karma=5000, |
| friend_count=50, |
| follower_count=10000, |
| statuses_count=200, |
| profession="Organization", |
| interested_topics=["Public Policy", "Announcements"], |
| source_entity_uuid="test-uuid-456", |
| source_entity_type="University", |
| ), |
| ] |
| |
| generator = OasisProfileGenerator.__new__(OasisProfileGenerator) |
| |
| |
| with tempfile.TemporaryDirectory() as temp_dir: |
| twitter_path = os.path.join(temp_dir, "twitter_profiles.csv") |
| reddit_path = os.path.join(temp_dir, "reddit_profiles.json") |
| |
| |
| print("\n1. 测试Twitter Profile (CSV格式)") |
| print("-" * 40) |
| generator._save_twitter_csv(test_profiles, twitter_path) |
| |
| |
| with open(twitter_path, 'r', encoding='utf-8') as f: |
| reader = csv.DictReader(f) |
| rows = list(reader) |
| |
| print(f" 文件: {twitter_path}") |
| print(f" 行数: {len(rows)}") |
| print(f" 表头: {list(rows[0].keys())}") |
| print(f"\n 示例数据 (第1行):") |
| for key, value in rows[0].items(): |
| print(f" {key}: {value}") |
| |
| |
| required_twitter_fields = ['user_id', 'user_name', 'name', 'bio', |
| 'friend_count', 'follower_count', 'statuses_count', 'created_at'] |
| missing = set(required_twitter_fields) - set(rows[0].keys()) |
| if missing: |
| print(f"\n [错误] 缺少字段: {missing}") |
| else: |
| print(f"\n [通过] 所有必需字段都存在") |
| |
| |
| print("\n2. 测试Reddit Profile (JSON详细格式)") |
| print("-" * 40) |
| generator._save_reddit_json(test_profiles, reddit_path) |
| |
| |
| with open(reddit_path, 'r', encoding='utf-8') as f: |
| reddit_data = json.load(f) |
| |
| print(f" 文件: {reddit_path}") |
| print(f" 条目数: {len(reddit_data)}") |
| print(f" 字段: {list(reddit_data[0].keys())}") |
| print(f"\n 示例数据 (第1条):") |
| print(json.dumps(reddit_data[0], ensure_ascii=False, indent=4)) |
| |
| |
| required_reddit_fields = ['realname', 'username', 'bio', 'persona'] |
| optional_reddit_fields = ['age', 'gender', 'mbti', 'country', 'profession', 'interested_topics'] |
| |
| missing = set(required_reddit_fields) - set(reddit_data[0].keys()) |
| if missing: |
| print(f"\n [错误] 缺少必需字段: {missing}") |
| else: |
| print(f"\n [通过] 所有必需字段都存在") |
| |
| present_optional = set(optional_reddit_fields) & set(reddit_data[0].keys()) |
| print(f" [信息] 可选字段: {present_optional}") |
| |
| print("\n" + "=" * 60) |
| print("测试完成!") |
| print("=" * 60) |
|
|
|
|
| def show_expected_formats(): |
| """显示OASIS期望的格式""" |
| print("\n" + "=" * 60) |
| print("OASIS 期望的Profile格式参考") |
| print("=" * 60) |
| |
| print("\n1. Twitter Profile (CSV格式)") |
| print("-" * 40) |
| twitter_example = """user_id,user_name,name,bio,friend_count,follower_count,statuses_count,created_at |
| 0,user0,User Zero,I am user zero with interests in technology.,100,150,500,2023-01-01 |
| 1,user1,User One,Tech enthusiast and coffee lover.,200,250,1000,2023-01-02""" |
| print(twitter_example) |
| |
| print("\n2. Reddit Profile (JSON详细格式)") |
| print("-" * 40) |
| reddit_example = [ |
| { |
| "realname": "James Miller", |
| "username": "millerhospitality", |
| "bio": "Passionate about hospitality & tourism.", |
| "persona": "James is a seasoned professional in the Hospitality & Tourism industry...", |
| "age": 40, |
| "gender": "male", |
| "mbti": "ESTJ", |
| "country": "UK", |
| "profession": "Hospitality & Tourism", |
| "interested_topics": ["Economics", "Business"] |
| } |
| ] |
| print(json.dumps(reddit_example, ensure_ascii=False, indent=2)) |
|
|
|
|
| if __name__ == "__main__": |
| test_profile_formats() |
| show_expected_formats() |
|
|
|
|
|
|