File size: 1,386 Bytes
88aba71
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
from dataclasses import dataclass
from pandas import Timestamp
from pydantic import BaseModel


@dataclass
class ChatMessage:
    id: int
    MsgSvrID: int
    type_name: str
    is_sender: int
    talker: str
    room_name: str
    msg: str
    src: str
    CreateTime: Timestamp


@dataclass
class CutMessage:
    is_sender: int
    cut_type: str
    CreateTime: Timestamp


@dataclass
class QaPair:
    id: int
    system: str
    instruction: str
    output: str
    history: list[list[str]]
    time: Timestamp
    score: int


class QaPairScore(BaseModel):
    id: int
    score: int


skip_type_list = [
    "添加好友",
    "推荐公众号",
    "动画表情",
    "位置",
    "文件",
    "位置共享",
    "接龙",
    "引用回复",
    "视频号直播或直播回放",
    "用户上传的GIF表情",
    "文件(猜)",
    "群公告",
    "视频号直播或直播回放等",
    "游戏相关",
    "转账",
    "赠送红包封面",
    "语音通话",
    "企业微信打招呼(猜)",
    "企业微信添加好友(猜)",
    "系统通知",
    "消息撤回1",
    "拍一拍",
    "消息撤回5",
    "消息撤回6",
    "消息撤回33",
    "消息撤回36",
    "消息撤回57",
    "邀请加群",
    "未知-11000,0",
]
# 没处理的类型
unprocessed_type_list = []