update
Browse files- examples.json +1 -1
- main.py +27 -2
examples.json
CHANGED
@@ -5,6 +5,6 @@
|
|
5 |
["男人走进房间, 上床, 压上", 512, 0.75, 0.35, 1.8, "qgyd2021/chinese_porn_novel", false],
|
6 |
[
|
7 |
"电销场景意图识别。如果不能确定,请输出 “未知意图”。\n\nExamples:\n------------\ntext: 没关系啦 知道的\nintent: 肯定答复\n------------\ntext: 怎么能联系你\nintent: 查联系方式\n------------\ntext: 恩。让我想想吧。\nintent: 考虑一下\n------------\ntext: 说点有用的\nintent: 请讲重点\n------------\ntext: 唉唉\nintent: 语气词\n------------\ntext: 说快一点\nintent: 请讲重点\n------------\ntext: 再介绍一下\nintent: 要求复述\n------------\ntext: 从哪弄到我信息\nintent: 质疑隐私安全\n------------\ntext: 哎。。不是的\nintent: 不是\n------------\ntext: 给我电话号码\nintent: 查联系方式\n------------\ntext: 先看看吧\nintent: 考虑一下\n------------\ntext: 怎么知道道我的信息\nintent: 质疑隐私安全\n------------\ntext: 哎,再说吧,我再想想\nintent: 考虑一下\n------------\ntext: 不,我清醒。\nintent: 不是\n------------\ntext: 重说一次\nintent: 要求复述\n------------\ntext: 行了,晚安\nintent: 肯定答复\n------------\ntext: 额额额额\nintent: 语气词\n------------\ntext: 恩。哎再说吧我考虑一下hiahia\nintent:\n",
|
8 |
-
128, 0.75, 0.
|
9 |
]
|
10 |
]
|
|
|
5 |
["男人走进房间, 上床, 压上", 512, 0.75, 0.35, 1.8, "qgyd2021/chinese_porn_novel", false],
|
6 |
[
|
7 |
"电销场景意图识别。如果不能确定,请输出 “未知意图”。\n\nExamples:\n------------\ntext: 没关系啦 知道的\nintent: 肯定答复\n------------\ntext: 怎么能联系你\nintent: 查联系方式\n------------\ntext: 恩。让我想想吧。\nintent: 考虑一下\n------------\ntext: 说点有用的\nintent: 请讲重点\n------------\ntext: 唉唉\nintent: 语气词\n------------\ntext: 说快一点\nintent: 请讲重点\n------------\ntext: 再介绍一下\nintent: 要求复述\n------------\ntext: 从哪弄到我信息\nintent: 质疑隐私安全\n------------\ntext: 哎。。不是的\nintent: 不是\n------------\ntext: 给我电话号码\nintent: 查联系方式\n------------\ntext: 先看看吧\nintent: 考虑一下\n------------\ntext: 怎么知道道我的信息\nintent: 质疑隐私安全\n------------\ntext: 哎,再说吧,我再想想\nintent: 考虑一下\n------------\ntext: 不,我清醒。\nintent: 不是\n------------\ntext: 重说一次\nintent: 要求复述\n------------\ntext: 行了,晚安\nintent: 肯定答复\n------------\ntext: 额额额额\nintent: 语气词\n------------\ntext: 恩。哎再说吧我考虑一下hiahia\nintent:\n",
|
8 |
+
128, 0.75, 0.35, 1.2, "qgyd2021/few_shot_intent", true
|
9 |
]
|
10 |
]
|
main.py
CHANGED
@@ -6,6 +6,7 @@ import json
|
|
6 |
import os
|
7 |
import platform
|
8 |
import re
|
|
|
9 |
from typing import List
|
10 |
|
11 |
from project_settings import project_path
|
@@ -48,6 +49,29 @@ def repl2(match):
|
|
48 |
return result
|
49 |
|
50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
def main():
|
52 |
args = get_args()
|
53 |
|
@@ -123,8 +147,9 @@ def main():
|
|
123 |
output = output[5:]
|
124 |
|
125 |
output = output.lstrip(" ,.!?")
|
126 |
-
output =
|
127 |
-
output = re.sub(r"([,。!?\u4e00-\u9fa5]) ",
|
|
|
128 |
|
129 |
output = output.replace("[SEP] ", "\n")
|
130 |
output = output.replace("[SEP]", "\n")
|
|
|
6 |
import os
|
7 |
import platform
|
8 |
import re
|
9 |
+
import string
|
10 |
from typing import List
|
11 |
|
12 |
from project_settings import project_path
|
|
|
49 |
return result
|
50 |
|
51 |
|
52 |
+
def remove_space_between_cn_en(text):
|
53 |
+
splits = re.split(" ", text)
|
54 |
+
if len(splits) < 2:
|
55 |
+
return text
|
56 |
+
|
57 |
+
result = ""
|
58 |
+
for t in splits:
|
59 |
+
if t == "":
|
60 |
+
continue
|
61 |
+
if re.search(f"[a-zA-Z0-9{string.punctuation}]$", result) and re.search("^[a-zA-Z0-9]", t):
|
62 |
+
result += " "
|
63 |
+
result += t
|
64 |
+
else:
|
65 |
+
if not result == "":
|
66 |
+
result += t
|
67 |
+
else:
|
68 |
+
result = t
|
69 |
+
|
70 |
+
if text.endswith(" "):
|
71 |
+
result += " "
|
72 |
+
return result
|
73 |
+
|
74 |
+
|
75 |
def main():
|
76 |
args = get_args()
|
77 |
|
|
|
147 |
output = output[5:]
|
148 |
|
149 |
output = output.lstrip(" ,.!?")
|
150 |
+
output = remove_space_between_cn_en(output)
|
151 |
+
# output = re.sub(r"([,。!?\u4e00-\u9fa5]) ([,。!?\u4e00-\u9fa5])", repl1, output)
|
152 |
+
# output = re.sub(r"([,。!?\u4e00-\u9fa5]) ", repl2, output)
|
153 |
|
154 |
output = output.replace("[SEP] ", "\n")
|
155 |
output = output.replace("[SEP]", "\n")
|