qingxu98 commited on
Commit
d245958
·
1 Parent(s): 8dd4d48
crazy_functions/game_fns/game_ascii_art.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from toolbox import CatchException, update_ui, update_ui_lastest_msg
2
+ from crazy_functions.multi_stage.multi_stage_utils import GptAcademicGameBaseState
3
+ from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
4
+ from request_llms.bridge_all import predict_no_ui_long_connection
5
+ from crazy_functions.game_fns.game_utils import get_code_block, is_same_thing
6
+ import random
7
+
8
+
9
+ class MiniGame_ASCII_Art(GptAcademicGameBaseState):
10
+ def step(self, prompt, chatbot, history):
11
+ if self.step_cnt == 0:
12
+ chatbot.append(["我画你猜(动物)", "请稍等..."])
13
+ else:
14
+ if prompt.strip() == 'exit':
15
+ self.delete_game = True
16
+ yield from update_ui_lastest_msg(lastmsg=f"谜底是{self.obj},游戏结束。", chatbot=chatbot, history=history, delay=0.)
17
+ return
18
+ chatbot.append([prompt, ""])
19
+ yield from update_ui(chatbot=chatbot, history=history)
20
+
21
+ if self.step_cnt == 0:
22
+ self.lock_plugin(chatbot)
23
+ self.cur_task = 'draw'
24
+
25
+ if self.cur_task == 'draw':
26
+ avail_obj = ["狗","猫","鸟","鱼","老鼠","蛇"]
27
+ self.obj = random.choice(avail_obj)
28
+ inputs = "I want to play a game called Guess the ASCII art. You can draw the ASCII art and I will try to guess it. " + \
29
+ f"This time you draw a {self.obj}. Note that you must not indicate what you have draw in the text, and you should only produce the ASCII art wrapped by ```. "
30
+ raw_res = predict_no_ui_long_connection(inputs=inputs, llm_kwargs=self.llm_kwargs, history=[], sys_prompt="")
31
+ self.cur_task = 'identify user guess'
32
+ res = get_code_block(raw_res)
33
+ history += ['', f'the answer is {self.obj}', inputs, res]
34
+ yield from update_ui_lastest_msg(lastmsg=res, chatbot=chatbot, history=history, delay=0.)
35
+
36
+ elif self.cur_task == 'identify user guess':
37
+ if is_same_thing(self.obj, prompt, self.llm_kwargs):
38
+ self.delete_game = True
39
+ yield from update_ui_lastest_msg(lastmsg="你猜对了!", chatbot=chatbot, history=history, delay=0.)
40
+ else:
41
+ self.cur_task = 'identify user guess'
42
+ yield from update_ui_lastest_msg(lastmsg="猜错了,再试试,输入“exit”获取答案。", chatbot=chatbot, history=history, delay=0.)
crazy_functions/game_fns/game_interactive_story.py ADDED
@@ -0,0 +1,212 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ prompts_hs = """ 请以“{headstart}”为开头,编写一个小说的第一幕。
2
+
3
+ - 尽量短,不要包含太多情节,因为你接下来将会与用户互动续写下面的情节,要留出足够的互动空间。
4
+ - 出现人物时,给出人物的名字。
5
+ - 积极地运用环境描写、人物描写等手法,让读者能够感受到你的故事世界。
6
+ - 积极地运用修辞手法,比如比喻、拟人、排比、对偶、夸张等等。
7
+ - 字数要求:第一幕的字数少于300字,且少于2个段落。
8
+ """
9
+
10
+ prompts_interact = """ 小说的前文回顾:
11
+
12
+ {previously_on_story}
13
+
14
+
15
+ 你是一个作家,根据以上的情节,给出4种不同的后续剧情发展方向,每个发展方向都精明扼要地用一句话说明。稍后,我将在这4个选择中,挑选一种剧情发展。
16
+
17
+ 输出格式例如:
18
+ 1. 后续剧情发展1
19
+ 2. 后续剧情发展2
20
+ 3. 后续剧情发展3
21
+ 4. 后续剧情发展4
22
+ """
23
+
24
+
25
+ prompts_resume = """小说的前文回顾:
26
+
27
+ {previously_on_story}
28
+
29
+
30
+ 你是一个作家,我们正在互相讨论,确定后续剧情的发展。
31
+ 在以下的剧情发展中,
32
+
33
+ {choice}
34
+
35
+ 我认为更合理的是:{user_choice}。
36
+ 请在前文的基础上(不要重复前文),围绕我选定的剧情情节,编写小说的下一幕。
37
+
38
+ - 禁止杜撰不符合我选择的剧情。
39
+ - 尽量短,不要包含太多情节,因为你接下来将会与用户互动续写下面的情节,要留出足够的互动空间。
40
+ - 不要重复前文。
41
+ - 出现人物时,给出人物的名字。
42
+ - 积极地运用环境描写、人物描写等手法,让读者能够感受到你的故事世界。
43
+ - 积极地运用修辞手法,比如比喻、拟人、排比、对偶、夸张等等。
44
+ - 小说的下一幕字数少于300字,且少于2个段落。
45
+ """
46
+
47
+
48
+ prompts_terminate = """小说的前文回顾:
49
+
50
+ {previously_on_story}
51
+
52
+
53
+ 你是一个作家,我们正在互相讨论,确定后续剧情的发展。
54
+ 现在,故事该结束了,我认为最合理的故事结局是:{user_choice}。
55
+
56
+ 请在前文的基础上(不要重复前文),编写小说的最后一幕。
57
+
58
+ - 不要重复前文。
59
+ - 出现人物时,给出人物的名字。
60
+ - 积极地运用环境描写、人物描写等手法,让读者能够感受到你的故事世界。
61
+ - 积极地运用修辞手法,比如比喻、拟人、排比、对偶、夸张等等。
62
+ - 字数要求:最后一幕的字数少于1000字。
63
+ """
64
+
65
+
66
+ from toolbox import CatchException, update_ui, update_ui_lastest_msg
67
+ from crazy_functions.multi_stage.multi_stage_utils import GptAcademicGameBaseState
68
+ from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
69
+ from request_llms.bridge_all import predict_no_ui_long_connection
70
+ from crazy_functions.game_fns.game_utils import get_code_block, is_same_thing
71
+ import random
72
+
73
+
74
+ class MiniGame_ResumeStory(GptAcademicGameBaseState):
75
+ story_headstart = [
76
+ '先行者知道,他现在是全宇宙中唯一的一个人了。',
77
+ '深夜,一个年轻人穿过天安门广场向纪念堂走去。在二十二世纪编年史中,计算机把他的代号定为M102。',
78
+ '他知道,这最后一课要提前讲了。又一阵剧痛从肝部袭来,几乎使他晕厥过去。',
79
+ '在距地球五万光年的远方,在银河系的中心,一场延续了两万年的星际战争已接近尾声。那里的太空中渐渐隐现出一个方形区域,仿佛灿烂的群星的背景被剪出一个方口。',
80
+ '伊依一行三人乘坐一艘游艇在南太平洋上做吟诗航行,他们的目的地是南极,如果几天后能顺利到达那里,他们将钻出地壳去看诗云。',
81
+ '很多人生来就会莫名其妙地迷上一样东西,仿佛他的出生就是要和这东西约会似的,正是这样,圆圆迷上了肥皂泡。'
82
+ ]
83
+
84
+
85
+ def begin_game_step_0(self, prompt, chatbot, history):
86
+ # init game at step 0
87
+ self.headstart = random.choice(self.story_headstart)
88
+ self.story = []
89
+ chatbot.append(["互动写故事", f"这次的故事开头是:{self.headstart}"])
90
+ self.sys_prompt_ = '你是一个想象力丰富的杰出作家。正在与你的朋友互动,一起写故事,因此你每次写的故事段落应少于300字(结局除外)。'
91
+
92
+
93
+ def generate_story_image(self, story_paragraph):
94
+ try:
95
+ from crazy_functions.图片生成 import gen_image
96
+ prompt_ = predict_no_ui_long_connection(inputs=story_paragraph, llm_kwargs=self.llm_kwargs, history=[], sys_prompt='你需要根据用户给出的小说段落,进行简短的环境描写。要求:80字以内。')
97
+ image_url, image_path = gen_image(self.llm_kwargs, prompt_, '512x512', model="dall-e-2", quality='standard', style='natural')
98
+ return f'<br/><div align="center"><img src="file={image_path}"></div>'
99
+ except:
100
+ return ''
101
+
102
+ def step(self, prompt, chatbot, history):
103
+
104
+ """
105
+ 首先,处理游戏初始化等特殊情况
106
+ """
107
+ if self.step_cnt == 0:
108
+ self.begin_game_step_0(prompt, chatbot, history)
109
+ self.lock_plugin(chatbot)
110
+ self.cur_task = 'head_start'
111
+ else:
112
+ if prompt.strip() == 'exit' or prompt.strip() == '结束剧情':
113
+ # should we terminate game here?
114
+ self.delete_game = True
115
+ yield from update_ui_lastest_msg(lastmsg=f"游戏结束。", chatbot=chatbot, history=history, delay=0.)
116
+ return
117
+ if '剧情收尾' in prompt:
118
+ self.cur_task = 'story_terminate'
119
+ # # well, game resumes
120
+ # chatbot.append([prompt, ""])
121
+ # update ui, don't keep the user waiting
122
+ yield from update_ui(chatbot=chatbot, history=history)
123
+
124
+
125
+ """
126
+ 处理游戏的主体逻辑
127
+ """
128
+ if self.cur_task == 'head_start':
129
+ """
130
+ 这是游戏的第一步
131
+ """
132
+ inputs_ = prompts_hs.format(headstart=self.headstart)
133
+ history_ = []
134
+ story_paragraph = yield from request_gpt_model_in_new_thread_with_ui_alive(
135
+ inputs_, '故事开头', self.llm_kwargs,
136
+ chatbot, history_, self.sys_prompt_
137
+ )
138
+ self.story.append(story_paragraph)
139
+ # # 配图
140
+ yield from update_ui_lastest_msg(lastmsg=story_paragraph + '<br/>正在生成插图中 ...', chatbot=chatbot, history=history, delay=0.)
141
+ yield from update_ui_lastest_msg(lastmsg=story_paragraph + '<br/>'+ self.generate_story_image(story_paragraph), chatbot=chatbot, history=history, delay=0.)
142
+
143
+ # # 构建后续剧情引导
144
+ previously_on_story = ""
145
+ for s in self.story:
146
+ previously_on_story += s + '\n'
147
+ inputs_ = prompts_interact.format(previously_on_story=previously_on_story)
148
+ history_ = []
149
+ self.next_choices = yield from request_gpt_model_in_new_thread_with_ui_alive(
150
+ inputs_, '请在以下几种故事走向中,选择一种(当然,您也可以选择给出其他故事走向):', self.llm_kwargs,
151
+ chatbot,
152
+ history_,
153
+ self.sys_prompt_
154
+ )
155
+ self.cur_task = 'user_choice'
156
+
157
+
158
+ elif self.cur_task == 'user_choice':
159
+ """
160
+ 根据用户的提示,确定故事的下一步
161
+ """
162
+ if '请在以下几种故事走向中,选择一种' in chatbot[-1][0]: chatbot.pop(-1)
163
+ previously_on_story = ""
164
+ for s in self.story:
165
+ previously_on_story += s + '\n'
166
+ inputs_ = prompts_resume.format(previously_on_story=previously_on_story, choice=self.next_choices, user_choice=prompt)
167
+ history_ = []
168
+ story_paragraph = yield from request_gpt_model_in_new_thread_with_ui_alive(
169
+ inputs_, f'下一段故事(您的选择是:{prompt})。', self.llm_kwargs,
170
+ chatbot, history_, self.sys_prompt_
171
+ )
172
+ self.story.append(story_paragraph)
173
+ # # 配图
174
+ yield from update_ui_lastest_msg(lastmsg=story_paragraph + '<br/>正在生成插图中 ...', chatbot=chatbot, history=history, delay=0.)
175
+ yield from update_ui_lastest_msg(lastmsg=story_paragraph + '<br/>'+ self.generate_story_image(story_paragraph), chatbot=chatbot, history=history, delay=0.)
176
+
177
+ # # 构建后续剧情引导
178
+ previously_on_story = ""
179
+ for s in self.story:
180
+ previously_on_story += s + '\n'
181
+ inputs_ = prompts_interact.format(previously_on_story=previously_on_story)
182
+ history_ = []
183
+ self.next_choices = yield from request_gpt_model_in_new_thread_with_ui_alive(
184
+ inputs_,
185
+ '请在以下几种故事走向中,选择一种。当然,您也可以给出您心中的其他故事走向。另外,如果您希望剧情立即收尾,请输入剧情走向,并以“剧情收尾”四个字提示程序。', self.llm_kwargs,
186
+ chatbot,
187
+ history_,
188
+ self.sys_prompt_
189
+ )
190
+ self.cur_task = 'user_choice'
191
+
192
+
193
+ elif self.cur_task == 'story_terminate':
194
+ """
195
+ 根据用户的提示,确定故事的结局
196
+ """
197
+ previously_on_story = ""
198
+ for s in self.story:
199
+ previously_on_story += s + '\n'
200
+ inputs_ = prompts_terminate.format(previously_on_story=previously_on_story, user_choice=prompt)
201
+ history_ = []
202
+ story_paragraph = yield from request_gpt_model_in_new_thread_with_ui_alive(
203
+ inputs_, f'故事收尾(您的选择是:{prompt})。', self.llm_kwargs,
204
+ chatbot, history_, self.sys_prompt_
205
+ )
206
+ # # 配图
207
+ yield from update_ui_lastest_msg(lastmsg=story_paragraph + '<br/>正在生成插图中 ...', chatbot=chatbot, history=history, delay=0.)
208
+ yield from update_ui_lastest_msg(lastmsg=story_paragraph + '<br/>'+ self.generate_story_image(story_paragraph), chatbot=chatbot, history=history, delay=0.)
209
+
210
+ # terminate game
211
+ self.delete_game = True
212
+ return
crazy_functions/game_fns/game_utils.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from crazy_functions.json_fns.pydantic_io import GptJsonIO, JsonStringError
3
+ from request_llms.bridge_all import predict_no_ui_long_connection
4
+ def get_code_block(reply):
5
+ import re
6
+ pattern = r"```([\s\S]*?)```" # regex pattern to match code blocks
7
+ matches = re.findall(pattern, reply) # find all code blocks in text
8
+ if len(matches) == 1:
9
+ return "```" + matches[0] + "```" # code block
10
+ raise RuntimeError("GPT is not generating proper code.")
11
+
12
+ def is_same_thing(a, b, llm_kwargs):
13
+ from pydantic import BaseModel, Field
14
+ class IsSameThing(BaseModel):
15
+ is_same_thing: bool = Field(description="determine whether two objects are same thing.", default=False)
16
+
17
+ def run_gpt_fn(inputs, sys_prompt, history=[]):
18
+ return predict_no_ui_long_connection(
19
+ inputs=inputs, llm_kwargs=llm_kwargs,
20
+ history=history, sys_prompt=sys_prompt, observe_window=[]
21
+ )
22
+
23
+ gpt_json_io = GptJsonIO(IsSameThing)
24
+ inputs_01 = "Identity whether the user input and the target is the same thing: \n target object: {a} \n user input object: {b} \n\n\n".format(a=a, b=b)
25
+ inputs_01 += "\n\n\n Note that the user may describe the target object with a different language, e.g. cat and 猫 are the same thing."
26
+ analyze_res_cot_01 = run_gpt_fn(inputs_01, "", [])
27
+
28
+ inputs_02 = inputs_01 + gpt_json_io.format_instructions
29
+ analyze_res = run_gpt_fn(inputs_02, "", [inputs_01, analyze_res_cot_01])
30
+
31
+ try:
32
+ res = gpt_json_io.generate_output_auto_repair(analyze_res, run_gpt_fn)
33
+ return res.is_same_thing
34
+ except JsonStringError as e:
35
+ return False
crazy_functions/ipc_fns/mp.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import platform
2
+ import pickle
3
+ import multiprocessing
4
+
5
+ def run_in_subprocess_wrapper_func(v_args):
6
+ func, args, kwargs, return_dict, exception_dict = pickle.loads(v_args)
7
+ import sys
8
+ try:
9
+ result = func(*args, **kwargs)
10
+ return_dict['result'] = result
11
+ except Exception as e:
12
+ exc_info = sys.exc_info()
13
+ exception_dict['exception'] = exc_info
14
+
15
+ def run_in_subprocess_with_timeout(func, timeout=60):
16
+ if platform.system() == 'Linux':
17
+ def wrapper(*args, **kwargs):
18
+ return_dict = multiprocessing.Manager().dict()
19
+ exception_dict = multiprocessing.Manager().dict()
20
+ v_args = pickle.dumps((func, args, kwargs, return_dict, exception_dict))
21
+ process = multiprocessing.Process(target=run_in_subprocess_wrapper_func, args=(v_args,))
22
+ process.start()
23
+ process.join(timeout)
24
+ if process.is_alive():
25
+ process.terminate()
26
+ raise TimeoutError(f'功能单元{str(func)}未能在规定时间内完成任务')
27
+ process.close()
28
+ if 'exception' in exception_dict:
29
+ # ooops, the subprocess ran into an exception
30
+ exc_info = exception_dict['exception']
31
+ raise exc_info[1].with_traceback(exc_info[2])
32
+ if 'result' in return_dict.keys():
33
+ # If the subprocess ran successfully, return the result
34
+ return return_dict['result']
35
+ return wrapper
36
+ else:
37
+ return func
crazy_functions/pdf_fns/breakdown_txt.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from crazy_functions.ipc_fns.mp import run_in_subprocess_with_timeout
2
+
3
+ def force_breakdown(txt, limit, get_token_fn):
4
+ """ 当无法用标点、空行分割时,我们用最暴力的方法切割
5
+ """
6
+ for i in reversed(range(len(txt))):
7
+ if get_token_fn(txt[:i]) < limit:
8
+ return txt[:i], txt[i:]
9
+ return "Tiktoken未知错误", "Tiktoken未知错误"
10
+
11
+
12
+ def maintain_storage(remain_txt_to_cut, remain_txt_to_cut_storage):
13
+ """ 为了加速计算,我们采样一个特殊的手段。当 remain_txt_to_cut > `_max` 时, 我们把 _max 后的文字转存至 remain_txt_to_cut_storage
14
+ 当 remain_txt_to_cut < `_min` 时,我们再把 remain_txt_to_cut_storage 中的部分文字取出
15
+ """
16
+ _min = int(5e4)
17
+ _max = int(1e5)
18
+ # print(len(remain_txt_to_cut), len(remain_txt_to_cut_storage))
19
+ if len(remain_txt_to_cut) < _min and len(remain_txt_to_cut_storage) > 0:
20
+ remain_txt_to_cut = remain_txt_to_cut + remain_txt_to_cut_storage
21
+ remain_txt_to_cut_storage = ""
22
+ if len(remain_txt_to_cut) > _max:
23
+ remain_txt_to_cut_storage = remain_txt_to_cut[_max:] + remain_txt_to_cut_storage
24
+ remain_txt_to_cut = remain_txt_to_cut[:_max]
25
+ return remain_txt_to_cut, remain_txt_to_cut_storage
26
+
27
+
28
+ def cut(limit, get_token_fn, txt_tocut, must_break_at_empty_line, break_anyway=False):
29
+ """ 文本切分
30
+ """
31
+ res = []
32
+ total_len = len(txt_tocut)
33
+ fin_len = 0
34
+ remain_txt_to_cut = txt_tocut
35
+ remain_txt_to_cut_storage = ""
36
+ # 为了加速计算,我们采样一个特殊的手段。当 remain_txt_to_cut > `_max` 时, 我们把 _max 后的文字转存至 remain_txt_to_cut_storage
37
+ remain_txt_to_cut, remain_txt_to_cut_storage = maintain_storage(remain_txt_to_cut, remain_txt_to_cut_storage)
38
+
39
+ while True:
40
+ if get_token_fn(remain_txt_to_cut) <= limit:
41
+ # 如果剩余文本的token数小于限制,那么就不用切了
42
+ res.append(remain_txt_to_cut); fin_len+=len(remain_txt_to_cut)
43
+ break
44
+ else:
45
+ # 如果剩余文本的token数大于限制,那么就切
46
+ lines = remain_txt_to_cut.split('\n')
47
+
48
+ # 估计一个切分点
49
+ estimated_line_cut = limit / get_token_fn(remain_txt_to_cut) * len(lines)
50
+ estimated_line_cut = int(estimated_line_cut)
51
+
52
+ # 开始查找合适切分点的偏移(cnt)
53
+ cnt = 0
54
+ for cnt in reversed(range(estimated_line_cut)):
55
+ if must_break_at_empty_line:
56
+ # 首先尝试用双空行(\n\n)作为切分点
57
+ if lines[cnt] != "":
58
+ continue
59
+ prev = "\n".join(lines[:cnt])
60
+ post = "\n".join(lines[cnt:])
61
+ if get_token_fn(prev) < limit:
62
+ break
63
+
64
+ if cnt == 0:
65
+ # 如果没有找到合适的切分点
66
+ if break_anyway:
67
+ # 是否允许暴力切分
68
+ prev, post = force_breakdown(txt_tocut, limit, get_token_fn)
69
+ else:
70
+ # 不允许直接报错
71
+ raise RuntimeError(f"存在一行极长的文本!{txt_tocut}")
72
+
73
+ # 追加列表
74
+ res.append(prev); fin_len+=len(prev)
75
+ # 准备下一次迭代
76
+ remain_txt_to_cut = post
77
+ remain_txt_to_cut, remain_txt_to_cut_storage = maintain_storage(remain_txt_to_cut, remain_txt_to_cut_storage)
78
+ process = fin_len/total_len
79
+ print(f'正在文本切分 {int(process*100)}%')
80
+ if len(remain_txt_to_cut.strip()) == 0:
81
+ break
82
+ return res
83
+
84
+
85
+ def breakdown_text_to_satisfy_token_limit_(txt, limit, llm_model="gpt-3.5-turbo"):
86
+ """ 使用多种方式尝试切分文本,以满足 token 限制
87
+ """
88
+ from request_llms.bridge_all import model_info
89
+ enc = model_info[llm_model]['tokenizer']
90
+ def get_token_fn(txt): return len(enc.encode(txt, disallowed_special=()))
91
+ try:
92
+ # 第1次尝试,将双空行(\n\n)作为切分点
93
+ return cut(limit, get_token_fn, txt, must_break_at_empty_line=True)
94
+ except RuntimeError:
95
+ try:
96
+ # 第2次尝试,将单空行(\n)作为切分点
97
+ return cut(limit, get_token_fn, txt, must_break_at_empty_line=False)
98
+ except RuntimeError:
99
+ try:
100
+ # 第3次尝试,将英文句号(.)作为切分点
101
+ res = cut(limit, get_token_fn, txt.replace('.', '。\n'), must_break_at_empty_line=False) # 这个中文的句号是故意的,作为一个标识而存在
102
+ return [r.replace('。\n', '.') for r in res]
103
+ except RuntimeError as e:
104
+ try:
105
+ # 第4次尝试,将中文句号(。)作为切分点
106
+ res = cut(limit, get_token_fn, txt.replace('。', '。。\n'), must_break_at_empty_line=False)
107
+ return [r.replace('。。\n', '。') for r in res]
108
+ except RuntimeError as e:
109
+ # 第5次尝试,没办法了,随便切一下吧
110
+ return cut(limit, get_token_fn, txt, must_break_at_empty_line=False, break_anyway=True)
111
+
112
+ breakdown_text_to_satisfy_token_limit = run_in_subprocess_with_timeout(breakdown_text_to_satisfy_token_limit_, timeout=60)
113
+
114
+ if __name__ == '__main__':
115
+ from crazy_functions.crazy_utils import read_and_clean_pdf_text
116
+ file_content, page_one = read_and_clean_pdf_text("build/assets/at.pdf")
117
+
118
+ from request_llms.bridge_all import model_info
119
+ for i in range(5):
120
+ file_content += file_content
121
+
122
+ print(len(file_content))
123
+ TOKEN_LIMIT_PER_FRAGMENT = 2500
124
+ res = breakdown_text_to_satisfy_token_limit(file_content, TOKEN_LIMIT_PER_FRAGMENT)
125
+
crazy_functions/vector_fns/__init__.py ADDED
File without changes
crazy_functions/vector_fns/general_file_loader.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # From project chatglm-langchain
2
+
3
+
4
+ from langchain.document_loaders import UnstructuredFileLoader
5
+ from langchain.text_splitter import CharacterTextSplitter
6
+ import re
7
+ from typing import List
8
+
9
+ class ChineseTextSplitter(CharacterTextSplitter):
10
+ def __init__(self, pdf: bool = False, sentence_size: int = None, **kwargs):
11
+ super().__init__(**kwargs)
12
+ self.pdf = pdf
13
+ self.sentence_size = sentence_size
14
+
15
+ def split_text1(self, text: str) -> List[str]:
16
+ if self.pdf:
17
+ text = re.sub(r"\n{3,}", "\n", text)
18
+ text = re.sub('\s', ' ', text)
19
+ text = text.replace("\n\n", "")
20
+ sent_sep_pattern = re.compile('([﹒﹔﹖﹗.。!?]["’”」』]{0,2}|(?=["‘“「『]{1,2}|$))') # del :;
21
+ sent_list = []
22
+ for ele in sent_sep_pattern.split(text):
23
+ if sent_sep_pattern.match(ele) and sent_list:
24
+ sent_list[-1] += ele
25
+ elif ele:
26
+ sent_list.append(ele)
27
+ return sent_list
28
+
29
+ def split_text(self, text: str) -> List[str]: ##此处需要进一步优化逻辑
30
+ if self.pdf:
31
+ text = re.sub(r"\n{3,}", r"\n", text)
32
+ text = re.sub('\s', " ", text)
33
+ text = re.sub("\n\n", "", text)
34
+
35
+ text = re.sub(r'([;;.!?。!?\?])([^”’])', r"\1\n\2", text) # 单字符断句符
36
+ text = re.sub(r'(\.{6})([^"’”」』])', r"\1\n\2", text) # 英文省略号
37
+ text = re.sub(r'(\…{2})([^"’”」』])', r"\1\n\2", text) # 中文省略号
38
+ text = re.sub(r'([;;!?。!?\?]["’”」』]{0,2})([^;;!?,。!?\?])', r'\1\n\2', text)
39
+ # 如果双引号前有终止符,那么双引号才是句子的终点,把分句符\n放到双引号后,注意前面的几句都小心保留了双引号
40
+ text = text.rstrip() # 段尾如果有多余的\n就去掉它
41
+ # 很多规则中会考虑分号;,但是这里我把它忽略不计,破折号、英文双引号等同样忽略,需要的再做些简单调整即可。
42
+ ls = [i for i in text.split("\n") if i]
43
+ for ele in ls:
44
+ if len(ele) > self.sentence_size:
45
+ ele1 = re.sub(r'([,,.]["’”」』]{0,2})([^,,.])', r'\1\n\2', ele)
46
+ ele1_ls = ele1.split("\n")
47
+ for ele_ele1 in ele1_ls:
48
+ if len(ele_ele1) > self.sentence_size:
49
+ ele_ele2 = re.sub(r'([\n]{1,}| {2,}["’”」』]{0,2})([^\s])', r'\1\n\2', ele_ele1)
50
+ ele2_ls = ele_ele2.split("\n")
51
+ for ele_ele2 in ele2_ls:
52
+ if len(ele_ele2) > self.sentence_size:
53
+ ele_ele3 = re.sub('( ["’”」』]{0,2})([^ ])', r'\1\n\2', ele_ele2)
54
+ ele2_id = ele2_ls.index(ele_ele2)
55
+ ele2_ls = ele2_ls[:ele2_id] + [i for i in ele_ele3.split("\n") if i] + ele2_ls[
56
+ ele2_id + 1:]
57
+ ele_id = ele1_ls.index(ele_ele1)
58
+ ele1_ls = ele1_ls[:ele_id] + [i for i in ele2_ls if i] + ele1_ls[ele_id + 1:]
59
+
60
+ id = ls.index(ele)
61
+ ls = ls[:id] + [i for i in ele1_ls if i] + ls[id + 1:]
62
+ return ls
63
+
64
+ def load_file(filepath, sentence_size):
65
+ loader = UnstructuredFileLoader(filepath, mode="elements")
66
+ textsplitter = ChineseTextSplitter(pdf=False, sentence_size=sentence_size)
67
+ docs = loader.load_and_split(text_splitter=textsplitter)
68
+ # write_check_file(filepath, docs)
69
+ return docs
70
+
crazy_functions/vector_fns/vector_database.py ADDED
@@ -0,0 +1,338 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # From project chatglm-langchain
2
+
3
+ import threading
4
+ from toolbox import Singleton
5
+ import os
6
+ import shutil
7
+ import os
8
+ import uuid
9
+ import tqdm
10
+ from langchain.vectorstores import FAISS
11
+ from langchain.docstore.document import Document
12
+ from typing import List, Tuple
13
+ import numpy as np
14
+ from crazy_functions.vector_fns.general_file_loader import load_file
15
+
16
+ embedding_model_dict = {
17
+ "ernie-tiny": "nghuyong/ernie-3.0-nano-zh",
18
+ "ernie-base": "nghuyong/ernie-3.0-base-zh",
19
+ "text2vec-base": "shibing624/text2vec-base-chinese",
20
+ "text2vec": "GanymedeNil/text2vec-large-chinese",
21
+ }
22
+
23
+ # Embedding model name
24
+ EMBEDDING_MODEL = "text2vec"
25
+
26
+ # Embedding running device
27
+ EMBEDDING_DEVICE = "cpu"
28
+
29
+ # 基于上下文的prompt模版,请务必保留"{question}"和"{context}"
30
+ PROMPT_TEMPLATE = """已知信息:
31
+ {context}
32
+
33
+ 根据上述已知信息,简洁和专业的来回答用户的问题。如果无法从中得到答案,请说 “根据已知信息无法回答该问题” 或 “没有提供足够的相关信息”,不允许在答案中添加编造成分,答案请使用中文。 问题是:{question}"""
34
+
35
+ # 文本分句长度
36
+ SENTENCE_SIZE = 100
37
+
38
+ # 匹配后单段上下文长度
39
+ CHUNK_SIZE = 250
40
+
41
+ # LLM input history length
42
+ LLM_HISTORY_LEN = 3
43
+
44
+ # return top-k text chunk from vector store
45
+ VECTOR_SEARCH_TOP_K = 5
46
+
47
+ # 知识检索内容相关度 Score, 数值范围约为0-1100,如果为0,则不生效,经测试设置为小于500时,匹配结果更精准
48
+ VECTOR_SEARCH_SCORE_THRESHOLD = 0
49
+
50
+ NLTK_DATA_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "nltk_data")
51
+
52
+ FLAG_USER_NAME = uuid.uuid4().hex
53
+
54
+ # 是否开启跨域,默认为False,如果需要开启,请设置为True
55
+ # is open cross domain
56
+ OPEN_CROSS_DOMAIN = False
57
+
58
+ def similarity_search_with_score_by_vector(
59
+ self, embedding: List[float], k: int = 4
60
+ ) -> List[Tuple[Document, float]]:
61
+
62
+ def seperate_list(ls: List[int]) -> List[List[int]]:
63
+ lists = []
64
+ ls1 = [ls[0]]
65
+ for i in range(1, len(ls)):
66
+ if ls[i - 1] + 1 == ls[i]:
67
+ ls1.append(ls[i])
68
+ else:
69
+ lists.append(ls1)
70
+ ls1 = [ls[i]]
71
+ lists.append(ls1)
72
+ return lists
73
+
74
+ scores, indices = self.index.search(np.array([embedding], dtype=np.float32), k)
75
+ docs = []
76
+ id_set = set()
77
+ store_len = len(self.index_to_docstore_id)
78
+ for j, i in enumerate(indices[0]):
79
+ if i == -1 or 0 < self.score_threshold < scores[0][j]:
80
+ # This happens when not enough docs are returned.
81
+ continue
82
+ _id = self.index_to_docstore_id[i]
83
+ doc = self.docstore.search(_id)
84
+ if not self.chunk_conent:
85
+ if not isinstance(doc, Document):
86
+ raise ValueError(f"Could not find document for id {_id}, got {doc}")
87
+ doc.metadata["score"] = int(scores[0][j])
88
+ docs.append(doc)
89
+ continue
90
+ id_set.add(i)
91
+ docs_len = len(doc.page_content)
92
+ for k in range(1, max(i, store_len - i)):
93
+ break_flag = False
94
+ for l in [i + k, i - k]:
95
+ if 0 <= l < len(self.index_to_docstore_id):
96
+ _id0 = self.index_to_docstore_id[l]
97
+ doc0 = self.docstore.search(_id0)
98
+ if docs_len + len(doc0.page_content) > self.chunk_size:
99
+ break_flag = True
100
+ break
101
+ elif doc0.metadata["source"] == doc.metadata["source"]:
102
+ docs_len += len(doc0.page_content)
103
+ id_set.add(l)
104
+ if break_flag:
105
+ break
106
+ if not self.chunk_conent:
107
+ return docs
108
+ if len(id_set) == 0 and self.score_threshold > 0:
109
+ return []
110
+ id_list = sorted(list(id_set))
111
+ id_lists = seperate_list(id_list)
112
+ for id_seq in id_lists:
113
+ for id in id_seq:
114
+ if id == id_seq[0]:
115
+ _id = self.index_to_docstore_id[id]
116
+ doc = self.docstore.search(_id)
117
+ else:
118
+ _id0 = self.index_to_docstore_id[id]
119
+ doc0 = self.docstore.search(_id0)
120
+ doc.page_content += " " + doc0.page_content
121
+ if not isinstance(doc, Document):
122
+ raise ValueError(f"Could not find document for id {_id}, got {doc}")
123
+ doc_score = min([scores[0][id] for id in [indices[0].tolist().index(i) for i in id_seq if i in indices[0]]])
124
+ doc.metadata["score"] = int(doc_score)
125
+ docs.append(doc)
126
+ return docs
127
+
128
+
129
+ class LocalDocQA:
130
+ llm: object = None
131
+ embeddings: object = None
132
+ top_k: int = VECTOR_SEARCH_TOP_K
133
+ chunk_size: int = CHUNK_SIZE
134
+ chunk_conent: bool = True
135
+ score_threshold: int = VECTOR_SEARCH_SCORE_THRESHOLD
136
+
137
+ def init_cfg(self,
138
+ top_k=VECTOR_SEARCH_TOP_K,
139
+ ):
140
+
141
+ self.llm = None
142
+ self.top_k = top_k
143
+
144
+ def init_knowledge_vector_store(self,
145
+ filepath,
146
+ vs_path: str or os.PathLike = None,
147
+ sentence_size=SENTENCE_SIZE,
148
+ text2vec=None):
149
+ loaded_files = []
150
+ failed_files = []
151
+ if isinstance(filepath, str):
152
+ if not os.path.exists(filepath):
153
+ print("路径不存在")
154
+ return None
155
+ elif os.path.isfile(filepath):
156
+ file = os.path.split(filepath)[-1]
157
+ try:
158
+ docs = load_file(filepath, SENTENCE_SIZE)
159
+ print(f"{file} 已成功加载")
160
+ loaded_files.append(filepath)
161
+ except Exception as e:
162
+ print(e)
163
+ print(f"{file} 未能成功加载")
164
+ return None
165
+ elif os.path.isdir(filepath):
166
+ docs = []
167
+ for file in tqdm(os.listdir(filepath), desc="加载文件"):
168
+ fullfilepath = os.path.join(filepath, file)
169
+ try:
170
+ docs += load_file(fullfilepath, SENTENCE_SIZE)
171
+ loaded_files.append(fullfilepath)
172
+ except Exception as e:
173
+ print(e)
174
+ failed_files.append(file)
175
+
176
+ if len(failed_files) > 0:
177
+ print("以下文件未能成功加载:")
178
+ for file in failed_files:
179
+ print(f"{file}\n")
180
+
181
+ else:
182
+ docs = []
183
+ for file in filepath:
184
+ docs += load_file(file, SENTENCE_SIZE)
185
+ print(f"{file} 已成功加载")
186
+ loaded_files.append(file)
187
+
188
+ if len(docs) > 0:
189
+ print("文件加载完毕,正在生成向量库")
190
+ if vs_path and os.path.isdir(vs_path):
191
+ try:
192
+ self.vector_store = FAISS.load_local(vs_path, text2vec)
193
+ self.vector_store.add_documents(docs)
194
+ except:
195
+ self.vector_store = FAISS.from_documents(docs, text2vec)
196
+ else:
197
+ self.vector_store = FAISS.from_documents(docs, text2vec) # docs 为Document列表
198
+
199
+ self.vector_store.save_local(vs_path)
200
+ return vs_path, loaded_files
201
+ else:
202
+ raise RuntimeError("文件加载失败,请检查文件格式是否正确")
203
+
204
+ def get_loaded_file(self, vs_path):
205
+ ds = self.vector_store.docstore
206
+ return set([ds._dict[k].metadata['source'].split(vs_path)[-1] for k in ds._dict])
207
+
208
+
209
+ # query 查询内容
210
+ # vs_path 知识库路径
211
+ # chunk_conent 是否启用上下文关联
212
+ # score_threshold 搜索匹配score阈值
213
+ # vector_search_top_k 搜索知识库内容条数,默认搜索5条结果
214
+ # chunk_sizes 匹配单段内容的连接上下文长度
215
+ def get_knowledge_based_conent_test(self, query, vs_path, chunk_conent,
216
+ score_threshold=VECTOR_SEARCH_SCORE_THRESHOLD,
217
+ vector_search_top_k=VECTOR_SEARCH_TOP_K, chunk_size=CHUNK_SIZE,
218
+ text2vec=None):
219
+ self.vector_store = FAISS.load_local(vs_path, text2vec)
220
+ self.vector_store.chunk_conent = chunk_conent
221
+ self.vector_store.score_threshold = score_threshold
222
+ self.vector_store.chunk_size = chunk_size
223
+
224
+ embedding = self.vector_store.embedding_function.embed_query(query)
225
+ related_docs_with_score = similarity_search_with_score_by_vector(self.vector_store, embedding, k=vector_search_top_k)
226
+
227
+ if not related_docs_with_score:
228
+ response = {"query": query,
229
+ "source_documents": []}
230
+ return response, ""
231
+ # prompt = f"{query}. You should answer this question using information from following documents: \n\n"
232
+ prompt = f"{query}. 你必须利用以下文档中包含的信息回答这个问题: \n\n---\n\n"
233
+ prompt += "\n\n".join([f"({k}): " + doc.page_content for k, doc in enumerate(related_docs_with_score)])
234
+ prompt += "\n\n---\n\n"
235
+ prompt = prompt.encode('utf-8', 'ignore').decode() # avoid reading non-utf8 chars
236
+ # print(prompt)
237
+ response = {"query": query, "source_documents": related_docs_with_score}
238
+ return response, prompt
239
+
240
+
241
+
242
+
243
+ def construct_vector_store(vs_id, vs_path, files, sentence_size, history, one_conent, one_content_segmentation, text2vec):
244
+ for file in files:
245
+ assert os.path.exists(file), "输入文件不存在:" + file
246
+ import nltk
247
+ if NLTK_DATA_PATH not in nltk.data.path: nltk.data.path = [NLTK_DATA_PATH] + nltk.data.path
248
+ local_doc_qa = LocalDocQA()
249
+ local_doc_qa.init_cfg()
250
+ filelist = []
251
+ if not os.path.exists(os.path.join(vs_path, vs_id)):
252
+ os.makedirs(os.path.join(vs_path, vs_id))
253
+ for file in files:
254
+ file_name = file.name if not isinstance(file, str) else file
255
+ filename = os.path.split(file_name)[-1]
256
+ shutil.copyfile(file_name, os.path.join(vs_path, vs_id, filename))
257
+ filelist.append(os.path.join(vs_path, vs_id, filename))
258
+ vs_path, loaded_files = local_doc_qa.init_knowledge_vector_store(filelist, os.path.join(vs_path, vs_id), sentence_size, text2vec)
259
+
260
+ if len(loaded_files):
261
+ file_status = f"已添加 {'、'.join([os.path.split(i)[-1] for i in loaded_files if i])} 内容至知识库,并已加载知识库,请开始提问"
262
+ else:
263
+ pass
264
+ # file_status = "文件未成功加载,请重新上传文件"
265
+ # print(file_status)
266
+ return local_doc_qa, vs_path
267
+
268
+ @Singleton
269
+ class knowledge_archive_interface():
270
+ def __init__(self) -> None:
271
+ self.threadLock = threading.Lock()
272
+ self.current_id = ""
273
+ self.kai_path = None
274
+ self.qa_handle = None
275
+ self.text2vec_large_chinese = None
276
+
277
+ def get_chinese_text2vec(self):
278
+ if self.text2vec_large_chinese is None:
279
+ # < -------------------预热文本向量化模组--------------- >
280
+ from toolbox import ProxyNetworkActivate
281
+ print('Checking Text2vec ...')
282
+ from langchain.embeddings.huggingface import HuggingFaceEmbeddings
283
+ with ProxyNetworkActivate('Download_LLM'): # 临时地激活代理网络
284
+ self.text2vec_large_chinese = HuggingFaceEmbeddings(model_name="GanymedeNil/text2vec-large-chinese")
285
+
286
+ return self.text2vec_large_chinese
287
+
288
+
289
+ def feed_archive(self, file_manifest, vs_path, id="default"):
290
+ self.threadLock.acquire()
291
+ # import uuid
292
+ self.current_id = id
293
+ self.qa_handle, self.kai_path = construct_vector_store(
294
+ vs_id=self.current_id,
295
+ vs_path=vs_path,
296
+ files=file_manifest,
297
+ sentence_size=100,
298
+ history=[],
299
+ one_conent="",
300
+ one_content_segmentation="",
301
+ text2vec = self.get_chinese_text2vec(),
302
+ )
303
+ self.threadLock.release()
304
+
305
+ def get_current_archive_id(self):
306
+ return self.current_id
307
+
308
+ def get_loaded_file(self, vs_path):
309
+ return self.qa_handle.get_loaded_file(vs_path)
310
+
311
+ def answer_with_archive_by_id(self, txt, id, vs_path):
312
+ self.threadLock.acquire()
313
+ if not self.current_id == id:
314
+ self.current_id = id
315
+ self.qa_handle, self.kai_path = construct_vector_store(
316
+ vs_id=self.current_id,
317
+ vs_path=vs_path,
318
+ files=[],
319
+ sentence_size=100,
320
+ history=[],
321
+ one_conent="",
322
+ one_content_segmentation="",
323
+ text2vec = self.get_chinese_text2vec(),
324
+ )
325
+ VECTOR_SEARCH_SCORE_THRESHOLD = 0
326
+ VECTOR_SEARCH_TOP_K = 4
327
+ CHUNK_SIZE = 512
328
+ resp, prompt = self.qa_handle.get_knowledge_based_conent_test(
329
+ query = txt,
330
+ vs_path = self.kai_path,
331
+ score_threshold=VECTOR_SEARCH_SCORE_THRESHOLD,
332
+ vector_search_top_k=VECTOR_SEARCH_TOP_K,
333
+ chunk_conent=True,
334
+ chunk_size=CHUNK_SIZE,
335
+ text2vec = self.get_chinese_text2vec(),
336
+ )
337
+ self.threadLock.release()
338
+ return resp, prompt
crazy_functions/互动小游戏.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from toolbox import CatchException, update_ui, update_ui_lastest_msg
2
+ from crazy_functions.multi_stage.multi_stage_utils import GptAcademicGameBaseState
3
+ from crazy_functions.crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
4
+ from request_llms.bridge_all import predict_no_ui_long_connection
5
+ from crazy_functions.game_fns.game_utils import get_code_block, is_same_thing
6
+
7
+ @CatchException
8
+ def 随机小游戏(prompt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
9
+ from crazy_functions.game_fns.game_interactive_story import MiniGame_ResumeStory
10
+ # 清空历史
11
+ history = []
12
+ # 选择游戏
13
+ cls = MiniGame_ResumeStory
14
+ # 如果之前已经初始化了游戏实例,则继续该实例;否则重新初始化
15
+ state = cls.sync_state(chatbot,
16
+ llm_kwargs,
17
+ cls,
18
+ plugin_name='MiniGame_ResumeStory',
19
+ callback_fn='crazy_functions.互动小游戏->随机小游戏',
20
+ lock_plugin=True
21
+ )
22
+ yield from state.continue_game(prompt, chatbot, history)
23
+
24
+
25
+ @CatchException
26
+ def 随机小游戏1(prompt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
27
+ from crazy_functions.game_fns.game_ascii_art import MiniGame_ASCII_Art
28
+ # 清空历史
29
+ history = []
30
+ # 选择游戏
31
+ cls = MiniGame_ASCII_Art
32
+ # 如果之前已经初始化了游戏实例,则继续该实例;否则重新初始化
33
+ state = cls.sync_state(chatbot,
34
+ llm_kwargs,
35
+ cls,
36
+ plugin_name='MiniGame_ASCII_Art',
37
+ callback_fn='crazy_functions.互动小游戏->随机小游戏1',
38
+ lock_plugin=True
39
+ )
40
+ yield from state.continue_game(prompt, chatbot, history)
crazy_functions/知识库问答.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from toolbox import CatchException, update_ui, ProxyNetworkActivate, update_ui_lastest_msg, get_log_folder, get_user
2
+ from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, get_files_from_everything
3
+
4
+ install_msg ="""
5
+
6
+ 1. python -m pip install torch --index-url https://download.pytorch.org/whl/cpu
7
+
8
+ 2. python -m pip install transformers protobuf langchain sentence-transformers faiss-cpu nltk beautifulsoup4 bitsandbytes tabulate icetk --upgrade
9
+
10
+ 3. python -m pip install unstructured[all-docs] --upgrade
11
+
12
+ 4. python -c 'import nltk; nltk.download("punkt")'
13
+ """
14
+
15
+ @CatchException
16
+ def 知识库文件注入(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
17
+ """
18
+ txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
19
+ llm_kwargs gpt模型参数, 如温度和top_p等, 一般原样传递下去就行
20
+ plugin_kwargs 插件模型的参数,暂时没有用武之地
21
+ chatbot 聊天显示框的句柄,用于显示给用户
22
+ history 聊天历史,前情提要
23
+ system_prompt 给gpt的静默提醒
24
+ web_port 当前软件运行的端口号
25
+ """
26
+ history = [] # 清空历史,以免输入溢出
27
+
28
+ # < --------------------读取参数--------------- >
29
+ if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
30
+ kai_id = plugin_kwargs.get("advanced_arg", 'default')
31
+
32
+ chatbot.append((f"向`{kai_id}`知识库中添加文件。", "[Local Message] 从一批文件(txt, md, tex)中读取数据构建知识库, 然后进行问答。"))
33
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
34
+
35
+ # resolve deps
36
+ try:
37
+ # from zh_langchain import construct_vector_store
38
+ # from langchain.embeddings.huggingface import HuggingFaceEmbeddings
39
+ from crazy_functions.vector_fns.vector_database import knowledge_archive_interface
40
+ except Exception as e:
41
+ chatbot.append(["依赖不足", f"{str(e)}\n\n导入依赖失败。请用以下命令安装" + install_msg])
42
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
43
+ # from .crazy_utils import try_install_deps
44
+ # try_install_deps(['zh_langchain==0.2.1', 'pypinyin'], reload_m=['pypinyin', 'zh_langchain'])
45
+ # yield from update_ui_lastest_msg("安装完成,您可以再次重试。", chatbot, history)
46
+ return
47
+
48
+ # < --------------------读取文件--------------- >
49
+ file_manifest = []
50
+ spl = ["txt", "doc", "docx", "email", "epub", "html", "json", "md", "msg", "pdf", "ppt", "pptx", "rtf"]
51
+ for sp in spl:
52
+ _, file_manifest_tmp, _ = get_files_from_everything(txt, type=f'.{sp}')
53
+ file_manifest += file_manifest_tmp
54
+
55
+ if len(file_manifest) == 0:
56
+ chatbot.append(["没有找到任何可读取文件", "当前支持的格式包括: txt, md, docx, pptx, pdf, json等"])
57
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
58
+ return
59
+
60
+ # < -------------------预热文本向量化模组--------------- >
61
+ chatbot.append(['<br/>'.join(file_manifest), "正在预热文本向量化模组, 如果是第一次运行, 将消耗较长时间下载中文向量化模型..."])
62
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
63
+ print('Checking Text2vec ...')
64
+ from langchain.embeddings.huggingface import HuggingFaceEmbeddings
65
+ with ProxyNetworkActivate('Download_LLM'): # 临时地激活代理网络
66
+ HuggingFaceEmbeddings(model_name="GanymedeNil/text2vec-large-chinese")
67
+
68
+ # < -------------------构建知识库--------------- >
69
+ chatbot.append(['<br/>'.join(file_manifest), "正在构建知识库..."])
70
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
71
+ print('Establishing knowledge archive ...')
72
+ with ProxyNetworkActivate('Download_LLM'): # 临时地激活代理网络
73
+ kai = knowledge_archive_interface()
74
+ vs_path = get_log_folder(user=get_user(chatbot), plugin_name='vec_store')
75
+ kai.feed_archive(file_manifest=file_manifest, vs_path=vs_path, id=kai_id)
76
+ kai_files = kai.get_loaded_file(vs_path=vs_path)
77
+ kai_files = '<br/>'.join(kai_files)
78
+ # chatbot.append(['知识库构建成功', "正在将知识库存储至cookie中"])
79
+ # yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
80
+ # chatbot._cookies['langchain_plugin_embedding'] = kai.get_current_archive_id()
81
+ # chatbot._cookies['lock_plugin'] = 'crazy_functions.知识库文件注入->读取知识库作答'
82
+ # chatbot.append(['完成', "“根据知识库作答”函数插件已经接管问答系统, 提问吧! 但注意, 您接下来不能再使用其他插件了,刷新页面即可以退出知识库问答模式。"])
83
+ chatbot.append(['构建完成', f"当前知识库内的有效文件:\n\n---\n\n{kai_files}\n\n---\n\n请切换至“知识库问���”插件进行知识库访问, 或者使用此插件继续上传更多文件。"])
84
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
85
+
86
+ @CatchException
87
+ def 读取知识库作答(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port=-1):
88
+ # resolve deps
89
+ try:
90
+ # from zh_langchain import construct_vector_store
91
+ # from langchain.embeddings.huggingface import HuggingFaceEmbeddings
92
+ from crazy_functions.vector_fns.vector_database import knowledge_archive_interface
93
+ except Exception as e:
94
+ chatbot.append(["依赖不足", f"{str(e)}\n\n导入依赖失败。请用以下命令安装" + install_msg])
95
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
96
+ # from .crazy_utils import try_install_deps
97
+ # try_install_deps(['zh_langchain==0.2.1', 'pypinyin'], reload_m=['pypinyin', 'zh_langchain'])
98
+ # yield from update_ui_lastest_msg("安装完成,您可以再次重试。", chatbot, history)
99
+ return
100
+
101
+ # < ------------------- --------------- >
102
+ kai = knowledge_archive_interface()
103
+
104
+ if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
105
+ kai_id = plugin_kwargs.get("advanced_arg", 'default')
106
+ vs_path = get_log_folder(user=get_user(chatbot), plugin_name='vec_store')
107
+ resp, prompt = kai.answer_with_archive_by_id(txt, kai_id, vs_path)
108
+
109
+ chatbot.append((txt, f'[知识库 {kai_id}] ' + prompt))
110
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
111
+ gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
112
+ inputs=prompt, inputs_show_user=txt,
113
+ llm_kwargs=llm_kwargs, chatbot=chatbot, history=[],
114
+ sys_prompt=system_prompt
115
+ )
116
+ history.extend((prompt, gpt_say))
117
+ yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
docs/GithubAction+AllCapacityBeta ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # docker build -t gpt-academic-all-capacity -f docs/GithubAction+AllCapacity --network=host --build-arg http_proxy=http://localhost:10881 --build-arg https_proxy=http://localhost:10881 .
2
+ # docker build -t gpt-academic-all-capacity -f docs/GithubAction+AllCapacityBeta --network=host .
3
+ # docker run -it --net=host gpt-academic-all-capacity bash
4
+
5
+ # 从NVIDIA源,从而支持显卡(检查宿主的nvidia-smi中的cuda版本必须>=11.3)
6
+ FROM fuqingxu/11.3.1-runtime-ubuntu20.04-with-texlive:latest
7
+
8
+ # use python3 as the system default python
9
+ WORKDIR /gpt
10
+ RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.8
11
+
12
+ # # 非必要步骤,更换pip源 (以下三行,可以删除)
13
+ # RUN echo '[global]' > /etc/pip.conf && \
14
+ # echo 'index-url = https://mirrors.aliyun.com/pypi/simple/' >> /etc/pip.conf && \
15
+ # echo 'trusted-host = mirrors.aliyun.com' >> /etc/pip.conf
16
+
17
+ # 下载pytorch
18
+ RUN python3 -m pip install torch torchvision --extra-index-url https://download.pytorch.org/whl/cu113
19
+ # 准备pip依赖
20
+ RUN python3 -m pip install openai numpy arxiv rich
21
+ RUN python3 -m pip install colorama Markdown pygments pymupdf
22
+ RUN python3 -m pip install python-docx moviepy pdfminer
23
+ RUN python3 -m pip install zh_langchain==0.2.1 pypinyin
24
+ RUN python3 -m pip install rarfile py7zr
25
+ RUN python3 -m pip install aliyun-python-sdk-core==2.13.3 pyOpenSSL webrtcvad scipy git+https://github.com/aliyun/alibabacloud-nls-python-sdk.git
26
+ # 下载分支
27
+ WORKDIR /gpt
28
+ RUN git clone --depth=1 https://github.com/binary-husky/gpt_academic.git
29
+ WORKDIR /gpt/gpt_academic
30
+ RUN git clone --depth=1 https://github.com/OpenLMLab/MOSS.git request_llms/moss
31
+
32
+ RUN python3 -m pip install -r requirements.txt
33
+ RUN python3 -m pip install -r request_llms/requirements_moss.txt
34
+ RUN python3 -m pip install -r request_llms/requirements_qwen.txt
35
+ RUN python3 -m pip install -r request_llms/requirements_chatglm.txt
36
+ RUN python3 -m pip install -r request_llms/requirements_newbing.txt
37
+ RUN python3 -m pip install nougat-ocr
38
+
39
+ # 预热Tiktoken模块
40
+ RUN python3 -c 'from check_proxy import warm_up_modules; warm_up_modules()'
41
+
42
+ # 安装知识库插件的额外依赖
43
+ RUN apt-get update && apt-get install libgl1 -y
44
+ RUN pip3 install transformers protobuf langchain sentence-transformers faiss-cpu nltk beautifulsoup4 bitsandbytes tabulate icetk --upgrade
45
+ RUN pip3 install unstructured[all-docs] --upgrade
46
+ RUN python3 -c 'from check_proxy import warm_up_vectordb; warm_up_vectordb()'
47
+ RUN rm -rf /usr/local/lib/python3.8/dist-packages/tests
48
+
49
+
50
+ # COPY .cache /root/.cache
51
+ # COPY config_private.py config_private.py
52
+ # 启动
53
+ CMD ["python3", "-u", "main.py"]
docs/GithubAction+NoLocal+Vectordb ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 此Dockerfile适用于“无本地模型”的环境构建,如果需要使用chatglm等本地模型,请参考 docs/Dockerfile+ChatGLM
2
+ # 如何构建: 先修改 `config.py`, 然后 docker build -t gpt-academic-nolocal-vs -f docs/GithubAction+NoLocal+Vectordb .
3
+ # 如何运行: docker run --rm -it --net=host gpt-academic-nolocal-vs
4
+ FROM python:3.11
5
+
6
+ # 指定路径
7
+ WORKDIR /gpt
8
+
9
+ # 装载项目文件
10
+ COPY . .
11
+
12
+ # 安装依赖
13
+ RUN pip3 install -r requirements.txt
14
+
15
+ # 安装知识库插件的额外依赖
16
+ RUN apt-get update && apt-get install libgl1 -y
17
+ RUN pip3 install torch torchvision --index-url https://download.pytorch.org/whl/cpu
18
+ RUN pip3 install transformers protobuf langchain sentence-transformers faiss-cpu nltk beautifulsoup4 bitsandbytes tabulate icetk --upgrade
19
+ RUN pip3 install unstructured[all-docs] --upgrade
20
+ RUN python3 -c 'from check_proxy import warm_up_vectordb; warm_up_vectordb()'
21
+
22
+ # 可选步骤,用于预热模块
23
+ RUN python3 -c 'from check_proxy import warm_up_modules; warm_up_modules()'
24
+
25
+ # 启动
26
+ CMD ["python3", "-u", "main.py"]
request_llms/bridge_qwen_local.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_name = "Qwen_Local"
2
+ cmd_to_install = "`pip install -r request_llms/requirements_qwen_local.txt`"
3
+
4
+ from toolbox import ProxyNetworkActivate, get_conf
5
+ from .local_llm_class import LocalLLMHandle, get_local_llm_predict_fns
6
+
7
+
8
+
9
+ # ------------------------------------------------------------------------------------------------------------------------
10
+ # 🔌💻 Local Model
11
+ # ------------------------------------------------------------------------------------------------------------------------
12
+ class GetQwenLMHandle(LocalLLMHandle):
13
+
14
+ def load_model_info(self):
15
+ # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
16
+ self.model_name = model_name
17
+ self.cmd_to_install = cmd_to_install
18
+
19
+ def load_model_and_tokenizer(self):
20
+ # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
21
+ # from modelscope import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
22
+ from transformers import AutoModelForCausalLM, AutoTokenizer
23
+ from transformers.generation import GenerationConfig
24
+ with ProxyNetworkActivate('Download_LLM'):
25
+ model_id = get_conf('QWEN_LOCAL_MODEL_SELECTION')
26
+ self._tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True, resume_download=True)
27
+ # use fp16
28
+ model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", trust_remote_code=True).eval()
29
+ model.generation_config = GenerationConfig.from_pretrained(model_id, trust_remote_code=True) # 可指定不同的生成长度、top_p等相关超参
30
+ self._model = model
31
+
32
+ return self._model, self._tokenizer
33
+
34
+ def llm_stream_generator(self, **kwargs):
35
+ # 🏃‍♂️🏃‍♂️🏃‍♂️ 子进程执行
36
+ def adaptor(kwargs):
37
+ query = kwargs['query']
38
+ max_length = kwargs['max_length']
39
+ top_p = kwargs['top_p']
40
+ temperature = kwargs['temperature']
41
+ history = kwargs['history']
42
+ return query, max_length, top_p, temperature, history
43
+
44
+ query, max_length, top_p, temperature, history = adaptor(kwargs)
45
+
46
+ for response in self._model.chat_stream(self._tokenizer, query, history=history):
47
+ yield response
48
+
49
+ def try_to_import_special_deps(self, **kwargs):
50
+ # import something that will raise error if the user does not install requirement_*.txt
51
+ # 🏃‍♂️🏃‍♂️🏃‍♂️ 主进程执行
52
+ import importlib
53
+ importlib.import_module('modelscope')
54
+
55
+
56
+ # ------------------------------------------------------------------------------------------------------------------------
57
+ # 🔌💻 GPT-Academic Interface
58
+ # ------------------------------------------------------------------------------------------------------------------------
59
+ predict_no_ui_long_connection, predict = get_local_llm_predict_fns(GetQwenLMHandle, model_name)
request_llms/com_qwenapi.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from http import HTTPStatus
2
+ from toolbox import get_conf
3
+ import threading
4
+ import logging
5
+
6
+ timeout_bot_msg = '[Local Message] Request timeout. Network error.'
7
+
8
+ class QwenRequestInstance():
9
+ def __init__(self):
10
+ import dashscope
11
+ self.time_to_yield_event = threading.Event()
12
+ self.time_to_exit_event = threading.Event()
13
+ self.result_buf = ""
14
+
15
+ def validate_key():
16
+ DASHSCOPE_API_KEY = get_conf("DASHSCOPE_API_KEY")
17
+ if DASHSCOPE_API_KEY == '': return False
18
+ return True
19
+
20
+ if not validate_key():
21
+ raise RuntimeError('请配置 DASHSCOPE_API_KEY')
22
+ dashscope.api_key = get_conf("DASHSCOPE_API_KEY")
23
+
24
+
25
+ def generate(self, inputs, llm_kwargs, history, system_prompt):
26
+ # import _thread as thread
27
+ from dashscope import Generation
28
+ QWEN_MODEL = {
29
+ 'qwen-turbo': Generation.Models.qwen_turbo,
30
+ 'qwen-plus': Generation.Models.qwen_plus,
31
+ 'qwen-max': Generation.Models.qwen_max,
32
+ }[llm_kwargs['llm_model']]
33
+ top_p = llm_kwargs.get('top_p', 0.8)
34
+ if top_p == 0: top_p += 1e-5
35
+ if top_p == 1: top_p -= 1e-5
36
+
37
+ self.result_buf = ""
38
+ responses = Generation.call(
39
+ model=QWEN_MODEL,
40
+ messages=generate_message_payload(inputs, llm_kwargs, history, system_prompt),
41
+ top_p=top_p,
42
+ temperature=llm_kwargs.get('temperature', 1.0),
43
+ result_format='message',
44
+ stream=True,
45
+ incremental_output=True
46
+ )
47
+
48
+ for response in responses:
49
+ if response.status_code == HTTPStatus.OK:
50
+ if response.output.choices[0].finish_reason == 'stop':
51
+ yield self.result_buf
52
+ break
53
+ elif response.output.choices[0].finish_reason == 'length':
54
+ self.result_buf += "[Local Message] 生成长度过长,后续输出被截断"
55
+ yield self.result_buf
56
+ break
57
+ else:
58
+ self.result_buf += response.output.choices[0].message.content
59
+ yield self.result_buf
60
+ else:
61
+ self.result_buf += f"[Local Message] 请求错误:状态码:{response.status_code},错误码:{response.code},消息:{response.message}"
62
+ yield self.result_buf
63
+ break
64
+ logging.info(f'[raw_input] {inputs}')
65
+ logging.info(f'[response] {self.result_buf}')
66
+ return self.result_buf
67
+
68
+
69
+ def generate_message_payload(inputs, llm_kwargs, history, system_prompt):
70
+ conversation_cnt = len(history) // 2
71
+ if system_prompt == '': system_prompt = 'Hello!'
72
+ messages = [{"role": "user", "content": system_prompt}, {"role": "assistant", "content": "Certainly!"}]
73
+ if conversation_cnt:
74
+ for index in range(0, 2*conversation_cnt, 2):
75
+ what_i_have_asked = {}
76
+ what_i_have_asked["role"] = "user"
77
+ what_i_have_asked["content"] = history[index]
78
+ what_gpt_answer = {}
79
+ what_gpt_answer["role"] = "assistant"
80
+ what_gpt_answer["content"] = history[index+1]
81
+ if what_i_have_asked["content"] != "":
82
+ if what_gpt_answer["content"] == "":
83
+ continue
84
+ if what_gpt_answer["content"] == timeout_bot_msg:
85
+ continue
86
+ messages.append(what_i_have_asked)
87
+ messages.append(what_gpt_answer)
88
+ else:
89
+ messages[-1]['content'] = what_gpt_answer['content']
90
+ what_i_ask_now = {}
91
+ what_i_ask_now["role"] = "user"
92
+ what_i_ask_now["content"] = inputs
93
+ messages.append(what_i_ask_now)
94
+ return messages
request_llms/requirements_qwen_local.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ modelscope
2
+ transformers_stream_generator
3
+ auto-gptq
4
+ optimum
5
+ urllib3<2
tests/test_vector_plugins.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ 对项目中的各个插件进行测试。运行方法:直接运行 python tests/test_plugins.py
3
+ """
4
+
5
+
6
+ import os, sys
7
+ def validate_path(): dir_name = os.path.dirname(__file__); root_dir_assume = os.path.abspath(dir_name + '/..'); os.chdir(root_dir_assume); sys.path.append(root_dir_assume)
8
+ validate_path() # 返回项目根路径
9
+
10
+ if __name__ == "__main__":
11
+ from tests.test_utils import plugin_test
12
+
13
+ plugin_test(plugin='crazy_functions.知识库问答->知识库文件注入', main_input="./README.md")
14
+
15
+ plugin_test(plugin='crazy_functions.知识库问答->读取知识库作答', main_input="What is the installation method?")
16
+
17
+ plugin_test(plugin='crazy_functions.知识库问答->读取知识库作答', main_input="远程云服务器部署?")
themes/cookies.py ADDED
File without changes