File size: 8,768 Bytes
aa28c7f
 
 
 
 
 
12f36d4
aa28c7f
 
 
12f36d4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aa28c7f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12f36d4
aa28c7f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12f36d4
aa28c7f
 
 
 
12f36d4
aa28c7f
 
 
 
 
 
12f36d4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aa28c7f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12f36d4
aa28c7f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
import openai
import requests
import random
import json
from hashlib import md5
from os import path as osp
import os
import csv
import threading

MODEL_NAME_DICT = {
    "gpt-4":"openai/gpt-4",
    "gpt-4o":"openai/gpt-4o",
    "gpt-4o-mini":"openai/gpt-4o-mini",
    "gpt-3.5-turbo":"openai/gpt-3.5-turbo",
    "deepseek-r1":"deepseek/deepseek-r1",
    "deepseek-v3":"deepseek/deepseek-chat",
    "gemini-2":"google/gemini-2.0-flash-001",
    "gemini-1.5":"google/gemini-flash-1.5",
    "llama3-70b": "meta-llama/llama-3.3-70b-instruct",
    "qwen-turbo":"qwen/qwen-turbo",
    "qwen-plus":"qwen/qwen-plus",
    "qwen-max":"qwen/qwen-max",
    "qwen-2.5-72b":"qwen/qwen-2.5-72b-instruct",
    "claude-3.5-sonnet":"anthropic/claude-3.5-sonnet",
    "phi-4":"microsoft/phi-4",
}

def get_models(model_name):
    # return the combination of llm, embedding and tokenizer
    if os.getenv("OPENROUTER_API_KEY", default="") and "YOUR" not in os.getenv("OPENROUTER_API_KEY", default="") and model_name in MODEL_NAME_DICT:
        from modules.llm.OpenRouter import OpenRouter
        return OpenRouter(model=MODEL_NAME_DICT[model_name])
    elif model_name == 'openai':
        from modules.llm.LangChainGPT import LangChainGPT
        return LangChainGPT()
    elif model_name.startswith('gpt-3.5'):
        from modules.llm.LangChainGPT import LangChainGPT
        return LangChainGPT(model="gpt-3.5-turbo")
    elif model_name == 'gpt-4':
        from modules.llm.LangChainGPT import LangChainGPT
        return LangChainGPT(model="gpt-4")
    elif model_name == 'gpt-4o':
        from modules.llm.LangChainGPT import LangChainGPT
        return LangChainGPT(model="gpt-4o")
    elif model_name == "gpt-4o-mini":
        from modules.llm.LangChainGPT import LangChainGPT
        return LangChainGPT(model="gpt-4o-mini")
    elif model_name.startswith("claude-3-5"):
        from modules.llm.Claude import Claude
        return Claude(model="claude-3-5-sonnet-20241022")
    elif model_name in ["qwen-turbo","qwen-plus","qwen-max"]:
        from modules.llm.Qwen import Qwen
        return Qwen(model = model_name)
    elif model_name.startswith('doubao'):
        from modules.llm.Doubao import Doubao
        return Doubao()
    elif model_name.startswith('gemini-2'):
        from modules.llm.Gemini import Gemini
        return Gemini("gemini-2.0-flash")
    elif model_name.startswith('gemini-1.5'):
        from modules.llm.Gemini import Gemini
        return Gemini("gemini-1.5-flash")
    elif model_name.startswith("deepseek"):
        from modules.llm.DeepSeek import DeepSeek
        return DeepSeek()
    else:
        print(f'Warning! undefined model {model_name}, use gpt-4o-mini instead.')
        from modules.llm.LangChainGPT import LangChainGPT
        return LangChainGPT()

def load_config(filepath):
    with open(filepath, "r", encoding="utf-8") as file:
        args = json.load(file)
    return args

def save_config(args,filepath):
    with open(filepath, "w", encoding ="utf8") as json_file:
        json.dump(args,json_file,indent = 1,ensure_ascii = False)
    return

def smart_path(path):
    file_dir = osp.dirname(osp.abspath(__file__))
    if osp.isabs(path):
        return path
    else:
        return osp.join(file_dir,path)
args = load_config(smart_path("./config.json"))

# Baidu preparation
endpoint = "http://api.fanyi.baidu.com"
path = "/api/trans/vip/translate"
url = endpoint + path
headers = {"Content-Type": "application/x-www-form-urlencoded"}
# Generate salt and sign
def make_md5(s, encoding="utf-8"):
    return md5(s.encode(encoding)).hexdigest()

def get_baidu_completion(text,api_id,api_key,from_lang,to_lang):
    salt = random.randint(32768, 65536)
    sign = make_md5(api_id + text + str(salt) + api_key)
    payload = {"appid": api_id, "q": text, "from": from_lang, "to": to_lang, "salt": salt, "sign": sign}
    r = requests.post(url, params=payload, headers=headers)
    result = r.json()
    return result["trans_result"][0]["dst"]

# OPENAI preparation
openai_api_key = args["openai_api_settings"]["openai_api_key"]
time_limit = float(args["openai_api_settings"]["time_limit"])
client = openai.OpenAI(api_key = openai_api_key)

class GPTThread(threading.Thread):
    def __init__(self, model, messages, temperature):
        super().__init__()
        self.model = model
        self.messages = messages
        self.temperature = temperature
        self.result = ""
    def terminate(self):
        self._running = False 
    def run(self):
        response = client.chat.completions.create(
        model=self.model,
        messages=self.messages,
        temperature=self.temperature, 
    )
        self.result = response.choices[0].message.content
    
def get_gpt_completion(prompt, time_limit = 10, model="gpt-40-mini"):
    messages = [{"role": "user", "content": prompt}]
    temperature = random.uniform(0,1)
    thread = GPTThread(model, messages,temperature)
    thread.start()
    thread.join(time_limit)
    if thread.is_alive():
        thread.terminate()
        print("请求超时")
        return "TimeoutError", False
    else:
        return thread.result, True
    
class LLMThread(threading.Thread):
    def __init__(self, llm, prompt, temperature):
        super().__init__()
        self.llm = llm
        self.prompt = prompt
        self.temperature = temperature
        self.result = ""
    def terminate(self):
        self._running = False 
    def run(self):
        self.result = self.llm.chat(self.prompt, temperature = self.temperature)
    
def get_llm_completion(prompt, time_limit = 10, model_name="gpt-4o-mini"):
    llm = get_models(model_name)
    temperature = 0.7
    thread = LLMThread(llm, prompt,temperature)
    thread.start()
    thread.join(time_limit)
    if thread.is_alive():
        thread.terminate()
        print("请求超时")
        return "TimeoutError", False
    else:
        return thread.result, True
    
def left_pad_zero(number, digit):
    number_str = str(number)
    padding_count = digit - len(number_str)
    padded_number_str = "0" * padding_count + number_str
    return padded_number_str

def generate_ids(num: int):
    length = len(str(num))+1
    ids = []
    for i in range(num):
        ids.append(left_pad_zero(i,length))
    return ids

def convert_to_json(files, text_col, name_col, id_col):
    out_files = []
    for file_target in files:
        dic = {}
        path = file_target.name
        dir = osp.dirname(path)
        base_name = osp.basename(path)
        new_name = base_name[:-4]+".json"
        new_path = osp.join(dir,new_name)
        with open(path,"r",encoding="utf-8") as f:
            reader = csv.DictReader(f)
            line_num = sum(1 for _ in open(path,"r",encoding="utf-8"))
            fieldnames = reader.fieldnames if reader.fieldnames else []
            if id_col not in fieldnames:
                ids = generate_ids(line_num)
                i = 0
                for row in reader:
                    dic[ids[i]]={"name":row[name_col],"text":row[text_col]}
                    for field in fieldnames:
                        if field not in (name_col,text_col):
                            dic[ids[i]][field] = row[field]
                    i += 1
            else:
                for row in reader:
                    dic[row[id_col]]={"name":row[name_col],"text":row[text_col]}
                    for field in fieldnames:
                        if field not in (name_col,text_col,id_col):
                            dic[row[id_col]][field] = row[field]
                
            f.close()
        with open(new_path, "w", encoding= "utf-8") as f2:
            json.dump(dic,f2,indent=1,ensure_ascii=False)
        out_files.append(new_path)
    return out_files

def convert_to_csv(files):
    out_files = []
    for file_target in files:
        path = file_target.name
        dir = osp.dirname(path)
        base_name = osp.basename(path)
        new_name = base_name[:-4]+".csv"
        new_path = osp.join(dir,new_name)
        with open(path, "r", encoding= "utf-8") as f:
            dic = json.load(f)
        field_names = [] 
        for value in dic.values():
            for field in value.keys():
                if field not in field_names: field_names.append(field)
        for key in dic.keys():
            dic[key]["id"] = key
            for field in field_names:
                if field not in dic[key]:
                    dic[key][field] = ""
        field_names.insert(0,"id")
        with open(new_path, "w", encoding= "utf-8",newline="") as f2:
            writer = csv.DictWriter(f2,fieldnames=field_names)
            writer.writeheader()
            writer.writerows(list(dic.values()))
        out_files.append(new_path)
    return out_files