from typing import Optional, List # from langchain.llms.utils import enforce_stop_tokens # import torch import requests # import logging # from transformers import AutoTokenizer, AutoModel, AutoConfig # logging.basicConfig(filename='chat_log.txt', level=logging.INFO) DEVICE = "cuda" FORWARD_KEY = 'fk198719-Pmvv22OqZiovaxRq6YxCzkTcd6UVVX5O0' # def torch_gc(): # if torch.cuda.is_available(): # with torch.cuda.device(DEVICE): # torch.cuda.empty_cache() # torch.cuda.ipc_collect() class ChatGLM: max_length: int = 10000 temperature: float = 0 top_p = 0.9 tokenizer: object = None model: object = None history_len: int = 10 history = [] URL = 'http://183.131.3.48:9200' HEADERS = {'Content-Type': 'application/json'} @property def _llm_type(self) -> str: return "ChatGLM" def __call__(self, prompt: str, history: Optional[List[list[str]]] = None, stop: Optional[List[str]] = None) -> str: # print('\n\n\n\n') # print('-------------------------------------------------------------------------------------------------------') # print(' ****** prompt ****** ') # print(prompt) if history: history = [i for i in history if i[0] is not None] # clear out the system message history = history[-self.history_len:] params = {'tokenizers': self.tokenizer, 'prompt': prompt, 'history': history, 'top_p': self.top_p, 'max_length': self.max_length, 'temperature': self.temperature} response = requests.post(self.URL, headers=self.HEADERS, json=params).json() answer = response['response'] # question = prompt.split('question:\n')[-1] # self.history = self.history+[[prompt, response]] # print(" ****** GLM_answer ****** ") # print(answer) # print('-------------------------------------------------------------------------------------------------------') # print('\n\n\n\n') return answer class LocalChatGLM: max_length: int = 10000 temperature: float = 0 top_p = 0.9 tokenizer: object = None model: object = None history_len: int = 10 history = [] @property def _llm_type(self) -> str: return "ChatGLM" def __call__(self, prompt: str, history: List[List[str]] = [], stop: Optional[List[str]] = None) -> str: # print('\n\n\n\n') # print('-------------------------------------------------------------------------------------------------------') # print('**************** prompt ****************:') # print(prompt) response, _ = self.model.chat( self.tokenizer, prompt, history=history[-self.history_len:] if self.history_len > 0 else [], max_length=self.max_length, temperature=self.temperature, ) # torch_gc() # if stop is not None: # response = enforce_stop_tokens(response, stop) question = prompt.split('question:\n')[-1] self.history = self.history+[[question, response]] # print("*********************** answer **************************:") # print(response) # print('-------------------------------------------------------------------------------------------------------') # print('\n\n\n\n') return response # @classmethod # def load_model(cls, # model_name_or_path: str = "THUDM/chatglm-6b"): # tokenizer = AutoTokenizer.from_pretrained( # model_name_or_path, # trust_remote_code=True # ) # if torch.cuda.is_available() and DEVICE.lower().startswith("cuda"): # model = ( # AutoModel.from_pretrained( # model_name_or_path, # trust_remote_code=True) # .half() # .cuda() # ) # else: # model = ( # AutoModel.from_pretrained( # model_name_or_path, # trust_remote_code=True) # .float() # .to(DEVICE) # ) # llm = cls() # llm.tokenizer = tokenizer # llm.model = model # return llm class OpenAI3: max_length: int = 10000 temperature: float = 0.2 top_p = 0.9 tokenizer: object = None model: object = None history_len: int = 10 history = [] HEADERS = {'Content-Type': 'application/json', 'Authorization': 'Bearer fk198719-pHAOCyaUXohoZBl0KfRvYf4AuHhWm8pm'} URL ='https://openai.api2d.net/v1/chat/completions' MODEL_NAME = "gpt-3.5-turbo" @property def _llm_type(self) -> str: return "OPENAI3" def __call__(self, prompt: str, history: Optional[List[List[str]]] = None, stop: Optional[List[str]] = None) -> str: message = [{"role": "user", "content": prompt}] params = {"model": self.MODEL_NAME, "messages": message, 'temperature': self.temperature} response = requests.post(self.URL, headers=self.HEADERS, json=params).json() answer = response['choices'][0]['message']['content'] # if stop is not None: # answer = enforce_stop_tokens(answer, stop) return answer class OpenAI4: max_length: int = 10000 temperature: float = 0.2 top_p = 0.9 tokenizer: object = None model: object = None history_len: int = 10 history = [] HEADERS = {'Content-Type': 'application/json', 'Authorization': 'Bearer fk198719-pHAOCyaUXohoZBl0KfRvYf4AuHhWm8pm'} URL ='https://openai.api2d.net/v1/chat/completions' MODEL_NAME = "gpt-4" @property def _llm_type(self) -> str: return "OPENAI4" def __call__(self, prompt: str, history: Optional[List[List[str]]] = None, stop: Optional[List[str]] = None) -> str: message = [{"role": "user", "content": prompt}] params = {"model": self.MODEL_NAME, "messages": message, 'temperature': self.temperature} response = requests.post(self.URL, headers=self.HEADERS, json=params).json() answer = response['choices'][0]['message']['content'] # if stop is not None: # answer = enforce_stop_tokens(answer, stop) return answer