Spaces:
Runtime error
Runtime error
####################### | |
''' | |
License: MIT | |
''' | |
####################### | |
##### Dependencies | |
""" IMPORTANT: Uncomment the following line if you are in a Colab/Notebook environment """ | |
#!pip install gradio einops accelerate bitsandbytes transformers | |
##### | |
import gradio as gr | |
import transformers | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
import torch | |
import random | |
import spaces | |
import re | |
def cut_text_after_last_token(text, token): | |
last_occurrence = text.rfind(token) | |
if last_occurrence != -1: | |
result = text[last_occurrence + len(token):].strip() | |
return result | |
else: | |
return None | |
class _SentinelTokenStoppingCriteria(transformers.StoppingCriteria): | |
def __init__(self, sentinel_token_ids: torch.LongTensor, | |
starting_idx: int): | |
transformers.StoppingCriteria.__init__(self) | |
self.sentinel_token_ids = sentinel_token_ids | |
self.starting_idx = starting_idx | |
def __call__(self, input_ids: torch.LongTensor, | |
_scores: torch.FloatTensor) -> bool: | |
for sample in input_ids: | |
trimmed_sample = sample[self.starting_idx:] | |
if trimmed_sample.shape[-1] < self.sentinel_token_ids.shape[-1]: | |
continue | |
for window in trimmed_sample.unfold( | |
0, self.sentinel_token_ids.shape[-1], 1): | |
if torch.all(torch.eq(self.sentinel_token_ids, window)): | |
return True | |
return False | |
model_path = 'freecs/ArtificialThinker-Phi2' | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) | |
model = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=True, load_in_4bit=False, torch_dtype=torch.float16).to(device) #remove .to() if load_in_4/8bit = True | |
def phine(message, history, temperature, top_p, top_k, repetition_penalty, sys_message): | |
n = 0 | |
context = "" | |
if history and len(history) > 0: | |
for x in history: | |
for h in x: | |
if n%2 == 0: | |
context+=f"""\n<|prompt|>{h}\n""" | |
else: | |
pattern = re.compile(r'<details>.*?</details>') | |
result = re.sub(pattern, '', h) | |
context+=f"""<|response|>{result}""" | |
n+=1 | |
else: | |
context = "" | |
prompt = f"""\n<|system|>{sys_message}"""+context+"\n<|prompt|>"+message+"<|endoftext|>\n<|reasoning|>" | |
tokenized = tokenizer(prompt, return_tensors="pt").to(device) | |
stopping_criteria_list = transformers.StoppingCriteriaList([ | |
_SentinelTokenStoppingCriteria( | |
sentinel_token_ids=tokenizer( | |
"<|endoftext|>", | |
add_special_tokens=False, | |
return_tensors="pt", | |
).input_ids.to(device), | |
starting_idx=tokenized.input_ids.shape[-1]) | |
]) | |
token = model.generate(**tokenized, | |
stopping_criteria=stopping_criteria_list, | |
do_sample=True, | |
max_length=2048, temperature=temperature, top_p=top_p, top_k = top_k, repetition_penalty = repetition_penalty | |
) | |
completion = tokenizer.decode(token[0], skip_special_tokens=True) | |
token = "<|reasoning|>" | |
reasoning = cut_text_after_last_token(completion, token) | |
prompt = f"""\n<|system|>{sys_message}"""+context+"\n<|prompt|>"+message+"\n<|reasoning|>"+reasoning+"\n<|response|>" | |
tokenized = tokenizer(prompt, return_tensors="pt").to(device) | |
token = model.generate(**tokenized, | |
stopping_criteria=stopping_criteria_list, | |
do_sample=True, | |
max_length=2048, temperature=temperature, top_p=top_p, top_k = top_k, repetition_penalty = repetition_penalty | |
) | |
completion = tokenizer.decode(token[0], skip_special_tokens=True) | |
token = "<|response|>" | |
response = cut_text_after_last_token(completion, token) | |
res = f"""<details><summary>Reasoning (Click Me)</summary>{reasoning}</details>\n\n{response}""" | |
return res | |
demo = gr.ChatInterface(phine, | |
title="ArtificialThinker Demo on GPU", | |
description="A demo of [ArtificialThinker](https://huggingface.co/freecs/ArtificialThinker-Phi2) on GPU. ArtificialThinker is a 2.7B parameter model based on Phi 2. The model is suitable for commercial use and is licensed under the MIT license. I am not responsible for any outputs you generate. You are solely responsible for ensuring that your usage of the model complies with applicable laws and regulations. I am not affiliated with the authors of the model.", | |
additional_inputs=[ | |
gr.Slider(0.1, 2.0, label="temperature", value=0.3), | |
gr.Slider(0.1, 2.0, label="Top P", value=0.9), | |
gr.Slider(1, 500, label="Top K", value=50), | |
gr.Slider(0.1, 2.0, label="Repetition Penalty", value=1.1), | |
gr.Textbox(label="System Prompt",max_lines=1,interactive=True, value="You are an AI assistant named Phine developed by FreeCS.org. You are polite and smart.") | |
] | |
) | |
if __name__ == "__main__": | |
demo.queue().launch(share=True, debug=False) #If debug=True causes problems you can set it to False |