Tonic commited on
Commit
a3c3064
โ€ข
1 Parent(s): 27d5e20

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +99 -70
app.py CHANGED
@@ -1,84 +1,113 @@
1
- import os
2
- import math
3
- import transformers
4
- from transformers import AutoModelForCausalLM, AutoTokenizer
5
  import torch
6
  import gradio as gr
7
- import sentencepiece
8
-
9
- title = "Welcome to Tonic's ๐Ÿ‹๐ŸณOrca-2-13B!"
10
- description = "You can use [๐Ÿ‹๐Ÿณmicrosoft/Orca-2-13b](https://huggingface.co/microsoft/Orca-2-13b) via API using Gradio by scrolling down and clicking Use 'Via API' or privately by [cloning this space on huggingface](https://huggingface.co/spaces/Tonic1/TonicsOrca2?duplicate=true) . [Join my active builders' server on discord](https://discord.gg/VqTxc76K3u). Big thanks to the HuggingFace Organisation for the Community Grant."
11
-
12
- # os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:50'
13
- device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
14
- model_name = "microsoft/Orca-2-13b"
15
- # offload_folder = './model_weights'
16
-
17
- # if not os.path.exists(offload_folder):
18
- # os.makedirs(offload_folder)
19
-
20
- tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
21
- model = AutoModelForCausalLM.from_pretrained(model_name)
22
- model = model.to(torch.bfloat16)
23
- model = model.to(device)
24
-
25
-
26
- class OrcaChatBot:
27
- def __init__(self, model, tokenizer, system_message="You are Orca, an AI language model created by Microsoft. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior."):
28
- self.model = model
29
- self.tokenizer = tokenizer
30
- self.system_message = system_message
31
- self.conversation_history = None
32
-
33
- def predict(self, user_message, temperature=0.4, max_new_tokens=70, top_p=0.99, repetition_penalty=1.9):
34
- # Prepare the prompt
35
- prompt = f"<|im_start|>system\n{self.system_message}<|im_end|>\n<|im_start|>user\n{user_message}<|im_end|>\n<|im_start|>assistant" if self.conversation_history is None else self.conversation_history + f"<|im_end|>\n<|im_start|>user\n{user_message}<|im_end|>\n<|im_start|>assistant"
36
-
37
- # Encode the prompt
38
- inputs = self.tokenizer(prompt, return_tensors='pt', add_special_tokens=False)
39
- input_ids = inputs["input_ids"].to(self.model.device)
40
-
41
- # Generate a response
42
- output_ids = self.model.generate(
43
- input_ids,
44
- max_length=input_ids.shape[1] + max_new_tokens,
45
- temperature=temperature,
46
- top_p=top_p,
47
- repetition_penalty=repetition_penalty,
48
- pad_token_id=self.tokenizer.eos_token_id,
49
- do_sample=True # Enable sampling-based generation
50
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
 
52
 
53
- # Decode the generated response
54
- response = self.tokenizer.decode(output_ids[0], skip_special_tokens=True)
55
 
56
- # Update conversation history
57
- self.conversation_history = self.tokenizer.decode(output_ids[0], skip_special_tokens=False)
58
-
59
- return response
60
-
61
- Orca_bot = OrcaChatBot(model, tokenizer)
62
-
63
- def gradio_predict(user_message, system_message, max_new_tokens, temperature, top_p, repetition_penalty):
64
- full_message = f"{system_message}\n{user_message}" if system_message else user_message
65
- return Orca_bot.predict(full_message, temperature, max_new_tokens, top_p, repetition_penalty)
66
 
67
  iface = gr.Interface(
68
- fn=gradio_predict,
69
  title=title,
70
  description=description,
71
- inputs=[
72
- gr.Textbox(label="Your Message", type="text", lines=3),
73
- gr.Textbox(label="Introduce a Character Here or Set a Scene (system prompt)", type="text", lines=2),
74
- gr.Slider(label="Max new tokens", value=1200, minimum=25, maximum=4096, step=1),
75
- gr.Slider(label="Temperature", value=0.7, minimum=0.05, maximum=1.0, step=0.05),
76
- gr.Slider(label="Top-p (nucleus sampling)", value=0.90, minimum=0.01, maximum=0.99, step=0.05),
77
- gr.Slider(label="Repetition penalty", value=1.9, minimum=1.0, maximum=2.0, step=0.05)
78
- ],
79
  outputs="text",
80
  theme="ParityError/Anime"
81
  )
82
 
83
- # Launch the Gradio interface
84
  iface.launch()
 
1
+ from transformers import AutoConfig, AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM, MistralForCausalLM
2
+ from peft import PeftModel, PeftConfig
 
 
3
  import torch
4
  import gradio as gr
5
+ import random
6
+ from textwrap import wrap
7
+
8
+ # Functions to Wrap the Prompt Correctly
9
+ def wrap_text(text, width=90):
10
+ lines = text.split('\n')
11
+ wrapped_lines = [textwrap.fill(line, width=width) for line in lines]
12
+ wrapped_text = '\n'.join(wrapped_lines)
13
+ return wrapped_text
14
+
15
+ def multimodal_prompt(user_input, system_prompt="You are an expert medical analyst:"):
16
+ """
17
+ Generates text using a large language model, given a user input and a system prompt.
18
+ Args:
19
+ user_input: The user's input text to generate a response for.
20
+ system_prompt: Optional system prompt.
21
+ Returns:
22
+ A string containing the generated text.
23
+ """
24
+ # Combine user input and system prompt
25
+ formatted_input = f"<s>[INST]{system_prompt} {user_input}[/INST]"
26
+
27
+ # Encode the input text
28
+ encodeds = tokenizer(formatted_input, return_tensors="pt", add_special_tokens=False)
29
+ model_inputs = encodeds.to(device)
30
+
31
+ # Generate a response using the model
32
+ output = model.generate(
33
+ **model_inputs,
34
+ max_length=max_length,
35
+ use_cache=True,
36
+ early_stopping=True,
37
+ bos_token_id=model.config.bos_token_id,
38
+ eos_token_id=model.config.eos_token_id,
39
+ pad_token_id=model.config.eos_token_id,
40
+ temperature=0.1,
41
+ do_sample=True
42
+ )
43
+
44
+ # Decode the response
45
+ response_text = tokenizer.decode(output[0], skip_special_tokens=True)
46
+
47
+ return response_text
48
+
49
+ # Define the device
50
+ device = "cuda" if torch.cuda.is_available() else "cpu"
51
+
52
+ # Use the base model's ID
53
+ base_model_id = "mistralai/Mistral-7B-v0.1"
54
+ model_directory = "Tonic/mistralmed"
55
+
56
+ # Instantiate the Tokenizer
57
+ tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1", trust_remote_code=True, padding_side="left")
58
+ # tokenizer = AutoTokenizer.from_pretrained("Tonic/mistralmed", trust_remote_code=True, padding_side="left")
59
+ tokenizer.pad_token = tokenizer.eos_token
60
+ tokenizer.padding_side = 'left'
61
+
62
+ # Specify the configuration class for the model
63
+ #model_config = AutoConfig.from_pretrained(base_model_id)
64
+
65
+ # Load the PEFT model with the specified configuration
66
+ #peft_model = AutoModelForCausalLM.from_pretrained(base_model_id, config=model_config)
67
+
68
+ # Load the PEFT model
69
+ peft_config = PeftConfig.from_pretrained("Tonic/mistralmed")
70
+ peft_model = MistralForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1", trust_remote_code=True)
71
+ peft_model = PeftModel.from_pretrained(peft_model, "Tonic/mistralmed")
72
+
73
+ class ChatBot:
74
+ def __init__(self):
75
+ self.history = []
76
+
77
+ class ChatBot:
78
+ def __init__(self):
79
+ # Initialize the ChatBot class with an empty history
80
+ self.history = []
81
+
82
+ def predict(self, user_input, system_prompt="You are an expert medical analyst:"):
83
+ # Combine the user's input with the system prompt
84
+ formatted_input = f"<s>[INST]{system_prompt} {user_input}[/INST]"
85
+
86
+ # Encode the formatted input using the tokenizer
87
+ user_input_ids = tokenizer.encode(formatted_input, return_tensors="pt")
88
+
89
+ # Generate a response using the PEFT model
90
+ response = peft_model.generate(input_ids=user_input_ids, max_length=512, pad_token_id=tokenizer.eos_token_id)
91
+
92
+ # Decode the generated response to text
93
+ response_text = tokenizer.decode(response[0], skip_special_tokens=True)
94
 
95
+ return response_text # Return the generated response
96
 
97
+ bot = ChatBot()
 
98
 
99
+ title = "๐Ÿ‘‹๐Ÿปํ† ๋‹‰์˜ ๋ฏธ์ŠคํŠธ๋ž„๋ฉ”๋“œ ์ฑ„ํŒ…์— ์˜ค์‹  ๊ฒƒ์„ ํ™˜์˜ํ•ฉ๋‹ˆ๋‹ค๐Ÿš€๐Ÿ‘‹๐ŸปWelcome to Tonic's MistralMed Chat๐Ÿš€"
100
+ description = "์ด ๊ณต๊ฐ„์„ ์‚ฌ์šฉํ•˜์—ฌ ํ˜„์žฌ ๋ชจ๋ธ์„ ํ…Œ์ŠคํŠธํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. [(Tonic/MistralMed)](https://huggingface.co/Tonic/MistralMed) ๋˜๋Š” ์ด ๊ณต๊ฐ„์„ ๋ณต์ œํ•˜๊ณ  ๋กœ์ปฌ ๋˜๋Š” ๐Ÿค—HuggingFace์—์„œ ์‚ฌ์šฉํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. [Discord์—์„œ ํ•จ๊ป˜ ๋งŒ๋“ค๊ธฐ ์œ„ํ•ด Discord์— ๊ฐ€์ž…ํ•˜์‹ญ์‹œ์˜ค](https://discord.gg/VqTxc76K3u). You can use this Space to test out the current model [(Tonic/MistralMed)](https://huggingface.co/Tonic/MistralMed) or duplicate this Space and use it locally or on ๐Ÿค—HuggingFace. [Join me on Discord to build together](https://discord.gg/VqTxc76K3u)."
101
+ examples = [["[Question:] What is the proper treatment for buccal herpes?", "You are a medicine and public health expert, you will receive a question, answer the question, and provide a complete answer"]]
 
 
 
 
 
 
 
102
 
103
  iface = gr.Interface(
104
+ fn=bot.predict,
105
  title=title,
106
  description=description,
107
+ examples=examples,
108
+ inputs=["text", "text"], # Take user input and system prompt separately
 
 
 
 
 
 
109
  outputs="text",
110
  theme="ParityError/Anime"
111
  )
112
 
 
113
  iface.launch()