Pclanglais commited on
Commit
f6bae77
1 Parent(s): ae00ef2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -27
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import optimum
2
  import transformers
3
  from transformers import AutoConfig, AutoTokenizer, AutoModel, AutoModelForCausalLM
4
- from optimum.bettertransformer import BetterTransformer
5
  import torch
6
  import gradio as gr
7
  import json
@@ -19,10 +19,7 @@ repetition_penalty=1.7
19
 
20
  model_name = "Pclanglais/Bellay"
21
 
22
- tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)
23
- model = transformers.AutoModelForCausalLM.from_pretrained(model_name,
24
- device_map="auto"
25
- )
26
 
27
 
28
  styles_prompts_dict = {
@@ -68,28 +65,13 @@ class MistralChatBot:
68
 
69
  system_prompt = styles_prompts_dict[style]
70
 
71
- # Combine the user and assistant messages into a conversation
72
- conversation = "<|im_start|>system\n" + system_prompt + "<|im_end|>\n<|im_start|>user\n" + user_message + "<|im_end|>\n<|im_start|>assistant\n"
73
- # Encode the conversation using the tokenizer
74
- input_ids = tokenizer.encode(conversation, return_tensors="pt", add_special_tokens=True)
75
- input_ids = input_ids.to(device)
76
- # Generate a response using the Falcon model
77
- response = model.generate(
78
- input_ids=input_ids,
79
- use_cache=False,
80
- early_stopping=False,
81
- bos_token_id=model.config.bos_token_id,
82
- eos_token_id=model.config.eos_token_id,
83
- pad_token_id=model.config.eos_token_id,
84
- temperature=0.5,
85
- do_sample=True,
86
- max_new_tokens=max_new_tokens,
87
- top_p=top_p,
88
- repetition_penalty=repetition_penalty
89
- )
90
- # Decode the generated response to text
91
- response_text = tokenizer.decode(response[0], skip_special_tokens=True)
92
- return response_text
93
 
94
  def predict_simple(self, user_message, style):
95
  system_prompt = styles_prompts_dict[style]
 
1
  import optimum
2
  import transformers
3
  from transformers import AutoConfig, AutoTokenizer, AutoModel, AutoModelForCausalLM
4
+ from vllm import LLM, SamplingParams
5
  import torch
6
  import gradio as gr
7
  import json
 
19
 
20
  model_name = "Pclanglais/Bellay"
21
 
22
+ llm = LLM(model_name)
 
 
 
23
 
24
 
25
  styles_prompts_dict = {
 
65
 
66
  system_prompt = styles_prompts_dict[style]
67
 
68
+ sampling_params = SamplingParams(temperature=0.7, top_p=.95, max_tokens=500, presence_penalty = 2)
69
+ detailed_prompt = "<|im_start|>system\n" + system_prompt + "<|im_end|>\n<|im_start|>user"""
70
+ detailed_prompt = detailed_prompt + "\n" + user_input + "<|im_end|>\n<|im_start|>assistant\n"
71
+ prompts = [detailed_prompt]
72
+ outputs = llm.generate(prompts, sampling_params, use_tqdm = False)
73
+ generated_text = outputs[0].outputs[0].text
74
+ return generated_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
  def predict_simple(self, user_message, style):
77
  system_prompt = styles_prompts_dict[style]