vilarin commited on
Commit
0486bff
·
verified ·
1 Parent(s): 7cb9567

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -26
app.py CHANGED
@@ -6,16 +6,15 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStream
6
  import gradio as gr
7
  from threading import Thread
8
 
9
- MODEL_LIST = ["meta-llama/Meta-Llama-3.1-8B-Instruct", "meta-llama/Meta-Llama-3.1-70B-Instruct"]
10
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
11
  MODEL = os.environ.get("MODEL_ID")
12
 
13
- TITLE = "<h1><center>Meta-Llama3.1-Chat</center></h1>"
14
 
15
  PLACEHOLDER = """
16
  <center>
17
- <p>😊Hi! How can I help you today?</p><br>
18
- <p>✨Select Meta-Llama3.1-8B/70B in Advanced Options</p>
19
  </center>
20
  """
21
 
@@ -40,20 +39,14 @@ quantization_config = BitsAndBytesConfig(
40
  bnb_4bit_use_double_quant=True,
41
  bnb_4bit_quant_type= "nf4")
42
 
43
-
44
  tokenizer = AutoTokenizer.from_pretrained(MODEL)
45
- model_8b = AutoModelForCausalLM.from_pretrained(
46
- MODEL_LIST[0],
47
- torch_dtype=torch.bfloat16,
48
- device_map="auto",
49
- quantization_config=quantization_config)
50
- model_70b = AutoModelForCausalLM.from_pretrained(
51
- MODEL_LIST[1],
52
  torch_dtype=torch.bfloat16,
53
  device_map="auto",
54
  quantization_config=quantization_config)
55
 
56
- @spaces.GPU(duration=120)
57
  def stream_chat(
58
  message: str,
59
  history: list,
@@ -63,7 +56,6 @@ def stream_chat(
63
  top_p: float = 1.0,
64
  top_k: int = 20,
65
  penalty: float = 1.2,
66
- choice: str = "Meta-Llama-3.1-8B"
67
  ):
68
  print(f'message: {message}')
69
  print(f'history: {history}')
@@ -79,11 +71,6 @@ def stream_chat(
79
 
80
  conversation.append({"role": "user", "content": message})
81
 
82
- if choice == "Meta-Llama-3.1-8B":
83
- model = model_8b
84
- else:
85
- model = model_70b
86
-
87
  input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt").to(model.device)
88
 
89
  streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
@@ -118,7 +105,7 @@ with gr.Blocks(css=CSS, theme="soft") as demo:
118
  fn=stream_chat,
119
  chatbot=chatbot,
120
  fill_height=True,
121
- additional_inputs_accordion=gr.Accordion(label="⚙️ Advanced Options", open=False, render=False),
122
  additional_inputs=[
123
  gr.Textbox(
124
  value="You are a helpful assistant",
@@ -165,12 +152,6 @@ with gr.Blocks(css=CSS, theme="soft") as demo:
165
  label="Repetition penalty",
166
  render=False,
167
  ),
168
- gr.Radio(
169
- ["Meta-Llama-3.1-8B", "Meta-Llama-3.1-70B"],
170
- value="Meta-Llama-3.1-8B",
171
- label="Load Model",
172
- render=False,
173
- ),
174
  ],
175
  examples=[
176
  ["Help me study vocabulary: write a sentence for me to fill in the blank, and I'll try to pick the correct option."],
 
6
  import gradio as gr
7
  from threading import Thread
8
 
9
+ MODEL_LIST = ["meta-llama/Meta-Llama-3.1-8B-Instruct"]
10
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
11
  MODEL = os.environ.get("MODEL_ID")
12
 
13
+ TITLE = "<h1><center>Meta-Llama3.1-8B</center></h1>"
14
 
15
  PLACEHOLDER = """
16
  <center>
17
+ <p>Hi! How can I help you today?</p>
 
18
  </center>
19
  """
20
 
 
39
  bnb_4bit_use_double_quant=True,
40
  bnb_4bit_quant_type= "nf4")
41
 
 
42
  tokenizer = AutoTokenizer.from_pretrained(MODEL)
43
+ model = AutoModelForCausalLM.from_pretrained(
44
+ MODEL,
 
 
 
 
 
45
  torch_dtype=torch.bfloat16,
46
  device_map="auto",
47
  quantization_config=quantization_config)
48
 
49
+ @spaces.GPU()
50
  def stream_chat(
51
  message: str,
52
  history: list,
 
56
  top_p: float = 1.0,
57
  top_k: int = 20,
58
  penalty: float = 1.2,
 
59
  ):
60
  print(f'message: {message}')
61
  print(f'history: {history}')
 
71
 
72
  conversation.append({"role": "user", "content": message})
73
 
 
 
 
 
 
74
  input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt").to(model.device)
75
 
76
  streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
 
105
  fn=stream_chat,
106
  chatbot=chatbot,
107
  fill_height=True,
108
+ additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
109
  additional_inputs=[
110
  gr.Textbox(
111
  value="You are a helpful assistant",
 
152
  label="Repetition penalty",
153
  render=False,
154
  ),
 
 
 
 
 
 
155
  ],
156
  examples=[
157
  ["Help me study vocabulary: write a sentence for me to fill in the blank, and I'll try to pick the correct option."],