mikemin027 commited on
Commit
fe9bde4
1 Parent(s): abff1a0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -35
app.py CHANGED
@@ -7,6 +7,9 @@ llm = Llama.from_pretrained(
7
  filename="Marco-o1-Q4_K_M.gguf",
8
  )
9
 
 
 
 
10
  def respond(
11
  message,
12
  history: list[tuple[str, str]],
@@ -19,48 +22,18 @@ def respond(
19
  tokenized_messages = []
20
 
21
  # Tokenize the system message
22
- tokenized_messages.append(llm.tokenizer.encode(system_message))
23
 
24
  # Tokenize the history messages
25
  for val in history:
26
  if val[0]:
27
- tokenized_messages.append(llm.tokenizer.encode(val[0])) # User message
28
  if val[1]:
29
- tokenized_messages.append(llm.tokenizer.encode(val[1])) # Assistant message
30
 
31
  # Tokenize the current user message
32
- tokenized_messages.append(llm.tokenizer.encode(message))
33
 
34
  response = ""
35
 
36
- # Use llm.create_completion with tokenized messages
37
- for token in llm.create_completion(
38
- tokenized_messages,
39
- max_tokens=max_tokens,
40
- stream=True,
41
- temperature=temperature,
42
- top_p=top_p,
43
- ):
44
- token_content = token['choices'][0]['delta']['content']
45
- response += token_content
46
- yield response
47
-
48
- # Gradio demo setup
49
- demo = gr.ChatInterface(
50
- respond,
51
- additional_inputs=[
52
- gr.Textbox(value="", label="System message"),
53
- gr.Slider(minimum=1, maximum=8192, value=512, step=1, label="Max new tokens"),
54
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
55
- gr.Slider(
56
- minimum=0.1,
57
- maximum=1.0,
58
- value=0.95,
59
- step=0.05,
60
- label="Top-p (nucleus sampling)",
61
- ),
62
- ],
63
- )
64
-
65
- if __name__ == "__main__":
66
- demo.launch()
 
7
  filename="Marco-o1-Q4_K_M.gguf",
8
  )
9
 
10
+ # Access the tokenizer from the Llama model
11
+ tokenizer = llm.get_tokenizer()
12
+
13
  def respond(
14
  message,
15
  history: list[tuple[str, str]],
 
22
  tokenized_messages = []
23
 
24
  # Tokenize the system message
25
+ tokenized_messages.append(tokenizer.encode(system_message))
26
 
27
  # Tokenize the history messages
28
  for val in history:
29
  if val[0]:
30
+ tokenized_messages.append(tokenizer.encode(val[0])) # User message
31
  if val[1]:
32
+ tokenized_messages.append(tokenizer.encode(val[1])) # Assistant message
33
 
34
  # Tokenize the current user message
35
+ tokenized_messages.append(tokenizer.encode(message))
36
 
37
  response = ""
38
 
39
+ # Use llm.create_completion with tokenized message