akhaliq HF Staff commited on
Commit
deec06c
·
verified ·
1 Parent(s): de39f7f

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +21 -21
app.py CHANGED
@@ -1,50 +1,50 @@
1
  import gradio as gr
2
  from transformers import pipeline
3
  import torch
 
4
 
5
  # Initialize the model pipeline
6
- model_id = "facebook/MobileLLM-R1-950M"
7
  pipe = pipeline(
8
  "text-generation",
9
  model=model_id,
10
- torch_dtype="auto",
11
  device_map="auto",
12
  )
13
 
 
14
  def respond(message, history):
15
- # Convert history to messages format
16
- messages = []
17
-
18
- # Add conversation history
19
  for user_msg, assistant_msg in history:
20
  if user_msg:
21
- messages.append({"role": "user", "content": user_msg})
22
  if assistant_msg:
23
- messages.append({"role": "assistant", "content": assistant_msg})
24
 
25
  # Add current message
26
- messages.append({"role": "user", "content": message})
27
 
28
- # Generate response
29
- outputs = pipe(
30
- messages,
31
- max_new_tokens=512,
 
32
  temperature=0.7,
33
  do_sample=True,
34
  pad_token_id=pipe.tokenizer.eos_token_id,
35
- )
36
-
37
- # Extract only the assistant's response
38
- generated_text = outputs[0]["generated_text"]
39
- assistant_response = generated_text[-1]["content"]
40
-
41
- return assistant_response
42
 
43
  # Create the chat interface
44
  demo = gr.ChatInterface(
45
  fn=respond,
46
  title="MobileLLM Chat",
47
- description="Chat with Facebook's MobileLLM-R1-950M model",
48
  examples=[
49
  "Write a Python function that returns the square of a number.",
50
  "Compute: 1-2+3-4+5- ... +99-100.",
 
1
  import gradio as gr
2
  from transformers import pipeline
3
  import torch
4
+ import spaces
5
 
6
  # Initialize the model pipeline
7
+ model_id = "facebook/MobileLLM-1B"
8
  pipe = pipeline(
9
  "text-generation",
10
  model=model_id,
11
+ torch_dtype=torch.float16,
12
  device_map="auto",
13
  )
14
 
15
+ @spaces.GPU(duration=120)
16
  def respond(message, history):
17
+ # Build prompt from history
18
+ prompt = ""
 
 
19
  for user_msg, assistant_msg in history:
20
  if user_msg:
21
+ prompt += f"User: {user_msg}\n"
22
  if assistant_msg:
23
+ prompt += f"Assistant: {assistant_msg}\n"
24
 
25
  # Add current message
26
+ prompt += f"User: {message}\nAssistant: "
27
 
28
+ # Generate response with streaming
29
+ response = ""
30
+ for token in pipe(
31
+ prompt,
32
+ max_new_tokens=256,
33
  temperature=0.7,
34
  do_sample=True,
35
  pad_token_id=pipe.tokenizer.eos_token_id,
36
+ return_full_text=False,
37
+ stream=True,
38
+ ):
39
+ chunk = token[0]["generated_text"]
40
+ response = chunk
41
+ yield response
 
42
 
43
  # Create the chat interface
44
  demo = gr.ChatInterface(
45
  fn=respond,
46
  title="MobileLLM Chat",
47
+ description="Chat with Facebook's MobileLLM-1B model",
48
  examples=[
49
  "Write a Python function that returns the square of a number.",
50
  "Compute: 1-2+3-4+5- ... +99-100.",