made1570 commited on
Commit
5fe0a09
·
verified ·
1 Parent(s): c9cc26c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -46
app.py CHANGED
@@ -4,69 +4,58 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
4
  import gradio as gr
5
  from unsloth import FastModel
6
 
7
- # Set environment for Hugging Face Spaces or CUDA
8
  os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
9
 
10
- # Load your model
11
  model_repo_id = 'adarsh3601/my_gemma3_pt'
12
 
 
13
  model, tokenizer = FastModel.from_pretrained(
14
  model_name=model_repo_id,
15
  max_seq_length=2048,
16
- load_in_4bit=True,
17
  load_in_8bit=False,
18
  full_finetuning=False
19
  )
20
 
21
- # Define the main chat function with history
22
- def chat(user_input, history):
23
- # Reformat history into messages
24
- messages = []
25
- for human, assistant in history:
26
- messages.append({"role": "user", "content": [{"type": "text", "text": human}]})
27
- messages.append({"role": "assistant", "content": [{"type": "text", "text": assistant}]})
28
 
29
- # Append the new user message
30
- messages.append({"role": "user", "content": [{"type": "text", "text": user_input}]})
31
-
32
- # Apply template
33
- prompt = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
34
-
35
  with torch.no_grad():
36
  output = model.generate(
37
- **tokenizer([prompt], return_tensors="pt").to("cuda"),
38
- max_new_tokens=512,
39
  temperature=1.0,
40
  top_p=0.95,
41
  top_k=64,
42
- streamer=None
43
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
- decoded = tokenizer.decode(output[0], skip_special_tokens=True)
46
-
47
- # Try to strip the original prompt to get the model's actual reply
48
- reply = decoded[len(prompt):].strip()
49
-
50
- # Append reply to history
51
- history.append((user_input, reply))
52
- return history, history
53
-
54
- # Gradio interface with state (chat history)
55
- with gr.Blocks() as demo:
56
- gr.Markdown("## 🤖 Chat with Gemma-3")
57
- chatbot = gr.Chatbot()
58
- state = gr.State([]) # holds the message history
59
-
60
- with gr.Row():
61
- txt = gr.Textbox(placeholder="Type a message and hit enter...", show_label=False).style(container=False)
62
-
63
- def user_submit(message, history):
64
- return "", history + [[message, ""]]
65
-
66
- txt.submit(user_submit, [txt, state], [txt, state], queue=False).then(
67
- chat, [txt, state], [chatbot, state]
68
- )
69
-
70
- # Launch with public sharing
71
  if __name__ == "__main__":
72
- demo.launch(share=True)
 
4
  import gradio as gr
5
  from unsloth import FastModel
6
 
7
+ # Set environment for Hugging Face Spaces
8
  os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
9
 
10
+ # Load the model from Hugging Face Model Hub
11
  model_repo_id = 'adarsh3601/my_gemma3_pt'
12
 
13
+ # Load model and tokenizer using FastModel
14
  model, tokenizer = FastModel.from_pretrained(
15
  model_name=model_repo_id,
16
  max_seq_length=2048,
17
+ load_in_4bit=True, # Load model with 4-bit quantization
18
  load_in_8bit=False,
19
  full_finetuning=False
20
  )
21
 
22
+ # Function to generate text based on user input
23
+ def generate_text(user_input):
24
+ # Prepare the input as per the model's expected format
25
+ messages = [{
26
+ "role": "user",
27
+ "content": [{"type" : "text", "text" : user_input}]
28
+ }]
29
 
30
+ text = tokenizer.apply_chat_template(
31
+ messages,
32
+ add_generation_prompt=True, # Must add for generation
33
+ )
34
+
35
+ # Generate output with model
36
  with torch.no_grad():
37
  output = model.generate(
38
+ **tokenizer([text], return_tensors="pt").to("cuda"),
39
+ max_new_tokens=512, # Adjust if you need more tokens
40
  temperature=1.0,
41
  top_p=0.95,
42
  top_k=64,
43
+ streamer=None # You can set a streamer if needed
44
  )
45
+
46
+ # Decode the model output and return the result
47
+ decoded_output = tokenizer.decode(output[0], skip_special_tokens=True)
48
+ return decoded_output
49
+
50
+ # Build the Gradio interface
51
+ iface = gr.Interface(
52
+ fn=generate_text,
53
+ inputs=gr.Textbox(lines=2, placeholder="Enter your text here..."),
54
+ outputs=gr.Textbox(lines=2, placeholder="Generated text will appear here..."),
55
+ title="Gemma-3 Model",
56
+ description="This is a simple interface to interact with the Gemma-3 model. Enter a prompt and see the generated response."
57
+ )
58
 
59
+ # Launch the app
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  if __name__ == "__main__":
61
+ iface.launch(share=True)