made1570 commited on
Commit
c742ff6
·
verified ·
1 Parent(s): 7be2f4a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -41
app.py CHANGED
@@ -1,49 +1,41 @@
1
  import torch
2
- from transformers import AutoProcessor, AutoModelForImageTextToText, TextStreamer
3
  from peft import PeftModel
4
  import gradio as gr
5
 
6
- # Load base model and processor
7
- base_model_id = "unsloth/gemma-3-12b-it-unsloth-bnb-4bit"
8
- adapter_model_id = "adarsh3601/my_gemma3_pt"
9
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
10
-
11
- processor = AutoProcessor.from_pretrained(base_model_id)
12
- model = AutoModelForImageTextToText.from_pretrained(base_model_id, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, device_map="auto")
13
-
14
- # Apply adapter (LoRA)
15
- model = PeftModel.from_pretrained(model, adapter_model_id)
16
- model.eval()
17
-
18
- streamer = TextStreamer(processor.tokenizer, skip_prompt=True, skip_special_tokens=True)
19
-
20
- # Helper to format messages using the chat template
21
- def format_chat(messages):
22
- formatted = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
23
- return formatted
24
-
25
- # Chat function
26
- def chat(message, history):
27
- messages = []
28
-
29
- # Format history into messages
30
- for user_msg, bot_msg in history:
31
- messages.append({"role": "user", "content": user_msg})
32
- messages.append({"role": "assistant", "content": bot_msg})
33
-
34
- messages.append({"role": "user", "content": message})
35
- prompt = format_chat(messages)
36
-
37
- inputs = processor(prompt, return_tensors="pt").to(device)
38
-
39
  with torch.no_grad():
40
- outputs = model.generate(**inputs, max_new_tokens=512, streamer=streamer)
41
 
42
- decoded = processor.batch_decode(outputs, skip_special_tokens=True)[0]
43
- response = decoded.split("<end_of_turn>")[0].strip().split("<start_of_turn>model")[-1].strip()
44
- return response
45
 
46
  # Gradio interface
47
- gui = gr.ChatInterface(fn=chat, title="Gemma-3 Chatbot", description="Fine-tuned on adarsh3601/my_gemma_pt3")
48
-
49
- gui.launch()
 
 
 
 
 
 
1
  import torch
2
+ from transformers import AutoProcessor, AutoModelForImageTextToText
3
  from peft import PeftModel
4
  import gradio as gr
5
 
6
+ # Set up device (CPU or GPU)
7
+ device = "cuda" if torch.cuda.is_available() else "cpu"
8
+
9
+ # Load processor and model
10
+ model_name = "adarsh3601/my_gemma_pt3" # Change to your model path
11
+ processor = AutoProcessor.from_pretrained(model_name)
12
+ model = AutoModelForImageTextToText.from_pretrained(model_name).to(device)
13
+
14
+ # Optional: If using PEFT model with adapter
15
+ # adapter_model_id = "your_adapter_model_id" # Uncomment and replace if using adapter
16
+ # model = PeftModel.from_pretrained(model, adapter_model_id)
17
+
18
+ # Define function to process the user input
19
+ def chat(prompt):
20
+ # Prepare the message in the format the model expects
21
+ messages = [{"role": "user", "content": prompt}]
22
+
23
+ # Process the input using the processor
24
+ inputs = processor(messages, return_tensors="pt").to(device)
25
+
26
+ # Generate the output from the model
 
 
 
 
 
 
 
 
 
 
 
 
27
  with torch.no_grad():
28
+ outputs = model.generate(**inputs, max_length=200)
29
 
30
+ # Decode and return the response
31
+ return processor.decode(outputs[0], skip_special_tokens=True)
 
32
 
33
  # Gradio interface
34
+ gr.Interface(
35
+ fn=chat,
36
+ inputs="text",
37
+ outputs="text",
38
+ title="Gemma Chat Model",
39
+ description="Chat with Gemma3 model",
40
+ live=True
41
+ ).launch(share=False) # share=False for Hugging Face Spaces