made1570 commited on
Commit
11a35d1
·
verified ·
1 Parent(s): 49830c8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -35
app.py CHANGED
@@ -1,38 +1,52 @@
1
- import gradio as gr
2
- from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import torch
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
- # Load the tokenizer from the Hugging Face Hub
6
- tokenizer = AutoTokenizer.from_pretrained("adarsh3601/my_gemma3_pt")
7
-
8
- # Load the model from Hugging Face Hub (Assuming you are using a transformer model here)
9
- model = AutoModelForCausalLM.from_pretrained("adarsh3601/my_gemma3_pt")
10
-
11
- # Function to generate response using the model
12
- def generate_response(input_text):
13
- # Tokenize the input text
14
- inputs = tokenizer(input_text, return_tensors="pt")
15
-
16
- # Generate output using the model
17
- with torch.no_grad(): # Disable gradients for inference
18
- outputs = model.generate(inputs['input_ids'], max_length=50) # You can adjust max_length and other parameters
19
-
20
- # Decode the output and return it
21
- return tokenizer.decode(outputs[0], skip_special_tokens=True)
22
-
23
- # Create a Gradio interface
24
- def create_gradio_interface():
25
- # Interface with a text input and a text output
26
- interface = gr.Interface(
27
- fn=generate_response, # Function to call for generation
28
- inputs=gr.Textbox(label="Enter Input Text"), # Textbox for user input
29
- outputs=gr.Textbox(label="Generated Response"), # Textbox for output text
30
- title="Text Generation with My Model", # Title for the interface
31
- description="Enter some text to generate a response using the trained model." # Description
32
- )
33
- return interface
34
-
35
- # Launch the Gradio interface
36
  if __name__ == "__main__":
37
- interface = create_gradio_interface()
38
- interface.launch()
 
1
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
2
+ from peft import PeftModel
3
  import torch
4
+ from flask import Flask, request, jsonify
5
+ from flask_cors import CORS
6
+
7
+ # Setup
8
+ app = Flask(__name__)
9
+ CORS(app) # Enable CORS for frontend/backend calls
10
+
11
+ # Load base model + adapter
12
+ base_model_name = "unsloth/gemma-3-12b-it-unsloth-bnb-4bit"
13
+ adapter_name = "adarsh3601/my_gemma3_pt"
14
+
15
+ bnb_config = BitsAndBytesConfig(
16
+ load_in_4bit=True,
17
+ bnb_4bit_use_double_quant=True,
18
+ bnb_4bit_compute_dtype=torch.float16,
19
+ bnb_4bit_quant_type="nf4"
20
+ )
21
+
22
+ base_model = AutoModelForCausalLM.from_pretrained(
23
+ base_model_name,
24
+ quantization_config=bnb_config,
25
+ device_map="auto"
26
+ )
27
+
28
+ tokenizer = AutoTokenizer.from_pretrained(base_model_name)
29
+ model = PeftModel.from_pretrained(base_model, adapter_name)
30
+
31
+ @app.route("/chat", methods=["POST"])
32
+ def chat():
33
+ try:
34
+ data = request.json
35
+ prompt = data.get("message", "")
36
+
37
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
38
+ outputs = model.generate(**inputs, max_new_tokens=150, do_sample=True)
39
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
40
+
41
+ return jsonify({"response": response})
42
+
43
+ except Exception as e:
44
+ return jsonify({"error": str(e)}), 500
45
+
46
+ # For Hugging Face Spaces to detect the server
47
+ @app.route("/", methods=["GET"])
48
+ def root():
49
+ return "HF Space backend running"
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  if __name__ == "__main__":
52
+ app.run(host="0.0.0.0", port=7860)