marttinbell commited on
Commit
da07111
·
verified ·
1 Parent(s): df1081e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -43
app.py CHANGED
@@ -1,51 +1,59 @@
1
- import gradio as gr
2
- from transformers import AutoModelForCausalLM, AutoTokenizer
 
3
  import torch
 
4
 
5
- # Model aur tokenizer load karo
6
- model_name = "TheBloke/Mistral-7B-v0.1-AWQ"
7
- tokenizer = AutoTokenizer.from_pretrained(model_name)
8
- model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")
9
 
10
- def generate_response(prompt, max_tokens=512, temperature=0.7):
11
- inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
12
- outputs = model.generate(
13
- **inputs,
14
- max_new_tokens=max_tokens,
15
- do_sample=True,
16
- temperature=temperature,
17
- )
18
- return tokenizer.decode(outputs[0], skip_special_tokens=True)
19
-
20
- # Gradio respond function
21
- def respond(message, history, system_message, max_tokens, temperature, top_p, hf_token=None):
22
- prompt = system_message + "\n"
23
- for h in history:
24
- prompt += f"User: {h['user']}\nBot: {h['bot']}\n"
25
- prompt += f"User: {message}\nBot: "
26
-
27
- response = generate_response(prompt, max_tokens=max_tokens, temperature=temperature)
28
- yield response
29
-
30
- # Gradio ChatInterface
31
- chatbot = gr.ChatInterface(
32
- respond,
33
- type="messages",
34
- additional_inputs=[
35
- gr.Textbox(value="You are a friendly chatbot.", label="System message"),
36
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
37
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
38
- gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
39
- ],
40
  )
 
 
41
 
42
- with gr.Blocks() as demo:
43
- with gr.Sidebar():
44
- gr.Text("Login not required for local model")
45
- chatbot.render()
46
 
47
- if __name__ == "__main__":
48
- demo.launch()
 
 
 
 
 
 
 
 
 
49
 
 
 
 
50
 
51
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ from awq import AutoAWQForCausalLM
3
+ from transformers import AutoTokenizer
4
  import torch
5
+ import gradio as gr
6
 
7
+ # Model name from Hugging Face
8
+ MODEL_NAME = "TheBloke/Mistral-7B-v0.1-AWQ"
 
 
9
 
10
+ # Load the model
11
+ print("🚀 Loading Mistral 7B v0.1 AWQ model...")
12
+ model = AutoAWQForCausalLM.from_quantized(
13
+ MODEL_NAME,
14
+ fuse_layers=True,
15
+ trust_remote_code=False,
16
+ safetensors=True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  )
18
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=False)
19
+ print("✅ Model loaded successfully!")
20
 
21
+ # Text generation function
22
+ def generate_text(prompt, temperature, max_tokens):
23
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
 
24
 
25
+ with torch.no_grad():
26
+ outputs = model.generate(
27
+ inputs.input_ids,
28
+ max_new_tokens=max_tokens,
29
+ temperature=temperature,
30
+ top_p=0.9,
31
+ do_sample=True,
32
+ pad_token_id=tokenizer.eos_token_id,
33
+ )
34
+
35
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
36
 
37
+ # Clean the output (remove the original prompt from response)
38
+ if prompt in response:
39
+ response = response[len(prompt):].strip()
40
 
41
+ return response
42
+
43
+
44
+ # Gradio Interface
45
+ interface = gr.Interface(
46
+ fn=generate_text,
47
+ inputs=[
48
+ gr.Textbox(lines=3, placeholder="Ask Mistral something...", label="Prompt"),
49
+ gr.Slider(0.1, 1.0, value=0.7, step=0.1, label="Temperature"),
50
+ gr.Slider(50, 1024, value=512, step=10, label="Max Tokens")
51
+ ],
52
+ outputs=gr.Textbox(lines=10, label="Response"),
53
+ title="🧠 Mistral 7B v0.1 AWQ",
54
+ description="Run the quantized Mistral 7B v0.1 model locally or on Google Colab using Gradio.",
55
+ theme="default"
56
+ )
57
+
58
+ if __name__ == "__main__":
59
+ interface.launch(share=True)