hin123123 commited on
Commit
b0aa81d
·
verified ·
1 Parent(s): 818af83

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -0
app.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import login
2
+ import os
3
+ import torch
4
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
5
+ from peft import PeftModel
6
+ import gradio as gr
7
+
8
+ # Log in using the secret token
9
+ login(os.environ["HF_TOKEN"])
10
+
11
+ # Base model
12
+ base_model = "mistralai/Mistral-7B-v0.3"
13
+
14
+ # Your adapter model on HF
15
+ adapter_model = "hin123123/theralingua-mistral-7b-word"
16
+
17
+ # Quantization config for efficiency
18
+ quantization_config = BitsAndBytesConfig(
19
+ load_in_4bit=True,
20
+ bnb_4bit_compute_dtype=torch.float16,
21
+ bnb_4bit_use_double_quant=True,
22
+ bnb_4bit_quant_type="nf4"
23
+ )
24
+
25
+ # Load tokenizer
26
+ tokenizer = AutoTokenizer.from_pretrained(base_model)
27
+
28
+ # Load base model with low CPU memory usage
29
+ model = AutoModelForCausalLM.from_pretrained(
30
+ base_model,
31
+ quantization_config=quantization_config,
32
+ device_map="auto",
33
+ low_cpu_mem_usage=True # Streams to GPU if available, avoids full RAM load
34
+ )
35
+
36
+ # Apply LoRA adapter
37
+ model = PeftModel.from_pretrained(model, adapter_model)
38
+
39
+ def generate_text(input_text, max_new_tokens=100, temperature=0.7):
40
+ inputs = tokenizer(input_text, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")
41
+
42
+ with torch.inference_mode():
43
+ outputs = model.generate(
44
+ **inputs,
45
+ max_new_tokens=max_new_tokens,
46
+ temperature=temperature,
47
+ do_sample=True,
48
+ top_p=0.9
49
+ )
50
+
51
+ generated = tokenizer.decode(outputs[0], skip_special_tokens=True)
52
+ return generated
53
+
54
+ demo = gr.Interface(
55
+ fn=generate_text,
56
+ inputs=[
57
+ gr.Textbox(label="Input Text", placeholder="Enter your prompt here..."),
58
+ gr.Slider(label="Max New Tokens", minimum=50, maximum=500, value=100, step=50),
59
+ gr.Slider(label="Temperature", minimum=0.1, maximum=1.5, value=0.7, step=0.1)
60
+ ],
61
+ outputs=gr.Textbox(label="Generated Output"),
62
+ title="Theralingua-Mistral-7B-Word Demo",
63
+ description="Enter text to generate output from the model."
64
+ )
65
+
66
+ # Launch the demo (Spaces handles sharing automatically)
67
+ demo.launch()