Spaces:

plutostack
/

PlutoAI

Running

App Files Files Community

plutostack commited on 4 days ago

Commit

439d32c

verified ·

1 Parent(s): 91dd5aa

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -6

app.py CHANGED Viewed

@@ -2,17 +2,56 @@ import os
 os.system("pip3 install transformers")
 os.system("pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu")
 os.system("pip3 install tensorflow")
 import gradio as gr
-from transformers import pipeline
-pipe = pipeline("text-generation", model="nvidia/Llama-3.1-Nemotron-Nano-8B-v1", trust_remote_code=True)
 def textgen(request):
     messages = [
-    {"role": "user", "content": str(request)},
     ]
-    outputs = pipe(messages)
     return outputs[0]["generated_text"][-1]['content']
-demo = gr.Interface(fn=textgen, inputs="text", outputs="text")
-demo.launch()

 os.system("pip3 install transformers")
 os.system("pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu")
 os.system("pip3 install tensorflow")
+os.system("pip3 install bitsandbytes accelerate")
 import gradio as gr
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline
+import torch
+bnb_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_quant_type="nf4",
+    bnb_4bit_compute_dtype=torch.float16,
+    bnb_4bit_use_double_quant=True
+)
+model = AutoModelForCausalLM.from_pretrained(
+    "nvidia/Llama-3.1-Nemotron-Nano-8B-v1",
+    quantization_config=bnb_config,
+    device_map="auto",
+    trust_remote_code=True
+)
+tokenizer = AutoTokenizer.from_pretrained("nvidia/Llama-3.1-Nemotron-Nano-8B-v1")
+pipe = pipeline(
+    "text-generation",
+    model=model,
+    tokenizer=tokenizer,
+    device_map="auto"
+)
 def textgen(request):
     messages = [
+        {"role": "user", "content": str(request)},
     ]
+    outputs = pipe(
+        messages,
+        max_new_tokens=512,
+        do_sample=True,
+        temperature=0.7,
+        top_p=0.9
+    )
     return outputs[0]["generated_text"][-1]['content']
+demo = gr.Interface(
+    fn=textgen,
+    inputs=gr.Textbox(label="Ваш запрос", placeholder="Введите ваш вопрос здесь..."),
+    outputs=gr.Textbox(label="Ответ модели"),
+    title="Chat with Llama-3.1-Nemotron-Nano (4-bit quantized)",
+    description="Квантованная 4-bit версия модели NVIDIA Llama-3.1-Nemotron-Nano-8B"
+)
+demo.launch(share=True)