plutostack commited on
Commit
439d32c
·
verified ·
1 Parent(s): 91dd5aa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -6
app.py CHANGED
@@ -2,17 +2,56 @@ import os
2
  os.system("pip3 install transformers")
3
  os.system("pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu")
4
  os.system("pip3 install tensorflow")
 
 
5
  import gradio as gr
6
- from transformers import pipeline
 
 
 
 
 
 
 
 
 
7
 
8
- pipe = pipeline("text-generation", model="nvidia/Llama-3.1-Nemotron-Nano-8B-v1", trust_remote_code=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  def textgen(request):
11
  messages = [
12
- {"role": "user", "content": str(request)},
13
  ]
14
- outputs = pipe(messages)
 
 
 
 
 
 
 
 
15
  return outputs[0]["generated_text"][-1]['content']
16
 
17
- demo = gr.Interface(fn=textgen, inputs="text", outputs="text")
18
- demo.launch()
 
 
 
 
 
 
 
 
2
  os.system("pip3 install transformers")
3
  os.system("pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu")
4
  os.system("pip3 install tensorflow")
5
+ os.system("pip3 install bitsandbytes accelerate")
6
+
7
  import gradio as gr
8
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline
9
+ import torch
10
+
11
+ bnb_config = BitsAndBytesConfig(
12
+ load_in_4bit=True,
13
+ bnb_4bit_quant_type="nf4",
14
+ bnb_4bit_compute_dtype=torch.float16,
15
+ bnb_4bit_use_double_quant=True
16
+ )
17
+
18
 
19
+ model = AutoModelForCausalLM.from_pretrained(
20
+ "nvidia/Llama-3.1-Nemotron-Nano-8B-v1",
21
+ quantization_config=bnb_config,
22
+ device_map="auto",
23
+ trust_remote_code=True
24
+ )
25
+ tokenizer = AutoTokenizer.from_pretrained("nvidia/Llama-3.1-Nemotron-Nano-8B-v1")
26
+
27
+ pipe = pipeline(
28
+ "text-generation",
29
+ model=model,
30
+ tokenizer=tokenizer,
31
+ device_map="auto"
32
+ )
33
 
34
  def textgen(request):
35
  messages = [
36
+ {"role": "user", "content": str(request)},
37
  ]
38
+
39
+ outputs = pipe(
40
+ messages,
41
+ max_new_tokens=512,
42
+ do_sample=True,
43
+ temperature=0.7,
44
+ top_p=0.9
45
+ )
46
+
47
  return outputs[0]["generated_text"][-1]['content']
48
 
49
+ demo = gr.Interface(
50
+ fn=textgen,
51
+ inputs=gr.Textbox(label="Ваш запрос", placeholder="Введите ваш вопрос здесь..."),
52
+ outputs=gr.Textbox(label="Ответ модели"),
53
+ title="Chat with Llama-3.1-Nemotron-Nano (4-bit quantized)",
54
+ description="Квантованная 4-bit версия модели NVIDIA Llama-3.1-Nemotron-Nano-8B"
55
+ )
56
+
57
+ demo.launch(share=True)