Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,47 @@
|
|
1 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
from transformers import pipeline
|
3 |
|
4 |
# Initialize the pipeline with the LLaMA model
|
|
|
1 |
import gradio as gr
|
2 |
+
import os
|
3 |
+
import torch
|
4 |
+
from transformers import (
|
5 |
+
AutoModelForCausalLM,
|
6 |
+
AutoTokenizer,
|
7 |
+
BitsAndBytesConfig,
|
8 |
+
HfArgumentParser,
|
9 |
+
TrainingArguments,
|
10 |
+
pipeline,
|
11 |
+
logging,
|
12 |
+
)
|
13 |
+
|
14 |
+
|
15 |
+
################################################################################
|
16 |
+
# bitsandbytes parameters
|
17 |
+
################################################################################
|
18 |
+
|
19 |
+
# Activate 4-bit precision base model loading
|
20 |
+
use_4bit = True
|
21 |
+
|
22 |
+
# Compute dtype for 4-bit base models
|
23 |
+
bnb_4bit_compute_dtype = "float16"
|
24 |
+
|
25 |
+
# Quantization type (fp4 or nf4)
|
26 |
+
bnb_4bit_quant_type = "nf4"
|
27 |
+
|
28 |
+
# Activate nested quantization for 4-bit base models (double quantization)
|
29 |
+
use_nested_quant = False
|
30 |
+
|
31 |
+
|
32 |
+
################################################################################
|
33 |
+
# SFT parameters
|
34 |
+
################################################################################
|
35 |
+
|
36 |
+
# Maximum sequence length to use
|
37 |
+
max_seq_length = None
|
38 |
+
|
39 |
+
# Pack multiple short examples in the same input sequence to increase efficiency
|
40 |
+
packing = False
|
41 |
+
|
42 |
+
# Load the entire model on the GPU 0
|
43 |
+
device_map = {"": 0}
|
44 |
+
|
45 |
from transformers import pipeline
|
46 |
|
47 |
# Initialize the pipeline with the LLaMA model
|