ksh-nyp commited on
Commit
9005c68
1 Parent(s): 0a70c0f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -0
app.py CHANGED
@@ -1,4 +1,47 @@
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  from transformers import pipeline
3
 
4
  # Initialize the pipeline with the LLaMA model
 
1
  import gradio as gr
2
+ import os
3
+ import torch
4
+ from transformers import (
5
+ AutoModelForCausalLM,
6
+ AutoTokenizer,
7
+ BitsAndBytesConfig,
8
+ HfArgumentParser,
9
+ TrainingArguments,
10
+ pipeline,
11
+ logging,
12
+ )
13
+
14
+
15
+ ################################################################################
16
+ # bitsandbytes parameters
17
+ ################################################################################
18
+
19
+ # Activate 4-bit precision base model loading
20
+ use_4bit = True
21
+
22
+ # Compute dtype for 4-bit base models
23
+ bnb_4bit_compute_dtype = "float16"
24
+
25
+ # Quantization type (fp4 or nf4)
26
+ bnb_4bit_quant_type = "nf4"
27
+
28
+ # Activate nested quantization for 4-bit base models (double quantization)
29
+ use_nested_quant = False
30
+
31
+
32
+ ################################################################################
33
+ # SFT parameters
34
+ ################################################################################
35
+
36
+ # Maximum sequence length to use
37
+ max_seq_length = None
38
+
39
+ # Pack multiple short examples in the same input sequence to increase efficiency
40
+ packing = False
41
+
42
+ # Load the entire model on the GPU 0
43
+ device_map = {"": 0}
44
+
45
  from transformers import pipeline
46
 
47
  # Initialize the pipeline with the LLaMA model