Mayank082000 commited on
Commit
8914be0
1 Parent(s): 78094f0

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +166 -0
README.md ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ datasets:
4
+ - Mayank082000/Multilingual_Sentences_with_Sentences
5
+ language:
6
+ - en
7
+ - hi
8
+ - pa
9
+ library_name: adapter-transformers
10
+ pipeline_tag: text-generation
11
+ tags:
12
+ - job-search
13
+ - skill-development
14
+ - foreign-counseling
15
+ ---
16
+ # Fine-Tuned Llama 2 model for Multilingual Text Generation
17
+
18
+ This repository contains adapters for the `adapter-transformers` library aimed at enabling multilingual text generation. It leverages datasets such as `siddeo99/sidtestfiverrmulti` and supports multiple languages including English, Hindi, and Punjabi.
19
+
20
+ ## Installation
21
+
22
+ To install the necessary library, you can use pip:
23
+ ```python
24
+ !pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7
25
+ !pip install pyarrow
26
+ ```
27
+ # This Section is for GPU enabled devices(For cpu code is below,skip the below code for cpu)
28
+
29
+ ## Import libraries
30
+ ```python
31
+ import os
32
+ import torch
33
+ from datasets import load_dataset
34
+ from transformers import (
35
+ AutoModelForCausalLM,
36
+ AutoTokenizer,
37
+ BitsAndBytesConfig,
38
+ HfArgumentParser,
39
+ TrainingArguments,
40
+ pipeline,
41
+ )
42
+ from peft import LoraConfig, PeftModel
43
+ ```
44
+
45
+ ## Configuration Parameters
46
+ ```python
47
+ # The model that you want to train from the Hugging Face hub
48
+ model_name = "siddeo99/job_search_category"
49
+
50
+ ################################################################################
51
+ # QLoRA parameters
52
+ ################################################################################
53
+
54
+ # LoRA attention dimension
55
+ lora_r = 64
56
+
57
+ # Alpha parameter for LoRA scaling
58
+ lora_alpha = 16
59
+
60
+ # Dropout probability for LoRA layers
61
+ lora_dropout = 0.1
62
+
63
+ ################################################################################
64
+ # bitsandbytes parameters
65
+ ################################################################################
66
+
67
+ # Activate 4-bit precision base model loading
68
+ use_4bit = True
69
+
70
+ # Compute dtype for 4-bit base models
71
+ bnb_4bit_compute_dtype = "float16"
72
+
73
+ # Quantization type (fp4 or nf4)
74
+ bnb_4bit_quant_type = "nf4"
75
+
76
+ # Activate nested quantization for 4-bit base models (double quantization)
77
+ use_nested_quant = False
78
+ device_map = {"": 0}
79
+ ```
80
+
81
+ ### Loading Configuration
82
+
83
+ ```python
84
+ # Load tokenizer and model with QLoRA configuration
85
+ compute_dtype = getattr(torch, bnb_4bit_compute_dtype)
86
+
87
+ bnb_config = BitsAndBytesConfig(
88
+ load_in_4bit=use_4bit,
89
+ bnb_4bit_quant_type=bnb_4bit_quant_type,
90
+ bnb_4bit_compute_dtype=compute_dtype,
91
+ bnb_4bit_use_double_quant=use_nested_quant,
92
+ )
93
+ # Check GPU compatibility with bfloat16
94
+ if compute_dtype == torch.float16 and use_4bit:
95
+ major, _ = torch.cuda.get_device_capability()
96
+ if major >= 8:
97
+ print("=" * 80)
98
+ print("Your GPU supports bfloat16: accelerate training with bf16=True")
99
+ print("=" * 80)
100
+
101
+ # Load base model
102
+ model = AutoModelForCausalLM.from_pretrained(
103
+ model_name,
104
+ quantization_config=bnb_config,
105
+ device_map=device_map
106
+ )
107
+ model.config.use_cache = False
108
+ model.config.pretraining_tp = 1
109
+
110
+ # Load LLaMA tokenizer
111
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
112
+ tokenizer.pad_token = tokenizer.eos_token
113
+ tokenizer.padding_side = "right" # Fix weird overflow issue with fp16 training
114
+
115
+ # Load LoRA configuration
116
+ peft_config = LoraConfig(
117
+ lora_alpha=lora_alpha,
118
+ lora_dropout=lora_dropout,
119
+ r=lora_r,
120
+ bias="none",
121
+ task_type="CAUSAL_LM",
122
+ )
123
+ ```
124
+ ## Text_generation
125
+ ```python
126
+ prompt = "What is a large language model?"
127
+ pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200)
128
+ result = pipe(f"<s>[INST] {prompt} [/INST]")
129
+ print(result[0]['generated_text'])
130
+ ```
131
+
132
+ # This Section is for CPU only(Slower than GPU)
133
+
134
+ ## Import libraries
135
+ ```python
136
+ from transformers import (
137
+ AutoModelForCausalLM,
138
+ AutoTokenizer,
139
+ pipeline,
140
+ )
141
+ from peft import LoraConfig
142
+ ```
143
+ ## Run the model on CPU
144
+ ```python
145
+ model = AutoModelForCausalLM.from_pretrained(model_name)
146
+
147
+ model.config.use_cache = False
148
+ model.config.pretraining_tp = 1
149
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
150
+ tokenizer.pad_token = tokenizer.eos_token
151
+ tokenizer.padding_side = "right" # Fix weird overflow issue with fp16 training
152
+ # Load LoRA configuration
153
+ peft_config = LoraConfig(
154
+ lora_alpha=lora_alpha,
155
+ lora_dropout=lora_dropout,
156
+ r=lora_r,
157
+ bias="none",
158
+ task_type="CAUSAL_LM",
159
+ )
160
+ # Run text generation pipeline with our next model
161
+ prompt = "भारत से ऑस्ट्रेलिया में एक कार्य वीजा के लिए आवेदन करने के लिए क्या आवश्यकताएं हैं?"
162
+ pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200)
163
+ result = pipe(f"<s>[INST] {prompt} [/INST]")
164
+ print(result[0]['generated_text'])
165
+
166
+ ```