Tao Wu commited on
Commit
032427b
1 Parent(s): 26cf43b

quantization

Browse files
Files changed (2) hide show
  1. app/embedding_setup.py +8 -4
  2. requirements.txt +0 -0
app/embedding_setup.py CHANGED
@@ -2,7 +2,7 @@ from langchain_community.vectorstores import Chroma
2
  from langchain_community.embeddings import HuggingFaceBgeEmbeddings
3
 
4
  from langchain.docstore.document import Document
5
- from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
6
  from peft import PeftModel
7
  from config import *
8
  import os
@@ -34,9 +34,13 @@ retriever = db.as_retriever(search_kwargs={"k": TOP_K})
34
  lora_weights_rec = REC_LORA_MODEL
35
  lora_weights_exp = EXP_LORA_MODEL
36
  hf_auth = os.environ.get("hf_token")
 
 
 
 
 
37
 
38
-
39
- tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL, token=hf_auth)
40
 
41
 
42
  first_token = 'First'
@@ -130,7 +134,7 @@ def compare_docs_with_context(doc_a, doc_b, target_occupation_name, target_occup
130
 
131
  #courses = f"First: name: {doc_a.metadata['name']} description:{doc_a.metadata['description']} Second: name: {doc_b.metadata['name']} description:{Sdoc_b.metadata['description']}"
132
  courses = f"First: name: {doc_a.metadata['name']} learning outcomes:{doc_a.metadata['skills'][:2000]} Second: name: {doc_b.metadata['name']} learning outcomes:{doc_b.metadata['skills'][:2000]}"
133
- target_occupation = f"name: {target_occupation_name} description: {target_occupation_dsp}"
134
  skill_gap = skill_gap
135
  prompt = generate_prompt(target_occupation, skill_gap, courses)
136
  prompt = [prompt]
 
2
  from langchain_community.embeddings import HuggingFaceBgeEmbeddings
3
 
4
  from langchain.docstore.document import Document
5
+ from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig,BitsAndBytesConfig
6
  from peft import PeftModel
7
  from config import *
8
  import os
 
34
  lora_weights_rec = REC_LORA_MODEL
35
  lora_weights_exp = EXP_LORA_MODEL
36
  hf_auth = os.environ.get("hf_token")
37
+ quantization_config = BitsAndBytesConfig(
38
+ load_in_4bit=True,
39
+ bnb_4bit_compute_dtype=torch.float16,
40
+ bnb_4bit_quant_type="nf4"
41
+ )
42
 
43
+ tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL, quantization_config=quantization_config, token=hf_auth)
 
44
 
45
 
46
  first_token = 'First'
 
134
 
135
  #courses = f"First: name: {doc_a.metadata['name']} description:{doc_a.metadata['description']} Second: name: {doc_b.metadata['name']} description:{Sdoc_b.metadata['description']}"
136
  courses = f"First: name: {doc_a.metadata['name']} learning outcomes:{doc_a.metadata['skills'][:2000]} Second: name: {doc_b.metadata['name']} learning outcomes:{doc_b.metadata['skills'][:2000]}"
137
+ target_occupation = f"name: {target_occupation_name} description: {target_occupation_dsp[:2000]}"
138
  skill_gap = skill_gap
139
  prompt = generate_prompt(target_occupation, skill_gap, courses)
140
  prompt = [prompt]
requirements.txt CHANGED
Binary files a/requirements.txt and b/requirements.txt differ