arjunanand13 commited on
Commit
4841c33
1 Parent(s): f623b49

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -6
app.py CHANGED
@@ -18,6 +18,7 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
18
  from langchain.embeddings import HuggingFaceEmbeddings
19
  from langchain.vectorstores import FAISS
20
  from langchain.chains import ConversationalRetrievalChain
 
21
 
22
  # Login to Hugging Face using a token
23
  # huggingface_hub.login(HF_TOKEN)
@@ -38,12 +39,37 @@ device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'
38
  # bnb_4bit_compute_dtype=bfloat16
39
  # )
40
 
41
- tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct",token=HF_TOKEN)
42
- model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", device_map="auto",token=HF_TOKEN) # to("cuda:0")
43
- terminators = [
44
- tokenizer.eos_token_id,
45
- tokenizer.convert_tokens_to_ids("<|eot_id|>")
46
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
  """
49
  Setting up the stop list to define stopping criteria.
 
18
  from langchain.embeddings import HuggingFaceEmbeddings
19
  from langchain.vectorstores import FAISS
20
  from langchain.chains import ConversationalRetrievalChain
21
+ from huggingface_hub import InferenceClient
22
 
23
  # Login to Hugging Face using a token
24
  # huggingface_hub.login(HF_TOKEN)
 
39
  # bnb_4bit_compute_dtype=bfloat16
40
  # )
41
 
42
+ # tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct",token=HF_TOKEN)
43
+ # model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", device_map="auto",token=HF_TOKEN) # to("cuda:0")
44
+ # terminators = [
45
+ # tokenizer.eos_token_id,
46
+ # tokenizer.convert_tokens_to_ids("<|eot_id|>")
47
+ # ]
48
+
49
+
50
+ model_config = transformers.AutoConfig.from_pretrained(
51
+ self.model_id,
52
+ # use_auth_token=hf_auth
53
+ )
54
+ model = transformers.AutoModelForCausalLM.from_pretrained(
55
+ self.model_id,
56
+ trust_remote_code=True,
57
+ config=model_config,
58
+ quantization_config=bnb_config,
59
+ # use_auth_token=hf_auth
60
+ )
61
+ model.eval()
62
+ tokenizer = transformers.AutoTokenizer.from_pretrained(
63
+ self.model_id,
64
+ # use_auth_token=hf_auth
65
+ )
66
+ generate_text = transformers.pipeline(
67
+ model=self.model, tokenizer=self.tokenizer,
68
+ return_full_text=True,
69
+ task='text-generation',
70
+ temperature=0.01,
71
+ max_new_tokens=512
72
+ )
73
 
74
  """
75
  Setting up the stop list to define stopping criteria.