Bihar-Now-Then / model.py
divyanshusingh's picture
Update model.py
617be15
import os
import subprocess
from dotenv import load_dotenv
load_dotenv()
try:
os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv("HUGGINGFACEHUB_API_TOKEN")
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
except:
PINECONE_API_KEY = subprocess.check_output(["bash", "-c", "echo ${{ secrets.PINECONE_API_KEY }}"]).decode("utf-8").strip()
from typing import Optional,List,Mapping,Any
from langchain.embeddings import HuggingFaceEmbeddings
import pinecone
import torch
from langchain import PromptTemplate, LLMChain,HuggingFacePipeline
from langchain.vectorstores import Pinecone
from langchain.llms.base import LLM
from transformers import pipeline
class CustomLLM(LLM):
# def __init__(self,model_name,pipeline):
model_name ="databricks/dolly-v2-3b"
num_output = 128
pipeline = pipeline(model=model_name, torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto",
return_full_text=True, do_sample=False, max_new_tokens=128)
def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
prompt_length = len(prompt)
response = self.pipeline(prompt, max_new_tokens=self.num_output)[0]["generated_text"]
# only return newly generated tokens
return response[prompt_length:]
@property
def _identifying_params(self) -> Mapping[str, Any]:
return {"name_of_model": self.model_name}
@property
def _llm_type(self) -> str:
return "custom"
def get_llm(model_name,pinecone_index,llm):
# model_name = "bert-large-uncased" #"t5-large"
model_kwargs = {'device': 'cuda' if torch.cuda.is_available() else 'cpu'}
embeddings = HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs)
pinecone.init(
api_key=PINECONE_API_KEY,
environment="us-east-1-aws"
)
index = pinecone.Index(pinecone_index)
# print(index.describe_index_stats())
docsearch = Pinecone(index, embeddings.embed_query,"text")
# print("About to load the model")
instruct_pipeline = pipeline(model=llm, torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto",
return_full_text=True, do_sample=False, max_new_tokens=128)
llm = HuggingFacePipeline(pipeline=instruct_pipeline)
# print("Loaded the LLM")
# print("Prompting")
template = """Context: {context}
Question: {question}
Answer: Let's go step by step."""
prompt = PromptTemplate(template=template, input_variables=["question","context"])
llm_chain = LLMChain(prompt=prompt, llm=llm)
return llm_chain, docsearch
if __name__ == "__main__":
model_name = "bert-large-uncased"
pinecone_index = "bert-large-uncased"
llm = "databricks/dolly-v2-3b"
llm_chain, docsearch = get_llm(model_name,pinecone_index,llm)
print(":"*40)
questions = ["what is the name of the first Hindi newspaper published in Bihar?",
"what is the capital of Bihar?",
"Brief about the Gupta Dynasty"]
for question in questions:
context = docsearch.similarity_search(question, k=3,metadata=False)
content = ""
for i in context:
content= content + f"{i.__dict__['page_content']}"
print(f"{question}")
response = llm_chain.predict(question=question,context=content)
print(f"{response}\n{'--'*25}")