import os
import subprocess
from dotenv import load_dotenv
load_dotenv()
try:
    os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv("HUGGINGFACEHUB_API_TOKEN")
    PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
except:
    PINECONE_API_KEY = subprocess.check_output(["bash", "-c", "echo ${{ secrets.PINECONE_API_KEY }}"]).decode("utf-8").strip()

from typing import Optional,List,Mapping,Any
from langchain.embeddings import HuggingFaceEmbeddings
import pinecone
import torch
from langchain import PromptTemplate, LLMChain,HuggingFacePipeline
from langchain.vectorstores import Pinecone
from langchain.llms.base import LLM
from transformers import pipeline

class CustomLLM(LLM):
    # def __init__(self,model_name,pipeline):
    model_name ="databricks/dolly-v2-3b"
    num_output = 128
    pipeline = pipeline(model=model_name, torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto", 
        return_full_text=True, do_sample=False, max_new_tokens=128)
    def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
        prompt_length = len(prompt)
        response = self.pipeline(prompt, max_new_tokens=self.num_output)[0]["generated_text"]

        # only return newly generated tokens
        return response[prompt_length:]

    @property
    def _identifying_params(self) -> Mapping[str, Any]:
        return {"name_of_model": self.model_name}

    @property
    def _llm_type(self) -> str:
        return "custom"
    
def get_llm(model_name,pinecone_index,llm):
    # model_name = "bert-large-uncased" #"t5-large"
    model_kwargs = {'device': 'cuda' if torch.cuda.is_available() else 'cpu'}

    embeddings = HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs)


    pinecone.init(
        api_key=PINECONE_API_KEY,
        environment="us-east-1-aws"
    )

    index = pinecone.Index(pinecone_index)
    # print(index.describe_index_stats())

    docsearch = Pinecone(index, embeddings.embed_query,"text")

    # print("About to load the model")

    instruct_pipeline = pipeline(model=llm, torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto", 
        return_full_text=True, do_sample=False, max_new_tokens=128)
    llm = HuggingFacePipeline(pipeline=instruct_pipeline)
    # print("Loaded the LLM")

    # print("Prompting")

    template = """Context: {context}

    Question: {question}

    Answer: Let's go step by step."""

    prompt = PromptTemplate(template=template, input_variables=["question","context"])
    llm_chain = LLMChain(prompt=prompt, llm=llm)
    return llm_chain, docsearch


if __name__ == "__main__":
    model_name = "bert-large-uncased"
    pinecone_index = "bert-large-uncased"
    llm = "databricks/dolly-v2-3b"
    llm_chain, docsearch = get_llm(model_name,pinecone_index,llm)
    print(":"*40)
    questions = ["what is the name of the first Hindi newspaper published in Bihar?",
                "what is the capital of Bihar?",
                "Brief about the Gupta Dynasty"]
    for question in questions:
        context = docsearch.similarity_search(question, k=3,metadata=False)
        content = ""
        for i in context:
            content= content + f"{i.__dict__['page_content']}"
        print(f"{question}")
        response = llm_chain.predict(question=question,context=content)
        print(f"{response}\n{'--'*25}")