#---
#- Author: Jaelin Lee
#- Date: Mar 16, 2024
#- Description: Calls HuggingFace API to generate natural response.
#- Credit: The initial code is from Abhishek Dutta.
#           Most of the code is kept as he created.
#           I only added a modification to convert it to class.
#           And, I tweaked the prompt to feed into the `streamlit_app.py` file.
#---

import os
from langchain_community.llms import HuggingFaceHub
from langchain_community.llms import OpenAI
# from langchain.llms import HuggingFaceHub, OpenAI
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
import warnings

warnings.filterwarnings("ignore")

class LLLResponseGenerator():

    def __init__(self):
        print("initialized")


    def llm_inference(
        self,
        model_type: str,
        question: str,
        prompt_template: str,
        context: str,
        ai_tone: str,
        questionnaire: str,
        user_text: str,
        openai_model_name: str = "",
        # hf_repo_id: str = "tiiuae/falcon-7b-instruct",
        hf_repo_id: str = "mistralai/Mistral-7B-Instruct-v0.2",
        temperature: float = 0.5,
        max_length: int = 128 * 4,
    ) -> str:
        """Call HuggingFace/OpenAI model for inference

        Given a question, prompt_template, and other parameters, this function calls the relevant
        API to fetch LLM inference results.

        Args:
            model_str: Denotes the LLM vendor's name. Can be either 'huggingface' or 'openai'
            question: The question to be asked to the LLM.
            prompt_template: The prompt template itself.
            context: Instructions for the LLM.
            ai_tone: Can be either empathy, encouragement or suggest medical help.
            questionnaire: Can be either depression, anxiety or adhd.
            user_text: Response given by the user.
            hf_repo_id: The Huggingface model's repo_id
            temperature: (Default: 1.0). Range: Float (0.0-100.0). The temperature of the sampling operation. 1 means regular sampling, 0 means always take the highest score, 100.0 is getting closer to uniform probability.
            max_length: Integer to define the maximum length in tokens of the output summary.

        Returns:
            A Python string which contains the inference result.

        HuggingFace repo_id examples:
            - google/flan-t5-xxl
            - tiiuae/falcon-7b-instruct

        """
        prompt = PromptTemplate(
            template=prompt_template,
            input_variables=[
                "context",
                "ai_tone",
                "questionnaire",
                "question",
                "user_text",
            ],
        )

        if model_type == "openai":
            # https://api.python.langchain.com/en/stable/llms/langchain.llms.openai.OpenAI.html#langchain.llms.openai.OpenAI
            llm = OpenAI(
                model_name=openai_model_name, temperature=temperature, max_tokens=max_length
            )
            llm_chain = LLMChain(prompt=prompt, llm=llm)
            return llm_chain.run(
                context=context,
                ai_tone=ai_tone,
                questionnaire=questionnaire,
                question=question,
                user_text=user_text,
            )

        elif model_type == "huggingface":
            # https://python.langchain.com/docs/integrations/llms/huggingface_hub
            llm = HuggingFaceHub(
                repo_id=hf_repo_id,
                model_kwargs={"temperature": temperature, "max_length": max_length},
            )

            llm_chain = LLMChain(prompt=prompt, llm=llm)
            response =  llm_chain.run(
                context=context,
                ai_tone=ai_tone,
                questionnaire=questionnaire,
                question=question,
                user_text=user_text,
            )
            print(response)
            # Extracting only the response part from the output
            response_start_index = response.find("Response;")
            return response[response_start_index + len("Response;"):].strip()

        else:
            print(
                "Please use the correct value of model_type parameter: It can have a value of either openai or huggingface"
            )


if __name__ == "__main__":
    # Please ensure you have a .env file available with 'HUGGINGFACEHUB_API_TOKEN' and 'OPENAI_API_KEY' values.
    HUGGINGFACEHUB_API_TOKEN = os.getenv('HUGGINGFACEHUB_API_TOKEN')

    context = "You are a mental health supporting non-medical assistant. DO NOT PROVIDE any medical advice with conviction."

    ai_tone = "EMPATHY"
    questionnaire = "ADHD"
    question = (
        "How often do you find yourself having trouble focusing on tasks or activities?"
    )
    user_text = "I feel distracted all the time, and I am never able to finish"

    # The user may have signs of {questionnaire}.
    template = """INSTRUCTIONS: {context}

    Respond to the user with a tone of {ai_tone}.

    Question asked to the user: {question}

    Response by the user: {user_text}

    Provide some advice and ask a relevant question back to the user.

    Response;
    """

    temperature = 0.5
    max_length = 128 *4

    model = LLLResponseGenerator()


    llm_response = model.llm_inference(
        model_type="huggingface",
        question=question,
        prompt_template=template,
        context=context,
        ai_tone=ai_tone,
        questionnaire=questionnaire,
        user_text=user_text,
        temperature=temperature,
        max_length=max_length,
        )

    print(llm_response)