import os
from dotenv import load_dotenv
from rag_app.database.db_handler import DataBaseHandler
from langchain_huggingface import HuggingFaceEndpoint
# from langchain_huggingface import HuggingFaceHubEmbeddings
from langchain_huggingface import HuggingFaceEmbeddings

load_dotenv()

SQLITE_FILE_NAME = os.getenv('SOURCES_CACHE')
VECTOR_DATABASE_LOCATION = os.getenv('VECTOR_DATABASE_LOCATION')
EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL")
SEVEN_B_LLM_MODEL = os.getenv("SEVEN_B_LLM_MODEL")
BERT_MODEL = os.getenv("BERT_MODEL")
FAISS_INDEX_PATH = os.getenv("FAISS_INDEX_PATH")
HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")


# embeddings = HuggingFaceHubEmbeddings(repo_id=EMBEDDING_MODEL)

model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
embeddings = HuggingFaceEmbeddings(
    model_name=EMBEDDING_MODEL,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

db = DataBaseHandler()

db.create_all_tables()

# This model is used for task that a larger model may not need to do
# as of currently we have been getting MODEL OVERLOADED errors
# with huggingface
SEVEN_B_LLM_MODEL = HuggingFaceEndpoint(
        repo_id=SEVEN_B_LLM_MODEL, 
        temperature=0.1,         # Controls randomness in response generation (lower value means less random)
        max_new_tokens=1024,     # Maximum number of new tokens to generate in responses
        repetition_penalty=1.2,  # Penalty for repeating the same words (higher value increases penalty)
        return_full_text=False   # If False, only the newly generated text is returned; if True, the input is included as well
    )