import os
import streamlit as st
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
# from langchain.llms.huggingface_pipeline import HuggingFacePipeline
# from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BitsAndBytesConfig
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate

# import os
import google.generativeai as genai

import git  # pip install gitpython

genai.configure(api_key = os.environ['GOOGLE_API_KEY'])

# quantization_config = BitsAndBytesConfig(
# load_in_4bit=True,
# bnb_4bit_compute_dtype=torch.bfloat16
# )


model_kwargs = {'device': 'cpu'}
embeddings = HuggingFaceEmbeddings(model_kwargs=model_kwargs)

# tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
# model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2", device_map='auto', quantization_config = quantization_config)

# pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens = 1000)
# llm = HuggingFacePipeline(pipeline=pipe)


# def clone_repo(repo):
#     if os.path.exists("githubCode") and os.path.isdir("githubCode"):
#         print("File already exists!!")
#         pass
#     else:
#         print("Cloning repo!!")
#         git.Repo.clone_from(repo,"githubCode")

# git.Repo.clone_from("https://github.com/Divyansh3021/Github_code_assistant.git", "githubCode")

llm = genai.GenerativeModel('gemini-pro')

def get_folder_paths(directory = "githubCode"):
    folder_paths = []
    for root, dirs, files in os.walk(directory):
        if '.git' in dirs:
            # Skip the directory if a .git folder is found
            dirs.remove('.git')
        for dir_name in dirs:
            folder_paths.append(os.path.join(root, dir_name))
    return folder_paths

directory_paths = get_folder_paths()
directory_paths.append("Code")
print("directory_paths: ", directory_paths)

with open("Code.txt", "w", encoding='utf-8') as output:
    for directory_path in directory_paths:
        for filename in os.listdir(directory_path):
            if filename.endswith((".py",".ipynb",".js", ".ts")):
                filepath = os.path.join(directory_path, filename)
                with open(filepath, "r", encoding='utf-8') as file:
                    code = file.read()
                    output.write(f"Filepath: {filepath}:\n\n")
                    output.write(code + "\n\n")

from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import TextLoader

# for filename in os.listdir(directory_path):
#     if filename.endswith(".txt"):  # Only process PD files
#         file_path = os.path.join(directory_path, filename)
loader = TextLoader("Code.txt", encoding="utf-8")
pages = loader.load_and_split()

# Split data into chunks
text_splitter = RecursiveCharacterTextSplitter(
chunk_size = 2000,
chunk_overlap  = 20,
add_start_index = True,
)
chunks = text_splitter.split_documents(pages)

# Store data into database
db=Chroma.from_documents(chunks,embedding=embeddings,persist_directory="test_index")
db.persist()

# Load the database
vectordb = Chroma(persist_directory="test_index", embedding_function = embeddings)

# Load the retriver
retriever = vectordb.as_retriever(search_kwargs = {"k": 3})

# Function to generate assistant's response using ask function
def generate_assistant_response(question):
    context = retriever.get_relevant_documents(question)
    qna_prompt_template= f"""### [INST] Instruction: You will be provided with questions and context. Your task is to find the answers to the questions using the given data.'
    Context: ```
        {context}
    ```
        ### Question: {question} [/INST]"""
    print("Context: ", context)
    answer = llm.generate_content(qna_prompt_template).text
    return answer

# print(generate_assistant_response("Tell me about the instructor_embeddings function."))