File size: 2,584 Bytes
670064c
 
d445e66
670064c
d445e66
 
18602ef
d445e66
670064c
18602ef
d445e66
8adec04
670064c
88b5d3f
670064c
 
 
 
 
 
18602ef
670064c
 
 
 
 
 
aaa0257
670064c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8adec04
d445e66
 
670064c
8adec04
670064c
d445e66
 
 
 
 
8adec04
670064c
8adec04
d445e66
18602ef
d445e66
670064c
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import os
import streamlit as st
from langchain_community.embeddings import HuggingFaceHubEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings

import git 

# embeddings = HuggingFaceHubEmbeddings(model="thuan9889/llama_embedding_model_v1")
from chromadb.utils import embedding_functions
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=os.environ['GOOGLE_API_KEY'], task_type="retrieval_document")

model = ChatGoogleGenerativeAI(model="gemini-pro",google_api_key=os.environ['GOOGLE_API_KEY'],temperature=0.2,convert_system_message_to_human=True)

def get_folder_paths(directory = "githubCode"):
    folder_paths = []
    for root, dirs, files in os.walk(directory):
        if '.git' in dirs:
            # Skip the directory if a .git folder is found
            dirs.remove('.git') 
        for dir_name in dirs:
            folder_paths.append(os.path.join(root, dir_name))
    return folder_paths

directory_paths = get_folder_paths()
directory_paths.append("Code")
print("directory_paths: ", directory_paths)

with open("Code.txt", "w", encoding='utf-8') as output:
    for directory_path in directory_paths:
        for filename in os.listdir(directory_path):
            if filename.endswith((".py",".ipynb",".js", ".ts")):
                filepath = os.path.join(directory_path, filename)
                with open(filepath, "r", encoding='utf-8') as file:
                    code = file.read()
                    output.write(f"Filepath: {filepath}:\n\n")
                    output.write(code + "\n\n")

from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import TextLoader

loader = TextLoader("Code.txt", encoding="utf-8")
pages = loader.load_and_split()

# Split data into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=4000, chunk_overlap=200)
context = "\n\n".join(str(p.page_content) for p in pages)
texts = text_splitter.split_text(context)

vector_index = Chroma.from_texts(texts, embeddings).as_retriever(search_kwargs={"k":3})

qa_chain = RetrievalQA.from_chain_type(
    model,
    retriever=vector_index,
    return_source_documents=True
)
    
# Function to generate assistant's response using ask function
def ask(question):
    answer = qa_chain({"query": question})
    print(answer)
    return answer['result']

# print(generate_assistant_response("Tell me about the instructor_embeddings function."))