Spaces:
Sleeping
Sleeping
File size: 2,584 Bytes
670064c d445e66 670064c d445e66 18602ef d445e66 670064c 18602ef d445e66 8adec04 670064c 88b5d3f 670064c 18602ef 670064c aaa0257 670064c 8adec04 d445e66 670064c 8adec04 670064c d445e66 8adec04 670064c 8adec04 d445e66 18602ef d445e66 670064c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
import os
import streamlit as st
from langchain_community.embeddings import HuggingFaceHubEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
import git
# embeddings = HuggingFaceHubEmbeddings(model="thuan9889/llama_embedding_model_v1")
from chromadb.utils import embedding_functions
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=os.environ['GOOGLE_API_KEY'], task_type="retrieval_document")
model = ChatGoogleGenerativeAI(model="gemini-pro",google_api_key=os.environ['GOOGLE_API_KEY'],temperature=0.2,convert_system_message_to_human=True)
def get_folder_paths(directory = "githubCode"):
folder_paths = []
for root, dirs, files in os.walk(directory):
if '.git' in dirs:
# Skip the directory if a .git folder is found
dirs.remove('.git')
for dir_name in dirs:
folder_paths.append(os.path.join(root, dir_name))
return folder_paths
directory_paths = get_folder_paths()
directory_paths.append("Code")
print("directory_paths: ", directory_paths)
with open("Code.txt", "w", encoding='utf-8') as output:
for directory_path in directory_paths:
for filename in os.listdir(directory_path):
if filename.endswith((".py",".ipynb",".js", ".ts")):
filepath = os.path.join(directory_path, filename)
with open(filepath, "r", encoding='utf-8') as file:
code = file.read()
output.write(f"Filepath: {filepath}:\n\n")
output.write(code + "\n\n")
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import TextLoader
loader = TextLoader("Code.txt", encoding="utf-8")
pages = loader.load_and_split()
# Split data into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=4000, chunk_overlap=200)
context = "\n\n".join(str(p.page_content) for p in pages)
texts = text_splitter.split_text(context)
vector_index = Chroma.from_texts(texts, embeddings).as_retriever(search_kwargs={"k":3})
qa_chain = RetrievalQA.from_chain_type(
model,
retriever=vector_index,
return_source_documents=True
)
# Function to generate assistant's response using ask function
def ask(question):
answer = qa_chain({"query": question})
print(answer)
return answer['result']
# print(generate_assistant_response("Tell me about the instructor_embeddings function.")) |