Spaces:

Divyanshh
/

nile

Sleeping

App Files Files Community

Divyanshh commited on Mar 5

Commit

d445e66

•

1 Parent(s): 8336356

Update util.py

Browse files

Files changed (1) hide show

util.py +18 -63

util.py CHANGED Viewed

@@ -1,48 +1,17 @@
 import os
 import streamlit as st
-from langchain_community.embeddings import HuggingFaceEmbeddings
-from sentence_transformers import SentenceTransformer
 from langchain_community.vectorstores import Chroma
-# from langchain.llms.huggingface_pipeline import HuggingFacePipeline
-# from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BitsAndBytesConfig
-from langchain.chains.question_answering import load_qa_chain
-from langchain.prompts import PromptTemplate
-# import os
-import google.generativeai as genai
-import git  # pip install gitpython
-genai.configure(api_key = os.environ['GOOGLE_API_KEY'])
-# quantization_config = BitsAndBytesConfig(
-# load_in_4bit=True,
-# bnb_4bit_compute_dtype=torch.bfloat16
-# )
-model_kwargs = {'device': 'cpu'}
-embeddings = HuggingFaceEmbeddings(model_name="michaelfeil/ct2fast-e5-small",model_kwargs=model_kwargs, )
-# embeddings = SentenceTransformer(model_name_or_path="All-MiniLM-L6-v2")
-# tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
-# model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2", device_map='auto', quantization_config = quantization_config)
-# pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens = 1000)
-# llm = HuggingFacePipeline(pipeline=pipe)
-# def clone_repo(repo):
-#     if os.path.exists("githubCode") and os.path.isdir("githubCode"):
-#         print("File already exists!!")
-#         pass
-#     else:
-#         print("Cloning repo!!")
-#         git.Repo.clone_from(repo,"githubCode")
-# git.Repo.clone_from("https://github.com/Divyansh3021/Github_code_assistant.git", "githubCode")
-llm = genai.GenerativeModel('gemini-pro')
 def get_folder_paths(directory = "githubCode"):
     folder_paths = []
@@ -71,40 +40,26 @@ with open("Code.txt", "w", encoding='utf-8') as output:
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.document_loaders import TextLoader
-# for filename in os.listdir(directory_path):
-#     if filename.endswith(".txt"):  # Only process PD files
-#         file_path = os.path.join(directory_path, filename)
 loader = TextLoader("Code.txt", encoding="utf-8")
 pages = loader.load_and_split()
 # Split data into chunks
-text_splitter = RecursiveCharacterTextSplitter(
-chunk_size = 2000,
-chunk_overlap  = 20,
-add_start_index = True,
-)
-chunks = text_splitter.split_documents(pages)
-# Store data into database
-db=Chroma.from_documents(chunks,embedding=embeddings,persist_directory="test_index")
-db.persist()
-# Load the database
-vectordb = Chroma(persist_directory="test_index", embedding_function = embeddings)
-# Load the retriver
-retriever = vectordb.as_retriever(search_kwargs = {"k": 3})
 # Function to generate assistant's response using ask function
 def generate_assistant_response(question):
-    context = retriever.get_relevant_documents(question)
-    qna_prompt_template= f"""### [INST] Instruction: You will be provided with questions and context. Your task is to find the answers to the questions using the given data.'
-    Context: ```
-        {context}
-    ```
-        ### Question: {question} [/INST]"""
-    print("Context: ", context)
-    answer = llm.generate_content(qna_prompt_template).text
-    return answer
 # print(generate_assistant_response("Tell me about the instructor_embeddings function."))

 import os
 import streamlit as st
+from langchain_community.embeddings import HuggingFaceHubEmbeddings
 from langchain_community.vectorstores import Chroma
+from langchain.chains import RetrievalQA
+from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
+import git
+from chromadb.utils import embedding_functions
+embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=os.environ['GOOGLE_API_KEY'], task_type="retrieval_query")
+model = ChatGoogleGenerativeAI(model="gemini-pro",google_api_key=os.environ['GOOGLE_API_KEY'],temperature=0.2,convert_system_message_to_human=True)
 def get_folder_paths(directory = "githubCode"):
     folder_paths = []
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.document_loaders import TextLoader
 loader = TextLoader("Code.txt", encoding="utf-8")
 pages = loader.load_and_split()
 # Split data into chunks
+text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=200)
+context = "\n\n".join(str(p.page_content) for p in pages)
+texts = text_splitter.split_text(context)
+vector_index = Chroma.from_texts(texts, embeddings).as_retriever(search_kwargs={"k":5})
+qa_chain = RetrievalQA.from_chain_type(
+    model,
+    retriever=vector_index,
+    return_source_documents=True
+)
 # Function to generate assistant's response using ask function
 def generate_assistant_response(question):
+    answer = qa_chain({"query": question})
+    return answer['result']
 # print(generate_assistant_response("Tell me about the instructor_embeddings function."))