Spaces:

SalehAhmad
/

GitHub_GPT

Sleeping

App Files Files Community

SalehAhmad commited on Aug 28

Commit

93aa82a

•

1 Parent(s): 7ca28e9

Upload 5 files

Browse files

Files changed (6) hide show

.gitattributes +1 -0
RAG.py +137 -0
app.py +27 -0
requirements.txt +110 -0
vector_db/.milvus_example.db.lock +0 -0
vector_db/milvus_example.db +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+vector_db/milvus_example.db filter=lfs diff=lfs merge=lfs -text

RAG.py ADDED Viewed

	@@ -0,0 +1,137 @@

+import numpy as np
+import pandas as pd
+import os
+from dotenv import load_dotenv
+load_dotenv()
+import shutil
+from langchain_milvus import Milvus
+from langchain_ollama import OllamaEmbeddings
+from langchain_openai import OpenAIEmbeddings
+from langchain_openai import ChatOpenAI
+from git import Repo
+from langchain_community.document_loaders import GitLoader
+class GitHubGPT:
+    def __init__(self):
+        self.OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+        self.embeddings = self.__initialize_embeddings()
+        self.vector_db = self.__initialize_vector_db()
+        self.llm = self.__initialize_llm()
+        self.system_prompt = self.__initialize_system_prompt()
+    def __initialize_embeddings(self):
+        return OpenAIEmbeddings(
+            model="text-embedding-3-small",
+            openai_api_key=self.OPENAI_API_KEY
+        )
+    def __initialize_vector_db(self):
+        if not os.path.exists("./vector_db"):
+            os.makedirs("./vector_db", mode=0o777)
+        return Milvus(
+            embedding_function=self.embeddings,
+            connection_args={"uri": "./vector_db/milvus_example.db"},
+            auto_id=True,
+            collection_name="github_gpt",
+        )
+    def __initialize_llm(self):
+        llm = ChatOpenAI(model="gpt-4o",
+                        temperature=0.25,
+                        max_tokens=None,
+                        timeout=None,
+                        max_retries=3)
+        return llm
+    def __initialize_system_prompt(self):
+        return '''
+    What are you? A well informed, intelligent chatbot which can talk to a given codebase.
+    What do you do? You are always given some file content from a codebase and a question/prompt. Your job is to generate a response.
+    What should be the tone of your output? It should be friendly, helpful, confident, narrative.
+    What outputs can we expect from you? You can be asked to genetate documentations, code, or anything else only relavant to the given codebase content.
+    '''
+    @staticmethod
+    def __clean_repo_name(name):
+        return name.replace('-', '_')
+    @staticmethod
+    def __declean_repo_name(name):
+        return name.replace('_', '-')
+    def __add_repo_data_to_db(self):
+        data = self.loader.load()
+        print(f'Length of Data to Add: {len(data)}')
+        print(f'Adding Data to Milvus Vector DB')
+        self.vector_db.add_documents(documents=data)
+        print(f'Done Adding Data to Milvus Vector DB')
+    def add_repo(self, repo_url):
+        repo_name = repo_url.split('/')[-1]
+        repo_save_path = f"./Data/Repos"
+        if not os.path.exists(repo_save_path):
+            os.makedirs(repo_save_path)
+        else:
+            shutil.rmtree(repo_save_path)
+            os.makedirs(repo_save_path)
+        repo_save_path = repo_save_path + "/" + self.__clean_repo_name(repo_name)
+        print(f'Cloning the repo from: {repo_url}')
+        repo = Repo.clone_from(
+            repo_url,
+            to_path=repo_save_path,
+            branch="master"
+        )
+        print(f'Repo Cloned to: {repo_save_path}')
+        self.repo_save_path = repo_save_path
+        self.branch = repo.head.reference
+        self.loader = GitLoader(repo_path=repo_save_path, branch=self.branch)
+        self.__add_repo_data_to_db()
+    def load_repo(self):
+        repo_save_path = "./Data/Repos"
+        repo_name = os.listdir(repo_save_path)[0]
+        self.repo_save_path = repo_save_path + "/" + repo_name
+        self.branch = "master"
+        print(f'Loading repo: {repo_name}')
+        print(f'Branch: {self.branch}')
+        print(f'Repo path: {self.repo_save_path}')
+        self.loader = GitLoader(repo_path=self.repo_save_path, branch=self.branch)
+        self.__add_repo_data_to_db()
+    def __retrieve_documents(self, prompt, k=3):
+        retrieved_documents = self.vector_db.similarity_search(
+            prompt,
+            k=k
+        )
+        return retrieved_documents
+    @staticmethod
+    def __concatenate_documents(documents):
+        print(f'Length of docs to concatenate: {len(documents)}')
+        All_content = ''
+        for idx, doc in enumerate(documents):
+            print(f"Retrieved Document: {idx} --- [{doc.metadata}]")
+            All_content += "Chunk:" + str(idx) + ":\n" + doc.page_content + "\n\n"
+        print("\n\n")
+        return All_content
+    def query(self, prompt):
+        retrieved_documents = self.__retrieve_documents(prompt)
+        context = self.__concatenate_documents(retrieved_documents)
+        messages = [
+            (
+                "system",
+                f"{self.system_prompt}",
+            ),
+            (
+                "human",
+                f"Context from codebase:{context}\nUser query prompt:{prompt}\nResponse:\n",
+            )
+        ]
+        response = self.llm.invoke(messages)
+        return response.content

app.py ADDED Viewed

	@@ -0,0 +1,27 @@

+import streamlit as st
+from dotenv import load_dotenv
+import os
+# Assuming the GitHubGPT class is in the same directory
+from RAG import GitHubGPT
+# Load environment variables
+load_dotenv()
+# Initialize the chatbot object
+gpt_bot = GitHubGPT()
+# Streamlit UI
+st.title("GitHubGPT Chatbot")
+st.write("Interact with your codebase through this RAG-based chatbot!")
+# User input
+user_input = st.text_input("Ask a question about the codebase:")
+if st.button("Get Response"):
+    if user_input:
+        # Get response from the chatbot
+        response = gpt_bot.query(user_input)
+        st.write("Response:", response)
+    else:
+        st.write("Please enter a question.")

requirements.txt ADDED Viewed

	@@ -0,0 +1,110 @@

+aiohappyeyeballs==2.4.0
+aiohttp==3.10.5
+aiosignal==1.3.1
+altair==5.4.1
+annotated-types==0.7.0
+anyio==4.4.0
+asttokens==2.4.1
+attrs==24.2.0
+blinker==1.8.2
+cachetools==5.5.0
+certifi==2024.7.4
+charset-normalizer==3.3.2
+click==8.1.7
+comm==0.2.2
+dataclasses-json==0.6.7
+debugpy==1.8.5
+decorator==5.1.1
+distro==1.9.0
+environs==9.5.0
+executing==2.0.1
+frozenlist==1.4.1
+gitdb==4.0.11
+GitPython==3.1.43
+greenlet==3.0.3
+grpcio==1.63.0
+h11==0.14.0
+httpcore==1.0.5
+httpx==0.27.2
+idna==3.8
+ipykernel==6.29.5
+ipython==8.26.0
+jedi==0.19.1
+Jinja2==3.1.4
+jiter==0.5.0
+jsonpatch==1.33
+jsonpointer==3.0.0
+jsonschema==4.23.0
+jsonschema-specifications==2023.12.1
+jupyter_client==8.6.2
+jupyter_core==5.7.2
+langchain==0.2.15
+langchain-community==0.2.13
+langchain-core==0.2.35
+langchain-milvus==0.1.4
+langchain-ollama==0.1.1
+langchain-openai==0.1.23
+langchain-text-splitters==0.2.2
+langsmith==0.1.105
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+marshmallow==3.22.0
+matplotlib-inline==0.1.7
+mdurl==0.1.2
+milvus-lite==2.4.9
+multidict==6.0.5
+mypy-extensions==1.0.0
+narwhals==1.5.5
+nest-asyncio==1.6.0
+numpy==1.26.4
+ollama==0.3.1
+openai==1.42.0
+orjson==3.10.7
+packaging==24.1
+pandas==2.2.2
+parso==0.8.4
+pexpect==4.9.0
+pillow==10.4.0
+platformdirs==4.2.2
+prompt_toolkit==3.0.47
+protobuf==5.27.3
+psutil==6.0.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pyarrow==17.0.0
+pydantic==2.8.2
+pydantic_core==2.20.1
+pydeck==0.9.1
+Pygments==2.18.0
+pymilvus==2.4.5
+python-dateutil==2.9.0.post0
+python-dotenv==1.0.1
+pytz==2024.1
+PyYAML==6.0.2
+pyzmq==26.2.0
+referencing==0.35.1
+regex==2024.7.24
+requests==2.32.3
+rich==13.8.0
+rpds-py==0.20.0
+scipy==1.14.1
+six==1.16.0
+smmap==5.0.1
+sniffio==1.3.1
+SQLAlchemy==2.0.32
+stack-data==0.6.3
+streamlit==1.38.0
+tenacity==8.5.0
+tiktoken==0.7.0
+toml==0.10.2
+tornado==6.4.1
+tqdm==4.66.5
+traitlets==5.14.3
+typing-inspect==0.9.0
+typing_extensions==4.12.2
+tzdata==2024.1
+ujson==5.10.0
+urllib3==2.2.2
+watchdog==4.0.2
+wcwidth==0.2.13
+yarl==1.9.4

vector_db/.milvus_example.db.lock ADDED Viewed

File without changes

vector_db/milvus_example.db ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ee45ed323d10fe46a53948bc0376bb78f33801725a318856b70196bee23fb3fc
+size 19869696