Spaces:

LAWGPT
/

attorneygpt

Running

App Files Files Community

LAWGPT commited on Mar 3

Commit

fd762a6

•

1 Parent(s): c0b2749

upload 10 files

Browse files

Files changed (11) hide show

.gitattributes +2 -0
Ingest.py +18 -0
README.md +44 -12
app.py +121 -0
attorney.svg +2 -0
data/ipc_law.pdf +3 -0
ipc_vector_db/index.faiss +3 -0
ipc_vector_db/index.pkl +3 -0
logo.png +0 -0
requirements.txt +9 -0
user.svg +5 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+data/ipc_law.pdf filter=lfs diff=lfs merge=lfs -text
+ipc_vector_db/index.faiss filter=lfs diff=lfs merge=lfs -text

Ingest.py ADDED Viewed

	@@ -0,0 +1,18 @@

+from langchain_community.document_loaders import PyPDFLoader,DirectoryLoader
+from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.vectorstores import FAISS
+loader = DirectoryLoader('data', glob="./*.pdf", loader_cls=PyPDFLoader)
+documents = loader.load()
+text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=200)
+texts = text_splitter.split_documents(documents)
+embedings = HuggingFaceEmbeddings(model_name="nomic-ai/nomic-embed-text-v1",model_kwargs={"trust_remote_code":True,"revision":"289f532e14dbbbd5a04753fa58739e9ba766f3c7"})
+# Creates vector embeddings and saves it in the FAISS DB
+faiss_db = FAISS.from_documents(texts, embedings)
+# Saves and export the vector embeddings databse
+faiss_db.save_local("ipc_vector_db")

README.md CHANGED Viewed

@@ -1,12 +1,44 @@
----
-title: Attorneygpt
-emoji: ⚡
-colorFrom: blue
-colorTo: yellow
-sdk: streamlit
-sdk_version: 1.31.1
-app_file: app.py
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+<h1 align="center">LawGPT - RAG based Generative AI Attorney Chatbot</h1>
+<h3 align="center">Know Your Rights! Better Citizen, Better Nation!</h1>
+<p align="center">
+<img src="https://github.com/harshitv804/LawGPT/assets/100853494/ecff5d3c-f105-4ba2-a93a-500282f0bf00" width="700"/>
+</p>
+## About The Project
+LawGPT is a RAG based generative AI attorney chatbot that is trained using Indian Penal Code data. This project was developed using Streamlit LangChain and TogetherAI API for the LLM. Ask any questions to the attorney and it will give you the right justice as per the IPC. Are you a noob in knowing your rights? then this is for you!
+<br>
+<div align="center">
+  <br>
+  <video src="https://github.com/harshitv804/LawGPT/assets/100853494/b6711fd6-87df-4a37-ba24-317c50dc6f8f" width="400" />
+  <br>
+</div>
+### Check out the live demo on Hugging Face <a href="https://huggingface.co/spaces/harshitv804/LawGPT"><img src="https://static.vecteezy.com/system/resources/previews/009/384/880/non_2x/click-here-button-clipart-design-illustration-free-png.png" width="120" height="auto"></a>
+## Getting Started
+#### 1. Clone the repository:
+   - ```
+     git clone https://github.com/harshitv804/LawGPT.git
+     ```
+#### 2. Install necessary packages:
+   - ```
+     pip install -r requirements.txt
+     ```
+#### 3. Run the `ingest.py` file, preferably on kaggle or colab for faster embeddings processing and then download the `ipc_vector_db` from the output folder and save it locally.
+#### 4. Sign up with Together AI today and get $25 worth of free credit! 🎉 Whether you choose to use it for a short-term project or opt for a long-term commitment, Together AI offers cost-effective solutions compared to the OpenAI API. 🚀 You also have the flexibility to explore other Language Models (LLMs) or APIs if you prefer. For a comprehensive list of options, check out this link: [python.langchain.com/docs/integrations/llms](https://python.langchain.com/docs/integrations/llms) . Once signed up, seamlessly integrate Together AI into your Python environment by setting the API Key as an environment variable. 💻✨
+   - ```
+      os.environ["TOGETHER_API_KEY"] = "YOUR_TOGETHER_API_KEY"`
+     ```
+   - If you are going to host it in streamlit, huggingface or other...
+      - Save it in the secrets variable provided by the hosting with the name `TOGETHER_API_KEY` and key as `YOUR_TOGETHER_API_KEY`.
+#### 5. To run the `app.py` file, open the CMD Terminal and and type `streamlit run FULL_FILE_PATH_OF_APP.PY`.
+## Contact
+If you have any questions or feedback, please reach out to [harshitvenkatesan88@gmail.com]

app.py ADDED Viewed

	@@ -0,0 +1,121 @@

+from langchain_community.vectorstores import FAISS
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain.prompts import PromptTemplate
+from langchain_together import Together
+import os
+from langchain.memory import ConversationBufferWindowMemory
+from langchain.chains import ConversationalRetrievalChain
+import streamlit as st
+import time
+st.set_page_config(page_title="AttroneyGPT")
+col1, col2, col3 = st.columns([1,8,1])
+with col2:
+    st.image("logo.png")
+st.markdown(
+    """
+    <style>
+    div[data-baseweb="input"] input {
+            border-color: #000000;
+        }
+    margin-top: 0 !important;
+div.stButton > button:first-child {
+    background-color: #808080;
+    color:white;
+}
+div.stButton > button:active {
+    background-color: #808080;
+    color : white;
+}
+   div[data-testid="stStatusWidget"] div button {
+        display: none;
+        }
+    .reportview-container {
+            margin-top: -2em;
+        }
+        #MainMenu {visibility: hidden;}
+        .stDeployButton {display:none;}
+        footer {visibility: hidden;}
+        #stDecoration {display:none;}
+    button[title="View fullscreen"]{
+    visibility: hidden;}
+        </style>
+""",
+    unsafe_allow_html=True,
+)
+def reset_conversation():
+  st.session_state.messages = []
+  st.session_state.memory.clear()
+if "messages" not in st.session_state:
+    st.session_state.messages = []
+if "memory" not in st.session_state:
+    st.session_state.memory = ConversationBufferWindowMemory(k=2, memory_key="chat_history",return_messages=True)
+embeddings = HuggingFaceEmbeddings(model_name="nomic-ai/nomic-embed-text-v1",model_kwargs={"trust_remote_code":True,"revision":"289f532e14dbbbd5a04753fa58739e9ba766f3c7"})
+db = FAISS.load_local("ipc_vector_db", embeddings)
+db_retriever = db.as_retriever(search_type="similarity",search_kwargs={"k": 4})
+prompt_template = """<s>[INST]This is a chat template and As a legal chat bot specializing in Indian Penal Code queries, your primary objective is to provide accurate and concise information based on the human's questions. Do not generate your own questions and answers. You will adhere strictly to the instructions provided, offering relevant context from the knowledge base while avoiding unnecessary details. Your responses will be brief, to the point, and in compliance with the established format. If a question falls outside the given context, you will refrain from utilizing the chat history and instead rely on your own knowledge base to generate an appropriate response. You will prioritize the human's query and refrain from posing additional questions. The aim is to deliver professional, precise, and contextually relevant information pertaining to the Indian Penal Code.
+CONTEXT: {context}
+CHAT HISTORY: {chat_history}
+QUESTION: {question}
+ANSWER:
+</s>[INST]
+"""
+prompt = PromptTemplate(template=prompt_template,
+                        input_variables=['context', 'question', 'chat_history'])
+# You can also use other LLMs options from https://python.langchain.com/docs/integrations/llms. Here I have used TogetherAI API
+TOGETHER_AI_API= os.environ['TOGETHER_AI']="2a7c5dcdbb1049a39117ac0865c4d04008d49db31aa85a3258603817af16dbd0"
+llm = Together(
+    model="mistralai/Mistral-7B-Instruct-v0.2",
+    temperature=0.5,
+    max_tokens=1024,
+    together_api_key=f"{TOGETHER_AI_API}"
+)
+qa = ConversationalRetrievalChain.from_llm(
+    llm=llm,
+    memory=st.session_state.memory,
+    retriever=db_retriever,
+    combine_docs_chain_kwargs={'prompt': prompt}
+)
+for message in st.session_state.messages:
+    role = message.get("role")
+    content = message.get("content")
+    with st.chat_message(role, avatar="user.svg" if role == "human" else "attorney.svg"):
+        st.write(content)
+input_prompt = st.chat_input("message LAWGpt.....")
+if input_prompt:
+    with st.chat_message("human",avatar="user.svg"):
+        st.write(input_prompt)
+    st.session_state.messages.append({"role":"human","content":input_prompt})
+    full_response = " "
+    with st.chat_message("bot",avatar="attorney.svg"):
+        with st.spinner("Thinking..."):
+            result = qa.invoke(input=input_prompt)
+            message_placeholder = st.empty()
+            full_response = "⚠️ **_Note: This offers basic legal advice and is not a complete substitute for consulting a human attorney_** \n\n\n"
+        for chunk in result["answer"]:
+            full_response+=chunk
+            time.sleep(0.02)
+            message_placeholder.markdown(full_response+" ▌")
+        st.button('Reset All Chat 🗑️', on_click=reset_conversation)
+    st.session_state.messages.append({"role": "ai", "content": result["answer"], "avatar": "attorney.svg"})

attorney.svg ADDED Viewed

data/ipc_law.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e67161633a056f77848221ab30c49b26199c66cc844ee559ac47d2ca5dea9256
+size 20102169

ipc_vector_db/index.faiss ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:65e031779b9bc95bbd86fac051363b8ece9e2c25fa46aaed8780b7e528553518
+size 24066093

ipc_vector_db/index.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5bc978bf1541e161611bd1d547f5b46c62b7b8739142909b1aaac4cd7879f703
+size 7836258

logo.png ADDED Viewed

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+langchain
+pypdf
+transformers
+sentence-transformers
+accelerate
+faiss-cpu
+streamlit
+langchain-fireworks
+einops

user.svg ADDED Viewed