LAWGPT commited on
Commit
fd762a6
1 Parent(s): c0b2749

upload 10 files

Browse files
Files changed (11) hide show
  1. .gitattributes +2 -0
  2. Ingest.py +18 -0
  3. README.md +44 -12
  4. app.py +121 -0
  5. attorney.svg +2 -0
  6. data/ipc_law.pdf +3 -0
  7. ipc_vector_db/index.faiss +3 -0
  8. ipc_vector_db/index.pkl +3 -0
  9. logo.png +0 -0
  10. requirements.txt +9 -0
  11. user.svg +5 -0
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ data/ipc_law.pdf filter=lfs diff=lfs merge=lfs -text
37
+ ipc_vector_db/index.faiss filter=lfs diff=lfs merge=lfs -text
Ingest.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_community.document_loaders import PyPDFLoader,DirectoryLoader
2
+ from langchain.embeddings import HuggingFaceEmbeddings
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ from langchain_community.vectorstores import FAISS
5
+
6
+ loader = DirectoryLoader('data', glob="./*.pdf", loader_cls=PyPDFLoader)
7
+ documents = loader.load()
8
+
9
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=200)
10
+ texts = text_splitter.split_documents(documents)
11
+
12
+ embedings = HuggingFaceEmbeddings(model_name="nomic-ai/nomic-embed-text-v1",model_kwargs={"trust_remote_code":True,"revision":"289f532e14dbbbd5a04753fa58739e9ba766f3c7"})
13
+
14
+ # Creates vector embeddings and saves it in the FAISS DB
15
+ faiss_db = FAISS.from_documents(texts, embedings)
16
+
17
+ # Saves and export the vector embeddings databse
18
+ faiss_db.save_local("ipc_vector_db")
README.md CHANGED
@@ -1,12 +1,44 @@
1
- ---
2
- title: Attorneygpt
3
- emoji: ⚡
4
- colorFrom: blue
5
- colorTo: yellow
6
- sdk: streamlit
7
- sdk_version: 1.31.1
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <h1 align="center">LawGPT - RAG based Generative AI Attorney Chatbot</h1>
2
+ <h3 align="center">Know Your Rights! Better Citizen, Better Nation!</h1>
3
+
4
+ <p align="center">
5
+ <img src="https://github.com/harshitv804/LawGPT/assets/100853494/ecff5d3c-f105-4ba2-a93a-500282f0bf00" width="700"/>
6
+ </p>
7
+
8
+ ## About The Project
9
+ LawGPT is a RAG based generative AI attorney chatbot that is trained using Indian Penal Code data. This project was developed using Streamlit LangChain and TogetherAI API for the LLM. Ask any questions to the attorney and it will give you the right justice as per the IPC. Are you a noob in knowing your rights? then this is for you!
10
+ <br>
11
+
12
+ <div align="center">
13
+ <br>
14
+ <video src="https://github.com/harshitv804/LawGPT/assets/100853494/b6711fd6-87df-4a37-ba24-317c50dc6f8f" width="400" />
15
+ <br>
16
+ </div>
17
+
18
+
19
+
20
+
21
+ ### Check out the live demo on Hugging Face <a href="https://huggingface.co/spaces/harshitv804/LawGPT"><img src="https://static.vecteezy.com/system/resources/previews/009/384/880/non_2x/click-here-button-clipart-design-illustration-free-png.png" width="120" height="auto"></a>
22
+
23
+ ## Getting Started
24
+
25
+ #### 1. Clone the repository:
26
+ - ```
27
+ git clone https://github.com/harshitv804/LawGPT.git
28
+ ```
29
+ #### 2. Install necessary packages:
30
+ - ```
31
+ pip install -r requirements.txt
32
+ ```
33
+ #### 3. Run the `ingest.py` file, preferably on kaggle or colab for faster embeddings processing and then download the `ipc_vector_db` from the output folder and save it locally.
34
+ #### 4. Sign up with Together AI today and get $25 worth of free credit! 🎉 Whether you choose to use it for a short-term project or opt for a long-term commitment, Together AI offers cost-effective solutions compared to the OpenAI API. 🚀 You also have the flexibility to explore other Language Models (LLMs) or APIs if you prefer. For a comprehensive list of options, check out this link: [python.langchain.com/docs/integrations/llms](https://python.langchain.com/docs/integrations/llms) . Once signed up, seamlessly integrate Together AI into your Python environment by setting the API Key as an environment variable. 💻✨
35
+ - ```
36
+ os.environ["TOGETHER_API_KEY"] = "YOUR_TOGETHER_API_KEY"`
37
+ ```
38
+ - If you are going to host it in streamlit, huggingface or other...
39
+ - Save it in the secrets variable provided by the hosting with the name `TOGETHER_API_KEY` and key as `YOUR_TOGETHER_API_KEY`.
40
+
41
+ #### 5. To run the `app.py` file, open the CMD Terminal and and type `streamlit run FULL_FILE_PATH_OF_APP.PY`.
42
+
43
+ ## Contact
44
+ If you have any questions or feedback, please reach out to [harshitvenkatesan88@gmail.com]
app.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_community.vectorstores import FAISS
2
+ from langchain_community.embeddings import HuggingFaceEmbeddings
3
+ from langchain.prompts import PromptTemplate
4
+ from langchain_together import Together
5
+ import os
6
+ from langchain.memory import ConversationBufferWindowMemory
7
+ from langchain.chains import ConversationalRetrievalChain
8
+ import streamlit as st
9
+ import time
10
+
11
+ st.set_page_config(page_title="AttroneyGPT")
12
+ col1, col2, col3 = st.columns([1,8,1])
13
+ with col2:
14
+ st.image("logo.png")
15
+
16
+ st.markdown(
17
+ """
18
+ <style>
19
+ div[data-baseweb="input"] input {
20
+ border-color: #000000;
21
+ }
22
+ margin-top: 0 !important;
23
+ div.stButton > button:first-child {
24
+ background-color: #808080;
25
+ color:white;
26
+ }
27
+ div.stButton > button:active {
28
+ background-color: #808080;
29
+ color : white;
30
+ }
31
+
32
+ div[data-testid="stStatusWidget"] div button {
33
+ display: none;
34
+ }
35
+
36
+ .reportview-container {
37
+ margin-top: -2em;
38
+ }
39
+ #MainMenu {visibility: hidden;}
40
+ .stDeployButton {display:none;}
41
+ footer {visibility: hidden;}
42
+ #stDecoration {display:none;}
43
+ button[title="View fullscreen"]{
44
+ visibility: hidden;}
45
+ </style>
46
+ """,
47
+ unsafe_allow_html=True,
48
+ )
49
+
50
+ def reset_conversation():
51
+ st.session_state.messages = []
52
+ st.session_state.memory.clear()
53
+
54
+ if "messages" not in st.session_state:
55
+ st.session_state.messages = []
56
+
57
+ if "memory" not in st.session_state:
58
+ st.session_state.memory = ConversationBufferWindowMemory(k=2, memory_key="chat_history",return_messages=True)
59
+
60
+ embeddings = HuggingFaceEmbeddings(model_name="nomic-ai/nomic-embed-text-v1",model_kwargs={"trust_remote_code":True,"revision":"289f532e14dbbbd5a04753fa58739e9ba766f3c7"})
61
+ db = FAISS.load_local("ipc_vector_db", embeddings)
62
+ db_retriever = db.as_retriever(search_type="similarity",search_kwargs={"k": 4})
63
+
64
+ prompt_template = """<s>[INST]This is a chat template and As a legal chat bot specializing in Indian Penal Code queries, your primary objective is to provide accurate and concise information based on the human's questions. Do not generate your own questions and answers. You will adhere strictly to the instructions provided, offering relevant context from the knowledge base while avoiding unnecessary details. Your responses will be brief, to the point, and in compliance with the established format. If a question falls outside the given context, you will refrain from utilizing the chat history and instead rely on your own knowledge base to generate an appropriate response. You will prioritize the human's query and refrain from posing additional questions. The aim is to deliver professional, precise, and contextually relevant information pertaining to the Indian Penal Code.
65
+ CONTEXT: {context}
66
+ CHAT HISTORY: {chat_history}
67
+ QUESTION: {question}
68
+ ANSWER:
69
+ </s>[INST]
70
+ """
71
+
72
+ prompt = PromptTemplate(template=prompt_template,
73
+ input_variables=['context', 'question', 'chat_history'])
74
+
75
+ # You can also use other LLMs options from https://python.langchain.com/docs/integrations/llms. Here I have used TogetherAI API
76
+ TOGETHER_AI_API= os.environ['TOGETHER_AI']="2a7c5dcdbb1049a39117ac0865c4d04008d49db31aa85a3258603817af16dbd0"
77
+ llm = Together(
78
+ model="mistralai/Mistral-7B-Instruct-v0.2",
79
+ temperature=0.5,
80
+ max_tokens=1024,
81
+ together_api_key=f"{TOGETHER_AI_API}"
82
+ )
83
+
84
+ qa = ConversationalRetrievalChain.from_llm(
85
+ llm=llm,
86
+ memory=st.session_state.memory,
87
+ retriever=db_retriever,
88
+ combine_docs_chain_kwargs={'prompt': prompt}
89
+ )
90
+
91
+ for message in st.session_state.messages:
92
+ role = message.get("role")
93
+ content = message.get("content")
94
+
95
+ with st.chat_message(role, avatar="user.svg" if role == "human" else "attorney.svg"):
96
+ st.write(content)
97
+
98
+ input_prompt = st.chat_input("message LAWGpt.....")
99
+
100
+ if input_prompt:
101
+ with st.chat_message("human",avatar="user.svg"):
102
+ st.write(input_prompt)
103
+
104
+ st.session_state.messages.append({"role":"human","content":input_prompt})
105
+ full_response = " "
106
+ with st.chat_message("bot",avatar="attorney.svg"):
107
+ with st.spinner("Thinking..."):
108
+ result = qa.invoke(input=input_prompt)
109
+
110
+ message_placeholder = st.empty()
111
+
112
+ full_response = "⚠️ **_Note: This offers basic legal advice and is not a complete substitute for consulting a human attorney_** \n\n\n"
113
+ for chunk in result["answer"]:
114
+ full_response+=chunk
115
+ time.sleep(0.02)
116
+
117
+ message_placeholder.markdown(full_response+" ▌")
118
+ st.button('Reset All Chat 🗑️', on_click=reset_conversation)
119
+
120
+ st.session_state.messages.append({"role": "ai", "content": result["answer"], "avatar": "attorney.svg"})
121
+
attorney.svg ADDED
data/ipc_law.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e67161633a056f77848221ab30c49b26199c66cc844ee559ac47d2ca5dea9256
3
+ size 20102169
ipc_vector_db/index.faiss ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65e031779b9bc95bbd86fac051363b8ece9e2c25fa46aaed8780b7e528553518
3
+ size 24066093
ipc_vector_db/index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bc978bf1541e161611bd1d547f5b46c62b7b8739142909b1aaac4cd7879f703
3
+ size 7836258
logo.png ADDED
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ langchain
2
+ pypdf
3
+ transformers
4
+ sentence-transformers
5
+ accelerate
6
+ faiss-cpu
7
+ streamlit
8
+ langchain-fireworks
9
+ einops
user.svg ADDED