Spaces:
Sleeping
Sleeping
saptharishi
commited on
Commit
•
34cb636
1
Parent(s):
ab2b55a
Upload 9 files
Browse files- .gitattributes +1 -0
- Ingest.py +18 -0
- README.md +6 -6
- app.py +121 -0
- attorney.svg +2 -0
- ipc_vector_db/index.faiss +3 -0
- ipc_vector_db/index.pkl +3 -0
- logo.png +0 -0
- requirements.txt +11 -0
- user.svg +5 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
ipc_vector_db/index.faiss filter=lfs diff=lfs merge=lfs -text
|
Ingest.py
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain_community.document_loaders import PyPDFLoader,DirectoryLoader
|
2 |
+
from langchain.embeddings import HuggingFaceEmbeddings
|
3 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
4 |
+
from langchain_community.vectorstores import FAISS
|
5 |
+
|
6 |
+
loader = DirectoryLoader('data', glob="./*.pdf", loader_cls=PyPDFLoader)
|
7 |
+
documents = loader.load()
|
8 |
+
|
9 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=200)
|
10 |
+
texts = text_splitter.split_documents(documents)
|
11 |
+
|
12 |
+
embedings = HuggingFaceEmbeddings(model_name="nomic-ai/nomic-embed-text-v1",model_kwargs={"trust_remote_code":True,"revision":"289f532e14dbbbd5a04753fa58739e9ba766f3c7"})
|
13 |
+
|
14 |
+
# Creates vector embeddings and saves it in the FAISS DB
|
15 |
+
faiss_db = FAISS.from_documents(texts, embedings)
|
16 |
+
|
17 |
+
# Saves and export the vector embeddings databse
|
18 |
+
faiss_db.save_local("ipc_vector_db")
|
README.md
CHANGED
@@ -1,12 +1,12 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: streamlit
|
7 |
-
sdk_version: 1.
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
---
|
11 |
|
12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title: LawGPT - RAG based AI Attorney Chatbot
|
3 |
+
emoji: ⚖️
|
4 |
+
colorFrom: red
|
5 |
+
colorTo: pink
|
6 |
sdk: streamlit
|
7 |
+
sdk_version: 1.31.1
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
---
|
11 |
|
12 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain_community.vectorstores import FAISS
|
2 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
3 |
+
from langchain.prompts import PromptTemplate
|
4 |
+
from langchain_together import Together
|
5 |
+
import os
|
6 |
+
from langchain.memory import ConversationBufferWindowMemory
|
7 |
+
from langchain.chains import ConversationalRetrievalChain
|
8 |
+
import streamlit as st
|
9 |
+
import time
|
10 |
+
|
11 |
+
st.set_page_config(page_title="AttroneyGPT")
|
12 |
+
col1, col2, col3 = st.columns([1,8,1])
|
13 |
+
with col2:
|
14 |
+
st.image("logo.png")
|
15 |
+
|
16 |
+
st.markdown(
|
17 |
+
"""
|
18 |
+
<style>
|
19 |
+
div[data-baseweb="input"] input {
|
20 |
+
border-color: #000000;
|
21 |
+
}
|
22 |
+
margin-top: 0 !important;
|
23 |
+
div.stButton > button:first-child {
|
24 |
+
background-color: #808080;
|
25 |
+
color:white;
|
26 |
+
}
|
27 |
+
div.stButton > button:active {
|
28 |
+
background-color: #808080;
|
29 |
+
color : white;
|
30 |
+
}
|
31 |
+
|
32 |
+
div[data-testid="stStatusWidget"] div button {
|
33 |
+
display: none;
|
34 |
+
}
|
35 |
+
|
36 |
+
.reportview-container {
|
37 |
+
margin-top: -2em;
|
38 |
+
}
|
39 |
+
#MainMenu {visibility: hidden;}
|
40 |
+
.stDeployButton {display:none;}
|
41 |
+
footer {visibility: hidden;}
|
42 |
+
#stDecoration {display:none;}
|
43 |
+
button[title="View fullscreen"]{
|
44 |
+
visibility: hidden;}
|
45 |
+
</style>
|
46 |
+
""",
|
47 |
+
unsafe_allow_html=True,
|
48 |
+
)
|
49 |
+
|
50 |
+
def reset_conversation():
|
51 |
+
st.session_state.messages = []
|
52 |
+
st.session_state.memory.clear()
|
53 |
+
|
54 |
+
if "messages" not in st.session_state:
|
55 |
+
st.session_state.messages = []
|
56 |
+
|
57 |
+
if "memory" not in st.session_state:
|
58 |
+
st.session_state.memory = ConversationBufferWindowMemory(k=2, memory_key="chat_history",return_messages=True)
|
59 |
+
|
60 |
+
embeddings = HuggingFaceEmbeddings(model_name="nomic-ai/nomic-embed-text-v1",model_kwargs={"trust_remote_code":True,"revision":"289f532e14dbbbd5a04753fa58739e9ba766f3c7"})
|
61 |
+
db = FAISS.load_local("ipc_vector_db", embeddings, allow_dangerous_deserialization=True)
|
62 |
+
db_retriever = db.as_retriever(search_type="similarity",search_kwargs={"k": 4})
|
63 |
+
|
64 |
+
prompt_template = """<s>[INST]This is a chat template and As a legal chat bot specializing in Sericultural related Queries!!.
|
65 |
+
CONTEXT: {context}
|
66 |
+
CHAT HISTORY: {chat_history}
|
67 |
+
QUESTION: {question}
|
68 |
+
ANSWER:
|
69 |
+
</s>[INST]
|
70 |
+
"""
|
71 |
+
|
72 |
+
prompt = PromptTemplate(template=prompt_template,
|
73 |
+
input_variables=['context', 'question', 'chat_history'])
|
74 |
+
|
75 |
+
# You can also use other LLMs options from https://python.langchain.com/docs/integrations/llms. Here I have used TogetherAI API
|
76 |
+
TOGETHER_AI_API= os.environ['TOGETHER_AI']="2a7c5dcdbb1049a39117ac0865c4d04008d49db31aa85a3258603817af16dbd0"
|
77 |
+
llm = Together(
|
78 |
+
model="mistralai/Mistral-7B-Instruct-v0.2",
|
79 |
+
temperature=0.5,
|
80 |
+
max_tokens=1024,
|
81 |
+
together_api_key=f"{TOGETHER_AI_API}"
|
82 |
+
)
|
83 |
+
|
84 |
+
qa = ConversationalRetrievalChain.from_llm(
|
85 |
+
llm=llm,
|
86 |
+
memory=st.session_state.memory,
|
87 |
+
retriever=db_retriever,
|
88 |
+
combine_docs_chain_kwargs={'prompt': prompt}
|
89 |
+
)
|
90 |
+
|
91 |
+
for message in st.session_state.messages:
|
92 |
+
role = message.get("role")
|
93 |
+
content = message.get("content")
|
94 |
+
|
95 |
+
with st.chat_message(role, avatar="user.svg" if role == "human" else "bot"):
|
96 |
+
st.write(content)
|
97 |
+
|
98 |
+
input_prompt = st.chat_input("message LAWGpt.....")
|
99 |
+
|
100 |
+
if input_prompt:
|
101 |
+
with st.chat_message("human",avatar="user.svg"):
|
102 |
+
st.write(input_prompt)
|
103 |
+
|
104 |
+
st.session_state.messages.append({"role":"human","content":input_prompt})
|
105 |
+
full_response = " "
|
106 |
+
with st.chat_message("bot"):
|
107 |
+
with st.spinner("Thinking..."):
|
108 |
+
result = qa.invoke(input=input_prompt)
|
109 |
+
|
110 |
+
message_placeholder = st.empty()
|
111 |
+
|
112 |
+
full_response = "⚠️ **_Note: This offers basic legal advice and is not a complete substitute for consulting a human attorney_** \n\n\n"
|
113 |
+
for chunk in result["answer"]:
|
114 |
+
full_response+=chunk
|
115 |
+
time.sleep(0.02)
|
116 |
+
|
117 |
+
message_placeholder.markdown(full_response+" ▌")
|
118 |
+
st.button('Reset All Chat 🗑️', on_click=reset_conversation)
|
119 |
+
|
120 |
+
st.session_state.messages.append({"role": "ai", "content": result["answer"], "avatar": "bot"})
|
121 |
+
|
attorney.svg
ADDED
ipc_vector_db/index.faiss
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cac55439623c498f6c66b3effc3672a8a06236532be126130c3c14b117a8e92b
|
3 |
+
size 1090605
|
ipc_vector_db/index.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:185276a728ae14de633abfb4299490fd74d4e8eb78df96527500407234c6f5f7
|
3 |
+
size 321925
|
logo.png
ADDED
requirements.txt
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
langchain
|
2 |
+
pypdf
|
3 |
+
transformers
|
4 |
+
sentence-transformers
|
5 |
+
accelerate
|
6 |
+
faiss-cpu
|
7 |
+
streamlit
|
8 |
+
langchain-fireworks
|
9 |
+
einops
|
10 |
+
langchain_together
|
11 |
+
|
user.svg
ADDED