Spaces:
Sleeping
Sleeping
Rohil Bansal
commited on
Commit
·
96dad0a
1
Parent(s):
d85a4da
Initial Commit. LegalAlly.
Browse files- .gitattributes +5 -0
- Dockerfile +10 -0
- README.md +1 -1
- assets/Black Bold Initial AI Business Logo (1).png +3 -0
- assets/Black Bold Initial AI Business Logo.jpg +3 -0
- docker-compose.yml +19 -0
- internet-law-concept-with-3d-rendering-cute-robot-hold-gavel-judge_493806-6140.jpg +3 -0
- ipc_vector_db/index.faiss +3 -0
- ipc_vector_db/index.pkl +3 -0
- law-judgement-justice-equality-concept.jpg +3 -0
- notebooks/model.ipynb +3 -0
- notebooks/model.py +118 -0
- requirements.txt +0 -0
- src/app/__init__.py +0 -0
- src/app/__pycache__/__init__.cpython-311.pyc +0 -0
- src/app/__pycache__/logger.cpython-311.pyc +0 -0
- src/app/__pycache__/settings.cpython-311.pyc +0 -0
- src/app/logger.py +6 -0
- src/app/main.py +111 -0
- src/app/prompts.py +8 -0
- src/app/settings.py +9 -0
- src/data/Indian_Penal_Code_Book.pdf +3 -0
- src/data/__pycache__/embeddings.cpython-311.pyc +0 -0
- src/data/__pycache__/vector_db.cpython-311.pyc +0 -0
- src/data/_init__.py +0 -0
- src/data/dataloader.py +0 -0
- src/data/embeddings.py +5 -0
- src/data/vector_db.py +28 -0
- src/mlflow/__init__.py +0 -0
- src/mlflow/experiment-tracking.py +9 -0
- src/mlflow/mlflow-setup.py +6 -0
- src/run.py +7 -0
- tests/test.py +11 -0
.gitattributes
CHANGED
@@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
*.pdf filter=lfs diff=lfs merge=lfs -text
|
37 |
+
*.faiss filter=lfs diff=lfs merge=lfs -text
|
38 |
+
*.ipynb filter=lfs diff=lfs merge=lfs -text
|
39 |
+
*.jpg filter=lfs diff=lfs merge=lfs -text
|
40 |
+
*.png filter=lfs diff=lfs merge=lfs -text
|
Dockerfile
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.9-slim
|
2 |
+
|
3 |
+
WORKDIR /app
|
4 |
+
|
5 |
+
COPY requirements.txt requirements.txt
|
6 |
+
RUN pip install -r requirements.txt
|
7 |
+
|
8 |
+
COPY . .
|
9 |
+
|
10 |
+
CMD ["streamlit", "run", "src/app/main.py"]
|
README.md
CHANGED
@@ -5,7 +5,7 @@ colorFrom: green
|
|
5 |
colorTo: pink
|
6 |
sdk: streamlit
|
7 |
sdk_version: 1.38.0
|
8 |
-
app_file:
|
9 |
pinned: false
|
10 |
license: unknown
|
11 |
---
|
|
|
5 |
colorTo: pink
|
6 |
sdk: streamlit
|
7 |
sdk_version: 1.38.0
|
8 |
+
app_file: src/run.py
|
9 |
pinned: false
|
10 |
license: unknown
|
11 |
---
|
assets/Black Bold Initial AI Business Logo (1).png
ADDED
![]() |
Git LFS Details
|
assets/Black Bold Initial AI Business Logo.jpg
ADDED
![]() |
Git LFS Details
|
docker-compose.yml
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
version: '3.8'
|
2 |
+
|
3 |
+
services:
|
4 |
+
web:
|
5 |
+
build: .
|
6 |
+
ports:
|
7 |
+
- "8501:8501"
|
8 |
+
volumes:
|
9 |
+
- .:/app
|
10 |
+
environment:
|
11 |
+
- OPENAI_API_KEY=${OPENAI_API_KEY}
|
12 |
+
mlflow:
|
13 |
+
image: mlflow/mlflow:latest
|
14 |
+
ports:
|
15 |
+
- "5000:5000"
|
16 |
+
environment:
|
17 |
+
- MLFLOW_TRACKING_URI=http://mlflow:5000
|
18 |
+
- AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
|
19 |
+
- AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
|
internet-law-concept-with-3d-rendering-cute-robot-hold-gavel-judge_493806-6140.jpg
ADDED
![]() |
Git LFS Details
|
ipc_vector_db/index.faiss
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:daed6e305b10ccabd99cbe76a4e5ae6ab7d6bdd06d784253112d63b54f47cb37
|
3 |
+
size 18247725
|
ipc_vector_db/index.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1a58e22af7ab6a30e45af4fc6d5a4c144423bcab622731a0ded139edf5fc4d4e
|
3 |
+
size 5925124
|
law-judgement-justice-equality-concept.jpg
ADDED
![]() |
Git LFS Details
|
notebooks/model.ipynb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3ed0386c9c5ecd3a71e82822e1248435d51f4946c0b8d984d5336838029bad3d
|
3 |
+
size 83863
|
notebooks/model.py
ADDED
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain_community.vectorstores import FAISS
|
2 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
3 |
+
from langchain.prompts import PromptTemplate
|
4 |
+
from langchain_together import Together
|
5 |
+
import os
|
6 |
+
from langchain.memory import ConversationBufferWindowMemory
|
7 |
+
from langchain.chains import ConversationalRetrievalChain
|
8 |
+
import streamlit as st
|
9 |
+
import time
|
10 |
+
st.set_page_config(page_title="LawGPT")
|
11 |
+
col1, col2, col3 = st.columns([1,4,1])
|
12 |
+
with col2:
|
13 |
+
|
14 |
+
st.image("assets/Black Bold Initial AI Business Logo.jpg")
|
15 |
+
|
16 |
+
|
17 |
+
st.markdown(
|
18 |
+
"""
|
19 |
+
<style>
|
20 |
+
.stApp, .ea3mdgi6{
|
21 |
+
background-color:#000000;
|
22 |
+
}
|
23 |
+
div.stButton > button:first-child {
|
24 |
+
background-color: #ffd0d0;
|
25 |
+
}
|
26 |
+
div.stButton > button:active {
|
27 |
+
# background-color: #ff6262;
|
28 |
+
}
|
29 |
+
div[data-testid="stStatusWidget"] div button {
|
30 |
+
display: none;
|
31 |
+
}
|
32 |
+
|
33 |
+
.reportview-container {
|
34 |
+
margin-top: -2em;
|
35 |
+
}
|
36 |
+
#MainMenu {visibility: hidden;}
|
37 |
+
.stDeployButton {display:none;}
|
38 |
+
footer {visibility: hidden;}
|
39 |
+
#stDecoration {display:none;}
|
40 |
+
button[title="View fullscreen"]{
|
41 |
+
visibility: hidden;}
|
42 |
+
button:first-child{
|
43 |
+
background-color : transparent !important;
|
44 |
+
}
|
45 |
+
</style>
|
46 |
+
""",
|
47 |
+
unsafe_allow_html=True,
|
48 |
+
)
|
49 |
+
|
50 |
+
def reset_conversation():
|
51 |
+
st.session_state.messages = []
|
52 |
+
st.session_state.memory.clear()
|
53 |
+
|
54 |
+
if "messages" not in st.session_state:
|
55 |
+
st.session_state["messages"] = []
|
56 |
+
|
57 |
+
if "memory" not in st.session_state:
|
58 |
+
st.session_state["memory"] = ConversationBufferWindowMemory(k=2, memory_key="chat_history",return_messages=True)
|
59 |
+
|
60 |
+
embedings = HuggingFaceEmbeddings(model_name="nomic-ai/nomic-embed-text-v1",model_kwargs={"trust_remote_code":True,"revision":"289f532e14dbbbd5a04753fa58739e9ba766f3c7"})
|
61 |
+
db = FAISS.load_local("./ipc_vector_db", embedings, allow_dangerous_deserialization=True)
|
62 |
+
db_retriever = db.as_retriever(search_type="similarity",search_kwargs={"k": 4})
|
63 |
+
|
64 |
+
prompt_template = """<s>[INST]This is a chat template and As a legal chat bot specializing in Indian Penal Code queries, your primary objective is to provide accurate and concise information based on the user's questions. Do not generate your own questions and answers. You will adhere strictly to the instructions provided, offering relevant context from the knowledge base while avoiding unnecessary details. Your responses will be brief, to the point, and in compliance with the established format. If a question falls outside the given context, you will refrain from utilizing the chat history and instead rely on your own knowledge base to generate an appropriate response. You will prioritize the user's query and refrain from posing additional questions. The aim is to deliver professional, precise, and contextually relevant information pertaining to the Indian Penal Code.
|
65 |
+
CONTEXT: {context}
|
66 |
+
CHAT HISTORY: {chat_history}
|
67 |
+
QUESTION: {question}
|
68 |
+
ANSWER:
|
69 |
+
</s>[INST]
|
70 |
+
"""
|
71 |
+
|
72 |
+
prompt = PromptTemplate(template=prompt_template,
|
73 |
+
input_variables=['context', 'question', 'chat_history'])
|
74 |
+
|
75 |
+
|
76 |
+
|
77 |
+
|
78 |
+
llm = Together(
|
79 |
+
model="mistralai/Mistral-7B-Instruct-v0.2",
|
80 |
+
temperature=0.5,
|
81 |
+
max_tokens=1024,
|
82 |
+
together_api_key="b68f2588587cb665eb94e89cff6ddafce235a0c570566909f9049fc4837d64be"
|
83 |
+
)
|
84 |
+
|
85 |
+
qa = ConversationalRetrievalChain.from_llm(
|
86 |
+
llm=llm,
|
87 |
+
memory=ConversationBufferWindowMemory(k=2, memory_key="chat_history",return_messages=True),
|
88 |
+
retriever=db_retriever,
|
89 |
+
combine_docs_chain_kwargs={'prompt': prompt}
|
90 |
+
)
|
91 |
+
for message in st.session_state.get("messages", []):
|
92 |
+
with st.chat_message(message.get("role")):
|
93 |
+
st.write(message.get("content"))
|
94 |
+
|
95 |
+
|
96 |
+
input_prompt = st.chat_input("Say something")
|
97 |
+
|
98 |
+
if input_prompt:
|
99 |
+
with st.chat_message("user"):
|
100 |
+
st.write(input_prompt)
|
101 |
+
|
102 |
+
st.session_state.messages.append({"role":"user","content":input_prompt})
|
103 |
+
|
104 |
+
with st.chat_message("assistant"):
|
105 |
+
with st.status("Thinking 💡...",expanded=True):
|
106 |
+
result = qa.invoke(input=input_prompt)
|
107 |
+
|
108 |
+
message_placeholder = st.empty()
|
109 |
+
|
110 |
+
full_response = "⚠️ **_Note: Information provided may be inaccurate._** \n\n\n"
|
111 |
+
for chunk in result["answer"]:
|
112 |
+
full_response+=chunk
|
113 |
+
time.sleep(0.02)
|
114 |
+
|
115 |
+
message_placeholder.markdown(full_response+" ▌")
|
116 |
+
st.button('Reset All Chat 🗑️', on_click=reset_conversation)
|
117 |
+
|
118 |
+
st.session_state.messages.append({"role":"assistant","content":result["answer"]})
|
requirements.txt
ADDED
Binary file (156 Bytes). View file
|
|
src/app/__init__.py
ADDED
File without changes
|
src/app/__pycache__/__init__.cpython-311.pyc
ADDED
Binary file (166 Bytes). View file
|
|
src/app/__pycache__/logger.cpython-311.pyc
ADDED
Binary file (520 Bytes). View file
|
|
src/app/__pycache__/settings.cpython-311.pyc
ADDED
Binary file (515 Bytes). View file
|
|
src/app/logger.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
|
3 |
+
def setup_logger():
|
4 |
+
logging.basicConfig(level=logging.INFO)
|
5 |
+
logger = logging.getLogger(__name__)
|
6 |
+
return logger
|
src/app/main.py
ADDED
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain.embeddings import OpenAIEmbeddings
|
2 |
+
from langchain.llms import OpenAI
|
3 |
+
import streamlit as st
|
4 |
+
import time
|
5 |
+
import logging
|
6 |
+
import os
|
7 |
+
from langchain.memory import ConversationBufferWindowMemory
|
8 |
+
from langchain.chains import ConversationalRetrievalChain
|
9 |
+
from langchain.prompts import PromptTemplate
|
10 |
+
|
11 |
+
from app.settings import load_env_variables
|
12 |
+
from app.logger import setup_logger
|
13 |
+
from data.vector_db import load_vector_db, save_vector_db
|
14 |
+
from data.embeddings import get_openai_embeddings
|
15 |
+
|
16 |
+
# Load environment variables and setup logging
|
17 |
+
openai_api_key = load_env_variables()
|
18 |
+
setup_logger()
|
19 |
+
|
20 |
+
st.set_page_config(page_title="LawGPT")
|
21 |
+
col1, col2, col3 = st.columns([1, 4, 1])
|
22 |
+
with col2:
|
23 |
+
st.image("assets/Black Bold Initial AI Business Logo.jpg")
|
24 |
+
|
25 |
+
st.markdown("""
|
26 |
+
<style>
|
27 |
+
.stApp, .ea3mdgi6{ background-color:#000000; }
|
28 |
+
div.stButton > button:first-child { background-color: #ffd0d0; }
|
29 |
+
div.stButton > button:active { background-color: #ff6262; }
|
30 |
+
div[data-testid="stStatusWidget"] div button { display: none; }
|
31 |
+
.reportview-container { margin-top: -2em; }
|
32 |
+
#MainMenu {visibility: hidden;}
|
33 |
+
.stDeployButton {display:none;}
|
34 |
+
footer {visibility: hidden;}
|
35 |
+
#stDecoration {display:none;}
|
36 |
+
button[title="View fullscreen"]{ visibility: hidden;}
|
37 |
+
button:first-child{ background-color : transparent !important; }
|
38 |
+
</style>
|
39 |
+
""", unsafe_allow_html=True)
|
40 |
+
|
41 |
+
def reset_conversation():
|
42 |
+
st.session_state.messages = []
|
43 |
+
st.session_state.memory.clear()
|
44 |
+
|
45 |
+
if "messages" not in st.session_state:
|
46 |
+
st.session_state["messages"] = []
|
47 |
+
|
48 |
+
if "memory" not in st.session_state:
|
49 |
+
st.session_state["memory"] = ConversationBufferWindowMemory(k=2, memory_key="chat_history", return_messages=True)
|
50 |
+
|
51 |
+
# Use OpenAI embeddings
|
52 |
+
embeddings = get_openai_embeddings(openai_api_key)
|
53 |
+
|
54 |
+
# Placeholder data for creating the vector database
|
55 |
+
data = [
|
56 |
+
"Example legal text 1",
|
57 |
+
"Example legal text 2",
|
58 |
+
"Example legal text 3",
|
59 |
+
# Add more data as needed
|
60 |
+
]
|
61 |
+
|
62 |
+
# Load vector database using FAISS
|
63 |
+
db_path = "./ipc_vector_db/vectordb"
|
64 |
+
vector_db = load_vector_db(db_path, embeddings, data)
|
65 |
+
db_retriever = vector_db.as_retriever(search_type="similarity", search_kwargs={"k": 4})
|
66 |
+
|
67 |
+
prompt_template = """<s>[INST]This is a chat template and As a legal chat bot specializing in Indian Penal Code queries, your primary objective is to provide accurate and concise information based on the user's questions. Do not generate your own questions and answers. You will adhere strictly to the instructions provided, offering relevant context from the knowledge base while avoiding unnecessary details. Your responses will be brief, to the point, and in compliance with the established format. If a question falls outside the given context, you will refrain from utilizing the chat history and instead rely on your own knowledge base to generate an appropriate response. You will prioritize the user's query and refrain from posing additional questions. The aim is to deliver professional, precise, and contextually relevant information pertaining to the Indian Penal Code.
|
68 |
+
CONTEXT: {context}
|
69 |
+
CHAT HISTORY: {chat_history}
|
70 |
+
QUESTION: {question}
|
71 |
+
ANSWER:
|
72 |
+
</s>[INST]
|
73 |
+
"""
|
74 |
+
|
75 |
+
prompt = PromptTemplate(template=prompt_template, input_variables=['context', 'question', 'chat_history'])
|
76 |
+
|
77 |
+
# Use OpenAI LLM
|
78 |
+
llm = OpenAI(model_name="text-davinci-003", temperature=0.5, max_tokens=1024, openai_api_key=os.getenv("OPENAI_API_KEY"))
|
79 |
+
|
80 |
+
qa = ConversationalRetrievalChain.from_llm(
|
81 |
+
llm=llm,
|
82 |
+
memory=ConversationBufferWindowMemory(k=2, memory_key="chat_history", return_messages=True),
|
83 |
+
retriever=db_retriever,
|
84 |
+
combine_docs_chain_kwargs={'prompt': prompt}
|
85 |
+
)
|
86 |
+
|
87 |
+
for message in st.session_state.get("messages", []):
|
88 |
+
with st.chat_message(message.get("role")):
|
89 |
+
st.write(message.get("content"))
|
90 |
+
|
91 |
+
input_prompt = st.chat_input("Say something")
|
92 |
+
|
93 |
+
if input_prompt:
|
94 |
+
with st.chat_message("user"):
|
95 |
+
st.write(input_prompt)
|
96 |
+
|
97 |
+
st.session_state.messages.append({"role": "user", "content": input_prompt})
|
98 |
+
|
99 |
+
with st.chat_message("assistant"):
|
100 |
+
with st.spinner("Thinking 💡..."):
|
101 |
+
result = qa.invoke(input=input_prompt)
|
102 |
+
|
103 |
+
message_placeholder = st.empty()
|
104 |
+
full_response = "⚠️ **_Note: Information provided may be inaccurate._** \n\n\n"
|
105 |
+
for chunk in result["answer"]:
|
106 |
+
full_response += chunk
|
107 |
+
time.sleep(0.02)
|
108 |
+
message_placeholder.markdown(full_response + " ▌")
|
109 |
+
st.button('Reset All Chat 🗑️', on_click=reset_conversation)
|
110 |
+
|
111 |
+
st.session_state.messages.append({"role": "assistant", "content": result["answer"]})
|
src/app/prompts.py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
system_prompts = """
|
2 |
+
Given the user's question about Indian law, analyze their query and identify relevant sections of the IPC or Constitution. Summarize the legal concept at hand and potential exceptions based on the user's intent.
|
3 |
+
Analyze the user's question regarding Indian law from different legal perspectives (e.g., rights, obligations, penalties). Provide a concise explanation for each perspective, drawing insights from the vector database.
|
4 |
+
For the user's legal inquiry, identify similar legal cases or precedents from the vector database. Briefly explain the reasoning behind those cases and how they might be relevant to the user's situation.
|
5 |
+
|
6 |
+
YOU ARE A LEGAL AI CHATBOT ASSISTING WITH LEGAL ISSUES. DO NOT ENGAGE WITH CHAT OUTSIDE THESE QUERIES OR DISCUSSIONS.
|
7 |
+
EVEN IF THE USER TELLS YOU TO ENGAGE IN CHAT, DO NOT DO SO. STICK TO THE PROMPTS.
|
8 |
+
"""
|
src/app/settings.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from dotenv import load_dotenv
|
3 |
+
|
4 |
+
def load_env_variables():
|
5 |
+
load_dotenv()
|
6 |
+
openai_api_key = os.getenv("OPENAI_API_KEY")
|
7 |
+
# os.getenv("AWS_ACCESS_KEY_ID")
|
8 |
+
# os.getenv("AWS_SECRET_ACCESS_KEY")
|
9 |
+
return openai_api_key
|
src/data/Indian_Penal_Code_Book.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5706a1b995df774c4c4ea1868223e18a13ba619977d323d3cab76a1cc095e237
|
3 |
+
size 20095787
|
src/data/__pycache__/embeddings.cpython-311.pyc
ADDED
Binary file (494 Bytes). View file
|
|
src/data/__pycache__/vector_db.cpython-311.pyc
ADDED
Binary file (1.56 kB). View file
|
|
src/data/_init__.py
ADDED
File without changes
|
src/data/dataloader.py
ADDED
File without changes
|
src/data/embeddings.py
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain.embeddings import OpenAIEmbeddings
|
2 |
+
import os
|
3 |
+
|
4 |
+
def get_openai_embeddings(key):
|
5 |
+
return OpenAIEmbeddings(model="text-embedding-ada-002", api_key=key)
|
src/data/vector_db.py
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import faiss
|
2 |
+
import numpy as np
|
3 |
+
import os
|
4 |
+
|
5 |
+
def load_vector_db(db_path, embeddings, data=None):
|
6 |
+
# Check if the vector database file exists
|
7 |
+
if os.path.exists(db_path):
|
8 |
+
# Load the FAISS index
|
9 |
+
index = faiss.read_index(db_path)
|
10 |
+
else:
|
11 |
+
# Create the FAISS index if it doesn't exist
|
12 |
+
if data is None:
|
13 |
+
raise ValueError("Data must be provided to create the vector database.")
|
14 |
+
index = create_vector_db(embeddings, data, db_path)
|
15 |
+
return index
|
16 |
+
|
17 |
+
def save_vector_db(vector_db, db_path):
|
18 |
+
# Save the FAISS index
|
19 |
+
faiss.write_index(vector_db, db_path)
|
20 |
+
|
21 |
+
def create_vector_db(embeddings, data, db_path):
|
22 |
+
# Assuming `data` is a list of texts
|
23 |
+
vectors = embeddings.embed_documents(data)
|
24 |
+
dimension = len(vectors[0])
|
25 |
+
index = faiss.IndexFlatL2(dimension)
|
26 |
+
index.add(np.array(vectors))
|
27 |
+
faiss.write_index(index, db_path)
|
28 |
+
return index
|
src/mlflow/__init__.py
ADDED
File without changes
|
src/mlflow/experiment-tracking.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import mlflow
|
2 |
+
|
3 |
+
def log_experiment_params(params):
|
4 |
+
for key, value in params.items():
|
5 |
+
mlflow.log_param(key, value)
|
6 |
+
|
7 |
+
def log_experiment_metrics(metrics):
|
8 |
+
for key, value in metrics.items():
|
9 |
+
mlflow.log_metric(key, value)
|
src/mlflow/mlflow-setup.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import mlflow
|
2 |
+
from mlflow import log_metric, log_param, log_artifact
|
3 |
+
|
4 |
+
def setup_mlflow():
|
5 |
+
mlflow.set_tracking_uri("http://mlflow:5000")
|
6 |
+
mlflow.set_experiment("legalai_experiment")
|
src/run.py
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# legalaibot/src/run_app.py
|
2 |
+
import os
|
3 |
+
import subprocess
|
4 |
+
|
5 |
+
if __name__ == "__main__":
|
6 |
+
os.environ["PYTHONPATH"] = os.path.dirname(os.path.abspath(__file__)) + os.pathsep + os.environ.get("PYTHONPATH", "")
|
7 |
+
subprocess.run(["streamlit", "run", "src/app/main.py"])
|
tests/test.py
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import unittest
|
2 |
+
from notebooks.model import qa
|
3 |
+
|
4 |
+
class TestLawGPT(unittest.TestCase):
|
5 |
+
def test_basic_query(self):
|
6 |
+
query = "What is Section 302 in IPC?"
|
7 |
+
response = qa.invoke(input=query)
|
8 |
+
self.assertIn("Section 302", response["answer"])
|
9 |
+
|
10 |
+
if __name__ == "__main__":
|
11 |
+
unittest.main()
|