AnkitPatil commited on
Commit
d71358e
·
verified ·
1 Parent(s): 2f7d772

Upload 5 files

Browse files
Files changed (6) hide show
  1. .gitattributes +1 -0
  2. DOC From Adv.pdf +3 -0
  3. README.md +4 -4
  4. app.py +153 -0
  5. requirements.txt +9 -0
  6. vector_embeddings.py +56 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ DOC[[:space:]]From[[:space:]]Adv.pdf filter=lfs diff=lfs merge=lfs -text
DOC From Adv.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be83c647a805649bf42a3587664975ad1df26d24517654aba3a21cd6141a7acc
3
+ size 2246483
README.md CHANGED
@@ -1,10 +1,10 @@
1
  ---
2
- title: LexifyAI
3
- emoji: 🐨
4
  colorFrom: purple
5
- colorTo: red
6
  sdk: streamlit
7
- sdk_version: 1.37.0
8
  app_file: app.py
9
  pinned: false
10
  ---
 
1
  ---
2
+ title: Lexify
3
+ emoji: 🏆
4
  colorFrom: purple
5
+ colorTo: purple
6
  sdk: streamlit
7
+ sdk_version: 1.36.0
8
  app_file: app.py
9
  pinned: false
10
  ---
app.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from langchain_huggingface import HuggingFaceEmbeddings
3
+ from langchain_community.vectorstores import Chroma
4
+ from langchain_community.llms import HuggingFaceHub
5
+ from langchain.prompts import PromptTemplate
6
+ from langchain.chains import RetrievalQA
7
+ import warnings, os
8
+ from dotenv import load_dotenv
9
+ warnings.filterwarnings("ignore")
10
+
11
+ import sys
12
+
13
+ warnings.filterwarnings("ignore")
14
+ __import__('pysqlite3')
15
+ import sys
16
+ sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
17
+
18
+ # Load environment variables from .env file
19
+ load_dotenv()
20
+
21
+ data_directory = os.path.join(os.path.dirname(__file__), "data")
22
+
23
+ os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv("HUGGINGFACEHUB_API_TOKEN")
24
+ # st.secrets["huggingface_api_token"] # Don't forget to add your hugging face token
25
+
26
+ # Load the vector store from disk
27
+ embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
28
+ vector_store = Chroma(embedding_function=embedding_model, persist_directory=data_directory)
29
+
30
+ # Initialize the Hugging Face Hub LLM
31
+ hf_hub_llm = HuggingFaceHub(
32
+ repo_id="meta-llama/Meta-Llama-3-8B-Instruct",
33
+ model_kwargs={"temperature": 1, "max_new_tokens":1024},
34
+ )
35
+
36
+ prompt_template = """
37
+ You are an AI chatbot specializing in the domain of law,
38
+ focusing on the recent changes made by the Indian government on July 1 2024 from the old THE INDIAN PENAL CODE(IPC) law to the new Bharatiya Nyaya Sanhita(BNS) law, 2023.
39
+ Your task is to provide information about this transition.
40
+ Here are your specific instructions:
41
+
42
+ 1. **Simple Definitions**: Provide a brief, easy-to-understand definition of the BNS law for the general public.
43
+ 2. **Codes Comparison**: Share the sections and clauses for both the IPC and the BNS, highlighting the changes.
44
+ 3. **Punishments and Revisions**: Detail the punishments, penalties, and any improvements or revisions made in the BNS law.
45
+ 4. **Detailed Comparison**: Conduct a comprehensive comparison between the IPC and the BNS.
46
+ 5. **Articles and Videos**: Include references to relevant articles and videos discussing the new BNS law from authoritative sources.
47
+
48
+ Ensure the information is accurate, concise, and accessible to users with varying levels of legal knowledge.
49
+
50
+ Now, when the user interacts with you by saying 'hi', 'hello', or 'how are you', respond in an interactive manner to engage them effectively in a single line.
51
+ Do not call yourself as chatbot, call yourself as Lexify.
52
+
53
+ User Query:
54
+ {context}
55
+
56
+ Question: {question}
57
+ Answer:
58
+ """
59
+
60
+ custom_prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
61
+
62
+ rag_chain = RetrievalQA.from_chain_type(
63
+ llm=hf_hub_llm,
64
+ chain_type="stuff",
65
+ retriever=vector_store.as_retriever(top_k=3), # retriever is set to fetch top 3 results
66
+ chain_type_kwargs={"prompt": custom_prompt})
67
+
68
+ def get_response(question):
69
+ result = rag_chain({"query": question})
70
+ response_text = result["result"]
71
+ answer_start = response_text.find("Answer:") + len("Answer:")
72
+ answer = response_text[answer_start:].strip()
73
+ return answer
74
+
75
+ # Streamlit app
76
+ # Remove whitespace from the top of the page and sidebar
77
+ st.markdown(
78
+ """
79
+ <style>
80
+ .appview-container .main .block-container {{
81
+ padding-top: {padding_top}rem;
82
+ padding-bottom: {padding_bottom}rem;
83
+ }}
84
+
85
+ </style>""".format(
86
+ padding_top=1, padding_bottom=1
87
+ ),
88
+ unsafe_allow_html=True,
89
+ )
90
+
91
+
92
+ st.markdown("""
93
+ <h3 style='text-align: left; color: black; padding-top: 35px; border-bottom: 3px solid red;'>
94
+ LexifyAI: Your Personal Law Assistant
95
+ </h3>""", unsafe_allow_html=True)
96
+
97
+ side_bar_message = """
98
+ Hi! 👋 I'm here to help you with your Law Queries. What would you like to know or explore?
99
+ \nHere are some areas you might be interested in:
100
+ 1. **IPC Laws**
101
+ 2. **BNS Laws**
102
+ 3. **Comparing Both**
103
+ 4. **And Many More** 🌞
104
+
105
+ Feel free to ask me anything about Law and Justice!
106
+ """
107
+
108
+ with st.sidebar:
109
+ st.title('🤖LexifyAI')
110
+ st.markdown(side_bar_message)
111
+
112
+
113
+
114
+
115
+
116
+
117
+ initial_message = """
118
+ Hi there! I'm your Law and Justice Bot 🤖
119
+ Here are some questions you might ask me:\n
120
+ ⚖️ When BNS Law was made?\n
121
+ ⚖️ What is IPC?\n
122
+ ⚖️ On which date BNS was Implemented in the Country?\n
123
+ """
124
+
125
+ # Store LLM generated responses
126
+ if "messages" not in st.session_state.keys():
127
+ st.session_state.messages = [{"role": "assistant", "content": initial_message}]
128
+
129
+ # Display or clear chat messages
130
+ for message in st.session_state.messages:
131
+ with st.chat_message(message["role"]):
132
+ st.markdown(message["content"])
133
+
134
+ def clear_chat_history():
135
+ st.session_state.messages = [{"role": "assistant", "content": initial_message}]
136
+ st.button('Clear Chat', on_click=clear_chat_history)
137
+
138
+ # User-provided prompt
139
+ if prompt := st.chat_input():
140
+ st.session_state.messages.append({"role": "user", "content": prompt})
141
+ with st.chat_message("user"):
142
+ st.markdown(prompt)
143
+
144
+ # Generate a new response if last message is not from assistant
145
+ if st.session_state.messages[-1]["role"] != "assistant":
146
+ with st.chat_message("assistant"):
147
+ with st.spinner("Hold on, I'm fetching the latest Legal advice for you..."):
148
+ response = get_response(prompt)
149
+ placeholder = st.empty()
150
+ full_response = response # Directly use the response
151
+ placeholder.markdown(full_response)
152
+ message = {"role": "assistant", "content": full_response}
153
+ st.session_state.messages.append(message)
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ chromadb== 0.5.3
2
+ pysqlite3-binary
3
+ protobuf==3.20.*
4
+ streamlit==1.36.0
5
+ pypdf==4.2.0
6
+ langchain==0.2.5
7
+ langchain-community==0.2.5
8
+ langchain-huggingface==0.0.3
9
+ python-dotenv
vector_embeddings.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_community.document_loaders import PyPDFLoader
2
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
3
+ from langchain_huggingface import HuggingFaceEmbeddings
4
+ from langchain_chroma import Chroma
5
+ import os
6
+ #from langchain_community.embeddings import HuggingFaceEmbeddings
7
+ from langchain_community.embeddings import HuggingFaceInstructEmbeddings
8
+ from dotenv import load_dotenv
9
+ from collections import OrderedDict
10
+
11
+ # Load environment variables from .env file
12
+ load_dotenv()
13
+
14
+ os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv("HUGGINGFACEHUB_API_TOKEN")
15
+
16
+ # Load the PDF
17
+ loader = PyPDFLoader("Dataset.pdf") # Provide your PDF path here
18
+ documents = loader.load()
19
+
20
+ # Split the text
21
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=300)
22
+ texts = text_splitter.split_documents(documents)
23
+
24
+ # Initialize the embedding model
25
+ embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
26
+
27
+ # Convert texts to embeddings
28
+ try:
29
+ embeddings = embedding_model.embed_documents([doc.page_content for doc in texts])
30
+ print("Vector Embeddings created successfully")
31
+ except Exception as e:
32
+ print(f"Error creating vector embeddings: {e}")
33
+
34
+ # Initialize Chroma vector store
35
+ vector_store = Chroma(embedding_function=embedding_model, persist_directory="data")
36
+
37
+ # Add documents to the vector store
38
+ vector_store.add_documents(documents=texts)
39
+
40
+ # Validate the setup
41
+ try:
42
+ # Test query to validate data retrieval
43
+ test_query = "What are some popular items for winter?"
44
+ results = vector_store.search(query=test_query, search_type='similarity')
45
+
46
+ # Deduplicate results
47
+ unique_results = OrderedDict()
48
+ for doc in results:
49
+ if doc.page_content not in unique_results:
50
+ unique_results[doc.page_content] = doc
51
+
52
+ # Convert unique results to a list and limit to top 3
53
+ final_results = list(unique_results.values())[:3]
54
+ print(f"Unique query results: {final_results}")
55
+ except Exception as e:
56
+ print(f"Error during test query: {e}")