Upload 6 files
Browse files- .env +9 -0
- .gitattributes +1 -0
- chunks.txt +0 -0
- constitution_py.py +155 -0
- embeddings.npy +3 -0
- index.faiss +3 -0
- lm.py +15 -0
.env
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
GROQ_API_KEY_1="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"
|
2 |
+
GROQ_API_KEY_2="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"
|
3 |
+
|
4 |
+
|
5 |
+
#GROQ_API_KEY = "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"
|
6 |
+
LANGCHAIN_API_KEY = "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"
|
7 |
+
LANGCHAIN_PROJECT = "WORK"
|
8 |
+
LANGCHAIN_TRACING_V2 = "true"
|
9 |
+
#GOOGLE_API_KEY = "684ff9d7746ed6e9d54268345c5ad41655ffac7d"
|
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
index.faiss filter=lfs diff=lfs merge=lfs -text
|
chunks.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
constitution_py.py
ADDED
@@ -0,0 +1,155 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import warnings
|
2 |
+
warnings.filterwarnings("ignore")
|
3 |
+
|
4 |
+
import re
|
5 |
+
import os
|
6 |
+
import numpy as np
|
7 |
+
import faiss
|
8 |
+
from sentence_transformers import SentenceTransformer
|
9 |
+
from langchain_groq import ChatGroq
|
10 |
+
from langchain.chains import LLMChain
|
11 |
+
from langchain_core.prompts import ChatPromptTemplate
|
12 |
+
from pydantic import BaseModel, Field
|
13 |
+
from langchain.output_parsers import PydanticOutputParser
|
14 |
+
from lm import get_query_llm, get_answer_llm # Your custom LLM wrapper functions
|
15 |
+
|
16 |
+
# Initialize LLMs
|
17 |
+
q_llm = get_query_llm()
|
18 |
+
a_llm = get_answer_llm()
|
19 |
+
|
20 |
+
# Load sentence transformer model once globally
|
21 |
+
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
|
22 |
+
save_dir = "saved_data"
|
23 |
+
|
24 |
+
def load_embeddings_and_index(save_dir="saved_data"):
|
25 |
+
embedding = np.load(os.path.join(save_dir, "embeddings.npy"))
|
26 |
+
index = faiss.read_index(os.path.join(save_dir, "index.faiss"))
|
27 |
+
with open(os.path.join(save_dir, "chunks.txt"), "r", encoding="utf-8") as f:
|
28 |
+
chunks = [line.strip() for line in f.readlines()]
|
29 |
+
return embedding, index, chunks
|
30 |
+
|
31 |
+
similar_words = [
|
32 |
+
"explain", "elaborate", "describe", "clarify", "detail", "break down", "simplify", "outline",
|
33 |
+
"demonstrate", "illustrate", "interpret", "expand on", "go over", "walk through", "define",
|
34 |
+
"unpack", "decode", "shed light on", "analyze", "discuss", "make clear", "reveal", "disclose",
|
35 |
+
"comment on", "talk about", "lay out", "spell out", "express", "delve into", "explore",
|
36 |
+
"enlighten", "present", "review", "report", "state", "point out", "inform", "highlight"
|
37 |
+
]
|
38 |
+
|
39 |
+
def is_explanation_query(query):
|
40 |
+
return not any(word in query.lower() for word in similar_words)
|
41 |
+
|
42 |
+
def retrieve_relevant_chunks(query, index, chunks, top_k=5):
|
43 |
+
sub_str = "article"
|
44 |
+
numbers = re.findall(r'\d+', query)
|
45 |
+
|
46 |
+
if sub_str in query.lower() and numbers:
|
47 |
+
flag = False
|
48 |
+
article_number = str(numbers[0])
|
49 |
+
for i, chunk in enumerate(chunks):
|
50 |
+
if chunk.lower().startswith(f"article;{article_number}"):
|
51 |
+
flag = is_explanation_query(query)
|
52 |
+
return [chunk], flag
|
53 |
+
|
54 |
+
query_embedding = embedding_model.encode([query])
|
55 |
+
query_embedding = np.array(query_embedding).astype("float32")
|
56 |
+
distances, indices = index.search(query_embedding, top_k)
|
57 |
+
relevant_chunks = [chunks[i] for i in indices[0]]
|
58 |
+
flag = is_explanation_query(query)
|
59 |
+
return relevant_chunks, flag
|
60 |
+
|
61 |
+
# Prompt to refine the query
|
62 |
+
refine_prompt_template = ChatPromptTemplate.from_messages([
|
63 |
+
('system', 'You are a helpful legal assistant. Your job is to clean user queries by fixing grammar or spelling only. Do not expand or explain them. if number is mentioned returned in digit format.'),
|
64 |
+
('human', '{query}')
|
65 |
+
])
|
66 |
+
refine_chain = LLMChain(llm=q_llm, prompt=refine_prompt_template)
|
67 |
+
|
68 |
+
# Define response schema
|
69 |
+
class LegalResponse(BaseModel):
|
70 |
+
title: str = Field (...,description='Return the title')
|
71 |
+
answer: str = Field(..., description="The assistant's answer to the user's query")
|
72 |
+
is_relevant: bool = Field(..., description="True if the query is relevant to the Constitution of Pakistan, otherwise False")
|
73 |
+
article_number: str = Field(..., description="Mentioned article number if available, else empty string")
|
74 |
+
|
75 |
+
parser = PydanticOutputParser(pydantic_object=LegalResponse)
|
76 |
+
|
77 |
+
# Prompt for direct article wording
|
78 |
+
answer_prompt_template_query = ChatPromptTemplate.from_messages([
|
79 |
+
("system",
|
80 |
+
"You are a legal assistant with expertise in the Constitution of Pakistan. "
|
81 |
+
"Return answer in structure format."
|
82 |
+
"Your task is to extract and present the exact constitutional text, without paraphrasing, ensuring accuracy and fidelity to the original wording"
|
83 |
+
"Especially return the title"),
|
84 |
+
("human",
|
85 |
+
"User Query: {query}\n\n"
|
86 |
+
"Instructions:\n"
|
87 |
+
"0. Return Title"
|
88 |
+
"1. Return the exact wording from the Constitution.\n"
|
89 |
+
"2. If a query references a specific article or sub-clause (e.g., Article 11(3)(b), Article 11(b), or 11(i)), return only the exact wording of that clause from the Constitution — do not include the full article unless required by structure\n"
|
90 |
+
"3. Indicate whether the query is related to the Constitution of Pakistan (Yes/No).ar\n"
|
91 |
+
"4. Extract and return the article number if it is mentioned. with sub-clause if its mentioned like 1,2 or 1(a)\n\n"
|
92 |
+
"Context:\n{context}\n\n"
|
93 |
+
"{format_instructions}\n")
|
94 |
+
])
|
95 |
+
|
96 |
+
answer_chain_article = LLMChain(llm=a_llm, prompt=answer_prompt_template_query, output_parser=parser)
|
97 |
+
|
98 |
+
# Prompt for explanation-style answers
|
99 |
+
explanation_prompt_template_query = ChatPromptTemplate.from_messages([
|
100 |
+
("system",
|
101 |
+
"You are a legal expert assistant with deep knowledge of the Constitution of Pakistan. "
|
102 |
+
"Return answer in well structured format."
|
103 |
+
"Your task is to provide clear, in-depth explanations strictly based on the constitutional text. "
|
104 |
+
"Do not refer to real-world examples, cases, or interpretations beyond the Constitution itself."),
|
105 |
+
|
106 |
+
("human",
|
107 |
+
"User Query: {query}\n\n"
|
108 |
+
"Based on the provided context, follow these steps:\n\n"
|
109 |
+
"1. Provide a comprehensive and detailed explanation strictly based on the Constitution of Pakistan.\n"
|
110 |
+
"2. If the query refers to any law, article, amendment, clause, or provision that is passed, explain it thoroughly—"
|
111 |
+
"including what it means, how it is structured, and how it functions within the Constitution.\n"
|
112 |
+
"3. Avoid using any real-world references, court cases, or practical examples.\n"
|
113 |
+
"4. Indicate whether the query is relevant to the Constitution of Pakistan (True/False).\n"
|
114 |
+
"5. Extract and return the article number if any is mentioned in the query.\n\n"
|
115 |
+
"Context:\n{context}\n\n"
|
116 |
+
"{format_instructions}\n\n")
|
117 |
+
])
|
118 |
+
|
119 |
+
|
120 |
+
answer_chain_explanation = LLMChain(llm=a_llm, prompt=explanation_prompt_template_query, output_parser=parser)
|
121 |
+
|
122 |
+
# Load data
|
123 |
+
embeddings, index, chunks = load_embeddings_and_index(save_dir)
|
124 |
+
|
125 |
+
# Main function
|
126 |
+
def get_legal_response(query):
|
127 |
+
try:
|
128 |
+
refined_query = refine_chain.run(query=query)
|
129 |
+
except Exception as e:
|
130 |
+
print(f"[Refinement Error] Using raw query instead: {e}")
|
131 |
+
refined_query = query
|
132 |
+
|
133 |
+
print("\nRefined Query:", refined_query)
|
134 |
+
|
135 |
+
relevant_chunks, flag = retrieve_relevant_chunks(refined_query, index, chunks, top_k=5)
|
136 |
+
|
137 |
+
print("\nTop Relevant Chunks:")
|
138 |
+
for i, chunk in enumerate(relevant_chunks, 1):
|
139 |
+
print(f"\nChunk {i}:\n{'-'*50}\n{chunk}")
|
140 |
+
|
141 |
+
context = "\n\n".join(relevant_chunks)
|
142 |
+
|
143 |
+
if flag==True:
|
144 |
+
print('okokokokokokokokokokok')
|
145 |
+
response = answer_chain_article.run(query=refined_query,context=context,format_instructions=parser.get_format_instructions())
|
146 |
+
else:
|
147 |
+
print('nononononononononono')
|
148 |
+
response = answer_chain_explanation.run(query=refined_query,context=context,format_instructions=parser.get_format_instructions())
|
149 |
+
|
150 |
+
return {
|
151 |
+
"title":response.title,
|
152 |
+
"answer": response.answer,
|
153 |
+
"is_relevant": response.is_relevant,
|
154 |
+
"article_number": response.article_number
|
155 |
+
}
|
embeddings.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7594f7c33917c21f7168af5f896d08589c2f9644e6f31096665d0738cf8a7aed
|
3 |
+
size 427136
|
index.faiss
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:78883b11d6015e1f366dfe10d42b8582b09906c836eba65dd1dd5bf3f8391471
|
3 |
+
size 427053
|
lm.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from dotenv import load_dotenv
|
3 |
+
from langchain_groq import ChatGroq
|
4 |
+
|
5 |
+
load_dotenv()
|
6 |
+
key1 = os.getenv('GROQ_API_KEY_1')
|
7 |
+
key2 = os.getenv('GROQ_API_KEY_2')
|
8 |
+
|
9 |
+
|
10 |
+
|
11 |
+
def get_query_llm():
|
12 |
+
return ChatGroq(model="mistral-saba-24b",api_key=key1)
|
13 |
+
|
14 |
+
def get_answer_llm():
|
15 |
+
return ChatGroq(model="mistral-saba-24b",api_key=key2)
|