Saim-11 commited on
Commit
aa7cb11
·
verified ·
1 Parent(s): d61f47a

Upload 6 files

Browse files
Files changed (7) hide show
  1. .env +9 -0
  2. .gitattributes +1 -0
  3. chunks.txt +0 -0
  4. constitution_py.py +155 -0
  5. embeddings.npy +3 -0
  6. index.faiss +3 -0
  7. lm.py +15 -0
.env ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ GROQ_API_KEY_1="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"
2
+ GROQ_API_KEY_2="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"
3
+
4
+
5
+ #GROQ_API_KEY = "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"
6
+ LANGCHAIN_API_KEY = "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"
7
+ LANGCHAIN_PROJECT = "WORK"
8
+ LANGCHAIN_TRACING_V2 = "true"
9
+ #GOOGLE_API_KEY = "684ff9d7746ed6e9d54268345c5ad41655ffac7d"
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ index.faiss filter=lfs diff=lfs merge=lfs -text
chunks.txt ADDED
The diff for this file is too large to render. See raw diff
 
constitution_py.py ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import warnings
2
+ warnings.filterwarnings("ignore")
3
+
4
+ import re
5
+ import os
6
+ import numpy as np
7
+ import faiss
8
+ from sentence_transformers import SentenceTransformer
9
+ from langchain_groq import ChatGroq
10
+ from langchain.chains import LLMChain
11
+ from langchain_core.prompts import ChatPromptTemplate
12
+ from pydantic import BaseModel, Field
13
+ from langchain.output_parsers import PydanticOutputParser
14
+ from lm import get_query_llm, get_answer_llm # Your custom LLM wrapper functions
15
+
16
+ # Initialize LLMs
17
+ q_llm = get_query_llm()
18
+ a_llm = get_answer_llm()
19
+
20
+ # Load sentence transformer model once globally
21
+ embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
22
+ save_dir = "saved_data"
23
+
24
+ def load_embeddings_and_index(save_dir="saved_data"):
25
+ embedding = np.load(os.path.join(save_dir, "embeddings.npy"))
26
+ index = faiss.read_index(os.path.join(save_dir, "index.faiss"))
27
+ with open(os.path.join(save_dir, "chunks.txt"), "r", encoding="utf-8") as f:
28
+ chunks = [line.strip() for line in f.readlines()]
29
+ return embedding, index, chunks
30
+
31
+ similar_words = [
32
+ "explain", "elaborate", "describe", "clarify", "detail", "break down", "simplify", "outline",
33
+ "demonstrate", "illustrate", "interpret", "expand on", "go over", "walk through", "define",
34
+ "unpack", "decode", "shed light on", "analyze", "discuss", "make clear", "reveal", "disclose",
35
+ "comment on", "talk about", "lay out", "spell out", "express", "delve into", "explore",
36
+ "enlighten", "present", "review", "report", "state", "point out", "inform", "highlight"
37
+ ]
38
+
39
+ def is_explanation_query(query):
40
+ return not any(word in query.lower() for word in similar_words)
41
+
42
+ def retrieve_relevant_chunks(query, index, chunks, top_k=5):
43
+ sub_str = "article"
44
+ numbers = re.findall(r'\d+', query)
45
+
46
+ if sub_str in query.lower() and numbers:
47
+ flag = False
48
+ article_number = str(numbers[0])
49
+ for i, chunk in enumerate(chunks):
50
+ if chunk.lower().startswith(f"article;{article_number}"):
51
+ flag = is_explanation_query(query)
52
+ return [chunk], flag
53
+
54
+ query_embedding = embedding_model.encode([query])
55
+ query_embedding = np.array(query_embedding).astype("float32")
56
+ distances, indices = index.search(query_embedding, top_k)
57
+ relevant_chunks = [chunks[i] for i in indices[0]]
58
+ flag = is_explanation_query(query)
59
+ return relevant_chunks, flag
60
+
61
+ # Prompt to refine the query
62
+ refine_prompt_template = ChatPromptTemplate.from_messages([
63
+ ('system', 'You are a helpful legal assistant. Your job is to clean user queries by fixing grammar or spelling only. Do not expand or explain them. if number is mentioned returned in digit format.'),
64
+ ('human', '{query}')
65
+ ])
66
+ refine_chain = LLMChain(llm=q_llm, prompt=refine_prompt_template)
67
+
68
+ # Define response schema
69
+ class LegalResponse(BaseModel):
70
+ title: str = Field (...,description='Return the title')
71
+ answer: str = Field(..., description="The assistant's answer to the user's query")
72
+ is_relevant: bool = Field(..., description="True if the query is relevant to the Constitution of Pakistan, otherwise False")
73
+ article_number: str = Field(..., description="Mentioned article number if available, else empty string")
74
+
75
+ parser = PydanticOutputParser(pydantic_object=LegalResponse)
76
+
77
+ # Prompt for direct article wording
78
+ answer_prompt_template_query = ChatPromptTemplate.from_messages([
79
+ ("system",
80
+ "You are a legal assistant with expertise in the Constitution of Pakistan. "
81
+ "Return answer in structure format."
82
+ "Your task is to extract and present the exact constitutional text, without paraphrasing, ensuring accuracy and fidelity to the original wording"
83
+ "Especially return the title"),
84
+ ("human",
85
+ "User Query: {query}\n\n"
86
+ "Instructions:\n"
87
+ "0. Return Title"
88
+ "1. Return the exact wording from the Constitution.\n"
89
+ "2. If a query references a specific article or sub-clause (e.g., Article 11(3)(b), Article 11(b), or 11(i)), return only the exact wording of that clause from the Constitution — do not include the full article unless required by structure\n"
90
+ "3. Indicate whether the query is related to the Constitution of Pakistan (Yes/No).ar\n"
91
+ "4. Extract and return the article number if it is mentioned. with sub-clause if its mentioned like 1,2 or 1(a)\n\n"
92
+ "Context:\n{context}\n\n"
93
+ "{format_instructions}\n")
94
+ ])
95
+
96
+ answer_chain_article = LLMChain(llm=a_llm, prompt=answer_prompt_template_query, output_parser=parser)
97
+
98
+ # Prompt for explanation-style answers
99
+ explanation_prompt_template_query = ChatPromptTemplate.from_messages([
100
+ ("system",
101
+ "You are a legal expert assistant with deep knowledge of the Constitution of Pakistan. "
102
+ "Return answer in well structured format."
103
+ "Your task is to provide clear, in-depth explanations strictly based on the constitutional text. "
104
+ "Do not refer to real-world examples, cases, or interpretations beyond the Constitution itself."),
105
+
106
+ ("human",
107
+ "User Query: {query}\n\n"
108
+ "Based on the provided context, follow these steps:\n\n"
109
+ "1. Provide a comprehensive and detailed explanation strictly based on the Constitution of Pakistan.\n"
110
+ "2. If the query refers to any law, article, amendment, clause, or provision that is passed, explain it thoroughly—"
111
+ "including what it means, how it is structured, and how it functions within the Constitution.\n"
112
+ "3. Avoid using any real-world references, court cases, or practical examples.\n"
113
+ "4. Indicate whether the query is relevant to the Constitution of Pakistan (True/False).\n"
114
+ "5. Extract and return the article number if any is mentioned in the query.\n\n"
115
+ "Context:\n{context}\n\n"
116
+ "{format_instructions}\n\n")
117
+ ])
118
+
119
+
120
+ answer_chain_explanation = LLMChain(llm=a_llm, prompt=explanation_prompt_template_query, output_parser=parser)
121
+
122
+ # Load data
123
+ embeddings, index, chunks = load_embeddings_and_index(save_dir)
124
+
125
+ # Main function
126
+ def get_legal_response(query):
127
+ try:
128
+ refined_query = refine_chain.run(query=query)
129
+ except Exception as e:
130
+ print(f"[Refinement Error] Using raw query instead: {e}")
131
+ refined_query = query
132
+
133
+ print("\nRefined Query:", refined_query)
134
+
135
+ relevant_chunks, flag = retrieve_relevant_chunks(refined_query, index, chunks, top_k=5)
136
+
137
+ print("\nTop Relevant Chunks:")
138
+ for i, chunk in enumerate(relevant_chunks, 1):
139
+ print(f"\nChunk {i}:\n{'-'*50}\n{chunk}")
140
+
141
+ context = "\n\n".join(relevant_chunks)
142
+
143
+ if flag==True:
144
+ print('okokokokokokokokokokok')
145
+ response = answer_chain_article.run(query=refined_query,context=context,format_instructions=parser.get_format_instructions())
146
+ else:
147
+ print('nononononononononono')
148
+ response = answer_chain_explanation.run(query=refined_query,context=context,format_instructions=parser.get_format_instructions())
149
+
150
+ return {
151
+ "title":response.title,
152
+ "answer": response.answer,
153
+ "is_relevant": response.is_relevant,
154
+ "article_number": response.article_number
155
+ }
embeddings.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7594f7c33917c21f7168af5f896d08589c2f9644e6f31096665d0738cf8a7aed
3
+ size 427136
index.faiss ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78883b11d6015e1f366dfe10d42b8582b09906c836eba65dd1dd5bf3f8391471
3
+ size 427053
lm.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+ from langchain_groq import ChatGroq
4
+
5
+ load_dotenv()
6
+ key1 = os.getenv('GROQ_API_KEY_1')
7
+ key2 = os.getenv('GROQ_API_KEY_2')
8
+
9
+
10
+
11
+ def get_query_llm():
12
+ return ChatGroq(model="mistral-saba-24b",api_key=key1)
13
+
14
+ def get_answer_llm():
15
+ return ChatGroq(model="mistral-saba-24b",api_key=key2)