naitik31 commited on
Commit
2df226a
1 Parent(s): 649f58b

Upload 6 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ LawGPT/data/ipc_law.pdf filter=lfs diff=lfs merge=lfs -text
37
+ LawGPT/Lawbot511.png filter=lfs diff=lfs merge=lfs -text
LawGPT/LawGPT.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.vectorstores import Chroma
2
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
3
+ from transformers import pipeline
4
+ import torch
5
+ from langchain.llms import HuggingFacePipeline
6
+ from langchain.embeddings import SentenceTransformerEmbeddings
7
+ from langchain.chains import RetrievalQA
8
+ import gradio as gr
9
+
10
+ def chat(chat_history, user_input):
11
+
12
+ bot_response = qa_chain({"query": user_input})
13
+ bot_response = bot_response['result']
14
+ response = ""
15
+ for letter in ''.join(bot_response):
16
+ response += letter + ""
17
+ yield chat_history + [(user_input, response)]
18
+
19
+ checkpoint = "MBZUAI/LaMini-Flan-T5-783M"
20
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint)
21
+ base_model = AutoModelForSeq2SeqLM.from_pretrained(
22
+ checkpoint,
23
+ device_map="auto",
24
+ torch_dtype = torch.float32)
25
+
26
+ embeddings = SentenceTransformerEmbeddings(model_name="sentence-transformers/multi-qa-mpnet-base-dot-v1")
27
+
28
+ db = Chroma(persist_directory="ipc_vector_data", embedding_function=embeddings)
29
+
30
+ pipe = pipeline(
31
+ 'text2text-generation',
32
+ model = base_model,
33
+ tokenizer = tokenizer,
34
+ max_length = 512,
35
+ do_sample = True,
36
+ temperature = 0.3,
37
+ top_p= 0.95
38
+ )
39
+ local_llm = HuggingFacePipeline(pipeline=pipe)
40
+
41
+ qa_chain = RetrievalQA.from_chain_type(llm=local_llm,
42
+ chain_type='stuff',
43
+ retriever=db.as_retriever(search_type="similarity", search_kwargs={"k":2}),
44
+ return_source_documents=True,
45
+ )
46
+
47
+ with gr.Blocks() as gradioUI:
48
+
49
+ gr.Image('lawbot511.png')
50
+
51
+ with gr.Row():
52
+ chatbot = gr.Chatbot()
53
+ with gr.Row():
54
+ input_query = gr.TextArea(label='Input',show_copy_button=True)
55
+
56
+ with gr.Row():
57
+ with gr.Column():
58
+ submit_btn = gr.Button("Submit", variant="primary")
59
+ with gr.Column():
60
+ clear_input_btn = gr.Button("Clear Input")
61
+ with gr.Column():
62
+ clear_chat_btn = gr.Button("Clear Chat")
63
+
64
+ submit_btn.click(chat, [chatbot, input_query], chatbot)
65
+ submit_btn.click(lambda: gr.update(value=""), None, input_query, queue=False)
66
+ clear_input_btn.click(lambda: None, None, input_query, queue=False)
67
+ clear_chat_btn.click(lambda: None, None, chatbot, queue=False)
68
+
69
+ gradioUI.queue().launch()
LawGPT/Lawbot511.png ADDED

Git LFS Details

  • SHA256: bf394ee73e68a0fe9a3d4c8de7158ef519ca3c1af93bb507b232ea1528caaaf8
  • Pointer size: 132 Bytes
  • Size of remote file: 1.03 MB
LawGPT/VectorEmbeddings.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.document_loaders import PyPDFLoader, DirectoryLoader
2
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
3
+ from langchain.embeddings import SentenceTransformerEmbeddings
4
+ from langchain.vectorstores import Chroma
5
+
6
+ loader = DirectoryLoader('data', glob="./*.pdf", loader_cls=PyPDFLoader)
7
+ documents = loader.load()
8
+
9
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=200)
10
+ texts = text_splitter.split_documents(documents)
11
+
12
+ embeddings = SentenceTransformerEmbeddings(model_name="multi-qa-mpnet-base-dot-v1")
13
+ persist_directory = "ipc_vector_data"
14
+ db = Chroma.from_documents(texts, embeddings, persist_directory=persist_directory)
15
+
LawGPT/data/ipc_law.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e67161633a056f77848221ab30c49b26199c66cc844ee559ac47d2ca5dea9256
3
+ size 20102169
LawGPT/ipc_vector_data/chroma.sqlite3 ADDED
Binary file (127 kB). View file
 
LawGPT/requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ langchain
2
+ transformers
3
+ torch
4
+ gradio
5
+ sentence-transformers
6
+ accelerate
7
+ chromadb
8
+ pypdf