vedsadani commited on
Commit
4ea3ce6
1 Parent(s): 4f34caf

Create app_wip.py

Browse files
Files changed (1) hide show
  1. app_wip.py +71 -0
app_wip.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.vectorstores import FAISS
2
+ from langchain.chains import ConversationalRetrievalChain
3
+ from langchain.llms import HuggingFaceHub
4
+ from getpass import getpass
5
+ import os
6
+ from langchain.embeddings import HuggingFaceEmbeddings
7
+ from google.colab import drive
8
+ from langchain.document_loaders import PyPDFDirectoryLoader
9
+ from langchain.document_loaders.csv_loader import CSVLoader
10
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
11
+ from langchain.memory import ConversationSummaryBufferMemory
12
+ import io
13
+ import contextlib
14
+ import pandas as pd
15
+ from PyPDF2 import PdfReader
16
+ from langchain_community.vectorstores import FAISS
17
+ from langchain_community.embeddings import HuggingFaceEmbeddings
18
+ from langchain_community.document_loaders import PyPDFDirectoryLoader
19
+ from langchain_community.llms import HuggingFaceEndpoint
20
+ from langchain_community.document_loaders import PyPDFLoader
21
+ import gradio as gr
22
+
23
+ def process_file(fileobj):
24
+ destination_folder="Docs"
25
+ file_name = os.path.basename(fileobj)
26
+ destination_path = os.path.join(destination_folder, file_name)
27
+ shutil.copyfile(fileobj.name, destination_path)
28
+ return MOP(destination_folder)
29
+
30
+ def MOP(path):
31
+ docs=[]
32
+ loader = PyPDFDirectoryLoader(path)
33
+ docs = loader.load()
34
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000 , chunk_overlap=500)
35
+ text_chunks = text_splitter.split_documents(docs)
36
+ embeddings = HuggingFaceEmbeddings(model_name="thenlper/gte-base")
37
+ vector_store = FAISS.from_documents(text_chunks, embedding=embeddings)
38
+ filename_to_keep = 'Dummy_standard MoP_template_new.pdf'
39
+ prompt_file=delete_files_except(filename_to_keep,path)
40
+ repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1"
41
+ llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={"temperature": 0.1, "max_new_tokens": 2048})
42
+ retriever = vector_store.as_retriever(search_type="similarity",search_kwargs={"k": len(text_chunks)})
43
+ qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever,verbose=True)
44
+ message= f"""<s> [INST] You have two documents:
45
+ Template Document: "Dummy_standard MoP_template_new.pdf"
46
+ Content Document: "{prompt_file}"
47
+ Your task is to determine whether the Content Document effectively incorporates the context and instructions of the Pre-Check Procedures and Post-Check Procedures specified in the Template Document, without the requirement for them to be under specified labels. The focus should be on the presence of the context and instructions rather than their exact placement.
48
+ Provide a "Yes" or "No" response indicating whether the Content Document accurately integrates the context and instructions of the Pre-Check Procedures and Post-Check Procedures as outlined in the Template Document.
49
+ Additionally, identify any missing elements related to the context and instructions of the Pre-Check Procedures and Post-Check Procedures if present, regardless of their placement within the Content Document.
50
+ Instructions:
51
+ Review the context and instructions of the Pre-Check Procedures and Post-Check Procedures detailed in the Template Document ("Dummy_standard MoP_template_new.pdf").
52
+ Assess whether the Content Document ("{prompt_file}") includes the necessary context and instructions for the Pre-Check Procedures and Post-Check Procedures, regardless of their specific placement or labeling.
53
+ Provide a "Yes" if the Content Document adequately integrates the context and instructions of the Pre-Check Procedures and Post-Check Procedures, or "No" if there are significant gaps or omissions.
54
+ If the answer is "No," specify any missing elements related to the context and instructions of the Pre-Check Procedures and Post-Check Procedures, emphasizing their importance in the Content Document.
55
+ Ensure careful consideration of the context and instructions provided in the Template Document ("Dummy_standard MoP_template_new.pdf") while evaluating the alignment of the Content Document ("{prompt_file}"). [/INST] </s>"""
56
+ result=qa.run(message)
57
+ pattern = r"Helpful Answer:\n\n(.*)"
58
+ match = re.search(pattern, result, re.DOTALL)
59
+ if match:
60
+ helpful_answer_text = match.group(1)
61
+ return helpful_answer_text
62
+ return result
63
+
64
+ demo = gr.Interface(
65
+ fn=process_file,
66
+ inputs=[
67
+ "file",
68
+ ],
69
+ outputs="text"
70
+ )
71
+ demo.launch()