Spaces:

IProject-10
/

DQAsystem12

Sleeping

App Files Files Community

IProject-10 commited on Aug 18

Commit

744ae18

•

1 Parent(s): 920b5d6

Upload 2 files

Browse files

Files changed (2) hide show

app.py +125 -0
requirements.txt +8 -0

app.py ADDED Viewed

	@@ -0,0 +1,125 @@

+import os
+from rank_bm25 import BM25Okapi
+from transformers import pipeline, AutoTokenizer, AutoModelForQuestionAnswering
+import torch
+import gradio as gr
+from docx import Document
+import pdfplumber
+# Load the fine-tuned BERT-based QA model and tokenizer
+model_name = "IProject-10/roberta-base-finetuned-squad2"  # Replace with your model name
+qa_model = AutoModelForQuestionAnswering.from_pretrained(model_name)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+# Set up the device for BERT
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+qa_model.to(device)
+qa_model.eval()
+# Create a pipeline for retrieval-augmented QA
+retrieval_qa_pipeline = pipeline(
+    "question-answering",
+    model=qa_model,
+    tokenizer=tokenizer,
+    device=device.index if torch.cuda.is_available() else -1
+)
+def extract_text_from_file(file):
+    # Determine the file extension
+    file_extension = os.path.splitext(file.name)[1].lower()
+    text = ""
+    try:
+        if file_extension == ".txt":
+            with open(file.name, "r") as f:
+                text = f.read()
+        elif file_extension == ".docx":
+            doc = Document(file.name)
+            for para in doc.paragraphs:
+                text += para.text + "\n"
+        elif file_extension == ".pdf":
+            with pdfplumber.open(file.name) as pdf:
+                for page in pdf.pages:
+                    text += page.extract_text() + "\n"
+        else:
+            raise ValueError("Unsupported file format: {}".format(file_extension))
+    except Exception as e:
+        text = str(e)
+    return text
+def load_passages(files):
+    passages = []
+    for file in files:
+        passage = extract_text_from_file(file)
+        passages.append(passage)
+    return passages
+def highlight_answer(context, answer):
+    start_index = context.find(answer)
+    if start_index != -1:
+        end_index = start_index + len(answer)
+        highlighted_context = f"{context[:start_index]}_________<<{context[start_index:end_index]}>>_________{context[end_index:]}"
+        return highlighted_context
+    else:
+        return context
+def answer_question(question, files):
+    try:
+        # Load passages from the uploaded files
+        passages = load_passages(files)
+        # Create an index using BM25
+        bm25 = BM25Okapi([passage.split() for passage in passages])
+        # Retrieve relevant passages using BM25
+        tokenized_query = question.split()
+        candidate_passages = bm25.get_top_n(tokenized_query, passages, n=3)
+        bm25_scores = bm25.get_scores(tokenized_query)
+        # Extract answer using the pipeline for each candidate passage
+        answers_with_context = []
+        for passage in candidate_passages:
+            answer = retrieval_qa_pipeline(question=question, context=passage)
+            bm25_score = bm25_scores[passages.index(passage)]
+            answer_with_context = {
+                "context": passage,
+                "answer": answer["answer"],
+                "BM25-score": bm25_score  # BM25 confidence score for this passage
+            }
+            answers_with_context.append(answer_with_context)
+        # Choose the answer with the highest model confidence score
+        best_answer = max(answers_with_context, key=lambda x: x["BM25-score"])
+        # Highlight the answer in the context
+        highlighted_context = highlight_answer(best_answer["context"], best_answer["answer"])
+        return best_answer["answer"], highlighted_context, best_answer["BM25-score"]
+    except Exception as e:
+        return str(e), "", ""
+# Define Gradio interface
+iface = gr.Interface(
+    fn=answer_question,
+    inputs=[
+        gr.Textbox(lines=2, placeholder="Enter your question here...", label="Question"),
+        gr.Files(label="Upload text, Word, or PDF files")
+    ],
+    outputs=[
+        gr.Textbox(label="Answer"),
+        gr.Textbox(label="Context"),
+        gr.Textbox(label="BM25 Score")
+    ],
+    title="Question Answering Model",
+    description="Upload a text document and ask a question from the content",
+    css="""
+    .container { max-width: 800px; margin: auto; }
+    .interface-title { font-family: Arial, sans-serif; font-size: 24px; font-weight: bold; }
+    .interface-description { font-family: Arial, sans-serif; font-size: 16px; margin-bottom: 20px; }
+    .input-textbox, .output-textbox { font-family: Arial, sans-serif; font-size: 14px; }
+    .error { color: red; font-family: Arial, sans-serif; font-size: 14px; }
+    """
+)
+# Launch the interface
+iface.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+numpy
+torch
+transformers
+gradio
+python-docx
+pdfplumber
+rank-bm25