|
from transformers import BertForQuestionAnswering, BertTokenizer
|
|
import torch
|
|
from summarize import summarize_text,extract_text_from_pdf
|
|
|
|
def load_qa_model(model_path="D:/code/bert_easy/bert-large-uncased-whole-word-masking-finetuned-squad"):
|
|
"""Loads the BERT model and tokenizer for question answering."""
|
|
model = BertForQuestionAnswering.from_pretrained(model_path)
|
|
tokenizer = BertTokenizer.from_pretrained(model_path)
|
|
return model, tokenizer
|
|
|
|
def get_answer(question, context, model, tokenizer):
|
|
"""Generates an answer for a given question based on the provided context."""
|
|
inputs = tokenizer(question, context, return_tensors="pt", truncation=True, max_length=512)
|
|
with torch.no_grad():
|
|
outputs = model(**inputs)
|
|
|
|
answer_start = torch.argmax(outputs.start_logits)
|
|
answer_end = torch.argmax(outputs.end_logits) + 1
|
|
answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(inputs["input_ids"][0][answer_start:answer_end]))
|
|
|
|
return answer
|
|
|
|
if __name__ == "__main__":
|
|
pdf_path = "C://Users/HP/Downloads/study/cis/CIS Fundamentals.pdf"
|
|
extracted_text = extract_text_from_pdf(pdf_path)
|
|
summary = summarize_text(extracted_text)
|
|
sample_question = "what is cloud computing ?"
|
|
|
|
model, tokenizer = load_qa_model()
|
|
answer = get_answer(sample_question, summary, model, tokenizer)
|
|
print("Summary:", summary)
|
|
print("Answer:", answer)
|
|
|