File size: 2,448 Bytes
e778d13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import streamlit as st
from transformers import BertForQuestionAnswering, BertTokenizer
import torch
from io import BytesIO
import PyPDF2
import pandas as pd

# Initialize session state to store the log of QA pairs and satisfaction responses
if 'qa_log' not in st.session_state:
    st.session_state.qa_log = []

def extract_text_from_pdf(pdf_file):
    pdf_reader = PyPDF2.PdfReader(BytesIO(pdf_file.read()))
    text = ""
    for page in pdf_reader.pages:
        text += page.extract_text()
    return text

def answer_question(question, context, model, tokenizer):
    inputs = tokenizer.encode_plus(
        question,
        context,
        add_special_tokens=True,
        return_tensors="pt",
        truncation="only_second",
        max_length=512,
    )
    outputs = model(**inputs, return_dict=True)
    answer_start_scores = outputs.start_logits
    answer_end_scores = outputs.end_logits
    answer_start = torch.argmax(answer_start_scores)
    answer_end = torch.argmax(answer_end_scores) + 1
    input_ids = inputs["input_ids"].tolist()[0]
    answer = tokenizer.convert_tokens_to_string(
        tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end])
    )
    return answer

st.title("Resume Question Answering")

uploaded_file = st.file_uploader("Upload your resume (PDF format only)", type=["pdf"])

if uploaded_file is not None:
    resume_text = extract_text_from_pdf(uploaded_file)
    st.write("Resume Text:")
    st.write(resume_text)

    user_question = st.text_input("Ask a question based on your resume:")

    if user_question:
        model = BertForQuestionAnswering.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
        tokenizer = BertTokenizer.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")

        answer = answer_question(user_question, resume_text, model, tokenizer)
        st.write("Answer:")
        st.write(answer)

        # Ask for user feedback on satisfaction
        satisfaction = st.radio('Are you satisfied with the answer?', ('Yes', 'No'), key='satisfaction')
        
        # Log the interaction
        st.session_state.qa_log.append({
            'Question': user_question,
            'Answer': answer,
            'Satisfaction': satisfaction
        })

        # Display the log in a table format
        st.write("Interaction Log:")
        log_df = pd.DataFrame(st.session_state.qa_log)
        st.dataframe(log_df)