sanjay11 commited on
Commit
e778d13
1 Parent(s): dc53284

Create bertimproved.py

Browse files
Files changed (1) hide show
  1. bertimproved.py +71 -0
bertimproved.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import BertForQuestionAnswering, BertTokenizer
3
+ import torch
4
+ from io import BytesIO
5
+ import PyPDF2
6
+ import pandas as pd
7
+
8
+ # Initialize session state to store the log of QA pairs and satisfaction responses
9
+ if 'qa_log' not in st.session_state:
10
+ st.session_state.qa_log = []
11
+
12
+ def extract_text_from_pdf(pdf_file):
13
+ pdf_reader = PyPDF2.PdfReader(BytesIO(pdf_file.read()))
14
+ text = ""
15
+ for page in pdf_reader.pages:
16
+ text += page.extract_text()
17
+ return text
18
+
19
+ def answer_question(question, context, model, tokenizer):
20
+ inputs = tokenizer.encode_plus(
21
+ question,
22
+ context,
23
+ add_special_tokens=True,
24
+ return_tensors="pt",
25
+ truncation="only_second",
26
+ max_length=512,
27
+ )
28
+ outputs = model(**inputs, return_dict=True)
29
+ answer_start_scores = outputs.start_logits
30
+ answer_end_scores = outputs.end_logits
31
+ answer_start = torch.argmax(answer_start_scores)
32
+ answer_end = torch.argmax(answer_end_scores) + 1
33
+ input_ids = inputs["input_ids"].tolist()[0]
34
+ answer = tokenizer.convert_tokens_to_string(
35
+ tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end])
36
+ )
37
+ return answer
38
+
39
+ st.title("Resume Question Answering")
40
+
41
+ uploaded_file = st.file_uploader("Upload your resume (PDF format only)", type=["pdf"])
42
+
43
+ if uploaded_file is not None:
44
+ resume_text = extract_text_from_pdf(uploaded_file)
45
+ st.write("Resume Text:")
46
+ st.write(resume_text)
47
+
48
+ user_question = st.text_input("Ask a question based on your resume:")
49
+
50
+ if user_question:
51
+ model = BertForQuestionAnswering.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
52
+ tokenizer = BertTokenizer.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
53
+
54
+ answer = answer_question(user_question, resume_text, model, tokenizer)
55
+ st.write("Answer:")
56
+ st.write(answer)
57
+
58
+ # Ask for user feedback on satisfaction
59
+ satisfaction = st.radio('Are you satisfied with the answer?', ('Yes', 'No'), key='satisfaction')
60
+
61
+ # Log the interaction
62
+ st.session_state.qa_log.append({
63
+ 'Question': user_question,
64
+ 'Answer': answer,
65
+ 'Satisfaction': satisfaction
66
+ })
67
+
68
+ # Display the log in a table format
69
+ st.write("Interaction Log:")
70
+ log_df = pd.DataFrame(st.session_state.qa_log)
71
+ st.dataframe(log_df)