Ahmad-Moiz commited on
Commit
b0a12fe
1 Parent(s): d8bdbdd

Upload text_utils.py

Browse files
Files changed (1) hide show
  1. text_utils.py +120 -0
text_utils.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ from langchain.prompts import PromptTemplate
3
+
4
+
5
+ def clean_pdf_text(text: str) -> str:
6
+ """Cleans text extracted from a PDF file."""
7
+ # TODO: Remove References/Bibliography section.
8
+ return remove_citations(text)
9
+
10
+
11
+ def remove_citations(text: str) -> str:
12
+ """Removes in-text citations from a string."""
13
+ # (Author, Year)
14
+ text = re.sub(r'\([A-Za-z0-9,.\s]+\s\d{4}\)', '', text)
15
+ # [1], [2], [3-5], [3, 33, 49, 51]
16
+ text = re.sub(r'\[[0-9,-]+(,\s[0-9,-]+)*\]', '', text)
17
+ return text
18
+
19
+
20
+ template = """You are a teacher grading a quiz.
21
+ You are given a question, the student's answer, and the true answer, and are asked to score the student answer as either CORRECT or INCORRECT.
22
+
23
+ Example Format:
24
+ QUESTION: question here
25
+ STUDENT ANSWER: student's answer here
26
+ TRUE ANSWER: true answer here
27
+ GRADE: CORRECT or INCORRECT here
28
+
29
+ Grade the student answers based ONLY on their factual accuracy. Ignore differences in punctuation and phrasing between the student answer and true answer. It is OK if the student answer contains more information than the true answer, as long as it does not contain any conflicting statements. Begin!
30
+
31
+ QUESTION: {query}
32
+ STUDENT ANSWER: {result}
33
+ TRUE ANSWER: {answer}
34
+ GRADE:
35
+
36
+ And explain why the STUDENT ANSWER is correct or incorrect.
37
+ """
38
+
39
+ GRADE_ANSWER_PROMPT = PromptTemplate(input_variables=["query", "result", "answer"], template=template)
40
+
41
+ template = """You are a teacher grading a quiz.
42
+ You are given a question, the student's answer, and the true answer, and are asked to score the student answer as either CORRECT or INCORRECT.
43
+ You are also asked to identify potential sources of bias in the question and in the true answer.
44
+
45
+ Example Format:
46
+ QUESTION: question here
47
+ STUDENT ANSWER: student's answer here
48
+ TRUE ANSWER: true answer here
49
+ GRADE: CORRECT or INCORRECT here
50
+
51
+ Grade the student answers based ONLY on their factual accuracy. Ignore differences in punctuation and phrasing between the student answer and true answer. It is OK if the student answer contains more information than the true answer, as long as it does not contain any conflicting statements. Begin!
52
+
53
+ QUESTION: {query}
54
+ STUDENT ANSWER: {result}
55
+ TRUE ANSWER: {answer}
56
+ GRADE:
57
+
58
+ And explain why the STUDENT ANSWER is correct or incorrect, identify potential sources of bias in the QUESTION, and identify potential sources of bias in the TRUE ANSWER.
59
+ """
60
+
61
+ GRADE_ANSWER_PROMPT_BIAS_CHECK = PromptTemplate(input_variables=["query", "result", "answer"], template=template)
62
+
63
+ template = """You are assessing a submitted student answer to a question relative to the true answer based on the provided criteria:
64
+
65
+ ***
66
+ QUESTION: {query}
67
+ ***
68
+ STUDENT ANSWER: {result}
69
+ ***
70
+ TRUE ANSWER: {answer}
71
+ ***
72
+ Criteria:
73
+ relevance: Is the submission referring to a real quote from the text?"
74
+ conciseness: Is the answer concise and to the point?"
75
+ correct: Is the answer correct?"
76
+ ***
77
+ Does the submission meet the criterion? First, write out in a step by step manner your reasoning about the criterion to be sure that your conclusion is correct. Avoid simply stating the correct answers at the outset. Then print the "CORRECT" or "INCORRECT" (without quotes or punctuation) on its own line corresponding to the correct answer.
78
+ Reasoning:
79
+ """
80
+
81
+ GRADE_ANSWER_PROMPT_OPENAI = PromptTemplate(input_variables=["query", "result", "answer"], template=template)
82
+
83
+ template = """You are a teacher grading a quiz.
84
+ You are given a question, the student's answer, and the true answer, and are asked to score the student answer as either CORRECT or INCORRECT.
85
+
86
+ Example Format:
87
+ QUESTION: question here
88
+ STUDENT ANSWER: student's answer here
89
+ TRUE ANSWER: true answer here
90
+ GRADE: CORRECT or INCORRECT here
91
+
92
+ Grade the student answers based ONLY on their factual accuracy. Ignore differences in punctuation and phrasing between the student answer and true answer. It is OK if the student answer contains more information than the true answer, as long as it does not contain any conflicting statements. Begin!
93
+
94
+ QUESTION: {query}
95
+ STUDENT ANSWER: {result}
96
+ TRUE ANSWER: {answer}
97
+ GRADE:"""
98
+
99
+ GRADE_ANSWER_PROMPT_FAST = PromptTemplate(input_variables=["query", "result", "answer"], template=template)
100
+
101
+ template = """
102
+ Given the question: \n
103
+ {query}
104
+ Decide if the following retrieved context is relevant: \n
105
+ {result}
106
+ Answer in the following format: \n
107
+ "Context is relevant: True or False." \n
108
+ And explain why it supports or does not support the correct answer: {answer}"""
109
+
110
+ GRADE_DOCS_PROMPT = PromptTemplate(input_variables=["query", "result", "answer"], template=template)
111
+
112
+ template = """
113
+ Given the question: \n
114
+ {query}
115
+ Decide if the following retrieved context is relevant to the {answer}: \n
116
+ {result}
117
+ Answer in the following format: \n
118
+ "Context is relevant: True or False." \n """
119
+
120
+ GRADE_DOCS_PROMPT_FAST = PromptTemplate(input_variables=["query", "result", "answer"], template=template)