Spaces:

oliverwang15
/

DAN_AI

Sleeping

App Files Files Community

oliverwang15 commited on Oct 25, 2023

Commit

4831f4c

•

1 Parent(s): 4d47539

Multi-files input supported

Browse files

Files changed (3) hide show

app.py +28 -9
backend.py +134 -39
openai.py +9 -2

app.py CHANGED Viewed

@@ -31,7 +31,7 @@ with gr.Blocks(theme="dark") as demo:
                         label='Enter your OpenAI API key here',
                         type='password')
-                file = gr.File(label='Upload your .txt file here', file_types=['.txt'])
                 questions = gr.CheckboxGroup(choices = QUESTIONS, value = QUESTIONS, label="Questions", info="Please select the question you want to ask")
@@ -42,14 +42,23 @@ with gr.Blocks(theme="dark") as demo:
             with gr.Group():
                 gr.Markdown(f'<center><h1>Output</h1></center>')
                 gr.Markdown(f'<center><p>The answer to your question is :</p></center>')
                 question_box = gr.Textbox(label='Question')
                 answer_box = gr.Textbox(label='Answer')
                 # reference_box = gr.Textbox(label='Reference')
                 highlighted_text = gr.outputs.HTML(label="Highlighted Text")
-                with gr.Row():
-                    btn_last_question = gr.Button(value='Last Question')
-                    btn_next_question = gr.Button(value='Next Question')
             # Correctness
             with gr.Group():
@@ -103,7 +112,7 @@ with gr.Blocks(theme="dark") as demo:
     btn_submit_txt.click(
             backend.process_file,
             inputs=[file, questions, openai_key],
-            outputs=[question_box, answer_box, highlighted_text, correct_answer, correct_reference],
         )
     btn_submit_correctness.click(   # TODO
@@ -114,15 +123,25 @@ with gr.Blocks(theme="dark") as demo:
     # Switch question button
     btn_last_question.click(
-            backend.process_last,
-            outputs=[question_box, answer_box, highlighted_text, correct_answer, correct_reference, save_results, answer_correct, reference_correct],
-        )
     btn_next_question.click(
             backend.process_next,
-            outputs=[question_box, answer_box, highlighted_text, correct_answer, correct_reference, save_results, answer_correct, reference_correct],
         )
     # Download button
     btn_download_answer.click(
             backend.download_answer,

                         label='Enter your OpenAI API key here',
                         type='password')
+                file = gr.File(label='Upload your .txt file here', file_types=['.txt'], file_count = 'multiple')
                 questions = gr.CheckboxGroup(choices = QUESTIONS, value = QUESTIONS, label="Questions", info="Please select the question you want to ask")
             with gr.Group():
                 gr.Markdown(f'<center><h1>Output</h1></center>')
                 gr.Markdown(f'<center><p>The answer to your question is :</p></center>')
+                filename_box = gr.Textbox(label = "File")
                 question_box = gr.Textbox(label='Question')
                 answer_box = gr.Textbox(label='Answer')
                 # reference_box = gr.Textbox(label='Reference')
                 highlighted_text = gr.outputs.HTML(label="Highlighted Text")
+                with gr.Group():
+                    gr.Markdown("<center><h4>Please select different questions</h4></center>")
+                    with gr.Row():
+                        btn_last_question = gr.Button(value='Last Question')
+                        btn_next_question = gr.Button(value='Next Question')
+                with gr.Group():
+                    gr.Markdown("<center><h4>Please select different passages</h4></center>")
+                    with gr.Row():
+                        btn_last_passage = gr.Button(value='Last Passage')
+                        btn_next_passage = gr.Button(value='Next Passage')
             # Correctness
             with gr.Group():
     btn_submit_txt.click(
             backend.process_file,
             inputs=[file, questions, openai_key],
+            outputs=[filename_box, question_box, answer_box, highlighted_text, correct_answer, correct_reference],
         )
     btn_submit_correctness.click(   # TODO
     # Switch question button
     btn_last_question.click(
+        backend.process_last,
+        outputs=[filename_box, question_box, answer_box, highlighted_text, correct_answer, correct_reference, save_results, answer_correct, reference_correct],
+    )
     btn_next_question.click(
             backend.process_next,
+            outputs=[filename_box, question_box, answer_box, highlighted_text, correct_answer, correct_reference, save_results, answer_correct, reference_correct],
         )
+    # Switch passwage button
+    btn_last_passage.click(
+        backend.switch_last_passage,
+        outputs=[filename_box, question_box, answer_box, highlighted_text, correct_answer, correct_reference, save_results, answer_correct, reference_correct],
+    )
+    btn_next_passage.click(
+        backend.switch_next_passage,
+        outputs=[filename_box, question_box, answer_box, highlighted_text, correct_answer, correct_reference, save_results, answer_correct, reference_correct],
+    )
     # Download button
     btn_download_answer.click(
             backend.download_answer,

backend.py CHANGED Viewed

@@ -12,7 +12,7 @@ class Backend:
         self.agent = OpenAI()
         self.prompt = Prompt()
-    def read_file(self, file):
         # read the file
         if file is not None:
             with open(file.name, 'r') as f:
@@ -21,6 +21,19 @@ class Backend:
             raise gr.Error("You need to upload a file first")
         return text
     def highlight_text(self, text, highlight_list):
         # Find the original sentences
         # Split the passage into sentences
@@ -40,42 +53,61 @@ class Backend:
         text = text.replace('\n', f" <br /> ")
         # add scroll bar
-        text = f'<div style="height: 500px; overflow: auto;">{text}</div>'
         return text
-    def process_file(self, file, questions, openai_key):
         # record the questions
         self.questions = questions
-        # get the text
-        self.text = self.read_file(file)
         # make the prompt
-        prompt = self.prompt.get(self.text, questions, 'v3')
         # interact with openai
-        res = self.agent(prompt, with_history = False, temperature = 0.1, model = 'gpt-3.5-turbo-16k', api_key = openai_key)
-        res = self.prompt.process_result(res, 'v3')
-        # for multiple questions
-        self.gpt_result = res
-        self.curret_question = 0
         self.totel_question = len(res.keys())
         # make a dataframe to record everything
-        self.ori_answer_df = pd.DataFrame(res).T
-        self.answer_df = pd.DataFrame(res).T
         # default fist question
         res = res['Question 1']
-        question = self.questions[self.curret_question]
         self.answer = res['answer']
         self.highlighted_out = res['original sentences']
         highlighted_out_html = self.highlight_text(self.text, self.highlighted_out)
         self.highlighted_out = '\n'.join(self.highlighted_out)
-        return question, self.answer, highlighted_out_html, self.answer, self.highlighted_out
     def process_results(self, answer_correct, correct_answer, reference_correct, correct_reference):
         if not hasattr(self, 'clicked_correct_answer'):
@@ -87,75 +119,138 @@ class Backend:
         if not hasattr(self, 'answer_df'):
             raise gr.Error("You need to submit the document first")
-        if self.curret_question >= self.totel_question or self.curret_question < 0:
             raise gr.Error("No more questions, please return back")
         # record the answer
-        self.answer_df.loc[f'Question {self.curret_question + 1}', 'answer_correct'] = answer_correct
-        self.answer_df.loc[f'Question {self.curret_question + 1}', 'reference_correct'] = reference_correct
         if self.clicked_correct_answer == True:
             if hasattr(self, 'answer'):
-                self.answer_df.loc[f'Question {self.curret_question + 1}', 'correct_answer'] = self.answer
             else:
                 raise gr.Error("You need to submit the document first")
         else:
-            self.answer_df.loc[f'Question {self.curret_question + 1}', 'correct_answer'] = correct_answer
         if self.clicked_correct_reference == True:
             if hasattr(self, 'highlighted_out'):
-                self.answer_df.loc[f'Question {self.curret_question + 1}', 'correct_reference'] = self.highlighted_out
             else:
                 raise gr.Error("You need to submit the document first")
         else:
-            self.answer_df.loc[f'Question {self.curret_question + 1}', 'correct_reference'] = correct_reference
         gr.Info('Results saved!')
         return "Results saved!"
     def process_next(self):
-        self.curret_question += 1
         if hasattr(self, 'clicked_correct_answer'):
             del self.clicked_correct_answer
         if hasattr(self, 'clicked_correct_reference'):
             del self.clicked_correct_reference
-        if self.curret_question >= self.totel_question:
-            # self.curret_question -= 1
-            return "No more questions!", "No more questions!", "No more questions!", 'No more questions!', 'No more questions!', 'Still need to click the button above to save the results', None, None
         else:
-            res = self.gpt_result[f'Question {self.curret_question + 1}']
-            question = self.questions[self.curret_question]
             self.answer = res['answer']
             self.highlighted_out = res['original sentences']
             highlighted_out_html = self.highlight_text(self.text, self.highlighted_out)
             self.highlighted_out = '\n'.join(self.highlighted_out)
-            return question, self.answer, highlighted_out_html, 'Please judge on the generated answer', 'Please judge on the generated answer', 'Still need to click the button above to save the results', None, None
     def process_last(self):
-        self.curret_question -= 1
         if hasattr(self, 'clicked_correct_answer'):
             del self.clicked_correct_answer
         if hasattr(self, 'clicked_correct_reference'):
             del self.clicked_correct_reference
-        if self.curret_question < 0:
-            # self.curret_question += 1
-            return "No more questions!", "No more questions!", "No more questions!", 'No more questions!', 'No more questions!', 'Still need to click the button above to save the results', None, None
         else:
-            res = self.gpt_result[f'Question {self.curret_question + 1}']
-            question = self.questions[self.curret_question]
             self.answer = res['answer']
             self.highlighted_out = res['original sentences']
             highlighted_out_html = self.highlight_text(self.text, self.highlighted_out)
             self.highlighted_out = '\n'.join(self.highlighted_out)
-            return question, self.answer, highlighted_out_html, 'Please judge on the generated answer', 'Please judge on the generated answer', 'Still need to click the button above to save the results', None, None
     def download_answer(self, path = './tmp', name = 'answer.xlsx'):
         os.makedirs(path, exist_ok = True)
         path = os.path.join(path, name)
-        self.ori_answer_df['questions'] = self.questions
         self.ori_answer_df.to_excel(path, index = False)
         return path
@@ -163,7 +258,7 @@ class Backend:
     def download_corrected(self, path = './tmp', name = 'corrected_answer.xlsx'):
         os.makedirs(path, exist_ok = True)
         path = os.path.join(path, name)
-        self.answer_df['questions'] = self.questions
         self.answer_df.to_excel(path, index = False)
         return path

         self.agent = OpenAI()
         self.prompt = Prompt()
+    def read_file_single(self, file):
         # read the file
         if file is not None:
             with open(file.name, 'r') as f:
             raise gr.Error("You need to upload a file first")
         return text
+    def read_file(self, files):
+        # read the file
+        text_list = []
+        self.filename_list = []
+        if files is not None:
+            for file in files:
+                with open(file.name, 'r', encoding='utf-8') as f:
+                    text_list.append(f.read())
+                    self.filename_list.append(file.name.split('\\')[-1])
+        else:
+            raise gr.Error("You need to upload a file first")
+        return text_list
     def highlight_text(self, text, highlight_list):
         # Find the original sentences
         # Split the passage into sentences
         text = text.replace('\n', f" <br /> ")
         # add scroll bar
+        text = f'<div style="height: 300px; overflow: auto;">{text}</div>'
         return text
+    def process_file(self, file, questions, openai_key, progress = gr.Progress()):
         # record the questions
         self.questions = questions
+        # get the text_list
+        self.text_list = self.read_file(file)
         # make the prompt
+        prompt_list = [self.prompt.get(text, questions, 'v3') for text in self.text_list]
         # interact with openai
+        self.res_list = []
+        for prompt in progress.tqdm(prompt_list, desc = 'Generating answers...'):
+            res = self.agent(prompt, with_history = False, temperature = 0.1, model = 'gpt-3.5-turbo-16k', api_key = openai_key)
+            res = self.prompt.process_result(res, 'v3')
+            self.res_list.append(res)
+        # Use the first file as default
+        # Use the first question for multiple questions
+        gpt_res = self.res_list[0]
+        self.gpt_result = gpt_res
+        self.current_question = 0
         self.totel_question = len(res.keys())
+        self.current_passage = 0
+        self.total_passages = len(self.res_list)
         # make a dataframe to record everything
+        self.ori_answer_df = pd.DataFrame()
+        self.answer_df = pd.DataFrame()
+        for i, res in enumerate(self.res_list):
+            tmp = pd.DataFrame(res).T
+            tmp = tmp.reset_index()
+            tmp = tmp.rename(columns={"index":"question_id"})
+            tmp['filename'] = self.filename_list[i]
+            tmp['question'] = self.questions
+            self.ori_answer_df = pd.concat([tmp, self.ori_answer_df])
+            self.answer_df = pd.concat([tmp, self.answer_df])
         # default fist question
         res = res['Question 1']
+        question = self.questions[self.current_question]
         self.answer = res['answer']
+        self.text = self.text_list[0]
         self.highlighted_out = res['original sentences']
         highlighted_out_html = self.highlight_text(self.text, self.highlighted_out)
         self.highlighted_out = '\n'.join(self.highlighted_out)
+        file_name = self.filename_list[self.current_passage]
+        return file_name, question, self.answer, highlighted_out_html, self.answer, self.highlighted_out
     def process_results(self, answer_correct, correct_answer, reference_correct, correct_reference):
         if not hasattr(self, 'clicked_correct_answer'):
         if not hasattr(self, 'answer_df'):
             raise gr.Error("You need to submit the document first")
+        if self.current_question >= self.totel_question or self.current_question < 0:
             raise gr.Error("No more questions, please return back")
         # record the answer
+        condition = (self.answer_df['question_id'] == f'Question {self.current_question + 1}' ) & \
+            (self.answer_df['filename'] == self.filename_list[self.current_passage])
+        self.answer_df.loc[condition, 'answer_correct'] = answer_correct
+        self.answer_df.loc[condition, 'reference_correct'] = reference_correct
+        # self.answer_df.loc[f'Question {self.current_question + 1}', 'answer_correct'] = answer_correct
+        # self.answer_df.loc[f'Question {self.current_question + 1}', 'reference_correct'] = reference_correct
         if self.clicked_correct_answer == True:
             if hasattr(self, 'answer'):
+                self.answer_df.loc[condition, 'correct_answer'] = self.answer
             else:
                 raise gr.Error("You need to submit the document first")
         else:
+            # self.answer_df.loc[f'Question {self.current_question + 1}', 'correct_answer'] = correct_answer
+            self.answer_df.loc[condition, 'correct_answer'] = correct_answer
         if self.clicked_correct_reference == True:
             if hasattr(self, 'highlighted_out'):
+                self.answer_df.loc[condition, 'correct_reference'] = self.highlighted_out
             else:
                 raise gr.Error("You need to submit the document first")
         else:
+            self.answer_df.loc[condition, 'correct_reference'] = correct_reference
         gr.Info('Results saved!')
         return "Results saved!"
     def process_next(self):
+        self.current_question += 1
         if hasattr(self, 'clicked_correct_answer'):
             del self.clicked_correct_answer
         if hasattr(self, 'clicked_correct_reference'):
             del self.clicked_correct_reference
+        if self.current_question >= self.totel_question:
+            # self.current_question -= 1
+            return "No more questions!", "No more questions!", "No more questions!", "No more questions!", 'No more questions!', 'No more questions!', 'Still need to click the button above to save the results', None, None
         else:
+            res = self.gpt_result[f'Question {self.current_question + 1}']
+            question = self.questions[self.current_question]
             self.answer = res['answer']
             self.highlighted_out = res['original sentences']
             highlighted_out_html = self.highlight_text(self.text, self.highlighted_out)
             self.highlighted_out = '\n'.join(self.highlighted_out)
+            file_name = self.filename_list[self.current_passage]
+            return file_name, question, self.answer, highlighted_out_html, 'Please judge on the generated answer', 'Please judge on the generated answer', 'Still need to click the button above to save the results', None, None
     def process_last(self):
+        self.current_question -= 1
+        # To make sure to correct the answer first
         if hasattr(self, 'clicked_correct_answer'):
             del self.clicked_correct_answer
         if hasattr(self, 'clicked_correct_reference'):
             del self.clicked_correct_reference
+        # check question boundary
+        if self.current_question < 0:
+            # self.current_question += 1
+            return "No more questions!", "No more questions!", "No more questions!", "No more questions!", 'No more questions!', 'No more questions!', 'Still need to click the button above to save the results', None, None
         else:
+            res = self.gpt_result[f'Question {self.current_question + 1}']
+            question = self.questions[self.current_question]
             self.answer = res['answer']
             self.highlighted_out = res['original sentences']
             highlighted_out_html = self.highlight_text(self.text, self.highlighted_out)
             self.highlighted_out = '\n'.join(self.highlighted_out)
+            file_name = self.filename_list[self.current_passage]
+            return file_name, question, self.answer, highlighted_out_html, 'Please judge on the generated answer', 'Please judge on the generated answer', 'Still need to click the button above to save the results', None, None
+    def switch_next_passage(self):
+        self.current_question = 0
+        # To make sure to correct the answer first
+        if hasattr(self, 'clicked_correct_answer'):
+            del self.clicked_correct_answer
+        if hasattr(self, 'clicked_correct_reference'):
+            del self.clicked_correct_reference
+        self.current_passage += 1
+        if self.current_passage >= self.total_passages:
+            # self.current_passage -= 1
+            return "No more passages!", "No more passages!", "No more passages!", "No more passages!", 'No more passages!', 'No more passages!', 'Still need to click the button above to save the results', None, None
+        else:
+            gpt_res = self.res_list[self.current_passage]
+            self.gpt_result = gpt_res
+            res = self.gpt_result[f'Question {self.current_question + 1}']
+            question = self.questions[self.current_question]
+            self.answer = res['answer']
+            self.highlighted_out = res['original sentences']
+            highlighted_out_html = self.highlight_text(self.text, self.highlighted_out)
+            self.highlighted_out = '\n'.join(self.highlighted_out)
+            file_name = self.filename_list[self.current_passage]
+            return file_name, question, self.answer, highlighted_out_html, 'Please judge on the generated answer', 'Please judge on the generated answer', 'Still need to click the button above to save the results', None, None
+    def switch_last_passage(self):
+        self.current_question = 0
+        # To make sure to correct the answer first
+        if hasattr(self, 'clicked_correct_answer'):
+            del self.clicked_correct_answer
+        if hasattr(self, 'clicked_correct_reference'):
+            del self.clicked_correct_reference
+        self.current_passage -= 1
+        if self.current_passage < 0:
+            # self.current_passage += 1
+            return "No more passages!", "No more passages!", "No more passages!", "No more passages!", 'No more passages!', 'No more passages!', 'Still need to click the button above to save the results', None, None
+        else:
+            gpt_res = self.res_list[self.current_passage]
+            self.gpt_result = gpt_res
+            res = self.gpt_result[f'Question {self.current_question + 1}']
+            question = self.questions[self.current_question]
+            self.answer = res['answer']
+            self.highlighted_out = res['original sentences']
+            highlighted_out_html = self.highlight_text(self.text, self.highlighted_out)
+            self.highlighted_out = '\n'.join(self.highlighted_out)
+            file_name = self.filename_list[self.current_passage]
+            return file_name, question, self.answer, highlighted_out_html, 'Please judge on the generated answer', 'Please judge on the generated answer', 'Still need to click the button above to save the results', None, None
     def download_answer(self, path = './tmp', name = 'answer.xlsx'):
         os.makedirs(path, exist_ok = True)
         path = os.path.join(path, name)
+        # self.ori_answer_df['questions'] = self.questions
         self.ori_answer_df.to_excel(path, index = False)
         return path
     def download_corrected(self, path = './tmp', name = 'corrected_answer.xlsx'):
         os.makedirs(path, exist_ok = True)
         path = os.path.join(path, name)
+        # self.answer_df['questions'] = self.questions
         self.answer_df.to_excel(path, index = False)
         return path

openai.py CHANGED Viewed

@@ -32,7 +32,14 @@ class OpenAI:
         }, headers={
             'Authorization': f"Bearer {api_key}"
         })
-        # print(resp.json())
         self.history.append(resp.json()['choices'][0]['message'])
-        return resp.json()['choices'][0]['message']['content']

         }, headers={
             'Authorization': f"Bearer {api_key}"
         })
+        print(resp)
         self.history.append(resp.json()['choices'][0]['message'])
+        res = resp.json()['choices'][0]['message']['content']
+        # with open("tmp_res.txt", 'w') as f:
+        #     f.write(res)
+        # with open("tmp_res.txt", 'r') as f:
+        #     res = f.read()
+        return res