jpangas commited on
Commit
46fbfbe
·
verified ·
1 Parent(s): 15f76b2

Track history of chatbots conversations and add lint (#2)

Browse files

- Add pre-commit (c07abc1d45582681539f7d472bbb65f8f73789f9)

Files changed (6) hide show
  1. .gitignore +2 -0
  2. .pre-commit-config.yaml +7 -0
  3. app.py +15 -9
  4. pyproject.toml +2 -0
  5. requirements.txt +2 -1
  6. utils.py +33 -0
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ .env
2
+ question_answers
.pre-commit-config.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ repos:
2
+ - repo: https://github.com/astral-sh/ruff-pre-commit
3
+ rev: v0.4.9
4
+ hooks:
5
+ - id: ruff
6
+ args: [--fix]
7
+ - id: ruff-format
app.py CHANGED
@@ -1,23 +1,26 @@
1
- from grobid_client.grobid_client import GrobidClient
2
  import time
 
3
  import gradio as gr
 
 
 
4
  from langchain_community.document_loaders.generic import GenericLoader
5
  from langchain_community.document_loaders.parsers import GrobidParser
6
- from langchain_openai import OpenAIEmbeddings
7
- from langchain_core.vectorstores import InMemoryVectorStore
8
- from langchain_openai import ChatOpenAI
9
- from langchain import hub
10
  from langchain_core.documents import Document
 
 
11
  from langchain_text_splitters import RecursiveCharacterTextSplitter
12
  from langgraph.graph import START, StateGraph
13
  from typing_extensions import List, TypedDict
14
- import xmltodict
 
15
 
16
 
17
  class PaperQA:
18
  def __init__(self):
19
  self.qa_graph = None
20
  self.current_file = None
 
21
 
22
  class State(TypedDict):
23
  question: str
@@ -40,7 +43,6 @@ class PaperQA:
40
  )
41
  dict_information = xmltodict.parse(information[2])
42
  title = dict_information["tei"]["teiHeader"]["fileDesc"]["titleStmt"]["title"]
43
- abstract = dict_information["tei"]["teiHeader"]["profileDesc"]["abstract"]["p"]
44
  return title
45
 
46
  def initiate_graph(self, file):
@@ -93,6 +95,10 @@ class PaperQA:
93
  return "Please upload a PDF file first and wait for it to be loaded!"
94
 
95
  response = self.qa_graph.invoke({"question": question})
 
 
 
 
96
  return response["answer"]
97
 
98
  def slow_echo(self, message, history):
@@ -119,7 +125,7 @@ def main():
119
  label="Status of Upload", value="No Paper Uploaded", interactive=False
120
  )
121
 
122
- chat_interface = gr.ChatInterface(qa_app.slow_echo, type="messages")
123
 
124
  file_input.upload(fn=qa_app.initiate_graph, inputs=file_input, outputs=textbox)
125
 
@@ -127,4 +133,4 @@ def main():
127
 
128
 
129
  if __name__ == "__main__":
130
- main()
 
 
1
  import time
2
+
3
  import gradio as gr
4
+ import xmltodict
5
+ from grobid_client.grobid_client import GrobidClient
6
+ from langchain import hub
7
  from langchain_community.document_loaders.generic import GenericLoader
8
  from langchain_community.document_loaders.parsers import GrobidParser
 
 
 
 
9
  from langchain_core.documents import Document
10
+ from langchain_core.vectorstores import InMemoryVectorStore
11
+ from langchain_openai import ChatOpenAI, OpenAIEmbeddings
12
  from langchain_text_splitters import RecursiveCharacterTextSplitter
13
  from langgraph.graph import START, StateGraph
14
  from typing_extensions import List, TypedDict
15
+
16
+ from utils import create_scheduler, save_feedback
17
 
18
 
19
  class PaperQA:
20
  def __init__(self):
21
  self.qa_graph = None
22
  self.current_file = None
23
+ self.scheduler, self.feedback_file = create_scheduler()
24
 
25
  class State(TypedDict):
26
  question: str
 
43
  )
44
  dict_information = xmltodict.parse(information[2])
45
  title = dict_information["tei"]["teiHeader"]["fileDesc"]["titleStmt"]["title"]
 
46
  return title
47
 
48
  def initiate_graph(self, file):
 
95
  return "Please upload a PDF file first and wait for it to be loaded!"
96
 
97
  response = self.qa_graph.invoke({"question": question})
98
+ if response["answer"] != "Please upload a PDF file first!":
99
+ save_feedback(
100
+ self.scheduler, self.feedback_file, question, response["answer"]
101
+ )
102
  return response["answer"]
103
 
104
  def slow_echo(self, message, history):
 
125
  label="Status of Upload", value="No Paper Uploaded", interactive=False
126
  )
127
 
128
+ gr.ChatInterface(qa_app.slow_echo, type="messages")
129
 
130
  file_input.upload(fn=qa_app.initiate_graph, inputs=file_input, outputs=textbox)
131
 
 
133
 
134
 
135
  if __name__ == "__main__":
136
+ main()
pyproject.toml ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ [tool.ruff.lint]
2
+ select = ["E4", "E7", "E9", "F", "I", "T10", "CPY"]
requirements.txt CHANGED
@@ -8,4 +8,5 @@ langchain-community
8
  langgraph
9
  langchain-core
10
  xmltodict
11
- lxml
 
 
8
  langgraph
9
  langchain-core
10
  xmltodict
11
+ lxml
12
+ pre-commit
utils.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import uuid
3
+ from pathlib import Path
4
+
5
+ from huggingface_hub import CommitScheduler
6
+
7
+
8
+ def create_scheduler():
9
+ """
10
+ Create a scheduler to commit feedback to the dataset repository."""
11
+ feedback_file = Path("question_answers/") / f"data_{uuid.uuid4()}.json"
12
+ feedback_folder = feedback_file.parent
13
+
14
+ scheduler = CommitScheduler(
15
+ repo_id="paper-extractor-bot-history",
16
+ repo_type="dataset",
17
+ folder_path=feedback_folder,
18
+ path_in_repo="data",
19
+ every=5,
20
+ )
21
+ return scheduler, feedback_file
22
+
23
+
24
+ def save_feedback(
25
+ scheduler: CommitScheduler, feedback_file: Path, question: str, answer: str
26
+ ) -> None:
27
+ """
28
+ Add the question and answer to a JSON Lines file.
29
+ """
30
+ with scheduler.lock:
31
+ with feedback_file.open("a") as f:
32
+ f.write(json.dumps({"input": question, "answer": answer}))
33
+ f.write("\n")