Spaces:
Sleeping
Sleeping
Track history of chatbots conversations and add lint (#2)
Browse files- Add pre-commit (c07abc1d45582681539f7d472bbb65f8f73789f9)
- .gitignore +2 -0
- .pre-commit-config.yaml +7 -0
- app.py +15 -9
- pyproject.toml +2 -0
- requirements.txt +2 -1
- utils.py +33 -0
.gitignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
.env
|
2 |
+
question_answers
|
.pre-commit-config.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
repos:
|
2 |
+
- repo: https://github.com/astral-sh/ruff-pre-commit
|
3 |
+
rev: v0.4.9
|
4 |
+
hooks:
|
5 |
+
- id: ruff
|
6 |
+
args: [--fix]
|
7 |
+
- id: ruff-format
|
app.py
CHANGED
@@ -1,23 +1,26 @@
|
|
1 |
-
from grobid_client.grobid_client import GrobidClient
|
2 |
import time
|
|
|
3 |
import gradio as gr
|
|
|
|
|
|
|
4 |
from langchain_community.document_loaders.generic import GenericLoader
|
5 |
from langchain_community.document_loaders.parsers import GrobidParser
|
6 |
-
from langchain_openai import OpenAIEmbeddings
|
7 |
-
from langchain_core.vectorstores import InMemoryVectorStore
|
8 |
-
from langchain_openai import ChatOpenAI
|
9 |
-
from langchain import hub
|
10 |
from langchain_core.documents import Document
|
|
|
|
|
11 |
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
12 |
from langgraph.graph import START, StateGraph
|
13 |
from typing_extensions import List, TypedDict
|
14 |
-
|
|
|
15 |
|
16 |
|
17 |
class PaperQA:
|
18 |
def __init__(self):
|
19 |
self.qa_graph = None
|
20 |
self.current_file = None
|
|
|
21 |
|
22 |
class State(TypedDict):
|
23 |
question: str
|
@@ -40,7 +43,6 @@ class PaperQA:
|
|
40 |
)
|
41 |
dict_information = xmltodict.parse(information[2])
|
42 |
title = dict_information["tei"]["teiHeader"]["fileDesc"]["titleStmt"]["title"]
|
43 |
-
abstract = dict_information["tei"]["teiHeader"]["profileDesc"]["abstract"]["p"]
|
44 |
return title
|
45 |
|
46 |
def initiate_graph(self, file):
|
@@ -93,6 +95,10 @@ class PaperQA:
|
|
93 |
return "Please upload a PDF file first and wait for it to be loaded!"
|
94 |
|
95 |
response = self.qa_graph.invoke({"question": question})
|
|
|
|
|
|
|
|
|
96 |
return response["answer"]
|
97 |
|
98 |
def slow_echo(self, message, history):
|
@@ -119,7 +125,7 @@ def main():
|
|
119 |
label="Status of Upload", value="No Paper Uploaded", interactive=False
|
120 |
)
|
121 |
|
122 |
-
|
123 |
|
124 |
file_input.upload(fn=qa_app.initiate_graph, inputs=file_input, outputs=textbox)
|
125 |
|
@@ -127,4 +133,4 @@ def main():
|
|
127 |
|
128 |
|
129 |
if __name__ == "__main__":
|
130 |
-
main()
|
|
|
|
|
1 |
import time
|
2 |
+
|
3 |
import gradio as gr
|
4 |
+
import xmltodict
|
5 |
+
from grobid_client.grobid_client import GrobidClient
|
6 |
+
from langchain import hub
|
7 |
from langchain_community.document_loaders.generic import GenericLoader
|
8 |
from langchain_community.document_loaders.parsers import GrobidParser
|
|
|
|
|
|
|
|
|
9 |
from langchain_core.documents import Document
|
10 |
+
from langchain_core.vectorstores import InMemoryVectorStore
|
11 |
+
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
|
12 |
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
13 |
from langgraph.graph import START, StateGraph
|
14 |
from typing_extensions import List, TypedDict
|
15 |
+
|
16 |
+
from utils import create_scheduler, save_feedback
|
17 |
|
18 |
|
19 |
class PaperQA:
|
20 |
def __init__(self):
|
21 |
self.qa_graph = None
|
22 |
self.current_file = None
|
23 |
+
self.scheduler, self.feedback_file = create_scheduler()
|
24 |
|
25 |
class State(TypedDict):
|
26 |
question: str
|
|
|
43 |
)
|
44 |
dict_information = xmltodict.parse(information[2])
|
45 |
title = dict_information["tei"]["teiHeader"]["fileDesc"]["titleStmt"]["title"]
|
|
|
46 |
return title
|
47 |
|
48 |
def initiate_graph(self, file):
|
|
|
95 |
return "Please upload a PDF file first and wait for it to be loaded!"
|
96 |
|
97 |
response = self.qa_graph.invoke({"question": question})
|
98 |
+
if response["answer"] != "Please upload a PDF file first!":
|
99 |
+
save_feedback(
|
100 |
+
self.scheduler, self.feedback_file, question, response["answer"]
|
101 |
+
)
|
102 |
return response["answer"]
|
103 |
|
104 |
def slow_echo(self, message, history):
|
|
|
125 |
label="Status of Upload", value="No Paper Uploaded", interactive=False
|
126 |
)
|
127 |
|
128 |
+
gr.ChatInterface(qa_app.slow_echo, type="messages")
|
129 |
|
130 |
file_input.upload(fn=qa_app.initiate_graph, inputs=file_input, outputs=textbox)
|
131 |
|
|
|
133 |
|
134 |
|
135 |
if __name__ == "__main__":
|
136 |
+
main()
|
pyproject.toml
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
[tool.ruff.lint]
|
2 |
+
select = ["E4", "E7", "E9", "F", "I", "T10", "CPY"]
|
requirements.txt
CHANGED
@@ -8,4 +8,5 @@ langchain-community
|
|
8 |
langgraph
|
9 |
langchain-core
|
10 |
xmltodict
|
11 |
-
lxml
|
|
|
|
8 |
langgraph
|
9 |
langchain-core
|
10 |
xmltodict
|
11 |
+
lxml
|
12 |
+
pre-commit
|
utils.py
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import uuid
|
3 |
+
from pathlib import Path
|
4 |
+
|
5 |
+
from huggingface_hub import CommitScheduler
|
6 |
+
|
7 |
+
|
8 |
+
def create_scheduler():
|
9 |
+
"""
|
10 |
+
Create a scheduler to commit feedback to the dataset repository."""
|
11 |
+
feedback_file = Path("question_answers/") / f"data_{uuid.uuid4()}.json"
|
12 |
+
feedback_folder = feedback_file.parent
|
13 |
+
|
14 |
+
scheduler = CommitScheduler(
|
15 |
+
repo_id="paper-extractor-bot-history",
|
16 |
+
repo_type="dataset",
|
17 |
+
folder_path=feedback_folder,
|
18 |
+
path_in_repo="data",
|
19 |
+
every=5,
|
20 |
+
)
|
21 |
+
return scheduler, feedback_file
|
22 |
+
|
23 |
+
|
24 |
+
def save_feedback(
|
25 |
+
scheduler: CommitScheduler, feedback_file: Path, question: str, answer: str
|
26 |
+
) -> None:
|
27 |
+
"""
|
28 |
+
Add the question and answer to a JSON Lines file.
|
29 |
+
"""
|
30 |
+
with scheduler.lock:
|
31 |
+
with feedback_file.open("a") as f:
|
32 |
+
f.write(json.dumps({"input": question, "answer": answer}))
|
33 |
+
f.write("\n")
|