Spaces:

jpangas
/

gradio-extractor

Sleeping

App Files Files Community

jpangas commited on Jan 22

Commit

46fbfbe

verified ·

1 Parent(s): 15f76b2

Track history of chatbots conversations and add lint (#2)

Browse files

- Add pre-commit (c07abc1d45582681539f7d472bbb65f8f73789f9)

Files changed (6) hide show

.gitignore +2 -0
.pre-commit-config.yaml +7 -0
app.py +15 -9
pyproject.toml +2 -0
requirements.txt +2 -1
utils.py +33 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ .env
2	+ question_answers

.pre-commit-config.yaml ADDED Viewed

	@@ -0,0 +1,7 @@

+repos:
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.4.9
+    hooks:
+      - id: ruff
+        args: [--fix]
+      - id: ruff-format

app.py CHANGED Viewed

@@ -1,23 +1,26 @@
-from grobid_client.grobid_client import GrobidClient
 import time
 import gradio as gr
 from langchain_community.document_loaders.generic import GenericLoader
 from langchain_community.document_loaders.parsers import GrobidParser
-from langchain_openai import OpenAIEmbeddings
-from langchain_core.vectorstores import InMemoryVectorStore
-from langchain_openai import ChatOpenAI
-from langchain import hub
 from langchain_core.documents import Document
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 from langgraph.graph import START, StateGraph
 from typing_extensions import List, TypedDict
-import xmltodict
 class PaperQA:
     def __init__(self):
         self.qa_graph = None
         self.current_file = None
     class State(TypedDict):
         question: str
@@ -40,7 +43,6 @@ class PaperQA:
         )
         dict_information = xmltodict.parse(information[2])
         title = dict_information["tei"]["teiHeader"]["fileDesc"]["titleStmt"]["title"]
-        abstract = dict_information["tei"]["teiHeader"]["profileDesc"]["abstract"]["p"]
         return title
     def initiate_graph(self, file):
@@ -93,6 +95,10 @@ class PaperQA:
             return "Please upload a PDF file first and wait for it to be loaded!"
         response = self.qa_graph.invoke({"question": question})
         return response["answer"]
     def slow_echo(self, message, history):
@@ -119,7 +125,7 @@ def main():
             label="Status of Upload", value="No Paper Uploaded", interactive=False
         )
-        chat_interface = gr.ChatInterface(qa_app.slow_echo, type="messages")
         file_input.upload(fn=qa_app.initiate_graph, inputs=file_input, outputs=textbox)
@@ -127,4 +133,4 @@ def main():
 if __name__ == "__main__":
-    main()

 import time
 import gradio as gr
+import xmltodict
+from grobid_client.grobid_client import GrobidClient
+from langchain import hub
 from langchain_community.document_loaders.generic import GenericLoader
 from langchain_community.document_loaders.parsers import GrobidParser
 from langchain_core.documents import Document
+from langchain_core.vectorstores import InMemoryVectorStore
+from langchain_openai import ChatOpenAI, OpenAIEmbeddings
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 from langgraph.graph import START, StateGraph
 from typing_extensions import List, TypedDict
+from utils import create_scheduler, save_feedback
 class PaperQA:
     def __init__(self):
         self.qa_graph = None
         self.current_file = None
+        self.scheduler, self.feedback_file = create_scheduler()
     class State(TypedDict):
         question: str
         )
         dict_information = xmltodict.parse(information[2])
         title = dict_information["tei"]["teiHeader"]["fileDesc"]["titleStmt"]["title"]
         return title
     def initiate_graph(self, file):
             return "Please upload a PDF file first and wait for it to be loaded!"
         response = self.qa_graph.invoke({"question": question})
+        if response["answer"] != "Please upload a PDF file first!":
+            save_feedback(
+                self.scheduler, self.feedback_file, question, response["answer"]
+            )
         return response["answer"]
     def slow_echo(self, message, history):
             label="Status of Upload", value="No Paper Uploaded", interactive=False
         )
+        gr.ChatInterface(qa_app.slow_echo, type="messages")
         file_input.upload(fn=qa_app.initiate_graph, inputs=file_input, outputs=textbox)
 if __name__ == "__main__":
+    main()

pyproject.toml ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ [tool.ruff.lint]
2	+ select = ["E4", "E7", "E9", "F", "I", "T10", "CPY"]

requirements.txt CHANGED Viewed

@@ -8,4 +8,5 @@ langchain-community
 langgraph
 langchain-core
 xmltodict
-lxml

 langgraph
 langchain-core
 xmltodict
+lxml
+pre-commit

utils.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import json
+import uuid
+from pathlib import Path
+from huggingface_hub import CommitScheduler
+def create_scheduler():
+    """
+    Create a scheduler to commit feedback to the dataset repository."""
+    feedback_file = Path("question_answers/") / f"data_{uuid.uuid4()}.json"
+    feedback_folder = feedback_file.parent
+    scheduler = CommitScheduler(
+        repo_id="paper-extractor-bot-history",
+        repo_type="dataset",
+        folder_path=feedback_folder,
+        path_in_repo="data",
+        every=5,
+    )
+    return scheduler, feedback_file
+def save_feedback(
+    scheduler: CommitScheduler, feedback_file: Path, question: str, answer: str
+) -> None:
+    """
+    Add the question and answer to a JSON Lines file.
+    """
+    with scheduler.lock:
+        with feedback_file.open("a") as f:
+            f.write(json.dumps({"input": question, "answer": answer}))
+            f.write("\n")