Spaces:

Tristan
/

dadc

Runtime error

App Files Files Community

Tristan Thrush commited on Jul 23, 2022

Commit

3f9ceca

•

1 Parent(s): ac14940

simplified pushing to hub

Browse files

Files changed (1) hide show

app.py +30 -29

app.py CHANGED Viewed

@@ -11,8 +11,8 @@ from huggingface_hub import Repository
 from dotenv import load_dotenv
 from pathlib import Path
 import json
-from filelock import FileLock
 from utils import force_git_push
 # These variables are for storing the mturk HITs in a Hugging Face dataset.
 if Path(".env").is_file():
@@ -26,6 +26,25 @@ repo = Repository(
     local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN
 )
 # Now let's run the app!
 pipe = pipeline("sentiment-analysis")
@@ -80,6 +99,14 @@ with demo:
             toggle_final_submit_preview = gr.update(visible=done)
             toggle_final_submit = gr.update(visible=False)
         return pred_confidences, ret, state, toggle_example_submit, toggle_final_submit, toggle_final_submit_preview, new_state_md, dummy
     # Input fields
@@ -96,32 +123,6 @@ with demo:
     with gr.Column(visible=False) as final_submit_preview:
         submit_hit_button_preview = gr.Button("Submit Work (preview mode; no mturk HIT credit)")
-    # Store the HIT data into a Hugging Face dataset.
-    # The HIT is also stored and logged on mturk when post_hit_js is run below.
-    # This _store_in_huggingface_dataset function just demonstrates how easy it is
-    # to automatically create a Hugging Face dataset from mturk.
-    def _store_in_huggingface_dataset(state):
-        lock = FileLock(DATA_FILE + ".lock")
-        lock.acquire()
-        try:
-            with open(DATA_FILE, "a") as jsonlfile:
-                json_data_with_assignment_id =\
-                    [json.dumps(dict({"assignmentId": state["assignmentId"]}, **datum)) for datum in state["data"]]
-                jsonlfile.write("\n".join(json_data_with_assignment_id) + "\n")
-            if repo.is_repo_clean():
-                logger.info("Repo currently clean.  Ignoring push_to_hub")
-                return None
-            repo.git_add(auto_lfs_track=True)
-            repo.git_commit("Auto commit by space")
-            if FORCE_PUSH == "yes":
-                force_git_push(repo)
-            else:
-                repo.git_push()
-        finally:
-            lock.release()
-        return state
     # Button event handlers
     get_window_location_search_js = """
         function(text_input, label_input, state, dummy) {
@@ -157,7 +158,7 @@ with demo:
         """
     submit_hit_button.click(
-        _store_in_huggingface_dataset,
         inputs=[state],
         outputs=[state],
         _js=post_hit_js,
@@ -173,7 +174,7 @@ with demo:
         """
     submit_hit_button_preview.click(
-        _store_in_huggingface_dataset,
         inputs=[state],
         outputs=[state],
         _js=refresh_app_js,

 from dotenv import load_dotenv
 from pathlib import Path
 import json
 from utils import force_git_push
+import threading
 # These variables are for storing the mturk HITs in a Hugging Face dataset.
 if Path(".env").is_file():
     local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN
 )
+# This function pushes the HIT data written in data.jsonl to our Hugging Face
+# dataset every minute. Adjust the frequency to suit your needs.
+def asynchronous_push(f_stop):
+    if repo.is_repo_clean():
+        print("Repo currently clean. Ignoring push_to_hub")
+    else:
+        repo.git_add(auto_lfs_track=True)
+        repo.git_commit("Auto commit by space")
+        if FORCE_PUSH == "yes":
+            force_git_push(repo)
+        else:
+            repo.git_push()
+    if not f_stop.is_set():
+        # call again in 60 seconds
+        threading.Timer(60, asynchronous_push, [f_stop]).start()
+f_stop = threading.Event()
+asynchronous_push(f_stop)
 # Now let's run the app!
 pipe = pipeline("sentiment-analysis")
             toggle_final_submit_preview = gr.update(visible=done)
             toggle_final_submit = gr.update(visible=False)
+        if state["cnt"] == total_cnt:
+            # Write the HIT data to our local dataset because the person has
+            # submitted everything now.
+            with open(DATA_FILE, "a") as jsonlfile:
+                json_data_with_assignment_id =\
+                    [json.dumps(dict({"assignmentId": state["assignmentId"]}, **datum)) for datum in state["data"]]
+                jsonlfile.write("\n".join(json_data_with_assignment_id) + "\n")
         return pred_confidences, ret, state, toggle_example_submit, toggle_final_submit, toggle_final_submit_preview, new_state_md, dummy
     # Input fields
     with gr.Column(visible=False) as final_submit_preview:
         submit_hit_button_preview = gr.Button("Submit Work (preview mode; no mturk HIT credit)")
     # Button event handlers
     get_window_location_search_js = """
         function(text_input, label_input, state, dummy) {
         """
     submit_hit_button.click(
+        lambda state: state,
         inputs=[state],
         outputs=[state],
         _js=post_hit_js,
         """
     submit_hit_button_preview.click(
+        lambda state: state,
         inputs=[state],
         outputs=[state],
         _js=refresh_app_js,