Tristan Thrush
commited on
Commit
•
3f9ceca
1
Parent(s):
ac14940
simplified pushing to hub
Browse files
app.py
CHANGED
@@ -11,8 +11,8 @@ from huggingface_hub import Repository
|
|
11 |
from dotenv import load_dotenv
|
12 |
from pathlib import Path
|
13 |
import json
|
14 |
-
from filelock import FileLock
|
15 |
from utils import force_git_push
|
|
|
16 |
|
17 |
# These variables are for storing the mturk HITs in a Hugging Face dataset.
|
18 |
if Path(".env").is_file():
|
@@ -26,6 +26,25 @@ repo = Repository(
|
|
26 |
local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN
|
27 |
)
|
28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
# Now let's run the app!
|
30 |
pipe = pipeline("sentiment-analysis")
|
31 |
|
@@ -80,6 +99,14 @@ with demo:
|
|
80 |
toggle_final_submit_preview = gr.update(visible=done)
|
81 |
toggle_final_submit = gr.update(visible=False)
|
82 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
return pred_confidences, ret, state, toggle_example_submit, toggle_final_submit, toggle_final_submit_preview, new_state_md, dummy
|
84 |
|
85 |
# Input fields
|
@@ -96,32 +123,6 @@ with demo:
|
|
96 |
with gr.Column(visible=False) as final_submit_preview:
|
97 |
submit_hit_button_preview = gr.Button("Submit Work (preview mode; no mturk HIT credit)")
|
98 |
|
99 |
-
# Store the HIT data into a Hugging Face dataset.
|
100 |
-
# The HIT is also stored and logged on mturk when post_hit_js is run below.
|
101 |
-
# This _store_in_huggingface_dataset function just demonstrates how easy it is
|
102 |
-
# to automatically create a Hugging Face dataset from mturk.
|
103 |
-
def _store_in_huggingface_dataset(state):
|
104 |
-
lock = FileLock(DATA_FILE + ".lock")
|
105 |
-
lock.acquire()
|
106 |
-
try:
|
107 |
-
with open(DATA_FILE, "a") as jsonlfile:
|
108 |
-
json_data_with_assignment_id =\
|
109 |
-
[json.dumps(dict({"assignmentId": state["assignmentId"]}, **datum)) for datum in state["data"]]
|
110 |
-
jsonlfile.write("\n".join(json_data_with_assignment_id) + "\n")
|
111 |
-
|
112 |
-
if repo.is_repo_clean():
|
113 |
-
logger.info("Repo currently clean. Ignoring push_to_hub")
|
114 |
-
return None
|
115 |
-
repo.git_add(auto_lfs_track=True)
|
116 |
-
repo.git_commit("Auto commit by space")
|
117 |
-
if FORCE_PUSH == "yes":
|
118 |
-
force_git_push(repo)
|
119 |
-
else:
|
120 |
-
repo.git_push()
|
121 |
-
finally:
|
122 |
-
lock.release()
|
123 |
-
return state
|
124 |
-
|
125 |
# Button event handlers
|
126 |
get_window_location_search_js = """
|
127 |
function(text_input, label_input, state, dummy) {
|
@@ -157,7 +158,7 @@ with demo:
|
|
157 |
"""
|
158 |
|
159 |
submit_hit_button.click(
|
160 |
-
|
161 |
inputs=[state],
|
162 |
outputs=[state],
|
163 |
_js=post_hit_js,
|
@@ -173,7 +174,7 @@ with demo:
|
|
173 |
"""
|
174 |
|
175 |
submit_hit_button_preview.click(
|
176 |
-
|
177 |
inputs=[state],
|
178 |
outputs=[state],
|
179 |
_js=refresh_app_js,
|
|
|
11 |
from dotenv import load_dotenv
|
12 |
from pathlib import Path
|
13 |
import json
|
|
|
14 |
from utils import force_git_push
|
15 |
+
import threading
|
16 |
|
17 |
# These variables are for storing the mturk HITs in a Hugging Face dataset.
|
18 |
if Path(".env").is_file():
|
|
|
26 |
local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN
|
27 |
)
|
28 |
|
29 |
+
# This function pushes the HIT data written in data.jsonl to our Hugging Face
|
30 |
+
# dataset every minute. Adjust the frequency to suit your needs.
|
31 |
+
def asynchronous_push(f_stop):
|
32 |
+
if repo.is_repo_clean():
|
33 |
+
print("Repo currently clean. Ignoring push_to_hub")
|
34 |
+
else:
|
35 |
+
repo.git_add(auto_lfs_track=True)
|
36 |
+
repo.git_commit("Auto commit by space")
|
37 |
+
if FORCE_PUSH == "yes":
|
38 |
+
force_git_push(repo)
|
39 |
+
else:
|
40 |
+
repo.git_push()
|
41 |
+
if not f_stop.is_set():
|
42 |
+
# call again in 60 seconds
|
43 |
+
threading.Timer(60, asynchronous_push, [f_stop]).start()
|
44 |
+
|
45 |
+
f_stop = threading.Event()
|
46 |
+
asynchronous_push(f_stop)
|
47 |
+
|
48 |
# Now let's run the app!
|
49 |
pipe = pipeline("sentiment-analysis")
|
50 |
|
|
|
99 |
toggle_final_submit_preview = gr.update(visible=done)
|
100 |
toggle_final_submit = gr.update(visible=False)
|
101 |
|
102 |
+
if state["cnt"] == total_cnt:
|
103 |
+
# Write the HIT data to our local dataset because the person has
|
104 |
+
# submitted everything now.
|
105 |
+
with open(DATA_FILE, "a") as jsonlfile:
|
106 |
+
json_data_with_assignment_id =\
|
107 |
+
[json.dumps(dict({"assignmentId": state["assignmentId"]}, **datum)) for datum in state["data"]]
|
108 |
+
jsonlfile.write("\n".join(json_data_with_assignment_id) + "\n")
|
109 |
+
|
110 |
return pred_confidences, ret, state, toggle_example_submit, toggle_final_submit, toggle_final_submit_preview, new_state_md, dummy
|
111 |
|
112 |
# Input fields
|
|
|
123 |
with gr.Column(visible=False) as final_submit_preview:
|
124 |
submit_hit_button_preview = gr.Button("Submit Work (preview mode; no mturk HIT credit)")
|
125 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
126 |
# Button event handlers
|
127 |
get_window_location_search_js = """
|
128 |
function(text_input, label_input, state, dummy) {
|
|
|
158 |
"""
|
159 |
|
160 |
submit_hit_button.click(
|
161 |
+
lambda state: state,
|
162 |
inputs=[state],
|
163 |
outputs=[state],
|
164 |
_js=post_hit_js,
|
|
|
174 |
"""
|
175 |
|
176 |
submit_hit_button_preview.click(
|
177 |
+
lambda state: state,
|
178 |
inputs=[state],
|
179 |
outputs=[state],
|
180 |
_js=refresh_app_js,
|