Tristan Thrush commited on
Commit
3f9ceca
1 Parent(s): ac14940

simplified pushing to hub

Browse files
Files changed (1) hide show
  1. app.py +30 -29
app.py CHANGED
@@ -11,8 +11,8 @@ from huggingface_hub import Repository
11
  from dotenv import load_dotenv
12
  from pathlib import Path
13
  import json
14
- from filelock import FileLock
15
  from utils import force_git_push
 
16
 
17
  # These variables are for storing the mturk HITs in a Hugging Face dataset.
18
  if Path(".env").is_file():
@@ -26,6 +26,25 @@ repo = Repository(
26
  local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN
27
  )
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  # Now let's run the app!
30
  pipe = pipeline("sentiment-analysis")
31
 
@@ -80,6 +99,14 @@ with demo:
80
  toggle_final_submit_preview = gr.update(visible=done)
81
  toggle_final_submit = gr.update(visible=False)
82
 
 
 
 
 
 
 
 
 
83
  return pred_confidences, ret, state, toggle_example_submit, toggle_final_submit, toggle_final_submit_preview, new_state_md, dummy
84
 
85
  # Input fields
@@ -96,32 +123,6 @@ with demo:
96
  with gr.Column(visible=False) as final_submit_preview:
97
  submit_hit_button_preview = gr.Button("Submit Work (preview mode; no mturk HIT credit)")
98
 
99
- # Store the HIT data into a Hugging Face dataset.
100
- # The HIT is also stored and logged on mturk when post_hit_js is run below.
101
- # This _store_in_huggingface_dataset function just demonstrates how easy it is
102
- # to automatically create a Hugging Face dataset from mturk.
103
- def _store_in_huggingface_dataset(state):
104
- lock = FileLock(DATA_FILE + ".lock")
105
- lock.acquire()
106
- try:
107
- with open(DATA_FILE, "a") as jsonlfile:
108
- json_data_with_assignment_id =\
109
- [json.dumps(dict({"assignmentId": state["assignmentId"]}, **datum)) for datum in state["data"]]
110
- jsonlfile.write("\n".join(json_data_with_assignment_id) + "\n")
111
-
112
- if repo.is_repo_clean():
113
- logger.info("Repo currently clean. Ignoring push_to_hub")
114
- return None
115
- repo.git_add(auto_lfs_track=True)
116
- repo.git_commit("Auto commit by space")
117
- if FORCE_PUSH == "yes":
118
- force_git_push(repo)
119
- else:
120
- repo.git_push()
121
- finally:
122
- lock.release()
123
- return state
124
-
125
  # Button event handlers
126
  get_window_location_search_js = """
127
  function(text_input, label_input, state, dummy) {
@@ -157,7 +158,7 @@ with demo:
157
  """
158
 
159
  submit_hit_button.click(
160
- _store_in_huggingface_dataset,
161
  inputs=[state],
162
  outputs=[state],
163
  _js=post_hit_js,
@@ -173,7 +174,7 @@ with demo:
173
  """
174
 
175
  submit_hit_button_preview.click(
176
- _store_in_huggingface_dataset,
177
  inputs=[state],
178
  outputs=[state],
179
  _js=refresh_app_js,
 
11
  from dotenv import load_dotenv
12
  from pathlib import Path
13
  import json
 
14
  from utils import force_git_push
15
+ import threading
16
 
17
  # These variables are for storing the mturk HITs in a Hugging Face dataset.
18
  if Path(".env").is_file():
 
26
  local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN
27
  )
28
 
29
+ # This function pushes the HIT data written in data.jsonl to our Hugging Face
30
+ # dataset every minute. Adjust the frequency to suit your needs.
31
+ def asynchronous_push(f_stop):
32
+ if repo.is_repo_clean():
33
+ print("Repo currently clean. Ignoring push_to_hub")
34
+ else:
35
+ repo.git_add(auto_lfs_track=True)
36
+ repo.git_commit("Auto commit by space")
37
+ if FORCE_PUSH == "yes":
38
+ force_git_push(repo)
39
+ else:
40
+ repo.git_push()
41
+ if not f_stop.is_set():
42
+ # call again in 60 seconds
43
+ threading.Timer(60, asynchronous_push, [f_stop]).start()
44
+
45
+ f_stop = threading.Event()
46
+ asynchronous_push(f_stop)
47
+
48
  # Now let's run the app!
49
  pipe = pipeline("sentiment-analysis")
50
 
 
99
  toggle_final_submit_preview = gr.update(visible=done)
100
  toggle_final_submit = gr.update(visible=False)
101
 
102
+ if state["cnt"] == total_cnt:
103
+ # Write the HIT data to our local dataset because the person has
104
+ # submitted everything now.
105
+ with open(DATA_FILE, "a") as jsonlfile:
106
+ json_data_with_assignment_id =\
107
+ [json.dumps(dict({"assignmentId": state["assignmentId"]}, **datum)) for datum in state["data"]]
108
+ jsonlfile.write("\n".join(json_data_with_assignment_id) + "\n")
109
+
110
  return pred_confidences, ret, state, toggle_example_submit, toggle_final_submit, toggle_final_submit_preview, new_state_md, dummy
111
 
112
  # Input fields
 
123
  with gr.Column(visible=False) as final_submit_preview:
124
  submit_hit_button_preview = gr.Button("Submit Work (preview mode; no mturk HIT credit)")
125
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  # Button event handlers
127
  get_window_location_search_js = """
128
  function(text_input, label_input, state, dummy) {
 
158
  """
159
 
160
  submit_hit_button.click(
161
+ lambda state: state,
162
  inputs=[state],
163
  outputs=[state],
164
  _js=post_hit_js,
 
174
  """
175
 
176
  submit_hit_button_preview.click(
177
+ lambda state: state,
178
  inputs=[state],
179
  outputs=[state],
180
  _js=refresh_app_js,