Ben Burtenshaw commited on
Commit
dfd3683
1 Parent(s): 32014a1

lose codeless version

Browse files
Files changed (3) hide show
  1. defaults.py +1 -1
  2. hub.py +23 -1
  3. pages/3_🌱 Generate Dataset.py +29 -10
defaults.py CHANGED
@@ -3,7 +3,7 @@ import json
3
 
4
  SEED_DATA_PATH = "seed_data.json"
5
  PIPELINE_PATH = "pipeline.yaml"
6
- REMOTE_CODE_PATHS = ["defaults.py", "domain.py", "pipeline.py", "requirements.txt"]
7
  DIBT_PARENT_APP_URL = "https://argilla-domain-specific-datasets-welcome.hf.space/"
8
  N_PERSPECTIVES = 5
9
  N_TOPICS = 5
 
3
 
4
  SEED_DATA_PATH = "seed_data.json"
5
  PIPELINE_PATH = "pipeline.yaml"
6
+ REMOTE_CODE_PATHS = ["requirements.txt"]
7
  DIBT_PARENT_APP_URL = "https://argilla-domain-specific-datasets-welcome.hf.space/"
8
  N_PERSPECTIVES = 5
9
  N_TOPICS = 5
hub.py CHANGED
@@ -94,7 +94,7 @@ def push_pipeline_to_hub(
94
  # upload the pipeline to the hub
95
  hf_api.upload_file(
96
  path_or_fileobj=pipeline_path,
97
- path_in_repo="pipeline.yaml",
98
  token=hub_token,
99
  repo_id=repo_id,
100
  repo_type="dataset",
@@ -127,3 +127,25 @@ def push_argilla_dataset_to_hub(
127
  feedback_dataset = rg.FeedbackDataset.from_argilla(name=name, workspace=workspace)
128
  local_dataset = feedback_dataset.pull()
129
  local_dataset.push_to_huggingface(repo_id=repo_id)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  # upload the pipeline to the hub
95
  hf_api.upload_file(
96
  path_or_fileobj=pipeline_path,
97
+ path_in_repo="pipeline.py",
98
  token=hub_token,
99
  repo_id=repo_id,
100
  repo_type="dataset",
 
127
  feedback_dataset = rg.FeedbackDataset.from_argilla(name=name, workspace=workspace)
128
  local_dataset = feedback_dataset.pull()
129
  local_dataset.push_to_huggingface(repo_id=repo_id)
130
+
131
+
132
+ def push_pipeline_params(
133
+ pipeline_params,
134
+ hub_username,
135
+ hub_token: str,
136
+ project_name,
137
+ ):
138
+ repo_id = f"{hub_username}/{project_name}"
139
+ temp_path = mktemp()
140
+ with open(temp_path, "w") as f:
141
+ json.dump(pipeline_params, f)
142
+ # upload the pipeline to the hub
143
+ hf_api.upload_file(
144
+ path_or_fileobj=temp_path,
145
+ path_in_repo="pipeline_params.json",
146
+ token=hub_token,
147
+ repo_id=repo_id,
148
+ repo_type="dataset",
149
+ )
150
+
151
+ print(f"Pipeline params uploaded to {repo_id}")
pages/3_🌱 Generate Dataset.py CHANGED
@@ -1,6 +1,7 @@
1
  import streamlit as st
2
 
3
  from defaults import ARGILLA_URL
 
4
  from utils import project_sidebar
5
 
6
  st.set_page_config(
@@ -90,6 +91,25 @@ if all(
90
  argilla_dataset_name,
91
  ]
92
  ):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  st.markdown(
94
  "To run the pipeline locally, you need to have the `distilabel` library installed. You can install it using the following command:"
95
  )
@@ -106,19 +126,18 @@ if all(
106
 
107
  st.code(
108
  f"""
109
- # Clone the project and install the requirements
110
  git clone https://huggingface.co/datasets/{hub_username}/{project_name}
111
  cd {project_name}
112
  pip install -r requirements.txt
113
-
114
- # Run the pipeline
115
- python pipeline.py
116
- --argilla-api-key {argilla_api_key}
117
- --argilla-api-url {argilla_url}
118
- --argilla-dataset-name {argilla_dataset_name}
119
- --endpoint-base-url {base_url}
120
- --hub-token {st.session_state["hub_token"]}
121
- """,
122
  language="bash",
123
  )
124
  st.markdown(
 
1
  import streamlit as st
2
 
3
  from defaults import ARGILLA_URL
4
+ from hub import push_pipeline_params, push_pipeline_to_hub
5
  from utils import project_sidebar
6
 
7
  st.set_page_config(
 
91
  argilla_dataset_name,
92
  ]
93
  ):
94
+ push_pipeline_params(
95
+ pipeline_params={
96
+ "argilla_api_key": argilla_api_key,
97
+ "argilla_api_url": argilla_url,
98
+ "argilla_dataset_name": argilla_dataset_name,
99
+ "endpoint_base_url": base_url,
100
+ },
101
+ hub_username=hub_username,
102
+ hub_token=hub_token,
103
+ project_name=project_name,
104
+ )
105
+
106
+ push_pipeline_to_hub(
107
+ pipeline_path="pipeline.py",
108
+ hub_username=hub_username,
109
+ hub_token=hub_token,
110
+ project_name=project_name,
111
+ )
112
+
113
  st.markdown(
114
  "To run the pipeline locally, you need to have the `distilabel` library installed. You can install it using the following command:"
115
  )
 
126
 
127
  st.code(
128
  f"""
 
129
  git clone https://huggingface.co/datasets/{hub_username}/{project_name}
130
  cd {project_name}
131
  pip install -r requirements.txt
132
+ """
133
+ )
134
+
135
+ st.markdown("Finally, you can run the pipeline using the following command:")
136
+
137
+ st.code(
138
+ """
139
+ huggingface-cli login
140
+ python pipeline.py""",
141
  language="bash",
142
  )
143
  st.markdown(