Ben Burtenshaw
commited on
Commit
β’
9675a52
1
Parent(s):
3379fc5
fix push from argilla
Browse files- hub.py +7 -2
- pages/3_π± Generate Dataset.py +1 -1
- pages/4_π Review Generated Data.py +1 -0
- project_config.json +1 -1
- requirements.txt +1 -4
- seed_data.json +10 -30
hub.py
CHANGED
@@ -121,12 +121,17 @@ def pull_seed_data_from_repo(repo_id, hub_token):
|
|
121 |
|
122 |
|
123 |
def push_argilla_dataset_to_hub(
|
124 |
-
name: str,
|
|
|
|
|
|
|
|
|
|
|
125 |
):
|
126 |
rg.init(api_url=url, api_key=api_key)
|
127 |
feedback_dataset = rg.FeedbackDataset.from_argilla(name=name, workspace=workspace)
|
128 |
local_dataset = feedback_dataset.pull()
|
129 |
-
local_dataset.push_to_huggingface(repo_id=repo_id)
|
130 |
|
131 |
|
132 |
def push_pipeline_params(
|
|
|
121 |
|
122 |
|
123 |
def push_argilla_dataset_to_hub(
|
124 |
+
name: str,
|
125 |
+
repo_id: str,
|
126 |
+
url: str,
|
127 |
+
api_key: str,
|
128 |
+
hub_token: str,
|
129 |
+
workspace: str = "admin",
|
130 |
):
|
131 |
rg.init(api_url=url, api_key=api_key)
|
132 |
feedback_dataset = rg.FeedbackDataset.from_argilla(name=name, workspace=workspace)
|
133 |
local_dataset = feedback_dataset.pull()
|
134 |
+
local_dataset.push_to_huggingface(repo_id=repo_id, token=hub_token)
|
135 |
|
136 |
|
137 |
def push_pipeline_params(
|
pages/3_π± Generate Dataset.py
CHANGED
@@ -207,7 +207,7 @@ if all(
|
|
207 |
st.code(
|
208 |
"""
|
209 |
git clone https://github.com/huggingface/data-is-better-together
|
210 |
-
cd data-is-better-together/domain-specific-datasets/
|
211 |
pip install -r requirements.txt
|
212 |
huggingface-cli login
|
213 |
""",
|
|
|
207 |
st.code(
|
208 |
"""
|
209 |
git clone https://github.com/huggingface/data-is-better-together
|
210 |
+
cd data-is-better-together/domain-specific-datasets/distilabel_pipelines
|
211 |
pip install -r requirements.txt
|
212 |
huggingface-cli login
|
213 |
""",
|
pages/4_π Review Generated Data.py
CHANGED
@@ -44,5 +44,6 @@ if st.button("π Publish the generated dataset"):
|
|
44 |
url=argilla_url,
|
45 |
api_key=argilla_api_key,
|
46 |
workspace="admin",
|
|
|
47 |
)
|
48 |
st.success("The generated dataset has been published to the Hub.")
|
|
|
44 |
url=argilla_url,
|
45 |
api_key=argilla_api_key,
|
46 |
workspace="admin",
|
47 |
+
hub_token=st.session_state["hub_token"],
|
48 |
)
|
49 |
st.success("The generated dataset has been published to the Hub.")
|
project_config.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"project_name": "
|
|
|
1 |
+
{"project_name": "bicycle_maintenance", "argilla_space_repo_id": "burtenshaw/bicycle_maintenance_argilla_space", "project_space_repo_id": "burtenshaw/bicycle_maintenance_config_space", "dataset_repo_id": "burtenshaw/bicycle_maintenance"}
|
requirements.txt
CHANGED
@@ -1,8 +1,5 @@
|
|
1 |
datasets
|
2 |
python_dotenv
|
3 |
-
sentence_transformers
|
4 |
streamlit
|
5 |
huggingface_hub
|
6 |
-
|
7 |
-
argilla
|
8 |
-
git+https://github.com/argilla-io/distilabel.git
|
|
|
1 |
datasets
|
2 |
python_dotenv
|
|
|
3 |
streamlit
|
4 |
huggingface_hub
|
5 |
+
argilla
|
|
|
|
seed_data.json
CHANGED
@@ -1,40 +1,20 @@
|
|
1 |
{
|
2 |
-
"domain": "
|
3 |
"perspectives": [
|
4 |
-
"
|
5 |
-
"Agribusiness",
|
6 |
-
"Permaculture",
|
7 |
-
"Agroforestery",
|
8 |
-
"Conventional Farming"
|
9 |
],
|
10 |
"topics": [
|
11 |
-
"
|
12 |
-
"economic growth",
|
13 |
-
"land",
|
14 |
-
"resources",
|
15 |
-
"efficiency"
|
16 |
],
|
17 |
"examples": [
|
18 |
{
|
19 |
-
"question": "
|
20 |
-
"answer": "
|
21 |
-
},
|
22 |
-
{
|
23 |
-
"question": "Compare the environmental footprint of small-scale, local farming versus large-scale, industrial agriculture.",
|
24 |
-
"answer": "Industrial agriculture typically emphasizes high-output, monoculture farming reliant on synthetic fertilizers and pesticides, which, as Horrigan, Lawrence, and Walker (2002) argue, leads to greater greenhouse gas emissions, higher energy use, and more water consumption compared to small-scale farming. In contrast, small-scale farms often employ diverse cropping systems and lower chemical inputs, resulting in a smaller environmental footprint. Pimentel et al. (2005) note that small-scale farms tend to have higher yields per unit area when environmental and sustainability factors are integrated into farming practices."
|
25 |
-
},
|
26 |
-
{
|
27 |
-
"question": "Analyze the economic implications of transitioning from conventional to organic farming.",
|
28 |
-
"answer": "Transitioning from conventional to organic farming involves significant changes in farm management, input use, and market engagement. Crowder and Reganold (2015) present evidence that organic farms often yield smaller outputs initially but achieve higher profitability due to premium prices, lower input costs, and improved soil health over time. However, this transition requires upfront investments in knowledge and infrastructure, which can be economically challenging for some farmers, as noted by Seufert and Ramankutty (2017)."
|
29 |
-
},
|
30 |
-
{
|
31 |
-
"question": "Analyze the social, economic and environnmental impacts of land consolidation vs small-scale farmers.",
|
32 |
-
"answer": "Land consolidation has been associated with increased agricultural productivity but also with negative social and environmental impacts. Larger land holdings typically lead to monocultures, which reduce biodiversity and increase vulnerability to pests and diseases, as highlighted by Li et al. (2017). Economically, while consolidation can lead to economies of scale and potential gains in gross margins, it often displaces rural populations, exacerbating poverty and reducing local food diversity (Sutherland et al., 2015)."
|
33 |
-
},
|
34 |
-
{
|
35 |
-
"question": "Investigate the relationship between land ownership patterns, agricultural productivity and environment sustainability. ",
|
36 |
-
"answer": "Land ownership patterns critically influence agricultural productivity and sustainability. Secure land tenure supports investments in long-term improvements such as soil conservation and water management, which are pivotal for sustainable outcomes. Studies by Barrett et al. (2010) demonstrate that fragmented land ownership often results in inefficient resource use and higher transaction costs, which can detract from sustainability goals."
|
37 |
}
|
38 |
],
|
39 |
-
"domain_expert_prompt": "You
|
|
|
|
|
|
|
|
|
40 |
}
|
|
|
1 |
{
|
2 |
+
"domain": "Bicycle maintenance",
|
3 |
"perspectives": [
|
4 |
+
"Professional cycling"
|
|
|
|
|
|
|
|
|
5 |
],
|
6 |
"topics": [
|
7 |
+
"punctures"
|
|
|
|
|
|
|
|
|
8 |
],
|
9 |
"examples": [
|
10 |
{
|
11 |
+
"question": "",
|
12 |
+
"answer": ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
}
|
14 |
],
|
15 |
+
"domain_expert_prompt": "You are an experienced bicycle mechanic with extensive knowledge of various types of bicycles, their components, and common issues. You provide clear, concise, and accurate advice on bicycle maintenance, repairs, and upgrades. You have a deep understanding of bicycle mechanics, materials, and tools, and you are able to explain complex concepts in a way that is easy for users to understand. You are patient, friendly, and always willing to help users with their bicycle-related questions.",
|
16 |
+
"application_instruction": "You are an AI assistant than generates queries around the domain of Bicycle maintenance.\n Your should not expect basic but profound questions from your users.\n The queries should reflect a diversxamity of vision and economic positions and political positions.\n The queries may know about different methods of Bicycle maintenance.\n The queries can be positioned politically, economically, socially, or practically.\n Also take into account the impact of diverse causes on diverse domains.\n- Question: \n- Answer: \n",
|
17 |
+
"seed_terms": [
|
18 |
+
"punctures from a Professional cycling perspective"
|
19 |
+
]
|
20 |
}
|