Ben Burtenshaw commited on
Commit
9675a52
1 Parent(s): 3379fc5

fix push from argilla

Browse files
hub.py CHANGED
@@ -121,12 +121,17 @@ def pull_seed_data_from_repo(repo_id, hub_token):
121
 
122
 
123
  def push_argilla_dataset_to_hub(
124
- name: str, repo_id: str, url: str, api_key: str, workspace: str = "admin"
 
 
 
 
 
125
  ):
126
  rg.init(api_url=url, api_key=api_key)
127
  feedback_dataset = rg.FeedbackDataset.from_argilla(name=name, workspace=workspace)
128
  local_dataset = feedback_dataset.pull()
129
- local_dataset.push_to_huggingface(repo_id=repo_id)
130
 
131
 
132
  def push_pipeline_params(
 
121
 
122
 
123
  def push_argilla_dataset_to_hub(
124
+ name: str,
125
+ repo_id: str,
126
+ url: str,
127
+ api_key: str,
128
+ hub_token: str,
129
+ workspace: str = "admin",
130
  ):
131
  rg.init(api_url=url, api_key=api_key)
132
  feedback_dataset = rg.FeedbackDataset.from_argilla(name=name, workspace=workspace)
133
  local_dataset = feedback_dataset.pull()
134
+ local_dataset.push_to_huggingface(repo_id=repo_id, token=hub_token)
135
 
136
 
137
  def push_pipeline_params(
pages/3_🌱 Generate Dataset.py CHANGED
@@ -207,7 +207,7 @@ if all(
207
  st.code(
208
  """
209
  git clone https://github.com/huggingface/data-is-better-together
210
- cd data-is-better-together/domain-specific-datasets/pipelines
211
  pip install -r requirements.txt
212
  huggingface-cli login
213
  """,
 
207
  st.code(
208
  """
209
  git clone https://github.com/huggingface/data-is-better-together
210
+ cd data-is-better-together/domain-specific-datasets/distilabel_pipelines
211
  pip install -r requirements.txt
212
  huggingface-cli login
213
  """,
pages/4_🔍 Review Generated Data.py CHANGED
@@ -44,5 +44,6 @@ if st.button("🚀 Publish the generated dataset"):
44
  url=argilla_url,
45
  api_key=argilla_api_key,
46
  workspace="admin",
 
47
  )
48
  st.success("The generated dataset has been published to the Hub.")
 
44
  url=argilla_url,
45
  api_key=argilla_api_key,
46
  workspace="admin",
47
+ hub_token=st.session_state["hub_token"],
48
  )
49
  st.success("The generated dataset has been published to the Hub.")
project_config.json CHANGED
@@ -1 +1 @@
1
- {"project_name": "DEFAULT_DOMAIN", "argilla_space_repo_id": "burtenshaw/domain_test_4_argilla_space", "project_space_repo_id": "burtenshaw/domain_test_4_config_space", "dataset_repo_id": "burtenshaw/domain_test_4"}
 
1
+ {"project_name": "bicycle_maintenance", "argilla_space_repo_id": "burtenshaw/bicycle_maintenance_argilla_space", "project_space_repo_id": "burtenshaw/bicycle_maintenance_config_space", "dataset_repo_id": "burtenshaw/bicycle_maintenance"}
requirements.txt CHANGED
@@ -1,8 +1,5 @@
1
  datasets
2
  python_dotenv
3
- sentence_transformers
4
  streamlit
5
  huggingface_hub
6
- mistralai
7
- argilla
8
- git+https://github.com/argilla-io/distilabel.git
 
1
  datasets
2
  python_dotenv
 
3
  streamlit
4
  huggingface_hub
5
+ argilla
 
 
seed_data.json CHANGED
@@ -1,40 +1,20 @@
1
  {
2
- "domain": "farming",
3
  "perspectives": [
4
- "Family Farming",
5
- "Agribusiness",
6
- "Permaculture",
7
- "Agroforestery",
8
- "Conventional Farming"
9
  ],
10
  "topics": [
11
- "animal welfare",
12
- "economic growth",
13
- "land",
14
- "resources",
15
- "efficiency"
16
  ],
17
  "examples": [
18
  {
19
- "question": "Compare and contrast the environmental footprint of industrial and small-scale farming.",
20
- "answer": "Regenerative agriculture practices aim to restore soil health through methods that increase soil organic matter, enhance microbial activity, and improve soil structure. These practices include no-till farming, cover cropping, diverse crop rotations, and integrated livestock management. According to LaCanne and Lundgren (2018), soil health improves due to increased biodiversity and organic matter, enhancing its water retention and nutrient efficiency. Moreover, Jones (2012) in \"Soil carbon & organic farming\" reports that these practices significantly elevate biodiversity, both above and below the soil surface, promoting resilient ecosystems and agroecological balances."
21
- },
22
- {
23
- "question": "Compare the environmental footprint of small-scale, local farming versus large-scale, industrial agriculture.",
24
- "answer": "Industrial agriculture typically emphasizes high-output, monoculture farming reliant on synthetic fertilizers and pesticides, which, as Horrigan, Lawrence, and Walker (2002) argue, leads to greater greenhouse gas emissions, higher energy use, and more water consumption compared to small-scale farming. In contrast, small-scale farms often employ diverse cropping systems and lower chemical inputs, resulting in a smaller environmental footprint. Pimentel et al. (2005) note that small-scale farms tend to have higher yields per unit area when environmental and sustainability factors are integrated into farming practices."
25
- },
26
- {
27
- "question": "Analyze the economic implications of transitioning from conventional to organic farming.",
28
- "answer": "Transitioning from conventional to organic farming involves significant changes in farm management, input use, and market engagement. Crowder and Reganold (2015) present evidence that organic farms often yield smaller outputs initially but achieve higher profitability due to premium prices, lower input costs, and improved soil health over time. However, this transition requires upfront investments in knowledge and infrastructure, which can be economically challenging for some farmers, as noted by Seufert and Ramankutty (2017)."
29
- },
30
- {
31
- "question": "Analyze the social, economic and environnmental impacts of land consolidation vs small-scale farmers.",
32
- "answer": "Land consolidation has been associated with increased agricultural productivity but also with negative social and environmental impacts. Larger land holdings typically lead to monocultures, which reduce biodiversity and increase vulnerability to pests and diseases, as highlighted by Li et al. (2017). Economically, while consolidation can lead to economies of scale and potential gains in gross margins, it often displaces rural populations, exacerbating poverty and reducing local food diversity (Sutherland et al., 2015)."
33
- },
34
- {
35
- "question": "Investigate the relationship between land ownership patterns, agricultural productivity and environment sustainability. ",
36
- "answer": "Land ownership patterns critically influence agricultural productivity and sustainability. Secure land tenure supports investments in long-term improvements such as soil conservation and water management, which are pivotal for sustainable outcomes. Studies by Barrett et al. (2010) demonstrate that fragmented land ownership often results in inefficient resource use and higher transaction costs, which can detract from sustainability goals."
37
  }
38
  ],
39
- "domain_expert_prompt": "You will be asked about family farming and agribusiness related topics, from different perspectives.\n Your answer should be logical and supported by facts, don't fabricate arguments. \n Try to gather a diverse point of view taking into account current theories in agronomy, biology, economics, anthropology and ecology."
 
 
 
 
40
  }
 
1
  {
2
+ "domain": "Bicycle maintenance",
3
  "perspectives": [
4
+ "Professional cycling"
 
 
 
 
5
  ],
6
  "topics": [
7
+ "punctures"
 
 
 
 
8
  ],
9
  "examples": [
10
  {
11
+ "question": "",
12
+ "answer": ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  }
14
  ],
15
+ "domain_expert_prompt": "You are an experienced bicycle mechanic with extensive knowledge of various types of bicycles, their components, and common issues. You provide clear, concise, and accurate advice on bicycle maintenance, repairs, and upgrades. You have a deep understanding of bicycle mechanics, materials, and tools, and you are able to explain complex concepts in a way that is easy for users to understand. You are patient, friendly, and always willing to help users with their bicycle-related questions.",
16
+ "application_instruction": "You are an AI assistant than generates queries around the domain of Bicycle maintenance.\n Your should not expect basic but profound questions from your users.\n The queries should reflect a diversxamity of vision and economic positions and political positions.\n The queries may know about different methods of Bicycle maintenance.\n The queries can be positioned politically, economically, socially, or practically.\n Also take into account the impact of diverse causes on diverse domains.\n- Question: \n- Answer: \n",
17
+ "seed_terms": [
18
+ "punctures from a Professional cycling perspective"
19
+ ]
20
  }