dhruv-anand-aintech commited on
Commit
c2e8d51
1 Parent(s): 9494f79

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -0
app.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from huggingface_hub import HfApi, HfFolder
3
+ from datasets import load_dataset
4
+
5
+ # Function to fetch dataset names for typeahead (autocomplete)
6
+ def fetch_dataset_names(query):
7
+ api = HfApi()
8
+ datasets = api.list_datasets()
9
+ filtered_datasets = [d.id for d in datasets if query.lower() in d.id.lower()]
10
+ return filtered_datasets
11
+
12
+ # Function to create a new dataset
13
+ def create_sampled_dataset(dataset_name, num_rows, user_token):
14
+ # Load the dataset
15
+ dataset = load_dataset(dataset_name)
16
+
17
+ # Sample the dataset
18
+ sampled_dataset = dataset['train'].shuffle().select(range(num_rows))
19
+
20
+ # Save the sampled dataset to a file (modify this as needed)
21
+ sampled_dataset.to_csv('sampled_dataset.csv')
22
+
23
+ # Here you'd need to implement the logic to upload this dataset to the user's Hf account
24
+ # This part is not straightforward and requires using the Hf API to create a new dataset repo
25
+ # You'll need to refer to the Hf API documentation for details on how to implement this
26
+
27
+ return "URL_to_new_dataset" # This should be the URL to the newly created dataset
28
+
29
+ # Main app
30
+ def main():
31
+ st.title("HuggingFace Dataset Sampler")
32
+
33
+ # User authentication
34
+ user_token = st.text_input("Enter your HuggingFace token for authentication")
35
+
36
+ # Dataset input with typeahead
37
+ dataset_query = st.text_input("Enter Dataset Name")
38
+ if dataset_query:
39
+ dataset_names = fetch_dataset_names(dataset_query)
40
+ selected_dataset = st.selectbox("Select Dataset", options=dataset_names)
41
+ else:
42
+ selected_dataset = None
43
+
44
+ # Number of rows input
45
+ num_rows = st.number_input("Enter number of rows to sample", min_value=1, step=1)
46
+
47
+ # Button to create new dataset
48
+ if st.button("Create Sampled Dataset"):
49
+ if user_token and selected_dataset and num_rows:
50
+ try:
51
+ # Create the sampled dataset and get its URL
52
+ dataset_url = create_sampled_dataset(selected_dataset, num_rows, user_token)
53
+ st.success(f"Dataset created successfully! Find it here: {dataset_url}")
54
+ except Exception as e:
55
+ st.error(f"Error: {e}")
56
+ else:
57
+ st.error("Please fill in all required fields.")
58
+
59
+ if __name__ == "__main__":
60
+ main()