Spaces:
Sleeping
Sleeping
import streamlit as st | |
from datasets import load_dataset, concatenate_datasets | |
import os | |
def load_and_combine_datasets(): | |
python_codes_dataset = load_dataset('flytech/python-codes-25k', split='train') | |
streamlit_issues_dataset = load_dataset("andfanilo/streamlit-issues", split='train') | |
streamlit_docs_dataset = load_dataset("sai-lohith/streamlit_docs", split='train') | |
combined_dataset = concatenate_datasets([python_codes_dataset, streamlit_issues_dataset, streamlit_docs_dataset]) | |
return combined_dataset | |
def save_combined_dataset(combined_dataset, dataset_name): | |
combined_dataset.save_to_disk(dataset_name) | |
# Print the location where the dataset is saved | |
st.write(f"Dataset saved at: {dataset_name}") | |
def main(): | |
st.title("Combined Dataset Viewer") | |
# Load and combine datasets | |
combined_dataset = load_and_combine_datasets() | |
# Display a subset of the combined dataset | |
st.write("Subset of Combined Dataset:", combined_dataset[:10]) | |
# Add option to save the combined dataset | |
if st.button("Save Combined Dataset"): | |
dataset_name = st.text_input("Enter a name for the combined dataset:") | |
if dataset_name: | |
save_combined_dataset(combined_dataset, dataset_name) | |
st.success(f"Combined dataset saved as '{dataset_name}'!") | |
# Add option to download the combined dataset | |
if st.button("Download Combined Dataset"): | |
dataset_name = st.text_input("Enter a name for the combined dataset:") | |
if dataset_name: | |
save_combined_dataset(combined_dataset, dataset_name) | |
filepath = os.path.join(os.getcwd(), dataset_name) | |
filesize = os.path.getsize(filepath) / (1024 * 1024) # Size in MB | |
st.write(f"Download the combined dataset: [{dataset_name}]({filepath})") | |
st.write(f"File Size: {filesize:.2f} MB") | |
if __name__ == "__main__": | |
main() | |