Spaces:
Running
Running
import streamlit as st | |
import pandas as pd | |
import json | |
def render_dataset_preview(dataset, dataset_type): | |
""" | |
Renders a preview of the dataset with pagination options. | |
Args: | |
dataset: The dataset to preview (pandas DataFrame) | |
dataset_type: The type of dataset (csv, json, etc.) | |
""" | |
if dataset is None: | |
st.warning("No dataset to preview.") | |
return | |
st.markdown(f"<h3>Dataset Preview: {st.session_state.dataset_name}</h3>", unsafe_allow_html=True) | |
# Show basic info | |
col1, col2, col3 = st.columns(3) | |
with col1: | |
st.metric("Rows", f"{dataset.shape[0]:,}") | |
with col2: | |
st.metric("Columns", f"{dataset.shape[1]:,}") | |
with col3: | |
st.metric("Type", dataset_type.upper()) | |
# Preview options | |
col1, col2 = st.columns([1, 3]) | |
with col1: | |
num_rows = st.number_input("Rows to display", min_value=5, max_value=100, value=10, step=5) | |
with col2: | |
preview_mode = st.radio("Preview mode", ["Head", "Tail", "Sample"], horizontal=True) | |
# Display dataset preview | |
st.markdown("<div class='dataset-preview'>", unsafe_allow_html=True) | |
if preview_mode == "Head": | |
st.dataframe(dataset.head(num_rows), use_container_width=True) | |
elif preview_mode == "Tail": | |
st.dataframe(dataset.tail(num_rows), use_container_width=True) | |
else: # Sample | |
st.dataframe(dataset.sample(min(num_rows, len(dataset))), use_container_width=True) | |
st.markdown("</div>", unsafe_allow_html=True) | |
# Show dataset schema | |
with st.expander("Dataset Schema"): | |
col1, col2 = st.columns(2) | |
with col1: | |
st.markdown("**Column Types**") | |
type_df = pd.DataFrame({ | |
'Column': dataset.dtypes.index, | |
'Type': dataset.dtypes.values.astype(str) | |
}) | |
st.dataframe(type_df, use_container_width=True) | |
with col2: | |
st.markdown("**Missing Values**") | |
missing_df = pd.DataFrame({ | |
'Column': dataset.columns, | |
'Missing': dataset.isna().sum().values, | |
'Percentage': dataset.isna().sum().values / len(dataset) * 100 | |
}) | |
st.dataframe(missing_df.style.format({ | |
'Percentage': '{:.2f}%' | |
}), use_container_width=True) | |
# Raw data | |
with st.expander("Raw Data (First 5 records)"): | |
if dataset_type == 'csv': | |
st.code(dataset.head(5).to_csv(index=False), language="text") | |
else: # json or jsonl | |
st.code(dataset.head(5).to_json(orient='records', indent=2), language="json") | |