Spaces:
Sleeping
Sleeping
import pandas as pd | |
import streamlit as st | |
# Set page configuration with a title and layout | |
st.set_page_config(page_title="Umami Data Processing", layout="wide") | |
# Display the main title of the application | |
st.title("Umami Data Processing") | |
st.subheader('Importing CSVs from Umami Cloud to Self-Hosted Umami Data Processing Online') | |
# Provide a link to a tutorial for more detailed instructions | |
st.markdown(""" | |
For detailed instructions on how to use this tool, please visit [myblog](https://blog.closex.org/posts/29bdb155/) or [Youtube Video](). | |
""") | |
# Widget to upload the CSV file | |
uploaded_file = st.file_uploader("Choose an Umami Cloud CSV file", type=['csv']) | |
# Text input for new website ID | |
new_website_id = st.text_input("Enter the new website ID:") | |
if uploaded_file is not None and new_website_id: | |
# Load the CSV file | |
df = pd.read_csv(uploaded_file) | |
# Update the website_id column with the user-provided website ID | |
df['website_id'] = new_website_id | |
# Define the columns for the website_event table | |
website_event_columns = [ | |
'event_id', 'website_id', 'session_id', 'created_at', 'url_path', | |
'url_query', 'referrer_path', 'referrer_query', 'referrer_domain', | |
'page_title', 'event_type', 'event_name', 'visit_id' | |
] | |
# Create DataFrame for website_event data | |
df_website_event = df[website_event_columns] | |
st.download_button(label="Download Website Event CSV", data=df_website_event.to_csv(index=False), file_name='website_event.csv', mime='text/csv') | |
# Define the columns for the session table | |
session_columns = [ | |
'session_id', 'website_id', 'hostname', 'browser', 'os', 'device', | |
'screen', 'language', 'country', 'subdivision1', 'subdivision2', | |
'city', 'created_at' | |
] | |
# Create DataFrame for session data | |
df_session = df[session_columns] | |
st.download_button(label="Download Session CSV", data=df_session.to_csv(index=False), file_name='session.csv', mime='text/csv') | |
# Versions of files without duplicates | |
df_website_event_nodup = df[website_event_columns].drop_duplicates('event_id') | |
df_session_nodup = df[session_columns].drop_duplicates('session_id') | |
st.text(f"Versions with duplicates removed ({len(df_website_event) - len(df_website_event_nodup)} events removed, {len(df_session) - len(df_session_nodup)} session entries removed):") | |
# Create DataFrame for website_event data without duplicates | |
st.download_button(label="Download Website Event CSV (No Duplicates)", data=df_website_event_nodup.to_csv(index=False), file_name='website_event_no_duplicates.csv', mime='text/csv') | |
# Create DataFrame for session data without duplicates | |
st.download_button(label="Download Session CSV (No Duplicates)", data=df_session_nodup.to_csv(index=False), file_name='session_no_duplicates.csv', mime='text/csv') | |
st.success("Successfully generated csv files") | |