umami_import

Sleeping

App Files Files Community

umami_import / app.py

ksitarz-sigmoidal

feat: add versions of files without duplicates

c04e197 verified about 1 year ago

raw

history blame contribute delete

2.92 kB

	import pandas as pd
	import streamlit as st

	# Set page configuration with a title and layout
	st.set_page_config(page_title="Umami Data Processing", layout="wide")

	# Display the main title of the application
	st.title("Umami Data Processing")

	st.subheader('Importing CSVs from Umami Cloud to Self-Hosted Umami Data Processing Online')

	# Provide a link to a tutorial for more detailed instructions
	st.markdown("""
	For detailed instructions on how to use this tool, please visit [myblog](https://blog.closex.org/posts/29bdb155/) or [Youtube Video]().
	""")

	# Widget to upload the CSV file
	uploaded_file = st.file_uploader("Choose an Umami Cloud CSV file", type=['csv'])

	# Text input for new website ID
	new_website_id = st.text_input("Enter the new website ID:")

	if uploaded_file is not None and new_website_id:
	# Load the CSV file
	df = pd.read_csv(uploaded_file)

	# Update the website_id column with the user-provided website ID
	df['website_id'] = new_website_id

	# Define the columns for the website_event table
	website_event_columns = [
	'event_id', 'website_id', 'session_id', 'created_at', 'url_path',
	'url_query', 'referrer_path', 'referrer_query', 'referrer_domain',
	'page_title', 'event_type', 'event_name', 'visit_id'
	]

	# Create DataFrame for website_event data
	df_website_event = df[website_event_columns]
	st.download_button(label="Download Website Event CSV", data=df_website_event.to_csv(index=False), file_name='website_event.csv', mime='text/csv')

	# Define the columns for the session table
	session_columns = [
	'session_id', 'website_id', 'hostname', 'browser', 'os', 'device',
	'screen', 'language', 'country', 'subdivision1', 'subdivision2',
	'city', 'created_at'
	]

	# Create DataFrame for session data
	df_session = df[session_columns]
	st.download_button(label="Download Session CSV", data=df_session.to_csv(index=False), file_name='session.csv', mime='text/csv')

	# Versions of files without duplicates
	df_website_event_nodup = df[website_event_columns].drop_duplicates('event_id')
	df_session_nodup = df[session_columns].drop_duplicates('session_id')
	st.text(f"Versions with duplicates removed ({len(df_website_event) - len(df_website_event_nodup)} events removed, {len(df_session) - len(df_session_nodup)} session entries removed):")

	# Create DataFrame for website_event data without duplicates
	st.download_button(label="Download Website Event CSV (No Duplicates)", data=df_website_event_nodup.to_csv(index=False), file_name='website_event_no_duplicates.csv', mime='text/csv')

	# Create DataFrame for session data without duplicates
	st.download_button(label="Download Session CSV (No Duplicates)", data=df_session_nodup.to_csv(index=False), file_name='session_no_duplicates.csv', mime='text/csv')

	st.success("Successfully generated csv files")