import streamlit as st from PIL import Image import os import faiss import torch import numpy as np from request import get_ft, get_topk def load_model(): """Load DINOv2 model once and cache it""" model = torch.hub.load('facebookresearch/dinov2', 'dinov2_vits14') model.eval() model.to(torch.device('cuda' if torch.cuda.is_available() else 'cpu')) return model def load_index(index_path): """Load FAISS index once and cache it""" return faiss.read_index(index_path) def distance_to_similarity(distances, temp=1e-4): """Convert distance to similarity""" for ii in range(len(distances)): contribs = distances[ii].max() - distances[ii] contribs = contribs / temp sum_contribs = np.exp(contribs).sum() distances[ii] = np.exp(contribs) / sum_contribs return distances def calculate_rewards(subscription, num_generations, author_share, ro_share, num_users_k, similarities): """Calculate rewards based on user inputs and similarities""" num_users = num_users_k * 1000 paid_per_gen = subscription / num_generations aro_share = paid_per_gen * (author_share + ro_share) / 100 * 100 # Convert to cents rewards = [] for sim in similarities[0]: training_data_reward = aro_share * sim author_month_reward = (training_data_reward / 100) * num_users / ((author_share + ro_share)/100) * (author_share/100) ro_month_reward = (training_data_reward / 100) * num_users / ((author_share + ro_share)/100) * (ro_share/100) rewards.append({ 'paid_per_month': f"{subscription:.0f}€", 'paid_per_gen': f"{paid_per_gen:.2f}€", 'aro_share': f"{aro_share:.2f}c€", 'attribution': f"{sim*100:.0f}%", 'training_data_reward': f"{training_data_reward:.2f}c€", 'author_month_reward': f"{author_month_reward:.0f}€", 'ro_month_reward': f"{ro_month_reward:.0f}€" }) return rewards def main(): st.title("Reward Simulator") # Add introduction text and expandable details st.markdown(""" This simulator helps estimate potential rewards for authors and rights owners when their images are used to train AI image generation models. """) with st.expander("Learn more about how it works"): st.markdown(""" ### How it works 1. Select or upload an image that represents AI-generated content 2. The system finds similar images that might have influenced the generation in a database of 10M images (OpenImages) 3. Based on your parameters, it calculates potential rewards for: - Original image authors - Rights owners (e.g., stock photo companies, galleries) ### Key assumptions - Attribution scores indicate the level of influence of training images - Rewards are distributed based on subscription revenue - Calculations use simplified models and are for demonstration purposes ### Use cases - Explore fair compensation models for AI training data - Simulate different revenue sharing scenarios - Understand the relationship between model training and attribution """) # Load model and index model = load_model() index = load_index("data/openimages_index.bin") with open("data/openimages_urls.txt", "r") as f: urls = f.readlines() # Sidebar controls for reward simulation with st.sidebar: st.subheader("Reward Simulation Parameters") subscription = st.number_input( "User Monthly Subscription (€/month)", min_value=1.0, max_value=100.0, value=12.0, help="Monthly subscription fee for users. Examples:\n" "- Adobe Firefly: starts at $4.99\n" "- Midjourney: starts at $10\n" "- DALL·E 3: included with ChatGPT Plus ($20)\n" "- Getty Edify: €45 for 25 generations (includes legal protection)" ) num_generations = st.number_input( "Number of Image Generations per Month", min_value=1, max_value=1000, value=60, help="Number of generations done by one user on average per month.\n" "Examples:\n" "- Adobe Firefly basic plan: 100 generations/month\n" "- Midjourney basic plan: 200 generations/month" ) author_share = st.number_input( "Authors share (%)", min_value=0.0, max_value=100.0, value=5.0, help="Percentage of subscription allocated to authors. Typical examples:\n" "- Printed books: 5-15%\n" "- Music (performers + songwriters): 15-30% of net revenues\n" "- Stock photography: 15-45% of revenues" ) ro_share = st.number_input( "Right Owners Share (%)", min_value=0.0, max_value=100.0, value=10.0, help="Percentage of subscription allocated to right owners. Examples:\n" "- Music CMOs (ASCAP, BMI, SACEM): 2-8% of net revenues\n" "- Image stocks: retain 55-85% of revenues" ) num_users_k = st.number_input( "Subscribers (in thousands)", min_value=1, max_value=10000, value=500, help="Number of paid users in thousands.\n" "Note: Exact figures aren't public, but Midjourney is estimated\n" "to have 2 to 5 million paying subscribers" ) num_neighbors = st.slider( "Number of similar images", min_value=1, max_value=20, value=8, help="Number of most similar images to display and calculate rewards for" ) # Display default images in a row st.subheader("Select an image to attribute rewards to similar images") cols = st.columns(3) default_images = { "Image 1": "assets/1.jpg", "Image 2": "assets/2.jpg", "Image 3": "assets/3.jpg" } # Create session state for storing the selected image if it doesn't exist if 'query_image' not in st.session_state: st.session_state.query_image = Image.open("assets/1.jpg").convert('RGB') # Display the three default images as buttons for idx, (col, (name, path)) in enumerate(zip(cols, default_images.items())): img = Image.open(path).convert('RGB') col.image(img, caption=name, width=200, use_container_width=True) # Center the button using columns button_cols = col.columns([1, 2, 1]) # Create 3 sub-columns with 1:2:1 ratio if button_cols[1].button(f"Select {name}", use_container_width=True): # Place button in middle column st.session_state.query_image = img # Upload option below st.subheader("Or upload your own image:") uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"]) if uploaded_file is not None: st.session_state.query_image = Image.open(uploaded_file).convert('RGB') # Display selected image st.subheader("You query image:") st.image(st.session_state.query_image, caption="Selected Image", width=300) # Get features and search features = get_ft(model, st.session_state.query_image) distances, indices = get_topk(index, features, topk=num_neighbors) similarities = distance_to_similarity(distances, temp=1e-5) # Calculate rewards rewards = calculate_rewards(subscription, num_generations, author_share, ro_share, num_users_k, similarities) # Display results in a table st.subheader("Similar Images and Rewards") for i in range(num_neighbors): img_idx = indices[0][i] reward = rewards[i] cols = st.columns(4) # Column 1: Similar image cols[0].image(urls[img_idx], caption=f"Similar Image {i+1}", width=150) # Column 2: Basic metrics cols[1].markdown( """