import streamlit as st import google.generativeai as genai import numpy as np # Configure Gemini API genai.configure(api_key=st.secrets["GEMINI_API_KEY"]) st.title("Text Embedding Similarity Test") def split_into_chunks(text, chunk_size=500): """Split text into chunks of approximately specified character length""" return [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)] def get_embedding(text): """Get embedding for a single text chunk""" return genai.embed_content( model="models/text-embedding-004", content=text )['embedding'] def cosine_similarity(vec1, vec2): """Compute cosine similarity between two vectors""" return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2)) # Text input areas col1, col2 = st.columns(2) with col1: input_text1 = st.text_area("Enter your first text:", height=200, placeholder="Type or paste your first text here...") with col2: input_text2 = st.text_area("Enter text to compare:", height=200, placeholder="Type or paste text to compare...") if st.button("Run Similarity Test"): if not input_text1.strip() or not input_text2.strip(): st.warning("Please enter text in both input fields.") else: with st.spinner("Analyzing texts..."): try: # Process first text into chunks chunks = split_into_chunks(input_text1) if len(chunks) > 1: st.info(f"Split first text into {len(chunks)} chunks") # Generate embeddings for all chunks embeddings = [get_embedding(chunk) for chunk in chunks] # Generate embedding for comparison text compare_embedding = get_embedding(input_text2) # Calculate similarities similarities = [cosine_similarity(emb, compare_embedding) for emb in embeddings] max_score = max(similarities) max_index = similarities.index(max_score) # Display results st.subheader("📊 Similarity Results") st.write(f"**Highest similarity score:** {max_score:.4f}") st.subheader("🧩 Most Similar Chunk") st.write(chunks[max_index]) st.subheader("📈 All Chunk Similarities") for i, (chunk, score) in enumerate(zip(chunks, similarities)): st.write(f"Chunk {i+1} ({len(chunk)} chars): {score:.4f}") st.expander(f"View chunk {i+1}").write(chunk) except Exception as e: st.error(f"Error processing texts: {str(e)}")