atles-echo / app.py
spartan8806's picture
Upload 3 files
73baae2 verified
"""
ATLES-ECHO - Semantic Embedding Service
A Hugging Face Space for generating embeddings using the ATLES Champion model.
"""
import gradio as gr
from sentence_transformers import SentenceTransformer
import numpy as np
# Load the ATLES Champion embedding model
print("Loading ATLES Champion Embedding model...")
model = SentenceTransformer("spartan8806/atles-champion-embedding")
print(f"Model loaded! Dimension: {model.get_sentence_embedding_dimension()}")
def generate_embedding(text: str) -> dict:
"""Generate embedding for input text"""
if not text or not text.strip():
return {"error": "Please enter some text", "embedding": None, "dimension": None}
# Generate embedding
embedding = model.encode(text, normalize_embeddings=True)
return {
"text_preview": text[:100] + "..." if len(text) > 100 else text,
"dimension": len(embedding),
"embedding_preview": embedding[:10].tolist(), # First 10 values
"embedding_full": embedding.tolist()
}
def compare_texts(text1: str, text2: str) -> dict:
"""Compare similarity between two texts"""
if not text1.strip() or not text2.strip():
return {"error": "Please enter both texts", "similarity": None}
# Generate embeddings
embeddings = model.encode([text1, text2], normalize_embeddings=True)
# Calculate cosine similarity
similarity = float(np.dot(embeddings[0], embeddings[1]))
return {
"text1_preview": text1[:50] + "..." if len(text1) > 50 else text1,
"text2_preview": text2[:50] + "..." if len(text2) > 50 else text2,
"similarity": round(similarity, 4),
"similarity_percent": f"{similarity * 100:.1f}%",
"interpretation": get_similarity_interpretation(similarity)
}
def get_similarity_interpretation(score: float) -> str:
"""Interpret similarity score"""
if score >= 0.9:
return "🟒 Nearly identical meaning"
elif score >= 0.7:
return "🟑 Very similar"
elif score >= 0.5:
return "🟠 Somewhat related"
elif score >= 0.3:
return "πŸ”΄ Loosely related"
else:
return "⚫ Different topics"
def batch_embed(texts: str) -> dict:
"""Generate embeddings for multiple texts (one per line)"""
lines = [l.strip() for l in texts.split('\n') if l.strip()]
if not lines:
return {"error": "Please enter at least one text (one per line)", "embeddings": None}
if len(lines) > 10:
return {"error": "Maximum 10 texts at a time", "embeddings": None}
# Generate embeddings
embeddings = model.encode(lines, normalize_embeddings=True)
results = []
for i, (text, emb) in enumerate(zip(lines, embeddings)):
results.append({
"index": i + 1,
"text": text[:50] + "..." if len(text) > 50 else text,
"embedding_preview": emb[:5].tolist()
})
return {
"count": len(lines),
"dimension": len(embeddings[0]),
"results": results
}
# Create Gradio interface
with gr.Blocks(
title="ATLES-ECHO Embedding Service",
theme=gr.themes.Soft(primary_hue="blue", secondary_hue="cyan")
) as demo:
gr.Markdown("""
# 🧠 ATLES-ECHO Embedding Service
Generate high-quality semantic embeddings using the **ATLES Champion** model.
- **Model**: [spartan8806/atles-champion-embedding](https://huggingface.co/spartan8806/atles-champion-embedding)
- **Dimension**: 768
- **Top-10 MTEB Performance**: Pearson 0.8445, Spearman 0.8374
""")
with gr.Tabs():
# Tab 1: Single Embedding
with gr.TabItem("πŸ”€ Single Embedding"):
gr.Markdown("Generate an embedding for a single piece of text.")
with gr.Row():
with gr.Column():
single_input = gr.Textbox(
label="Input Text",
placeholder="Enter text to embed...",
lines=3
)
single_btn = gr.Button("Generate Embedding", variant="primary")
with gr.Column():
single_output = gr.JSON(label="Embedding Result")
single_btn.click(
fn=generate_embedding,
inputs=single_input,
outputs=single_output
)
# Tab 2: Compare Texts
with gr.TabItem("βš–οΈ Compare Similarity"):
gr.Markdown("Compare the semantic similarity between two texts.")
with gr.Row():
text1_input = gr.Textbox(label="Text 1", placeholder="First text...", lines=2)
text2_input = gr.Textbox(label="Text 2", placeholder="Second text...", lines=2)
compare_btn = gr.Button("Compare Similarity", variant="primary")
compare_output = gr.JSON(label="Similarity Result")
compare_btn.click(
fn=compare_texts,
inputs=[text1_input, text2_input],
outputs=compare_output
)
# Tab 3: Batch Embedding
with gr.TabItem("πŸ“¦ Batch Embed"):
gr.Markdown("Generate embeddings for multiple texts (one per line, max 10).")
with gr.Row():
with gr.Column():
batch_input = gr.Textbox(
label="Texts (one per line)",
placeholder="Text 1\nText 2\nText 3...",
lines=6
)
batch_btn = gr.Button("Generate Batch Embeddings", variant="primary")
with gr.Column():
batch_output = gr.JSON(label="Batch Results")
batch_btn.click(
fn=batch_embed,
inputs=batch_input,
outputs=batch_output
)
gr.Markdown("""
---
### About ATLES-ECHO
ATLES-ECHO is the semantic memory core of the ATLES ecosystem - your AI digital twin that learns from your digital life.
**Features:**
- 🧠 High-quality semantic embeddings (768 dimensions)
- ⚑ Fast inference with normalized vectors
- 🎯 Top-10 MTEB benchmark performance
- πŸ”’ Built for the ATLES privacy-first ecosystem
[View Model Card](https://huggingface.co/spartan8806/atles-champion-embedding) | [ATLES GitHub](https://github.com/spartan8806)
""")
# Launch the app
if __name__ == "__main__":
demo.launch()