File size: 1,151 Bytes
fce83cf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import streamlit as st
from transformers import pipeline
from sklearn.cluster import KMeans
import numpy as np

# Mock data
mock_words = [
    "apple", "banana", "cherry", "date",      # Fruits
    "car", "truck", "bus", "bicycle",         # Vehicles
    "red", "blue", "green", "yellow",         # Colors
    "cat", "dog", "rabbit", "hamster"         # Pets
]

# Embedding model
embedder = pipeline('feature-extraction', model='distilbert-base-uncased')

def embed_words(words):
    embeddings = embedder(words)
    return np.array([np.mean(embedding[0], axis=0) for embedding in embeddings])

def cluster_words(words):
    embeddings = embed_words(words)
    kmeans = KMeans(n_clusters=4, random_state=0).fit(embeddings)
    clusters = {i: [] for i in range(4)}
    for word, label in zip(words, kmeans.labels_):
        clusters[label].append(word)
    return clusters

def main():
    st.title("NYT Connections Solver")
    
    if st.button("Generate Clusters"):
        clusters = cluster_words(mock_words)
        for i, words in clusters.items():
            st.write(f"Group {i+1}: {', '.join(words)}")

if __name__ == "__main__":
    main()