Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -2,11 +2,7 @@ import streamlit as st
|
|
| 2 |
from sentence_transformers import SentenceTransformer, util
|
| 3 |
from sklearn.decomposition import LatentDirichletAllocation
|
| 4 |
from sklearn.feature_extraction.text import CountVectorizer
|
| 5 |
-
from sklearn.manifold import TSNE
|
| 6 |
from langdetect import detect, DetectorFactory
|
| 7 |
-
import numpy as np
|
| 8 |
-
import matplotlib.pyplot as plt
|
| 9 |
-
import pandas as pd
|
| 10 |
|
| 11 |
st.set_page_config(page_title="Multilingual Text Analysis System", layout="wide")
|
| 12 |
|
|
@@ -56,18 +52,6 @@ def detect_language(text):
|
|
| 56 |
except:
|
| 57 |
return "unknown"
|
| 58 |
|
| 59 |
-
@st.cache_data
|
| 60 |
-
def tsne_visualization(embeddings, words):
|
| 61 |
-
if len(words) < 3: # Not enough words for t-SNE
|
| 62 |
-
return pd.DataFrame({'word': words})
|
| 63 |
-
|
| 64 |
-
perplexity = min(30, len(words) - 1)
|
| 65 |
-
tsne = TSNE(n_components=2, random_state=42, perplexity=perplexity)
|
| 66 |
-
embeddings_2d = tsne.fit_transform(embeddings)
|
| 67 |
-
df = pd.DataFrame(embeddings_2d, columns=['x', 'y'])
|
| 68 |
-
df['word'] = words
|
| 69 |
-
return df
|
| 70 |
-
|
| 71 |
st.title("Multilingual Text Analysis System")
|
| 72 |
user_input = st.text_area("Enter your text here:")
|
| 73 |
|
|
@@ -86,17 +70,8 @@ if st.button("Analyze"):
|
|
| 86 |
embeddings = embedding_agent.get_embeddings(words)
|
| 87 |
st.success("Word Embeddings Generated.")
|
| 88 |
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
if 'x' in tsne_df.columns:
|
| 92 |
-
fig, ax = plt.subplots()
|
| 93 |
-
ax.scatter(tsne_df['x'], tsne_df['y'])
|
| 94 |
-
for i, word in enumerate(tsne_df['word']):
|
| 95 |
-
ax.annotate(word, (tsne_df['x'][i], tsne_df['y'][i]))
|
| 96 |
-
st.pyplot(fig)
|
| 97 |
-
else:
|
| 98 |
-
st.write("Word list (not enough words for t-SNE visualization):")
|
| 99 |
-
st.write(", ".join(words))
|
| 100 |
|
| 101 |
if len(words) > 1:
|
| 102 |
with st.spinner("Extracting topics..."):
|
|
|
|
| 2 |
from sentence_transformers import SentenceTransformer, util
|
| 3 |
from sklearn.decomposition import LatentDirichletAllocation
|
| 4 |
from sklearn.feature_extraction.text import CountVectorizer
|
|
|
|
| 5 |
from langdetect import detect, DetectorFactory
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
st.set_page_config(page_title="Multilingual Text Analysis System", layout="wide")
|
| 8 |
|
|
|
|
| 52 |
except:
|
| 53 |
return "unknown"
|
| 54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
st.title("Multilingual Text Analysis System")
|
| 56 |
user_input = st.text_area("Enter your text here:")
|
| 57 |
|
|
|
|
| 70 |
embeddings = embedding_agent.get_embeddings(words)
|
| 71 |
st.success("Word Embeddings Generated.")
|
| 72 |
|
| 73 |
+
st.write("Words in the input:")
|
| 74 |
+
st.write(", ".join(words))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
|
| 76 |
if len(words) > 1:
|
| 77 |
with st.spinner("Extracting topics..."):
|