import streamlit as st
from duckduckgo_search import ddg
import pandas as pd
from sentence_transformers import SentenceTransformer
import umap.umap_ as umap
import numpy as np
import sys
import plotly.express as px
# The search bar
keywords = st.text_input('Enter your search', 'The future of AI')
# Set keywords as command line argument
# print("searching for: " + ' '.join(sys.argv[1:]) + "...")
# keywords = ' '.join(sys.argv[1:])
to_display = 'body' # Sometimes this is title
md = ddg(keywords, region='wt-wt', safesearch='Moderate', time='y', max_results=500)
md = pd.DataFrame(md)
# Load the model
print("running sentence embeddings...")
# model_name = 'all-mpnet-base-v2'
model_name = 'all-MiniLM-L6-v2'
model = SentenceTransformer(model_name)
sentence_embeddings = model.encode(md['body'].tolist(), show_progress_bar = True)
sentence_embeddings = pd.DataFrame(sentence_embeddings)
# Reduce dimensionality
print("reducing dimensionality...")
reducer = umap.UMAP(metric = 'cosine')
dimr = reducer.fit_transform(sentence_embeddings)
dimr = pd.DataFrame(dimr, columns = ['umap1', 'umap2'])
columns = ['title', 'href', 'body']
# Merge the data together
dat = pd.concat([dimr.reset_index(), md.reset_index()], axis = 1)
# Get it ready for plotting
dat['title'] = dat.title.str.wrap(30).apply(lambda x: x.replace('\n', '
'))
dat['body'] = dat.body.str.wrap(30).apply(lambda x: x.replace('\n', '
'))
# Visualize
fig = px.scatter(dat, x = 'umap1', y = 'umap2', hover_data = ['title', 'body'], title = 'Context similarity map of results')
st.plotly_chart(fig, use_container_width=True)
# A table under the plot
st.dataframe(dat)