Spaces:
Runtime error
Runtime error
File size: 2,149 Bytes
a808917 7325550 0074cb1 7325550 b60de54 ed172d3 6808b8a e642bfc 680895d 3246fea 7325550 3246fea 7325550 0df1ea2 7325550 1a64589 ed172d3 1a64589 7325550 7aa8bfe 7325550 3246fea 3b45f6b 1a64589 3b45f6b 3246fea 7aa8bfe ed172d3 8ef69e0 c231297 8ef69e0 3b45f6b b7043f1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
import streamlit as st
from duckduckgo_search import ddg
import pandas as pd
from sentence_transformers import SentenceTransformer
import umap.umap_ as umap
import numpy as np
import sys
import plotly.express as px
import re
# The search bar
keywords = st.text_input('Enter your search', 'How to use ChatGPT')
# Set keywords as command line argument
# print("searching for: " + ' '.join(sys.argv[1:]) + "...")
# keywords = ' '.join(sys.argv[1:])
to_display = 'body' # Sometimes this is title
md = ddg(keywords, region='wt-wt', safesearch='Moderate', time='y', max_results=500)
md = pd.DataFrame(md)
# Load the model
print("running sentence embeddings...")
# model_name = 'all-mpnet-base-v2'
model_name = 'all-MiniLM-L6-v2'
model = SentenceTransformer(model_name)
sentence_embeddings = model.encode(md['body'].tolist(), show_progress_bar = True)
sentence_embeddings = pd.DataFrame(sentence_embeddings)
# Reduce dimensionality
print("reducing dimensionality...")
reducer = umap.UMAP(metric = 'cosine')
dimr = reducer.fit_transform(sentence_embeddings)
dimr = pd.DataFrame(dimr, columns = ['umap1', 'umap2'])
columns = ['title', 'href', 'body']
# Merge the data together
dat = pd.concat([md.reset_index(), dimr.reset_index()], axis = 1)
# handle duplicate index columns
dat = dat.loc[:,~dat.columns.duplicated()]
# Get it ready for plotting
dat['title'] = dat.title.str.wrap(30).apply(lambda x: x.replace('\n', '<br>'))
dat['body'] = dat.body.str.wrap(30).apply(lambda x: x.replace('\n', '<br>'))
# Visualize
fig = px.scatter(dat, x = 'umap1', y = 'umap2', hover_data = ['title', 'body'], title = 'Context similarity map of results')
# Make the font a little bigger
fig.update_layout(
hoverlabel=dict(
bgcolor="white",
font_size=16
)
)
# Show the figure
st.plotly_chart(fig, use_container_width=True)
# Remove <br> in the text for the table
dat['title'] = [re.sub('<br>', ' ', i) for i in dat['title']]
dat['body'] = [re.sub('<br>', ' ', i) for i in dat['body']]
# Instructions
st.caption('Click on the table and press ctrl+f (or command+f for mac) to search it')
# Place a table under the plot
st.dataframe(dat)
|