Spaces:

tjburns
/

duckduckgo_2d_search

Runtime error

App Files Files Community

Tyler Burns commited on Feb 1, 2023

Commit

aa47d0d

•

1 Parent(s): d141533

changed the baseline result

Browse files

Files changed (2) hide show

app.py +1 -1
flycheck_app.py +0 -138

app.py CHANGED Viewed

@@ -22,7 +22,7 @@ np.random.seed(42)
 use_keywords = False
 # The search bar
-keywords = st.text_input('Enter your search', 'How to use ChatGPT')
 to_display = 'body' # Sometimes this is title
 md = ddg(keywords, region='wt-wt', safesearch='Moderate', time='y', max_results=500)

 use_keywords = False
 # The search bar
+keywords = st.text_input('Enter your search', 'AI news')
 to_display = 'body' # Sometimes this is title
 md = ddg(keywords, region='wt-wt', safesearch='Moderate', time='y', max_results=500)

flycheck_app.py DELETED Viewed

@@ -1,138 +0,0 @@
-import streamlit as st
-from duckduckgo_search import ddg
-import pandas as pd
-from sentence_transformers import SentenceTransformer
-import umap.umap_ as umap
-import numpy as np
-import sys
-import plotly.express as px
-import re
-import sklearn.cluster as cluster
-import nltk
-from nltk.stem import WordNetLemmatizer
-from keybert import KeyBERT
-nltk.download('punkt')
-nltk.download('omw-1.4')
-nltk.download('wordnet')
-# Set a seed
-np.random.seed(42)
-# Temp, for keywords
-use_keywords = False
-# The search bar
-keywords = st.text_input('Enter your search', 'How to use ChatGPT')
-to_display = 'body' # Sometimes this is title
-md = ddg(keywords, region='wt-wt', safesearch='Moderate', time='y', max_results=500)
-md = pd.DataFrame(md)
-# Load the model
-print("running sentence embeddings...")
-# model_name = 'all-mpnet-base-v2'
-model_name = 'all-MiniLM-L6-v2'
-model = SentenceTransformer(model_name)
-sentence_embeddings = model.encode(md['title'].tolist(), show_progress_bar = True)
-sentence_embeddings = pd.DataFrame(sentence_embeddings)
-# Reduce dimensionality
-print("reducing dimensionality...")
-reducer = umap.UMAP(metric = 'cosine')
-dimr = reducer.fit_transform(sentence_embeddings)
-dimr = pd.DataFrame(dimr, columns = ['umap1', 'umap2'])
-columns = ['title', 'href', 'body']
-# Clustering
-labels = cluster.KMeans(n_clusters=5).fit_predict(dimr[['umap1', 'umap2']])
-dimr['cluster'] = labels
-# Make the coloring easier on the eyes
-dimr['cluster'] = dimr['cluster'].astype('category')
-# Now we can search cluster in the table
-dimr['cluster'] = ['cluster ' + str(x) for x in dimr['cluster']]
-# Merge the data together
-dat = pd.concat([md.reset_index(), dimr.reset_index()], axis = 1)
-# The keywords
-if use_keywords:
-# Add keywords to the clusters
-# Create WordNetLemmatizer object
-    print('extracting keywords per cluster...')
-    wnl = WordNetLemmatizer()
-    kw_model = KeyBERT()
-    keywords_df = []
-    for i in np.unique(dat['cluster']):
-        curr = dat[dat['cluster'] == i]
-        text =  ' '.join(curr['body'])
-        # Lemmatization
-        text = nltk.word_tokenize(text)
-        text = [wnl.lemmatize(i) for i in text]
-        text = ' '.join(text)
-        # Keyword extraction
-        TR_keywords = kw_model.extract_keywords(text)
-        keywords_df.append(TR_keywords[0:10])
-    keywords_df = pd.DataFrame(keywords_df)
-    keywords_df['cluster'] = np.unique(dimr['cluster'])
-    keywords_df.columns = ['keyword1', 'keyword2', 'keyword3', 'keyword4', 'keyword5', 'cluster']
-# Get the keyword data into the dataframe
-    dat = dat.merge(keywords_df) # This messes up the index, so we need to reset it
-    dat = dat.reset_index(drop = True)
-# handle duplicate index columns
-dat = dat.loc[:,~dat.columns.duplicated()]
-# Get it ready for plotting
-dat['title'] = dat.title.str.wrap(30).apply(lambda x: x.replace('\n', '<br>'))
-dat['body'] = dat.body.str.wrap(30).apply(lambda x: x.replace('\n', '<br>'))
-# Visualize the data
-fig = px.scatter(dat, x = 'umap1', y = 'umap2', hover_data = ['title', 'body'], color = 'cluster', title = 'Context similarity map of results')
-# Make the font a little bigger
-fig.update_layout(
-    hoverlabel=dict(
-        bgcolor="white",
-        font_size=16
-    )
-)
-# x and y are same size
-fig.update_yaxes(
-    scaleanchor="x",
-    scaleratio=1,
-  )
-# Show the figure
-st.plotly_chart(fig, use_container_width=True)
-# Remove <br> in the text for the table
-dat['title'] = [re.sub('<br>', ' ', i) for i in dat['title']]
-dat['body'] = [re.sub('<br>', ' ', i) for i in dat['body']]
-# Instructions
-st.caption('Use ctrl+f (or command+f for mac) to search the table')
-# remove irrelevant columns from dat
-dat = dat.drop(columns=['index', 'umap1', 'umap2'])
-# Make the link clickable
-# pandas display options
-pd.set_option('display.max_colwidth', -1)
-def make_clickable(url, text):
-    return f'<a target="_blank" href="{url}">{text}</a>'
-dat['href'] = dat['href'].apply(make_clickable, args = ('Click here',))
-st.write(dat.to_html(escape = False), unsafe_allow_html = True)