import streamlit as st import pandas as pd import altair as alt from transformers import pipeline #!pip install -U sentence-transformers from sentence_transformers import SentenceTransformer, util #from sentence_transformers import SentenceTransformer import numpy as np import sys import json #from torch.utils.data import DataLoader #from sentence_transformers import SentenceTransformer, LoggingHandler, util, models, evaluation, losses, InputExample import logging from datetime import datetime import gzip import os import tarfile from collections import defaultdict #from torch.utils.data import IterableDataset import tqdm #from torch.utils.data import Dataset import random from shutil import copyfile from urllib.error import URLError # Load document embeddings # set up title and sidebar st.title(" Your top 3 Important Sessions") st.markdown("This application is a dashboard for displaying your top 3 Sessions at the Nashville summit") doc_emb = np.loadtxt("abstract-embed.txt", dtype=float) # Load data df = pd.read_csv("sessions.csv", usecols=['Unique ID', 'Name', 'Description', 'Activity Code', 'Start Time', 'End Time', 'Location Name']) # front end elements of the web page html_temp = """

Sentence Similarity App for Nashville Analytic Summit 2022

""" def main(): # display the front end aspect st.markdown(html_temp, unsafe_allow_html = True) # Get attributes from dataframe docs = list(df["Description"]) titles = list(df["Name"]) start_times = list(df["Start Time"]) end_times = list(df["End Time"]) locations = list(df["Location Name"]) # Query # Load the model model = pipeline('text-classification')#('sentence-transformers/multi-qa-MiniLM-L6-cos-v1') query = st.text_input("Enter your query: ") if query: #st.text_area('Text area') #age = st.number_input("Age in Years") #Encode query and documents query_emb = model(query).astype(float) #Compute dot score between query and all document embeddings #scores = util.dot_score(query_emb, doc_emb.astype(float))#[0].cpu().tolist() #cos_scores = util.pytorch_cos_sim(query_emb, doc_emb)[0] scores = util.pytorch_cos_sim(query_emb, doc_emb)[0] #Combine docs & scores with other attributes doc_score_pairs = list(zip(docs, scores, titles, start_times, end_times, locations)) # top_k results to return top_k=3 print(" Your top", top_k, "most similar sessions in the Summit:") #Sort the results in decreasing order and get the first top_k doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True) #Output presentation recommendations for doc, score, title, start_time, end_time, location in doc_score_pairs[:top_k]: st.write("Score: %f" %score) st.write("Title: %s" %title) st.write("Abstract: %s" %doc) st.write("Location: %s" %location) st.write(f"From {start_time} to {end_time}") st.write('\n') if __name__ == "__main__": main()