Summit_app_demo / app.py
PBusienei's picture
Added score code
6143265
raw
history blame
3.23 kB
import streamlit as st
import pandas as pd
import altair as alt
from transformers import pipeline
#!pip install -U sentence-transformers
from sentence_transformers import SentenceTransformer, util
#from sentence_transformers import SentenceTransformer
import numpy as np
import sys
import json
#from torch.utils.data import DataLoader
#from sentence_transformers import SentenceTransformer, LoggingHandler, util, models, evaluation, losses, InputExample
import logging
from datetime import datetime
import gzip
import os
import tarfile
from collections import defaultdict
#from torch.utils.data import IterableDataset
import tqdm
#from torch.utils.data import Dataset
import random
from shutil import copyfile
from urllib.error import URLError
# Load document embeddings
# set up title and sidebar
st.title(" Your top 3 Important Sessions")
st.markdown("This application is a dashboard for displaying your top 3 Sessions at the Nashville summit")
doc_emb = np.loadtxt("abstract-embed.txt", dtype=float)
# Load data
df = pd.read_csv("sessions.csv", usecols=['Unique ID', 'Name', 'Description', 'Activity Code', 'Start Time', 'End Time', 'Location Name'])
# front end elements of the web page
html_temp = """
<div style ="background-color:lightblue;padding:13px">
<h1 style ="color:white;text-align:center;">Sentence Similarity App for Nashville Analytic Summit 2022</h1>
</div>
"""
def main():
# display the front end aspect
st.markdown(html_temp, unsafe_allow_html = True)
# Get attributes from dataframe
docs = list(df["Description"])
titles = list(df["Name"])
start_times = list(df["Start Time"])
end_times = list(df["End Time"])
locations = list(df["Location Name"])
# Query
# Load the model
model = pipeline('text-classification')#('sentence-transformers/multi-qa-MiniLM-L6-cos-v1')
query = st.text_input("Enter your query: ")
if query:
#st.text_area('Text area')
#age = st.number_input("Age in Years")
#Encode query and documents
query_emb = model(query).astype(float)
#Compute dot score between query and all document embeddings
#scores = util.dot_score(query_emb, doc_emb.astype(float))#[0].cpu().tolist()
#cos_scores = util.pytorch_cos_sim(query_emb, doc_emb)[0]
scores = util.pytorch_cos_sim(query_emb, doc_emb)[0]
#Combine docs & scores with other attributes
doc_score_pairs = list(zip(docs, scores, titles, start_times, end_times, locations))
# top_k results to return
top_k=3
print(" Your top", top_k, "most similar sessions in the Summit:")
#Sort the results in decreasing order and get the first top_k
doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
#Output presentation recommendations
for doc, score, title, start_time, end_time, location in doc_score_pairs[:top_k]:
st.write("Score: %f" %score)
st.write("Title: %s" %title)
st.write("Abstract: %s" %doc)
st.write("Location: %s" %location)
st.write(f"From {start_time} to {end_time}")
st.write('\n')
if __name__ == "__main__":
main()