File size: 3,230 Bytes
28be794
 
3454357
846c9b9
f482a30
bc5676d
f482a30
3454357
f4547e9
 
f482a30
 
f4547e9
 
 
 
 
 
f482a30
f4547e9
f482a30
f4547e9
 
72c7204
28be794
 
 
3454357
7b14736
1c00bf5
2733741
48e5161
3454357
 
 
 
 
 
 
 
1c00bf5
3454357
 
35b1251
 
 
 
 
 
 
 
 
 
28be794
a203666
8cc1051
35b1251
 
28be794
35b1251
 
 
28be794
f96df1d
846c9b9
35b1251
6143265
 
 
 
 
35b1251
 
 
6143265
28be794
35b1251
 
28be794
35b1251
28be794
6143265
35b1251
28be794
 
35b1251
 
28be794
35b1251
 
 
 
 
3454357
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import streamlit as st
import pandas as pd
import altair as alt
from transformers import pipeline
#!pip install -U sentence-transformers
from sentence_transformers import SentenceTransformer, util
#from sentence_transformers import SentenceTransformer
import numpy as np
import sys
import json
#from torch.utils.data import DataLoader
#from sentence_transformers import SentenceTransformer, LoggingHandler, util, models, evaluation, losses, InputExample
import logging
from datetime import datetime
import gzip
import os
import tarfile
from collections import defaultdict
#from torch.utils.data import IterableDataset
import tqdm
#from torch.utils.data import Dataset
import random
from shutil import copyfile
from urllib.error import URLError

# Load document embeddings

# set up title and sidebar
st.title(" Your top 3 Important Sessions")
st.markdown("This application is a dashboard for displaying your top 3 Sessions at the Nashville summit")

doc_emb = np.loadtxt("abstract-embed.txt", dtype=float)


    # Load data
df = pd.read_csv("sessions.csv", usecols=['Unique ID', 'Name', 'Description', 'Activity Code', 'Start Time', 'End Time', 'Location Name'])

        # front end elements of the web page
html_temp = """
<div style ="background-color:lightblue;padding:13px">
<h1 style ="color:white;text-align:center;">Sentence Similarity App for Nashville Analytic Summit 2022</h1>
</div>
        """
def main():
        # display the front end aspect
    st.markdown(html_temp, unsafe_allow_html = True)

        # Get attributes from dataframe
    docs = list(df["Description"])
    titles = list(df["Name"])
    start_times = list(df["Start Time"])
    end_times = list(df["End Time"])
    locations = list(df["Location Name"])
# Query
# Load the model
    model = pipeline('text-classification')#('sentence-transformers/multi-qa-MiniLM-L6-cos-v1')

    query =  st.text_input("Enter your query: ")

    if query:
#st.text_area('Text area')
        #age = st.number_input("Age in Years")
#Encode query and documents
        query_emb = model(query).astype(float)
        
    #Compute dot score between query and all document embeddings
        #scores = util.dot_score(query_emb, doc_emb.astype(float))#[0].cpu().tolist()
        
        #cos_scores = util.pytorch_cos_sim(query_emb, doc_emb)[0]
        
        scores = util.pytorch_cos_sim(query_emb, doc_emb)[0]

    #Combine docs & scores with other attributes
        doc_score_pairs = list(zip(docs, scores, titles, start_times, end_times, locations))
        

    # top_k results to return
        top_k=3

        print(" Your top", top_k, "most similar sessions in the Summit:")

    #Sort the results in decreasing order and get the first top_k
        doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)


    #Output presentation recommendations
        for doc, score, title, start_time, end_time, location in doc_score_pairs[:top_k]:

            st.write("Score: %f" %score)
            st.write("Title: %s" %title)
            st.write("Abstract: %s" %doc)
            st.write("Location: %s" %location)
            st.write(f"From {start_time} to {end_time}")
            st.write('\n')


if __name__ == "__main__":
    main()