Spaces:

PBusienei
/

Summit_app_demo

Runtime error

App Files Files Community

Summit_app_demo / app.py

PBusienei

Added score code

6143265 over 2 years ago

raw

history blame

3.23 kB

	import streamlit as st
	import pandas as pd
	import altair as alt
	from transformers import pipeline
	#!pip install -U sentence-transformers
	from sentence_transformers import SentenceTransformer, util
	#from sentence_transformers import SentenceTransformer
	import numpy as np
	import sys
	import json
	#from torch.utils.data import DataLoader
	#from sentence_transformers import SentenceTransformer, LoggingHandler, util, models, evaluation, losses, InputExample
	import logging
	from datetime import datetime
	import gzip
	import os
	import tarfile
	from collections import defaultdict
	#from torch.utils.data import IterableDataset
	import tqdm
	#from torch.utils.data import Dataset
	import random
	from shutil import copyfile
	from urllib.error import URLError

	# Load document embeddings

	# set up title and sidebar
	st.title(" Your top 3 Important Sessions")
	st.markdown("This application is a dashboard for displaying your top 3 Sessions at the Nashville summit")

	doc_emb = np.loadtxt("abstract-embed.txt", dtype=float)


	# Load data
	df = pd.read_csv("sessions.csv", usecols=['Unique ID', 'Name', 'Description', 'Activity Code', 'Start Time', 'End Time', 'Location Name'])

	# front end elements of the web page
	html_temp = """
	<div style ="background-color:lightblue;padding:13px">
	<h1 style ="color:white;text-align:center;">Sentence Similarity App for Nashville Analytic Summit 2022</h1>
	</div>
	"""
	def main():
	# display the front end aspect
	st.markdown(html_temp, unsafe_allow_html = True)

	# Get attributes from dataframe
	docs = list(df["Description"])
	titles = list(df["Name"])
	start_times = list(df["Start Time"])
	end_times = list(df["End Time"])
	locations = list(df["Location Name"])
	# Query
	# Load the model
	model = pipeline('text-classification')#('sentence-transformers/multi-qa-MiniLM-L6-cos-v1')

	query = st.text_input("Enter your query: ")

	if query:
	#st.text_area('Text area')
	#age = st.number_input("Age in Years")
	#Encode query and documents
	query_emb = model(query).astype(float)

	#Compute dot score between query and all document embeddings
	#scores = util.dot_score(query_emb, doc_emb.astype(float))#[0].cpu().tolist()

	#cos_scores = util.pytorch_cos_sim(query_emb, doc_emb)[0]

	scores = util.pytorch_cos_sim(query_emb, doc_emb)[0]

	#Combine docs & scores with other attributes
	doc_score_pairs = list(zip(docs, scores, titles, start_times, end_times, locations))


	# top_k results to return
	top_k=3

	print(" Your top", top_k, "most similar sessions in the Summit:")

	#Sort the results in decreasing order and get the first top_k
	doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)


	#Output presentation recommendations
	for doc, score, title, start_time, end_time, location in doc_score_pairs[:top_k]:

	st.write("Score: %f" %score)
	st.write("Title: %s" %title)
	st.write("Abstract: %s" %doc)
	st.write("Location: %s" %location)
	st.write(f"From {start_time} to {end_time}")
	st.write('\n')


	if __name__ == "__main__":
	main()