Spaces:

hacpdsae2023
/

shortstorynetwork

Runtime error

App Files Files Community

shortstorynetwork / app.py

hacpdsae2023

Fix data into bar char

88cd79e about 1 year ago

raw

history blame contribute delete

4.72 kB

	import streamlit as st
	from datasets import load_dataset
	import networkx as nx
	import numpy as np
	import pandas as pd
	dataset = load_dataset("roneneldan/TinyStories")

	st.markdown('# Short Stories, networks and connections')
	st.markdown('In this example we consider the semantic similarity between short stories generatited by GenAI.')
	st.markdown('We study the relationshis between the stories using a network. The laplacian connectivity provides inights about the closeness of the graph')

	st.markdown('## Short Stories')
	st.markdown('We are using a sample fo the [TinyStories](roneneldan/TinyStories) dataset from roneneldan work')
	text_text = dataset['train'][10]['text']
	st.markdown("<span style='color:red'>" + text_text.replace('\n',' ') + "</span>",unsafe_allow_html=True)

	st.markdown('The threshold changes the level of connectivity in the network. The reange is from 0 (less similar) to 1 (completely similar)')
	threshhold = st.slider('Threshhold',0.0,1.0,step=0.1)

	#-------------------------------------------------------------
	#-------------------------------------------------------------

	from sentence_transformers import SentenceTransformer, util
	model = SentenceTransformer('all-MiniLM-L6-v2')

	# Sentences from the data set
	#sentences = [item['text'] for item in dataset['train'][:10]]

	#sentences = [dataset['train'][0],dataset['train'][1],dataset['train'][2]]
	sentences = [dataset['train'][ii] for ii in range(10)]

	#Compute embedding
	embeddings = model.encode(sentences, convert_to_tensor=True)

	#Compute cosine-similarities
	cosine_scores = util.cos_sim(embeddings, embeddings)

	# creating adjacency matrix
	A = np.zeros((len(sentences),len(sentences)))

	#Output the pairs with their score
	for i in range(len(sentences)):
	for j in range(i):
	#st.write("{} \t\t {} \t\t Score: {:.4f}".format(sentences[i], sentences[j], cosine_scores[i][j]))
	A[i][j] = cosine_scores[i][j]
	A[j][i] = cosine_scores[i][j]

	#G = nx.from_numpy_array(A)
	G = nx.from_numpy_array(cosine_scores.numpy()>threshhold)

	st.markdown('We can visualize the similarity between the shorts stories as a network. It the similarity is greater than the threshold, the two nodes are conencted')


	#-------------------------------------------------------------
	#-------------------------------------------------------------
	# ego_graph.py
	# An example of how to plot a node's ego network
	# (egonet). This indirectly showcases slightly more involved
	# interoperability between streamlit-agraph and networkx.

	# An egonet can be # created from (almost) any network (graph),
	# and exemplifies the # concept of a subnetwork (subgraph):
	# A node's egonet is the (sub)network comprised of the focal node
	# and all the nodes to whom it is adjacent. The edges included
	# in the egonet are those nodes are both included in the aforementioned
	# nodes.

	# Use the following command to launch the app
	# streamlit run <path-to-script>.py

	# standard library dependencies
	from operator import itemgetter

	# external dependencies
	import networkx as nx
	from streamlit_agraph import agraph, Node, Edge, Config

	# First create a graph using the Barabasi-Albert model
	n = 2000
	m = 2
	#G = nx.generators.barabasi_albert_graph(n, m, seed=2023)

	# Then find the node with the largest degree;
	# This node's egonet will be the focus of this example.
	node_and_degree = G.degree()
	most_connected_node = sorted(G.degree, key=lambda x: x[1], reverse=True)[0]
	degree = G.degree(most_connected_node)

	# Create egonet for the focal node
	hub_ego = nx.ego_graph(G, most_connected_node[0])

	# Now create the equivalent Node and Edge lists
	nodes = [Node(title=str(sentences[i]['text']), id=i, label='node_'+str(i), size=20) for i in hub_ego.nodes]
	edges = [Edge(source=i, target=j, type="CURVE_SMOOTH") for (i,j) in G.edges
	if i in hub_ego.nodes and j in hub_ego.nodes]


	config = Config(width=500,
	height=500,
	directed=True,
	nodeHighlightBehavior=False,
	highlightColor="#F7A7A6", # or "blue"
	collapsible=False,
	node={'labelProperty':'label'},
	# **kwargs e.g. node_size=1000 or node_color="blue"
	)

	return_value = agraph(nodes=nodes,
	edges=edges,
	config=config)

	st.markdown('The Laplacian centrality is a measure of closeness')
	st.write(str(nx.laplacian_centrality(G)))
	d_lc = nx.laplacian_centrality(G)
	#st.write(d_lc[0])
	#df_lc = pd.DataFrame.from_dict(nx.laplacian_centrality(G))
	df_lc = pd.DataFrame(np.transpose([list(d_lc.keys()),list(d_lc.values())]),columns=['node','laplacian_centrality'])
	st.bar_chart(df_lc,x='node',y='laplacian_centrality')