Spaces:
Runtime error
Runtime error
File size: 4,716 Bytes
3503538 101b319 1154940 8f37445 3503538 1db9e9a 8f37445 c491d37 0037ea6 4b87d72 2df8c8c 1db9e9a 9884e92 1bce5cd 090671b 6517c43 da97d65 1fdb11f b6c7b40 090671b 6517c43 b4b1fd9 090671b 6517c43 090671b f0ca479 755eb55 f0ca479 090671b 6517c43 e0b660a 665a998 090671b f0ca479 1bce5cd d1cba40 1db9e9a 090671b 2df8c8c f0ca479 2df8c8c 02a8d5f 2df8c8c d0f7403 1db9e9a 9cba540 be053e4 60e7380 88cd79e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 |
import streamlit as st
from datasets import load_dataset
import networkx as nx
import numpy as np
import pandas as pd
dataset = load_dataset("roneneldan/TinyStories")
st.markdown('# Short Stories, networks and connections')
st.markdown('In this example we consider the semantic similarity between short stories generatited by GenAI.')
st.markdown('We study the relationshis between the stories using a network. The laplacian connectivity provides inights about the closeness of the graph')
st.markdown('## Short Stories')
st.markdown('We are using a sample fo the [TinyStories](roneneldan/TinyStories) dataset from roneneldan work')
text_text = dataset['train'][10]['text']
st.markdown("<span style='color:red'>" + text_text.replace('\n',' ') + "</span>",unsafe_allow_html=True)
st.markdown('The threshold changes the level of connectivity in the network. The reange is from 0 (less similar) to 1 (completely similar)')
threshhold = st.slider('Threshhold',0.0,1.0,step=0.1)
#-------------------------------------------------------------
#-------------------------------------------------------------
from sentence_transformers import SentenceTransformer, util
model = SentenceTransformer('all-MiniLM-L6-v2')
# Sentences from the data set
#sentences = [item['text'] for item in dataset['train'][:10]]
#sentences = [dataset['train'][0],dataset['train'][1],dataset['train'][2]]
sentences = [dataset['train'][ii] for ii in range(10)]
#Compute embedding
embeddings = model.encode(sentences, convert_to_tensor=True)
#Compute cosine-similarities
cosine_scores = util.cos_sim(embeddings, embeddings)
# creating adjacency matrix
A = np.zeros((len(sentences),len(sentences)))
#Output the pairs with their score
for i in range(len(sentences)):
for j in range(i):
#st.write("{} \t\t {} \t\t Score: {:.4f}".format(sentences[i], sentences[j], cosine_scores[i][j]))
A[i][j] = cosine_scores[i][j]
A[j][i] = cosine_scores[i][j]
#G = nx.from_numpy_array(A)
G = nx.from_numpy_array(cosine_scores.numpy()>threshhold)
st.markdown('We can visualize the similarity between the shorts stories as a network. It the similarity is greater than the threshold, the two nodes are conencted')
#-------------------------------------------------------------
#-------------------------------------------------------------
# ego_graph.py
# An example of how to plot a node's ego network
# (egonet). This indirectly showcases slightly more involved
# interoperability between streamlit-agraph and networkx.
# An egonet can be # created from (almost) any network (graph),
# and exemplifies the # concept of a subnetwork (subgraph):
# A node's egonet is the (sub)network comprised of the focal node
# and all the nodes to whom it is adjacent. The edges included
# in the egonet are those nodes are both included in the aforementioned
# nodes.
# Use the following command to launch the app
# streamlit run <path-to-script>.py
# standard library dependencies
from operator import itemgetter
# external dependencies
import networkx as nx
from streamlit_agraph import agraph, Node, Edge, Config
# First create a graph using the Barabasi-Albert model
n = 2000
m = 2
#G = nx.generators.barabasi_albert_graph(n, m, seed=2023)
# Then find the node with the largest degree;
# This node's egonet will be the focus of this example.
node_and_degree = G.degree()
most_connected_node = sorted(G.degree, key=lambda x: x[1], reverse=True)[0]
degree = G.degree(most_connected_node)
# Create egonet for the focal node
hub_ego = nx.ego_graph(G, most_connected_node[0])
# Now create the equivalent Node and Edge lists
nodes = [Node(title=str(sentences[i]['text']), id=i, label='node_'+str(i), size=20) for i in hub_ego.nodes]
edges = [Edge(source=i, target=j, type="CURVE_SMOOTH") for (i,j) in G.edges
if i in hub_ego.nodes and j in hub_ego.nodes]
config = Config(width=500,
height=500,
directed=True,
nodeHighlightBehavior=False,
highlightColor="#F7A7A6", # or "blue"
collapsible=False,
node={'labelProperty':'label'},
# **kwargs e.g. node_size=1000 or node_color="blue"
)
return_value = agraph(nodes=nodes,
edges=edges,
config=config)
st.markdown('The Laplacian centrality is a measure of closeness')
st.write(str(nx.laplacian_centrality(G)))
d_lc = nx.laplacian_centrality(G)
#st.write(d_lc[0])
#df_lc = pd.DataFrame.from_dict(nx.laplacian_centrality(G))
df_lc = pd.DataFrame(np.transpose([list(d_lc.keys()),list(d_lc.values())]),columns=['node','laplacian_centrality'])
st.bar_chart(df_lc,x='node',y='laplacian_centrality') |