Polo123 commited on
Commit
bed7d75
1 Parent(s): c06d327

Create logic.py

Browse files
Files changed (1) hide show
  1. logic.py +146 -0
logic.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #from google.colab import userdata
2
+ import kuzu
3
+ import logging
4
+ import sys
5
+ import os
6
+ from llama_index.graph_stores.kuzu import KuzuGraphStore
7
+ from llama_index.core import (
8
+ SimpleDirectoryReader,
9
+ ServiceContext,
10
+ KnowledgeGraphIndex,
11
+ )
12
+ from llama_index.readers.web import SimpleWebPageReader
13
+
14
+
15
+ from llama_index.llms.openai import OpenAI
16
+ from IPython.display import Markdown, display
17
+ from llama_index.core.storage.storage_context import StorageContext
18
+
19
+ from pyvis.network import Network
20
+ import pandas as pd
21
+ import numpy as np
22
+ import plotly.express as px
23
+ import umap
24
+
25
+ def get_index(links):
26
+ os.environ["OPENAI_API_KEY"] = userdata.get('oai')
27
+ logging.basicConfig(stream=sys.stdout, level=logging.INFO)
28
+
29
+ db = kuzu.Database("kg1")
30
+ graph_store = KuzuGraphStore(db)
31
+
32
+
33
+ documents = SimpleWebPageReader(html_to_text=True).load_data(
34
+ links
35
+ )
36
+
37
+ llm = OpenAI(temperature=0, model="gpt-3.5-turbo",api_key='')
38
+ service_context = ServiceContext.from_defaults(llm=llm, chunk_size=512)
39
+
40
+ storage_context = StorageContext.from_defaults(graph_store=graph_store)
41
+
42
+ # NOTE: can take a while!
43
+ index = KnowledgeGraphIndex.from_documents(documents=documents,
44
+ max_triplets_per_chunk=5,
45
+ storage_context=storage_context,
46
+ service_context=service_context,
47
+ show_progress=True,
48
+ include_embeddings=True)
49
+
50
+ return index
51
+
52
+ def get_network_graph(index):
53
+ g = index.get_networkx_graph()
54
+ net = Network(notebook=True, cdn_resources="in_line", directed=True)
55
+ net.from_nx(g)
56
+ net.show("kuzugraph_draw3.html")
57
+ net.save_graph("kuzugraph_draw3.html")
58
+
59
+
60
+ def get_embeddings(index):
61
+ embeddings = index.index_struct.to_dict()
62
+ embeddings_df = pd.DataFrame.from_dict(embeddings)['embedding_dict']
63
+ embeddings_df = embeddings_df.dropna()
64
+ return embeddings_df
65
+
66
+
67
+ def get_visualize_embeddings(embedding_series, n_neighbors=15, min_dist=0.1, n_components=2):
68
+ # Convert Series to DataFrame
69
+ embedding_df = pd.DataFrame(embedding_series.tolist(), columns=[f'dim_{i+1}' for i in range(len(embedding_series[0]))])
70
+
71
+ # Perform UMAP dimensionality reduction
72
+ umap_embedded = umap.UMAP(
73
+ n_neighbors=n_neighbors,
74
+ min_dist=min_dist,
75
+ n_components=n_components,
76
+ random_state=42,
77
+ ).fit_transform(embedding_df.values)
78
+
79
+ # Plot the UMAP embedding
80
+ umap_df = pd.DataFrame(umap_embedded, columns=['UMAP Dimension 1', 'UMAP Dimension 2'])
81
+ umap_df['Label'] = embedding_series.index
82
+ # Plot the UMAP embedding using Plotly Express
83
+ fig = px.scatter(umap_df, x='UMAP Dimension 1', y='UMAP Dimension 2',hover_data=['Label'], title='UMAP Visualization of Embeddings')
84
+ return fig
85
+
86
+
87
+
88
+
89
+
90
+
91
+
92
+
93
+
94
+
95
+
96
+
97
+
98
+
99
+
100
+
101
+
102
+
103
+
104
+
105
+
106
+
107
+
108
+
109
+
110
+
111
+
112
+
113
+
114
+
115
+
116
+
117
+
118
+
119
+
120
+
121
+
122
+
123
+
124
+
125
+
126
+
127
+
128
+
129
+
130
+
131
+
132
+
133
+
134
+
135
+
136
+
137
+
138
+
139
+
140
+
141
+
142
+
143
+
144
+
145
+
146
+