Polo123's picture
Update logic.py
6b06a97 verified
import kuzu
import logging
import sys
import os
#import llama_index
from llama_index.graph_stores import KuzuGraphStore
from llama_index import (
SimpleDirectoryReader,
ServiceContext,
KnowledgeGraphIndex,
)
from llama_index.readers import SimpleWebPageReader
from llama_index.indices.loading import load_index_from_storage
from llama_index.llms import OpenAI
from IPython.display import Markdown, display
from llama_index.storage.storage_context import StorageContext
from pyvis.network import Network
import pandas as pd
import numpy as np
import plotly.express as px
import umap
def make_dir():
if(not os.path.exists("data")):
os.mkdir('data')
def save_uploadedfile(uploadedfile):
with open(os.path.join("data",uploadedfile.name),"wb") as f:
f.write(uploadedfile.getbuffer())
def load_index(token,name):
os.environ["OPENAI_API_KEY"] = token
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
db = kuzu.Database(name+"/kg")
graph_store = KuzuGraphStore(db)
llm = OpenAI(temperature=0, model="gpt-3.5-turbo",api_key=token)
service_context = ServiceContext.from_defaults(llm=llm, chunk_size=512)
storage_context = StorageContext.from_defaults(graph_store=graph_store,persist_dir=name+"/storage")
index = load_index_from_storage(storage_context=storage_context,service_context=service_context)
return index
def get_index_pdf(token,name):
documents = SimpleDirectoryReader("./data").load_data()
print(documents)
print(documents)
os.mkdir(name)
os.environ["OPENAI_API_KEY"] = token
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
db = kuzu.Database(name+"/kg")
graph_store = KuzuGraphStore(db)
llm = OpenAI(temperature=0, model="gpt-3.5-turbo",api_key=token)
service_context = ServiceContext.from_defaults(llm=llm, chunk_size=512)
storage_context = StorageContext.from_defaults(graph_store=graph_store)
index = KnowledgeGraphIndex.from_documents(documents=documents,
max_triplets_per_chunk=2,
storage_context=storage_context,
service_context=service_context,
show_progress=True,
include_embeddings=True)
index.storage_context.persist(name+"/storage")
return index
def get_index(links,token,name):
os.mkdir(name)
os.environ["OPENAI_API_KEY"] = token
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
db = kuzu.Database(name+"/kg")
graph_store = KuzuGraphStore(db)
documents = SimpleWebPageReader(html_to_text=True).load_data(
links
)
llm = OpenAI(temperature=0, model="gpt-3.5-turbo",api_key=token)
service_context = ServiceContext.from_defaults(llm=llm, chunk_size=512)
storage_context = StorageContext.from_defaults(graph_store=graph_store)
# NOTE: can take a while!
index = KnowledgeGraphIndex.from_documents(documents=documents,
max_triplets_per_chunk=2,
storage_context=storage_context,
service_context=service_context,
show_progress=True,
include_embeddings=True)
index.storage_context.persist(name+"/storage")
return index
def get_network_graph(index):
g = index.get_networkx_graph()
net = Network(directed=True)
net.from_nx(g)
# net.show("kuzugraph_draw3.html")
net.save_graph("kuzugraph_draw3.html")
def get_embeddings(index):
embeddings = index.index_struct.to_dict()
embeddings_df = pd.DataFrame.from_dict(embeddings)['embedding_dict']
embeddings_df = embeddings_df.dropna()
return embeddings_df
def get_visualize_embeddings(embedding_series, n_neighbors=15, min_dist=0.1, n_components=2):
# Convert Series to DataFrame
embedding_df = pd.DataFrame(embedding_series.tolist(), columns=[f'dim_{i+1}' for i in range(len(embedding_series[0]))])
# Perform UMAP dimensionality reduction
umap_embedded = umap.UMAP(
n_neighbors=n_neighbors,
min_dist=min_dist,
n_components=n_components,
random_state=42,
).fit_transform(embedding_df.values)
# Plot the UMAP embedding
umap_df = pd.DataFrame(umap_embedded, columns=['UMAP Dimension 1', 'UMAP Dimension 2'])
umap_df['Label'] = embedding_series.index
# Plot the UMAP embedding using Plotly Express
fig = px.scatter(umap_df, x='UMAP Dimension 1', y='UMAP Dimension 2',hover_data=['Label'], title='UMAP Visualization of Embeddings')
return fig
def query_model(index,user_query):
query_engine = index.as_query_engine(
include_text=True,
response_mode="tree_summarize",
embedding_mode="hybrid",
similarity_top_k=5,
)
response = query_engine.query(user_query)
return response.response