eaglelandsonce commited on
Commit
5794470
·
verified ·
1 Parent(s): 97aae78

Update pages/21_GraphRag.py

Browse files
Files changed (1) hide show
  1. pages/21_GraphRag.py +70 -60
pages/21_GraphRag.py CHANGED
@@ -1,60 +1,70 @@
1
- import torch
2
- from transformers import AutoTokenizer, AutoModel
3
- from sentence_transformers import SentenceTransformer
4
- import networkx as nx
5
- import matplotlib.pyplot as plt
6
-
7
- # Load pre-trained model and tokenizer
8
- model_name = "bert-base-uncased"
9
- tokenizer = AutoTokenizer.from_pretrained(model_name)
10
- model = AutoModel.from_pretrained(model_name)
11
-
12
- # Function to get embeddings
13
- def get_embeddings(texts):
14
- inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True, max_length=512)
15
- with torch.no_grad():
16
- outputs = model(**inputs)
17
- return outputs.last_hidden_state[:, 0, :].numpy()
18
-
19
- # Sample data (replace with your own data import)
20
- documents = [
21
- "The quick brown fox jumps over the lazy dog.",
22
- "A journey of a thousand miles begins with a single step.",
23
- "To be or not to be, that is the question.",
24
- "All that glitters is not gold.",
25
- ]
26
-
27
- # Get embeddings for documents
28
- embeddings = get_embeddings(documents)
29
-
30
- # Create graph
31
- G = nx.Graph()
32
-
33
- # Add nodes and edges based on cosine similarity
34
- threshold = 0.5 # Adjust this threshold as needed
35
- for i in range(len(documents)):
36
- G.add_node(i, text=documents[i])
37
- for j in range(i+1, len(documents)):
38
- similarity = torch.cosine_similarity(torch.tensor(embeddings[i]), torch.tensor(embeddings[j]), dim=0)
39
- if similarity > threshold:
40
- G.add_edge(i, j, weight=similarity.item())
41
-
42
- # Visualize the graph
43
- pos = nx.spring_layout(G)
44
- nx.draw(G, pos, with_labels=True, node_color='lightblue', node_size=500, font_size=8, font_weight='bold')
45
- edge_labels = nx.get_edge_attributes(G, 'weight')
46
- nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels)
47
- plt.title("Document Similarity Graph")
48
- plt.show()
49
-
50
- # Example of querying the graph
51
- query = "What is the meaning of life?"
52
- query_embedding = get_embeddings([query])[0]
53
-
54
- # Find most similar document
55
- similarities = [torch.cosine_similarity(torch.tensor(query_embedding), torch.tensor(emb), dim=0) for emb in embeddings]
56
- most_similar_idx = max(range(len(similarities)), key=similarities.__getitem__)
57
-
58
- print(f"Most similar document to the query: {documents[most_similar_idx]}")
59
-
60
- # You can extend this to implement more complex graph-based retrieval algorithms
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import sys
3
+ import subprocess
4
+ import importlib
5
+
6
+ st.title("GraphRAG Module Explorer")
7
+
8
+ # Function to install a package
9
+ def install_package(package):
10
+ subprocess.check_call([sys.executable, "-m", "pip", "install", package])
11
+
12
+ # Check and install required packages
13
+ required_packages = ['graphrag', 'sentence_transformers']
14
+ for package in required_packages:
15
+ try:
16
+ importlib.import_module(package)
17
+ except ImportError:
18
+ st.write(f"Installing {package}...")
19
+ install_package(package)
20
+ st.write(f"{package} installed successfully.")
21
+
22
+ # Now try to import graphrag
23
+ try:
24
+ import graphrag
25
+ import inspect
26
+
27
+ # Display all attributes and functions in the graphrag module
28
+ st.header("GraphRAG Module Contents")
29
+ graphrag_contents = dir(graphrag)
30
+
31
+ for item in graphrag_contents:
32
+ attr = getattr(graphrag, item)
33
+ st.subheader(f"{item}")
34
+ st.write(f"Type: {type(attr)}")
35
+
36
+ if inspect.isclass(attr):
37
+ st.write("Class Methods:")
38
+ for name, method in inspect.getmembers(attr, predicate=inspect.isfunction):
39
+ st.write(f"- {name}")
40
+ st.write(f" Signature: {inspect.signature(method)}")
41
+ st.write(f" Docstring: {method.__doc__}")
42
+
43
+ elif inspect.isfunction(attr):
44
+ st.write("Function:")
45
+ st.write(f"Signature: {inspect.signature(attr)}")
46
+ st.write(f"Docstring: {attr.__doc__}")
47
+
48
+ elif isinstance(attr, (int, float, str, bool)):
49
+ st.write(f"Value: {attr}")
50
+
51
+ st.write("---")
52
+
53
+ # Display the module's docstring if available
54
+ if graphrag.__doc__:
55
+ st.header("GraphRAG Module Documentation")
56
+ st.write(graphrag.__doc__)
57
+
58
+ st.header("Next Steps")
59
+ st.write("""
60
+ Based on the information above, we need to determine:
61
+ 1. How to create a graph representation of text using graphrag.
62
+ 2. How to process this graph representation for analysis.
63
+ 3. Whether graphrag provides any built-in analysis tools or if we need to integrate it with other libraries.
64
+
65
+ Please review the module contents and let me know which components seem most relevant for our text analysis task.
66
+ """)
67
+
68
+ except Exception as e:
69
+ st.error(f"An error occurred while exploring the graphrag module: {str(e)}")
70
+ st.write("Please check the installation of graphrag and its dependencies, and try running the app again.")