zrguo
commited on
Commit
·
96fdb2f
1
Parent(s):
e6b8d67
Update RAGAnything related
Browse files- README.md +89 -34
- examples/modalprocessors_example.py +10 -5
- examples/raganything_example.py +10 -7
README.md
CHANGED
|
@@ -1159,40 +1159,95 @@ LightRAG now seamlessly integrates with [RAG-Anything](https://github.com/HKUDS/
|
|
| 1159 |
pip install raganything
|
| 1160 |
```
|
| 1161 |
2. Process multimodal documents:
|
| 1162 |
-
|
| 1163 |
-
|
| 1164 |
-
|
| 1165 |
-
|
| 1166 |
-
|
| 1167 |
-
|
| 1168 |
-
|
| 1169 |
-
|
| 1170 |
-
|
| 1171 |
-
|
| 1172 |
-
|
| 1173 |
-
|
| 1174 |
-
|
| 1175 |
-
|
| 1176 |
-
|
| 1177 |
-
|
| 1178 |
-
|
| 1179 |
-
|
| 1180 |
-
|
| 1181 |
-
|
| 1182 |
-
|
| 1183 |
-
|
| 1184 |
-
|
| 1185 |
-
|
| 1186 |
-
|
| 1187 |
-
|
| 1188 |
-
|
| 1189 |
-
|
| 1190 |
-
|
| 1191 |
-
|
| 1192 |
-
|
| 1193 |
-
|
| 1194 |
-
|
| 1195 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1196 |
|
| 1197 |
For detailed documentation and advanced usage, please refer to the [RAG-Anything repository](https://github.com/HKUDS/RAG-Anything).
|
| 1198 |
|
|
|
|
| 1159 |
pip install raganything
|
| 1160 |
```
|
| 1161 |
2. Process multimodal documents:
|
| 1162 |
+
```python
|
| 1163 |
+
import asyncio
|
| 1164 |
+
from raganything import RAGAnything
|
| 1165 |
+
from lightrag import LightRAG
|
| 1166 |
+
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
|
| 1167 |
+
from lightrag.utils import EmbeddingFunc
|
| 1168 |
+
import os
|
| 1169 |
+
|
| 1170 |
+
async def load_existing_lightrag():
|
| 1171 |
+
# First, create or load an existing LightRAG instance
|
| 1172 |
+
lightrag_working_dir = "./existing_lightrag_storage"
|
| 1173 |
+
|
| 1174 |
+
# Check if previous LightRAG instance exists
|
| 1175 |
+
if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir):
|
| 1176 |
+
print("✅ Found existing LightRAG instance, loading...")
|
| 1177 |
+
else:
|
| 1178 |
+
print("❌ No existing LightRAG instance found, will create new one")
|
| 1179 |
+
|
| 1180 |
+
# Create/Load LightRAG instance with your configurations
|
| 1181 |
+
lightrag_instance = LightRAG(
|
| 1182 |
+
working_dir=lightrag_working_dir,
|
| 1183 |
+
llm_model_func=lambda prompt, system_prompt=None, history_messages=[], **kwargs: openai_complete_if_cache(
|
| 1184 |
+
"gpt-4o-mini",
|
| 1185 |
+
prompt,
|
| 1186 |
+
system_prompt=system_prompt,
|
| 1187 |
+
history_messages=history_messages,
|
| 1188 |
+
api_key="your-api-key",
|
| 1189 |
+
**kwargs,
|
| 1190 |
+
),
|
| 1191 |
+
embedding_func=EmbeddingFunc(
|
| 1192 |
+
embedding_dim=3072,
|
| 1193 |
+
max_token_size=8192,
|
| 1194 |
+
func=lambda texts: openai_embed(
|
| 1195 |
+
texts,
|
| 1196 |
+
model="text-embedding-3-large",
|
| 1197 |
+
api_key=api_key,
|
| 1198 |
+
base_url=base_url,
|
| 1199 |
+
),
|
| 1200 |
+
)
|
| 1201 |
+
)
|
| 1202 |
+
|
| 1203 |
+
# Initialize storage (this will load existing data if available)
|
| 1204 |
+
await lightrag_instance.initialize_storages()
|
| 1205 |
+
|
| 1206 |
+
# Now initialize RAGAnything with the existing LightRAG instance
|
| 1207 |
+
rag = RAGAnything(
|
| 1208 |
+
lightrag=lightrag_instance, # Pass the existing LightRAG instance
|
| 1209 |
+
# Only need vision model for multimodal processing
|
| 1210 |
+
vision_model_func=lambda prompt, system_prompt=None, history_messages=[], image_data=None, **kwargs: openai_complete_if_cache(
|
| 1211 |
+
"gpt-4o",
|
| 1212 |
+
"",
|
| 1213 |
+
system_prompt=None,
|
| 1214 |
+
history_messages=[],
|
| 1215 |
+
messages=[
|
| 1216 |
+
{"role": "system", "content": system_prompt} if system_prompt else None,
|
| 1217 |
+
{"role": "user", "content": [
|
| 1218 |
+
{"type": "text", "text": prompt},
|
| 1219 |
+
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}}
|
| 1220 |
+
]} if image_data else {"role": "user", "content": prompt}
|
| 1221 |
+
],
|
| 1222 |
+
api_key="your-api-key",
|
| 1223 |
+
**kwargs,
|
| 1224 |
+
) if image_data else openai_complete_if_cache(
|
| 1225 |
+
"gpt-4o-mini",
|
| 1226 |
+
prompt,
|
| 1227 |
+
system_prompt=system_prompt,
|
| 1228 |
+
history_messages=history_messages,
|
| 1229 |
+
api_key="your-api-key",
|
| 1230 |
+
**kwargs,
|
| 1231 |
+
)
|
| 1232 |
+
# Note: working_dir, llm_model_func, embedding_func, etc. are inherited from lightrag_instance
|
| 1233 |
+
)
|
| 1234 |
+
|
| 1235 |
+
# Query the existing knowledge base
|
| 1236 |
+
result = await rag.query_with_multimodal(
|
| 1237 |
+
"What data has been processed in this LightRAG instance?",
|
| 1238 |
+
mode="hybrid"
|
| 1239 |
+
)
|
| 1240 |
+
print("Query result:", result)
|
| 1241 |
+
|
| 1242 |
+
# Add new multimodal documents to the existing LightRAG instance
|
| 1243 |
+
await rag.process_document_complete(
|
| 1244 |
+
file_path="path/to/new/multimodal_document.pdf",
|
| 1245 |
+
output_dir="./output"
|
| 1246 |
+
)
|
| 1247 |
+
|
| 1248 |
+
if __name__ == "__main__":
|
| 1249 |
+
asyncio.run(load_existing_lightrag())
|
| 1250 |
+
```
|
| 1251 |
|
| 1252 |
For detailed documentation and advanced usage, please refer to the [RAG-Anything repository](https://github.com/HKUDS/RAG-Anything).
|
| 1253 |
|
examples/modalprocessors_example.py
CHANGED
|
@@ -9,6 +9,7 @@ import argparse
|
|
| 9 |
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
|
| 10 |
from lightrag.kg.shared_storage import initialize_pipeline_status
|
| 11 |
from lightrag import LightRAG
|
|
|
|
| 12 |
from raganything.modalprocessors import (
|
| 13 |
ImageModalProcessor,
|
| 14 |
TableModalProcessor,
|
|
@@ -165,11 +166,15 @@ async def process_equation_example(lightrag: LightRAG, llm_model_func):
|
|
| 165 |
async def initialize_rag(api_key: str, base_url: str = None):
|
| 166 |
rag = LightRAG(
|
| 167 |
working_dir=WORKING_DIR,
|
| 168 |
-
embedding_func=
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
),
|
| 174 |
llm_model_func=lambda prompt,
|
| 175 |
system_prompt=None,
|
|
|
|
| 9 |
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
|
| 10 |
from lightrag.kg.shared_storage import initialize_pipeline_status
|
| 11 |
from lightrag import LightRAG
|
| 12 |
+
from lightrag.utils import EmbeddingFunc
|
| 13 |
from raganything.modalprocessors import (
|
| 14 |
ImageModalProcessor,
|
| 15 |
TableModalProcessor,
|
|
|
|
| 166 |
async def initialize_rag(api_key: str, base_url: str = None):
|
| 167 |
rag = LightRAG(
|
| 168 |
working_dir=WORKING_DIR,
|
| 169 |
+
embedding_func=EmbeddingFunc(
|
| 170 |
+
embedding_dim=3072,
|
| 171 |
+
max_token_size=8192,
|
| 172 |
+
func=lambda texts: openai_embed(
|
| 173 |
+
texts,
|
| 174 |
+
model="text-embedding-3-large",
|
| 175 |
+
api_key=api_key,
|
| 176 |
+
base_url=base_url,
|
| 177 |
+
),
|
| 178 |
),
|
| 179 |
llm_model_func=lambda prompt,
|
| 180 |
system_prompt=None,
|
examples/raganything_example.py
CHANGED
|
@@ -12,6 +12,7 @@ import os
|
|
| 12 |
import argparse
|
| 13 |
import asyncio
|
| 14 |
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
|
|
|
|
| 15 |
from raganything.raganything import RAGAnything
|
| 16 |
|
| 17 |
|
|
@@ -89,14 +90,16 @@ async def process_with_rag(
|
|
| 89 |
base_url=base_url,
|
| 90 |
**kwargs,
|
| 91 |
),
|
| 92 |
-
embedding_func=
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
),
|
| 98 |
-
embedding_dim=3072,
|
| 99 |
-
max_token_size=8192,
|
| 100 |
)
|
| 101 |
|
| 102 |
# Process document
|
|
|
|
| 12 |
import argparse
|
| 13 |
import asyncio
|
| 14 |
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
|
| 15 |
+
from lightrag.utils import EmbeddingFunc
|
| 16 |
from raganything.raganything import RAGAnything
|
| 17 |
|
| 18 |
|
|
|
|
| 90 |
base_url=base_url,
|
| 91 |
**kwargs,
|
| 92 |
),
|
| 93 |
+
embedding_func=EmbeddingFunc(
|
| 94 |
+
embedding_dim=3072,
|
| 95 |
+
max_token_size=8192,
|
| 96 |
+
func=lambda texts: openai_embed(
|
| 97 |
+
texts,
|
| 98 |
+
model="text-embedding-3-large",
|
| 99 |
+
api_key=api_key,
|
| 100 |
+
base_url=base_url,
|
| 101 |
+
),
|
| 102 |
),
|
|
|
|
|
|
|
| 103 |
)
|
| 104 |
|
| 105 |
# Process document
|