File size: 6,693 Bytes
60e1dab d21ad66 60e1dab 120e951 60e1dab 120e951 60e1dab 120e951 60e1dab 120e951 60e1dab 120e951 60e1dab 120e951 60e1dab 120e951 60e1dab 120e951 60e1dab 120e951 60e1dab 120e951 60e1dab 120e951 60e1dab 120e951 60e1dab 120e951 60e1dab 120e951 60e1dab 120e951 60e1dab 120e951 60e1dab 120e951 60e1dab 120e951 60e1dab 120e951 60e1dab 120e951 60e1dab 120e951 60e1dab 120e951 60e1dab 120e951 60e1dab 120e951 60e1dab 120e951 60e1dab 120e951 60e1dab 120e951 60e1dab 120e951 60e1dab 120e951 60e1dab 120e951 60e1dab 120e951 60e1dab 120e951 60e1dab 120e951 60e1dab 120e951 60e1dab 120e951 60e1dab 120e951 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 |
"""
Example of directly using modal processors
This example demonstrates how to use LightRAG's modal processors directly without going through MinerU.
"""
import asyncio
import argparse
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
from lightrag.kg.shared_storage import initialize_pipeline_status
from lightrag import LightRAG
from raganything.modalprocessors import (
ImageModalProcessor,
TableModalProcessor,
EquationModalProcessor,
)
WORKING_DIR = "./rag_storage"
def get_llm_model_func(api_key: str, base_url: str = None):
return (
lambda prompt,
system_prompt=None,
history_messages=[],
**kwargs: openai_complete_if_cache(
"gpt-4o-mini",
prompt,
system_prompt=system_prompt,
history_messages=history_messages,
api_key=api_key,
base_url=base_url,
**kwargs,
)
)
def get_vision_model_func(api_key: str, base_url: str = None):
return (
lambda prompt,
system_prompt=None,
history_messages=[],
image_data=None,
**kwargs: openai_complete_if_cache(
"gpt-4o",
"",
system_prompt=None,
history_messages=[],
messages=[
{"role": "system", "content": system_prompt} if system_prompt else None,
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{image_data}"
},
},
],
}
if image_data
else {"role": "user", "content": prompt},
],
api_key=api_key,
base_url=base_url,
**kwargs,
)
if image_data
else openai_complete_if_cache(
"gpt-4o-mini",
prompt,
system_prompt=system_prompt,
history_messages=history_messages,
api_key=api_key,
base_url=base_url,
**kwargs,
)
)
async def process_image_example(lightrag: LightRAG, vision_model_func):
"""Example of processing an image"""
# Create image processor
image_processor = ImageModalProcessor(
lightrag=lightrag, modal_caption_func=vision_model_func
)
# Prepare image content
image_content = {
"img_path": "image.jpg",
"img_caption": ["Example image caption"],
"img_footnote": ["Example image footnote"],
}
# Process image
description, entity_info = await image_processor.process_multimodal_content(
modal_content=image_content,
content_type="image",
file_path="image_example.jpg",
entity_name="Example Image",
)
print("Image Processing Results:")
print(f"Description: {description}")
print(f"Entity Info: {entity_info}")
async def process_table_example(lightrag: LightRAG, llm_model_func):
"""Example of processing a table"""
# Create table processor
table_processor = TableModalProcessor(
lightrag=lightrag, modal_caption_func=llm_model_func
)
# Prepare table content
table_content = {
"table_body": """
| Name | Age | Occupation |
|------|-----|------------|
| John | 25 | Engineer |
| Mary | 30 | Designer |
""",
"table_caption": ["Employee Information Table"],
"table_footnote": ["Data updated as of 2024"],
}
# Process table
description, entity_info = await table_processor.process_multimodal_content(
modal_content=table_content,
content_type="table",
file_path="table_example.md",
entity_name="Employee Table",
)
print("\nTable Processing Results:")
print(f"Description: {description}")
print(f"Entity Info: {entity_info}")
async def process_equation_example(lightrag: LightRAG, llm_model_func):
"""Example of processing a mathematical equation"""
# Create equation processor
equation_processor = EquationModalProcessor(
lightrag=lightrag, modal_caption_func=llm_model_func
)
# Prepare equation content
equation_content = {"text": "E = mc^2", "text_format": "LaTeX"}
# Process equation
description, entity_info = await equation_processor.process_multimodal_content(
modal_content=equation_content,
content_type="equation",
file_path="equation_example.txt",
entity_name="Mass-Energy Equivalence",
)
print("\nEquation Processing Results:")
print(f"Description: {description}")
print(f"Entity Info: {entity_info}")
async def initialize_rag(api_key: str, base_url: str = None):
rag = LightRAG(
working_dir=WORKING_DIR,
embedding_func=lambda texts: openai_embed(
texts,
model="text-embedding-3-large",
api_key=api_key,
base_url=base_url,
),
llm_model_func=lambda prompt,
system_prompt=None,
history_messages=[],
**kwargs: openai_complete_if_cache(
"gpt-4o-mini",
prompt,
system_prompt=system_prompt,
history_messages=history_messages,
api_key=api_key,
base_url=base_url,
**kwargs,
),
)
await rag.initialize_storages()
await initialize_pipeline_status()
return rag
def main():
"""Main function to run the example"""
parser = argparse.ArgumentParser(description="Modal Processors Example")
parser.add_argument("--api-key", required=True, help="OpenAI API key")
parser.add_argument("--base-url", help="Optional base URL for API")
parser.add_argument(
"--working-dir", "-w", default=WORKING_DIR, help="Working directory path"
)
args = parser.parse_args()
# Run examples
asyncio.run(main_async(args.api_key, args.base_url))
async def main_async(api_key: str, base_url: str = None):
# Initialize LightRAG
lightrag = await initialize_rag(api_key, base_url)
# Get model functions
llm_model_func = get_llm_model_func(api_key, base_url)
vision_model_func = get_vision_model_func(api_key, base_url)
# Run examples
await process_image_example(lightrag, vision_model_func)
await process_table_example(lightrag, llm_model_func)
await process_equation_example(lightrag, llm_model_func)
if __name__ == "__main__":
main()
|