Commit
·
d6fd606
1
Parent(s):
f353888
Enrich README.md for postgres usage, make some change to cater python version<12
Browse files- README.md +1 -0
- examples/copy_postgres_llm_cache_to_json.py +66 -0
- lightrag/kg/postgres_impl.py +14 -1
README.md
CHANGED
|
@@ -360,6 +360,7 @@ see test_neo4j.py for a working example.
|
|
| 360 |
### Using PostgreSQL for Storage
|
| 361 |
For production level scenarios you will most likely want to leverage an enterprise solution. PostgreSQL can provide a one-stop solution for you as KV store, VectorDB (pgvector) and GraphDB (apache AGE).
|
| 362 |
* PostgreSQL is lightweight,the whole binary distribution including all necessary plugins can be zipped to 40MB: Ref to [Windows Release](https://github.com/ShanGor/apache-age-windows/releases/tag/PG17%2Fv1.5.0-rc0) as it is easy to install for Linux/Mac.
|
|
|
|
| 363 |
* How to start? Ref to: [examples/lightrag_zhipu_postgres_demo.py](https://github.com/HKUDS/LightRAG/blob/main/examples/lightrag_zhipu_postgres_demo.py)
|
| 364 |
* Create index for AGE example: (Change below `dickens` to your graph name if necessary)
|
| 365 |
```
|
|
|
|
| 360 |
### Using PostgreSQL for Storage
|
| 361 |
For production level scenarios you will most likely want to leverage an enterprise solution. PostgreSQL can provide a one-stop solution for you as KV store, VectorDB (pgvector) and GraphDB (apache AGE).
|
| 362 |
* PostgreSQL is lightweight,the whole binary distribution including all necessary plugins can be zipped to 40MB: Ref to [Windows Release](https://github.com/ShanGor/apache-age-windows/releases/tag/PG17%2Fv1.5.0-rc0) as it is easy to install for Linux/Mac.
|
| 363 |
+
* If you prefer docker, please start with this image if you are a beginner to avoid hiccups (DO read the overview): https://hub.docker.com/r/shangor/postgres-for-rag
|
| 364 |
* How to start? Ref to: [examples/lightrag_zhipu_postgres_demo.py](https://github.com/HKUDS/LightRAG/blob/main/examples/lightrag_zhipu_postgres_demo.py)
|
| 365 |
* Create index for AGE example: (Change below `dickens` to your graph name if necessary)
|
| 366 |
```
|
examples/copy_postgres_llm_cache_to_json.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
import logging
|
| 3 |
+
import os
|
| 4 |
+
from dotenv import load_dotenv
|
| 5 |
+
|
| 6 |
+
from lightrag.kg.postgres_impl import PostgreSQLDB, PGKVStorage
|
| 7 |
+
from lightrag.storage import JsonKVStorage
|
| 8 |
+
|
| 9 |
+
load_dotenv()
|
| 10 |
+
ROOT_DIR = os.environ.get("ROOT_DIR")
|
| 11 |
+
WORKING_DIR = f"{ROOT_DIR}/dickens-pg"
|
| 12 |
+
|
| 13 |
+
logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO)
|
| 14 |
+
|
| 15 |
+
if not os.path.exists(WORKING_DIR):
|
| 16 |
+
os.mkdir(WORKING_DIR)
|
| 17 |
+
|
| 18 |
+
# AGE
|
| 19 |
+
os.environ["AGE_GRAPH_NAME"] = "chinese"
|
| 20 |
+
|
| 21 |
+
postgres_db = PostgreSQLDB(
|
| 22 |
+
config={
|
| 23 |
+
"host": "localhost",
|
| 24 |
+
"port": 15432,
|
| 25 |
+
"user": "rag",
|
| 26 |
+
"password": "rag",
|
| 27 |
+
"database": "r1",
|
| 28 |
+
}
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
async def main():
|
| 33 |
+
await postgres_db.initdb()
|
| 34 |
+
|
| 35 |
+
from_llm_response_cache = PGKVStorage(
|
| 36 |
+
namespace="llm_response_cache",
|
| 37 |
+
global_config={"embedding_batch_num": 6},
|
| 38 |
+
embedding_func=None,
|
| 39 |
+
db=postgres_db,
|
| 40 |
+
)
|
| 41 |
+
|
| 42 |
+
to_llm_response_cache = JsonKVStorage(
|
| 43 |
+
namespace="llm_response_cache",
|
| 44 |
+
global_config={"working_dir": WORKING_DIR},
|
| 45 |
+
embedding_func=None,
|
| 46 |
+
)
|
| 47 |
+
|
| 48 |
+
kv = {}
|
| 49 |
+
for c_id in await from_llm_response_cache.all_keys():
|
| 50 |
+
print(f"Copying {c_id}")
|
| 51 |
+
workspace = c_id["workspace"]
|
| 52 |
+
mode = c_id["mode"]
|
| 53 |
+
_id = c_id["id"]
|
| 54 |
+
postgres_db.workspace = workspace
|
| 55 |
+
obj = await from_llm_response_cache.get_by_mode_and_id(mode, _id)
|
| 56 |
+
if mode not in kv:
|
| 57 |
+
kv[mode] = {}
|
| 58 |
+
kv[mode][_id] = obj[_id]
|
| 59 |
+
print(f"Object {obj}")
|
| 60 |
+
await to_llm_response_cache.upsert(kv)
|
| 61 |
+
await to_llm_response_cache.index_done_callback()
|
| 62 |
+
print("Mission accomplished!")
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
if __name__ == "__main__":
|
| 66 |
+
asyncio.run(main())
|
lightrag/kg/postgres_impl.py
CHANGED
|
@@ -231,6 +231,16 @@ class PGKVStorage(BaseKVStorage):
|
|
| 231 |
else:
|
| 232 |
return None
|
| 233 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 234 |
async def filter_keys(self, keys: List[str]) -> Set[str]:
|
| 235 |
"""Filter out duplicated content"""
|
| 236 |
sql = SQL_TEMPLATES["filter_keys"].format(
|
|
@@ -412,7 +422,10 @@ class PGDocStatusStorage(DocStatusStorage):
|
|
| 412 |
|
| 413 |
async def filter_keys(self, data: list[str]) -> set[str]:
|
| 414 |
"""Return keys that don't exist in storage"""
|
| 415 |
-
|
|
|
|
|
|
|
|
|
|
| 416 |
result = await self.db.query(sql, {"workspace": self.db.workspace}, True)
|
| 417 |
# The result is like [{'id': 'id1'}, {'id': 'id2'}, ...].
|
| 418 |
if result is None:
|
|
|
|
| 231 |
else:
|
| 232 |
return None
|
| 233 |
|
| 234 |
+
async def all_keys(self) -> list[dict]:
|
| 235 |
+
if "llm_response_cache" == self.namespace:
|
| 236 |
+
sql = "select workspace,mode,id from lightrag_llm_cache"
|
| 237 |
+
res = await self.db.query(sql, multirows=True)
|
| 238 |
+
return res
|
| 239 |
+
else:
|
| 240 |
+
logger.error(
|
| 241 |
+
f"all_keys is only implemented for llm_response_cache, not for {self.namespace}"
|
| 242 |
+
)
|
| 243 |
+
|
| 244 |
async def filter_keys(self, keys: List[str]) -> Set[str]:
|
| 245 |
"""Filter out duplicated content"""
|
| 246 |
sql = SQL_TEMPLATES["filter_keys"].format(
|
|
|
|
| 422 |
|
| 423 |
async def filter_keys(self, data: list[str]) -> set[str]:
|
| 424 |
"""Return keys that don't exist in storage"""
|
| 425 |
+
keys = ",".join([f"'{_id}'" for _id in data])
|
| 426 |
+
sql = (
|
| 427 |
+
f"SELECT id FROM LIGHTRAG_DOC_STATUS WHERE workspace=$1 AND id IN ({keys})"
|
| 428 |
+
)
|
| 429 |
result = await self.db.query(sql, {"workspace": self.db.workspace}, True)
|
| 430 |
# The result is like [{'id': 'id1'}, {'id': 'id2'}, ...].
|
| 431 |
if result is None:
|