yangdx
		
	commited on
		
		
					Commit 
							
							·
						
						6c5483b
	
1
								Parent(s):
							
							f28e08e
								
Improve logging message clarity by including max tokens info for truncation
Browse files- lightrag/operate.py +15 -8
    	
        lightrag/operate.py
    CHANGED
    
    | @@ -642,7 +642,6 @@ async def kg_query( | |
| 642 | 
             
                    history=history_context,
         | 
| 643 | 
             
                )
         | 
| 644 |  | 
| 645 | 
            -
             | 
| 646 | 
             
                if query_param.only_need_prompt:
         | 
| 647 | 
             
                    return sys_prompt
         | 
| 648 |  | 
| @@ -901,7 +900,7 @@ async def mix_kg_vector_query( | |
| 901 | 
             
                            formatted_chunks.append(chunk_text)
         | 
| 902 |  | 
| 903 | 
             
                        logger.info(
         | 
| 904 | 
            -
                            f"Truncate  | 
| 905 | 
             
                        )
         | 
| 906 | 
             
                        return "\n--New Chunk--\n".join(formatted_chunks)
         | 
| 907 | 
             
                    except Exception as e:
         | 
| @@ -1244,7 +1243,7 @@ async def _find_most_related_text_unit_from_entities( | |
| 1244 | 
             
                )
         | 
| 1245 |  | 
| 1246 | 
             
                logger.info(
         | 
| 1247 | 
            -
                    f"Truncate  | 
| 1248 | 
             
                )
         | 
| 1249 |  | 
| 1250 | 
             
                all_text_units = [t["data"] for t in all_text_units]
         | 
| @@ -1289,7 +1288,9 @@ async def _find_most_related_edges_from_entities( | |
| 1289 | 
             
                    max_token_size=query_param.max_token_for_global_context,
         | 
| 1290 | 
             
                )
         | 
| 1291 |  | 
| 1292 | 
            -
                logger.info( | 
|  | |
|  | |
| 1293 |  | 
| 1294 | 
             
                return all_edges_data
         | 
| 1295 |  | 
| @@ -1344,7 +1345,9 @@ async def _get_edge_data( | |
| 1344 | 
             
                    key=lambda x: x["description"],
         | 
| 1345 | 
             
                    max_token_size=query_param.max_token_for_global_context,
         | 
| 1346 | 
             
                )
         | 
| 1347 | 
            -
                logger.info( | 
|  | |
|  | |
| 1348 |  | 
| 1349 | 
             
                use_entities, use_text_units = await asyncio.gather(
         | 
| 1350 | 
             
                    _find_most_related_entities_from_relationships(
         | 
| @@ -1450,7 +1453,9 @@ async def _find_most_related_entities_from_relationships( | |
| 1450 | 
             
                    key=lambda x: x["description"],
         | 
| 1451 | 
             
                    max_token_size=query_param.max_token_for_local_context,
         | 
| 1452 | 
             
                )
         | 
| 1453 | 
            -
                logger.info( | 
|  | |
|  | |
| 1454 |  | 
| 1455 | 
             
                return node_datas
         | 
| 1456 |  | 
| @@ -1507,7 +1512,7 @@ async def _find_related_text_unit_from_relationships( | |
| 1507 | 
             
                )
         | 
| 1508 |  | 
| 1509 | 
             
                logger.info(
         | 
| 1510 | 
            -
                    f"Truncate  | 
| 1511 | 
             
                )
         | 
| 1512 |  | 
| 1513 | 
             
                all_text_units: list[TextChunkSchema] = [t["data"] for t in truncated_text_units]
         | 
| @@ -1577,7 +1582,9 @@ async def naive_query( | |
| 1577 | 
             
                    logger.warning("No chunks left after truncation")
         | 
| 1578 | 
             
                    return PROMPTS["fail_response"]
         | 
| 1579 |  | 
| 1580 | 
            -
                logger.info( | 
|  | |
|  | |
| 1581 |  | 
| 1582 | 
             
                section = "\n--New Chunk--\n".join([c["content"] for c in maybe_trun_chunks])
         | 
| 1583 |  | 
|  | |
| 642 | 
             
                    history=history_context,
         | 
| 643 | 
             
                )
         | 
| 644 |  | 
|  | |
| 645 | 
             
                if query_param.only_need_prompt:
         | 
| 646 | 
             
                    return sys_prompt
         | 
| 647 |  | 
|  | |
| 900 | 
             
                            formatted_chunks.append(chunk_text)
         | 
| 901 |  | 
| 902 | 
             
                        logger.info(
         | 
| 903 | 
            +
                            f"Truncate chunks from {len(chunks)} to {len(formatted_chunks)} (max tokens:{query_param.max_token_for_text_unit})"
         | 
| 904 | 
             
                        )
         | 
| 905 | 
             
                        return "\n--New Chunk--\n".join(formatted_chunks)
         | 
| 906 | 
             
                    except Exception as e:
         | 
|  | |
| 1243 | 
             
                )
         | 
| 1244 |  | 
| 1245 | 
             
                logger.info(
         | 
| 1246 | 
            +
                    f"Truncate chunks from {len(all_text_units_lookup)} to {len(all_text_units)} (max tokens:{query_param.max_token_for_text_unit})"
         | 
| 1247 | 
             
                )
         | 
| 1248 |  | 
| 1249 | 
             
                all_text_units = [t["data"] for t in all_text_units]
         | 
|  | |
| 1288 | 
             
                    max_token_size=query_param.max_token_for_global_context,
         | 
| 1289 | 
             
                )
         | 
| 1290 |  | 
| 1291 | 
            +
                logger.info(
         | 
| 1292 | 
            +
                    f"Truncate relations from {len(all_edges)} to {len(all_edges_data)} (max tokens:{query_param.max_token_for_global_context})"
         | 
| 1293 | 
            +
                )
         | 
| 1294 |  | 
| 1295 | 
             
                return all_edges_data
         | 
| 1296 |  | 
|  | |
| 1345 | 
             
                    key=lambda x: x["description"],
         | 
| 1346 | 
             
                    max_token_size=query_param.max_token_for_global_context,
         | 
| 1347 | 
             
                )
         | 
| 1348 | 
            +
                logger.info(
         | 
| 1349 | 
            +
                    f"Truncate relations from {len_edge_datas} to {len(edge_datas)} (max tokens:{query_param.max_token_for_global_context})"
         | 
| 1350 | 
            +
                )
         | 
| 1351 |  | 
| 1352 | 
             
                use_entities, use_text_units = await asyncio.gather(
         | 
| 1353 | 
             
                    _find_most_related_entities_from_relationships(
         | 
|  | |
| 1453 | 
             
                    key=lambda x: x["description"],
         | 
| 1454 | 
             
                    max_token_size=query_param.max_token_for_local_context,
         | 
| 1455 | 
             
                )
         | 
| 1456 | 
            +
                logger.info(
         | 
| 1457 | 
            +
                    f"Truncate entities from {len_node_datas} to {len(node_datas)} (max tokens:{query_param.max_token_for_local_context})"
         | 
| 1458 | 
            +
                )
         | 
| 1459 |  | 
| 1460 | 
             
                return node_datas
         | 
| 1461 |  | 
|  | |
| 1512 | 
             
                )
         | 
| 1513 |  | 
| 1514 | 
             
                logger.info(
         | 
| 1515 | 
            +
                    f"Truncate chunks from {len(valid_text_units)} to {len(truncated_text_units)} (max tokens:{query_param.max_token_for_text_unit})"
         | 
| 1516 | 
             
                )
         | 
| 1517 |  | 
| 1518 | 
             
                all_text_units: list[TextChunkSchema] = [t["data"] for t in truncated_text_units]
         | 
|  | |
| 1582 | 
             
                    logger.warning("No chunks left after truncation")
         | 
| 1583 | 
             
                    return PROMPTS["fail_response"]
         | 
| 1584 |  | 
| 1585 | 
            +
                logger.info(
         | 
| 1586 | 
            +
                    f"Truncate chunks from {len(chunks)} to {len(maybe_trun_chunks)} (max tokens:{query_param.max_token_for_text_unit})"
         | 
| 1587 | 
            +
                )
         | 
| 1588 |  | 
| 1589 | 
             
                section = "\n--New Chunk--\n".join([c["content"] for c in maybe_trun_chunks])
         | 
| 1590 |  | 
