Spaces:
Paused
Paused
# | |
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
# | |
""" | |
Reference: | |
- [graphrag](https://github.com/microsoft/graphrag) | |
""" | |
import html | |
import re | |
from collections.abc import Callable | |
from typing import Any | |
ErrorHandlerFn = Callable[[BaseException | None, str | None, dict | None], None] | |
def perform_variable_replacements( | |
input: str, history: list[dict]=[], variables: dict | None ={} | |
) -> str: | |
"""Perform variable replacements on the input string and in a chat log.""" | |
result = input | |
def replace_all(input: str) -> str: | |
result = input | |
if variables: | |
for entry in variables: | |
result = result.replace(f"{{{entry}}}", variables[entry]) | |
return result | |
result = replace_all(result) | |
for i in range(len(history)): | |
entry = history[i] | |
if entry.get("role") == "system": | |
history[i]["content"] = replace_all(entry.get("content") or "") | |
return result | |
def clean_str(input: Any) -> str: | |
"""Clean an input string by removing HTML escapes, control characters, and other unwanted characters.""" | |
# If we get non-string input, just give it back | |
if not isinstance(input, str): | |
return input | |
result = html.unescape(input.strip()) | |
# https://stackoverflow.com/questions/4324790/removing-control-characters-from-a-string-in-python | |
return re.sub(r"[\"\x00-\x1f\x7f-\x9f]", "", result) | |
def dict_has_keys_with_types( | |
data: dict, expected_fields: list[tuple[str, type]] | |
) -> bool: | |
"""Return True if the given dictionary has the given keys with the given types.""" | |
for field, field_type in expected_fields: | |
if field not in data: | |
return False | |
value = data[field] | |
if not isinstance(value, field_type): | |
return False | |
return True | |