Spaces:
Running
Running
| """ | |
| URL处理辅助函数 | |
| 提供URL解析和路径提取功能,用于导航验证中的域名无关匹配。 | |
| """ | |
| from urllib.parse import urlparse | |
| def extract_url_path(url: str) -> str: | |
| """ | |
| 提取URL的路径和查询参数部分,忽略协议和域名差异 | |
| 用于验证导航是否到达正确页面,允许域名重定向。 | |
| Args: | |
| url: 完整URL字符串 | |
| Returns: | |
| 路径+查询参数+片段(例如:"/apps/drive/123?param=value#section") | |
| 如果URL为空或无效,返回空字符串 | |
| Examples: | |
| >>> extract_url_path("https://ai.studio/apps/drive/123?param=value") | |
| '/apps/drive/123?param=value' | |
| >>> extract_url_path("https://aistudio.google.com/apps/drive/123") | |
| '/apps/drive/123' | |
| >>> extract_url_path("https://example.com/path") | |
| '/path' | |
| """ | |
| if not url: | |
| return "" | |
| try: | |
| parsed = urlparse(url) | |
| result = parsed.path | |
| if parsed.query: | |
| result += '?' + parsed.query | |
| if parsed.fragment: | |
| result += '#' + parsed.fragment | |
| return result | |
| except Exception: | |
| # 如果URL格式无效,返回空字符串 | |
| return "" | |
| def mask_path_for_logging(path: str) -> str: | |
| """ | |
| 对路径进行脱敏处理,用于日志输出 | |
| 脱敏规则: | |
| 1. 对于 /apps/drive/XXXXXXXXXX 路径,保留头4位和尾4位,中间用***代替 | |
| 2. 如果不是 /apps/drive/XXXXXXXXXX 路径,返回完整路径 | |
| Args: | |
| path: URL路径字符串 | |
| Returns: | |
| 脱敏后的路径字符串 | |
| Examples: | |
| >>> mask_path_for_logging("/apps/drive/abcdef123456") | |
| '/apps/drive/abcd***3456' | |
| >>> mask_path_for_logging("/apps/drive/xyz789") | |
| '/apps/drive/xyz789' | |
| >>> mask_path_for_logging("/other/path") | |
| '/other/path' | |
| """ | |
| if not path: | |
| return "" | |
| # 检查是否为 /apps/drive/ 路径 | |
| if path.startswith('/apps/drive/'): | |
| # 提取路径中的ID部分 | |
| path_parts = path.split('/') | |
| if len(path_parts) >= 4: # ['', 'apps', 'drive', 'ID'] | |
| drive_id = path_parts[3] | |
| # 如果ID长度大于8,则进行脱敏处理 | |
| if len(drive_id) > 8: | |
| # 使用与URL脱敏相同的格式 | |
| masked_id = f"{drive_id[:4]}***{drive_id[-4:]}" | |
| # 重新构建路径 | |
| masked_parts = path_parts[:3] + [masked_id] + path_parts[4:] | |
| return '/'.join(masked_parts) | |
| # 如果不符合脱敏条件,返回原始路径 | |
| return path | |
| def mask_url_for_logging(url: str) -> str: | |
| """ | |
| 对URL进行脱敏处理,用于日志输出 | |
| 脱敏规则: | |
| 1. 对于 /apps/drive/XXXXXXXXXX 路径,保留头4位和尾4位,中间用***代替 | |
| 2. 如果不是 /apps/drive/XXXXXXXXXX 路径,返回完整URL | |
| Args: | |
| url: 完整URL字符串 | |
| Returns: | |
| 脱敏后的URL字符串 | |
| Examples: | |
| >>> mask_url_for_logging("https://ai.studio/apps/drive/abcdef123456") | |
| 'https://ai.studio/apps/drive/abcd***3456' | |
| >>> mask_url_for_logging("https://aistudio.google.com/apps/drive/xyz789") | |
| 'https://aistudio.google.com/apps/drive/xyz789' | |
| >>> mask_url_for_logging("https://example.com/other/path") | |
| 'https://example.com/other/path' | |
| """ | |
| if not url: | |
| return "" | |
| try: | |
| parsed = urlparse(url) | |
| # 检查是否为 /apps/drive/ 路径 | |
| if parsed.path.startswith('/apps/drive/'): | |
| # 提取路径中的ID部分 | |
| path_parts = parsed.path.split('/') | |
| if len(path_parts) >= 4: # ['', 'apps', 'drive', 'ID'] | |
| drive_id = path_parts[3] | |
| # 如果ID长度大于8,则进行脱敏处理 | |
| if len(drive_id) > 8: | |
| masked_id = f"{drive_id[:4]}***{drive_id[-4:]}" | |
| # 重新构建路径 | |
| masked_parts = path_parts[:3] + [masked_id] + path_parts[4:] | |
| masked_path = '/'.join(masked_parts) | |
| # 重新构建URL | |
| result = f"{parsed.scheme}://{parsed.netloc}{masked_path}" | |
| if parsed.query: | |
| result += '?' + parsed.query | |
| if parsed.fragment: | |
| result += '#' + parsed.fragment | |
| return result | |
| # 如果不符合脱敏条件,返回原始URL | |
| return url | |
| except Exception: | |
| # 如果URL解析失败,返回原始URL | |
| return url | |