AIStudioBuildWS / utils /url_helper.py
hkfires's picture
fix(browser): mask URLs in navigation logs and clean messages
2011b89 verified
"""
URL处理辅助函数
提供URL解析和路径提取功能,用于导航验证中的域名无关匹配。
"""
from urllib.parse import urlparse
def extract_url_path(url: str) -> str:
"""
提取URL的路径和查询参数部分,忽略协议和域名差异
用于验证导航是否到达正确页面,允许域名重定向。
Args:
url: 完整URL字符串
Returns:
路径+查询参数+片段(例如:"/apps/drive/123?param=value#section")
如果URL为空或无效,返回空字符串
Examples:
>>> extract_url_path("https://ai.studio/apps/drive/123?param=value")
'/apps/drive/123?param=value'
>>> extract_url_path("https://aistudio.google.com/apps/drive/123")
'/apps/drive/123'
>>> extract_url_path("https://example.com/path")
'/path'
"""
if not url:
return ""
try:
parsed = urlparse(url)
result = parsed.path
if parsed.query:
result += '?' + parsed.query
if parsed.fragment:
result += '#' + parsed.fragment
return result
except Exception:
# 如果URL格式无效,返回空字符串
return ""
def mask_path_for_logging(path: str) -> str:
"""
对路径进行脱敏处理,用于日志输出
脱敏规则:
1. 对于 /apps/drive/XXXXXXXXXX 路径,保留头4位和尾4位,中间用***代替
2. 如果不是 /apps/drive/XXXXXXXXXX 路径,返回完整路径
Args:
path: URL路径字符串
Returns:
脱敏后的路径字符串
Examples:
>>> mask_path_for_logging("/apps/drive/abcdef123456")
'/apps/drive/abcd***3456'
>>> mask_path_for_logging("/apps/drive/xyz789")
'/apps/drive/xyz789'
>>> mask_path_for_logging("/other/path")
'/other/path'
"""
if not path:
return ""
# 检查是否为 /apps/drive/ 路径
if path.startswith('/apps/drive/'):
# 提取路径中的ID部分
path_parts = path.split('/')
if len(path_parts) >= 4: # ['', 'apps', 'drive', 'ID']
drive_id = path_parts[3]
# 如果ID长度大于8,则进行脱敏处理
if len(drive_id) > 8:
# 使用与URL脱敏相同的格式
masked_id = f"{drive_id[:4]}***{drive_id[-4:]}"
# 重新构建路径
masked_parts = path_parts[:3] + [masked_id] + path_parts[4:]
return '/'.join(masked_parts)
# 如果不符合脱敏条件,返回原始路径
return path
def mask_url_for_logging(url: str) -> str:
"""
对URL进行脱敏处理,用于日志输出
脱敏规则:
1. 对于 /apps/drive/XXXXXXXXXX 路径,保留头4位和尾4位,中间用***代替
2. 如果不是 /apps/drive/XXXXXXXXXX 路径,返回完整URL
Args:
url: 完整URL字符串
Returns:
脱敏后的URL字符串
Examples:
>>> mask_url_for_logging("https://ai.studio/apps/drive/abcdef123456")
'https://ai.studio/apps/drive/abcd***3456'
>>> mask_url_for_logging("https://aistudio.google.com/apps/drive/xyz789")
'https://aistudio.google.com/apps/drive/xyz789'
>>> mask_url_for_logging("https://example.com/other/path")
'https://example.com/other/path'
"""
if not url:
return ""
try:
parsed = urlparse(url)
# 检查是否为 /apps/drive/ 路径
if parsed.path.startswith('/apps/drive/'):
# 提取路径中的ID部分
path_parts = parsed.path.split('/')
if len(path_parts) >= 4: # ['', 'apps', 'drive', 'ID']
drive_id = path_parts[3]
# 如果ID长度大于8,则进行脱敏处理
if len(drive_id) > 8:
masked_id = f"{drive_id[:4]}***{drive_id[-4:]}"
# 重新构建路径
masked_parts = path_parts[:3] + [masked_id] + path_parts[4:]
masked_path = '/'.join(masked_parts)
# 重新构建URL
result = f"{parsed.scheme}://{parsed.netloc}{masked_path}"
if parsed.query:
result += '?' + parsed.query
if parsed.fragment:
result += '#' + parsed.fragment
return result
# 如果不符合脱敏条件,返回原始URL
return url
except Exception:
# 如果URL解析失败,返回原始URL
return url