TranslateMarkdown / markdown_utils.py
Hisab Cloud
Upload 5 files
08b7f89 verified
raw
history blame contribute delete
No virus
1.7 kB
import os
import re
def parse_markdown_files(file_paths):
"""
Parses markdown files to extract content for translation.
:param file_paths: List of markdown file paths
:return: List of dictionaries containing filename and content
"""
parsed_files = []
for path in file_paths:
with open(path, 'r', encoding='utf-8') as f:
content = f.read()
parsed_files.append({'filename': path, 'content': content})
return parsed_files
def extract_translatable_text(content):
"""
Extracts translatable text from markdown content.
:param content: Markdown content
:return: List of translatable text segments
"""
code_block_pattern = re.compile(r'```.*?```', re.DOTALL)
html_block_pattern = re.compile(r'<.*?>', re.DOTALL)
url_pattern = re.compile(r'\[.*?\]\(.*?\)')
# Remove code blocks, HTML blocks, and URLs
content = re.sub(code_block_pattern, '', content)
content = re.sub(html_block_pattern, '', content)
content = re.sub(url_pattern, '', content)
# Extract paragraphs and headers
paragraphs = re.split(r'\n\s*\n', content)
return [para.strip() for para in paragraphs if para.strip()]
def save_translated_files(translated_files):
"""
Saves translated files to the local machine.
:param translated_files: List of translated file data
"""
for file in translated_files:
directory = os.path.dirname(file['filename'])
if not os.path.exists(directory):
os.makedirs(directory)
with open(file['filename'], 'w', encoding='utf-8') as f:
f.write(file['content'])