Spaces:
Runtime error
Runtime error
import os | |
import re | |
def parse_markdown_files(file_paths): | |
""" | |
Parses markdown files to extract content for translation. | |
:param file_paths: List of markdown file paths | |
:return: List of dictionaries containing filename and content | |
""" | |
parsed_files = [] | |
for path in file_paths: | |
with open(path, 'r', encoding='utf-8') as f: | |
content = f.read() | |
parsed_files.append({'filename': path, 'content': content}) | |
return parsed_files | |
def extract_translatable_text(content): | |
""" | |
Extracts translatable text from markdown content. | |
:param content: Markdown content | |
:return: List of translatable text segments | |
""" | |
code_block_pattern = re.compile(r'```.*?```', re.DOTALL) | |
html_block_pattern = re.compile(r'<.*?>', re.DOTALL) | |
url_pattern = re.compile(r'\[.*?\]\(.*?\)') | |
# Remove code blocks, HTML blocks, and URLs | |
content = re.sub(code_block_pattern, '', content) | |
content = re.sub(html_block_pattern, '', content) | |
content = re.sub(url_pattern, '', content) | |
# Extract paragraphs and headers | |
paragraphs = re.split(r'\n\s*\n', content) | |
return [para.strip() for para in paragraphs if para.strip()] | |
def save_translated_files(translated_files): | |
""" | |
Saves translated files to the local machine. | |
:param translated_files: List of translated file data | |
""" | |
for file in translated_files: | |
directory = os.path.dirname(file['filename']) | |
if not os.path.exists(directory): | |
os.makedirs(directory) | |
with open(file['filename'], 'w', encoding='utf-8') as f: | |
f.write(file['content']) | |