Spaces:
Running
Running
| # ppt_objects.py | |
| from pptx import Presentation | |
| from pptx.enum.text import PP_ALIGN, MSO_ANCHOR | |
| from pptx.enum.shapes import MSO_SHAPE_TYPE | |
| import xml.etree.ElementTree as ET | |
| from pptx.util import Pt | |
| from pptx.dml.color import RGBColor | |
| import re | |
| import json | |
| from pymongo import MongoClient | |
| from gridfs import GridFS | |
| import json | |
| import xml.etree.ElementTree as ET | |
| from io import BytesIO | |
| def apply_group_properties_recursive(shape, shape_index, parent_element): | |
| """Recursively applies properties to shapes within groups.""" | |
| if shape.shape_type == MSO_SHAPE_TYPE.GROUP: | |
| group_element = parent_element.find(f".//group_element[@shape_index='{shape_index}']") | |
| if group_element is not None: | |
| for i, sub_shape in enumerate(shape.shapes): | |
| apply_group_properties_recursive(sub_shape, i, group_element) | |
| # Apply properties for sub-shapes WITHIN the group, based on their type. | |
| if sub_shape.shape_type == MSO_SHAPE_TYPE.TABLE: | |
| table_element = group_element.find(f".//table_element[@shape_index='{i}']") | |
| if table_element: # Use a shorter name for clarity | |
| props_element = table_element.find("properties") | |
| if props_element is not None and props_element.text: | |
| try: | |
| table_data = json.loads(props_element.text) | |
| apply_table_properties(sub_shape.table, table_data) | |
| except (json.JSONDecodeError, KeyError) as e: | |
| print(f"Error applying table properties (in group): {str(e)}") | |
| elif hasattr(sub_shape, "text_frame") and sub_shape.text_frame: | |
| text_element = group_element.find(f".//text_element[@shape_index='{i}']") | |
| if text_element: # Shorter name | |
| props_element = text_element.find("properties") | |
| if props_element is not None and props_element.text: | |
| try: | |
| shape_data = json.loads(props_element.text) | |
| apply_shape_properties(sub_shape, shape_data) | |
| except (json.JSONDecodeError, KeyError) as e: | |
| print(f"Error applying shape properties (in group): {str(e)}") | |
| def get_alignment_value(alignment_str): | |
| """Convert alignment string (with extra characters) to PP_ALIGN enum value.""" | |
| alignment_map = { | |
| 'center': PP_ALIGN.CENTER, | |
| 'left': PP_ALIGN.LEFT, | |
| 'right': PP_ALIGN.RIGHT, | |
| 'justify': PP_ALIGN.JUSTIFY | |
| } | |
| match = re.match(r"([A-Za-z]+)", alignment_str) | |
| return alignment_map.get(match.group(1).lower()) if match else None | |
| def get_vertical_anchor(value): | |
| """Converts vertical_anchor string to MSO_ANCHOR enum.""" | |
| mapping = { | |
| "TOP": MSO_ANCHOR.TOP, | |
| "MIDDLE": MSO_ANCHOR.MIDDLE, | |
| "BOTTOM": MSO_ANCHOR.BOTTOM | |
| } | |
| return mapping.get(value.upper().split()[0], MSO_ANCHOR.TOP) | |
| def get_table_properties(table): | |
| """Extract complete table properties.""" | |
| table_data = { | |
| 'rows': len(table.rows), | |
| 'cols': len(table.columns), | |
| 'cells': [] | |
| } | |
| for row in table.rows: | |
| row_data = [] | |
| for cell in row.cells: | |
| cell_data = { | |
| 'text': cell.text.strip(), | |
| 'font_size': None, | |
| 'font_name': None, | |
| 'alignment': None, | |
| 'margin_left': cell.margin_left, | |
| 'margin_right': cell.margin_right, | |
| 'margin_top': cell.margin_top, | |
| 'margin_bottom': cell.margin_bottom, | |
| 'vertical_anchor': str(cell.vertical_anchor) if cell.vertical_anchor else None, | |
| 'font_color': None | |
| } | |
| if cell.text_frame.paragraphs: | |
| paragraph = cell.text_frame.paragraphs[0] | |
| if paragraph.runs: | |
| run = paragraph.runs[0] | |
| if hasattr(run.font, 'size') and run.font.size is not None: | |
| cell_data['font_size'] = run.font.size.pt | |
| if hasattr(run.font, 'name'): | |
| cell_data['font_name'] = run.font.name | |
| if hasattr(run.font, 'bold'): | |
| cell_data['bold'] = run.font.bold | |
| if hasattr(run.font, 'italic'): | |
| cell_data['italic'] = run.font.italic | |
| if (hasattr(run.font, 'color') and | |
| run.font.color is not None and | |
| hasattr(run.font.color, 'rgb') and | |
| run.font.color.rgb is not None): | |
| cell_data['font_color'] = str(run.font.color.rgb) | |
| if hasattr(paragraph, 'alignment'): | |
| cell_data['alignment'] = f"{paragraph.alignment}" if paragraph.alignment else None | |
| row_data.append(cell_data) | |
| table_data['cells'].append(row_data) | |
| return table_data | |
| def get_shape_properties(shape): | |
| """Extract all properties from a shape, with detailed debug prints.""" | |
| shape_data = { | |
| 'text': '', | |
| 'font_size': None, | |
| 'font_name': None, | |
| 'alignment': None, | |
| 'width': shape.width, | |
| 'height': shape.height, | |
| 'left': shape.left, | |
| 'top': shape.top, | |
| 'bold': None, | |
| 'italic': None, | |
| 'line_spacing_info': { | |
| 'rule': None, | |
| 'value': None | |
| }, | |
| 'space_before': None, | |
| 'space_after': None, | |
| 'font_color': None | |
| } | |
| if hasattr(shape, "text"): | |
| shape_data['text'] = shape.text.strip() | |
| if hasattr(shape, 'text_frame'): | |
| for paragraph_index, paragraph in enumerate(shape.text_frame.paragraphs): | |
| if paragraph.runs: | |
| run = paragraph.runs[0] # Assuming properties are mostly consistent in the first run | |
| if hasattr(run.font, 'size') and run.font.size is not None: | |
| shape_data['font_size'] = run.font.size.pt | |
| if hasattr(run.font, 'name'): | |
| shape_data['font_name'] = run.font.name | |
| if hasattr(run.font, 'bold'): | |
| shape_data['bold'] = run.font.bold | |
| if hasattr(run.font, 'italic'): | |
| shape_data['italic'] = run.font.italic | |
| if (hasattr(run.font, 'color') and | |
| run.font.color is not None and | |
| hasattr(run.font.color, 'rgb') and | |
| run.font.color.rgb is not None): | |
| shape_data['font_color'] = str(run.font.color.rgb) | |
| if hasattr(paragraph, 'alignment') and paragraph.alignment is not None: | |
| shape_data['alignment'] = str(paragraph.alignment).split('.')[-1] | |
| if hasattr(paragraph, 'space_before'): | |
| shape_data['space_before'] = paragraph.space_before.pt if paragraph.space_before else None | |
| if hasattr(paragraph, 'space_after'): | |
| shape_data['space_after'] = paragraph.space_after.pt if paragraph.space_after else None | |
| if hasattr(paragraph, 'line_spacing') and paragraph.line_spacing: | |
| line_spacing = paragraph.line_spacing | |
| # Nếu line_spacing là một số lớn (ví dụ: 84.99 pt), có thể là EXACTLY | |
| if isinstance(line_spacing, Pt) or line_spacing > 10: | |
| line_spacing_rule = "EXACTLY" | |
| elif isinstance(line_spacing, float): | |
| line_spacing_rule = "MULTIPLE" | |
| else: | |
| line_spacing_rule = "UNKNOWN" | |
| shape_data['line_spacing_info'] = { | |
| 'rule': line_spacing_rule, | |
| 'value': line_spacing if isinstance(line_spacing, float) else None | |
| } | |
| return shape_data | |
| def apply_shape_properties(shape, shape_data): | |
| """Apply saved properties to a shape.""" | |
| try: | |
| shape.width = shape_data['width'] | |
| shape.height = shape_data['height'] | |
| shape.left = shape_data['left'] | |
| shape.top = shape_data['top'] | |
| shape.text = "" | |
| paragraph = shape.text_frame.paragraphs[0] | |
| run = paragraph.add_run() | |
| run.text = shape_data['text'] | |
| if shape_data['font_size']: | |
| adjusted_size = shape_data['font_size'] * 0.9 | |
| run.font.size = Pt(adjusted_size) | |
| if shape_data.get('font_name'): | |
| run.font.name = shape_data['font_name'] | |
| else: | |
| run.font.name = "Arial" | |
| if shape_data.get('font_color'): | |
| run.font.color.rgb = RGBColor.from_string(shape_data['font_color']) | |
| if shape_data['bold'] is not None: | |
| run.font.bold = shape_data['bold'] | |
| if shape_data['italic'] is not None: | |
| run.font.italic = shape_data['italic'] | |
| if shape_data['alignment']: | |
| paragraph.alignment = get_alignment_value(shape_data['alignment']) | |
| line_spacing_info = shape_data.get('line_spacing_info', {}) | |
| line_spacing_rule = line_spacing_info.get('rule') | |
| line_spacing_value = line_spacing_info.get('value') | |
| if line_spacing_rule and line_spacing_value is not None: | |
| if line_spacing_rule == "EXACTLY": | |
| paragraph.line_spacing = Pt(line_spacing_value) | |
| elif line_spacing_rule == "AT_LEAST": | |
| paragraph.line_spacing = Pt(line_spacing_value) | |
| elif line_spacing_rule == "MULTIPLE": | |
| paragraph.line_spacing = line_spacing_value | |
| else: | |
| print(f"⚠️ Unknown line spacing rule: {line_spacing_rule}") | |
| if shape_data['space_before']: | |
| paragraph.space_before = shape_data['space_before'] | |
| if shape_data['space_after']: | |
| paragraph.space_after = shape_data['space_after'] | |
| except Exception as e: | |
| print(f"Error applying shape properties: {str(e)}") | |
| def apply_table_properties(table, table_data): | |
| """Áp dụng các thuộc tính đã lưu vào bảng PowerPoint.""" | |
| for row_idx, row in enumerate(table.rows): | |
| for col_idx, cell in enumerate(row.cells): | |
| try: | |
| cell_data = table_data['cells'][row_idx][col_idx] | |
| # Áp dụng margin | |
| cell.margin_left = cell_data.get('margin_left', 0) | |
| cell.margin_right = cell_data.get('margin_right', 0) | |
| cell.margin_top = cell_data.get('margin_top', 0) | |
| cell.margin_bottom = cell_data.get('margin_bottom', 0) | |
| # Áp dụng vertical_anchor (tránh dùng eval) | |
| if 'vertical_anchor' in cell_data: | |
| cell.vertical_anchor = get_vertical_anchor(cell_data['vertical_anchor']) | |
| # Xóa nội dung cũ và thiết lập văn bản mới | |
| cell.text = "" | |
| paragraph = cell.text_frame.paragraphs[0] | |
| run = paragraph.add_run() | |
| run.text = cell_data.get('text', "") | |
| # Thiết lập kích thước font | |
| if 'font_size' in cell_data: | |
| adjusted_size = cell_data['font_size'] * 0.9 # Giữ tỉ lệ font | |
| run.font.size = Pt(adjusted_size) | |
| # Thiết lập font chữ | |
| run.font.name = cell_data.get('font_name', 'Arial') | |
| # Màu chữ | |
| if 'font_color' in cell_data: | |
| run.font.color.rgb = RGBColor.from_string(cell_data['font_color']) | |
| # In đậm & in nghiêng | |
| run.font.bold = cell_data.get('bold', False) | |
| run.font.italic = cell_data.get('italic', False) | |
| # Căn lề văn bản | |
| if 'alignment' in cell_data: | |
| paragraph.alignment = get_alignment_value(cell_data['alignment']) | |
| except Exception as e: | |
| print(f"Lỗi khi thiết lập thuộc tính ô [{row_idx}, {col_idx}]: {str(e)}") | |
| def get_file_from_mongodb(db_name, collection_name, file_id): | |
| """Tải tệp từ MongoDB GridFS""" | |
| client = MongoClient("mongodb+srv://admin:1highbar456@cluster0.equkm.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0") | |
| db = client[db_name] | |
| fs = GridFS(db, collection_name) | |
| file_data = fs.get(file_id) | |
| return BytesIO(file_data.read()) | |
| def save_file_to_mongodb(db_name, collection_name, file_name, file_data): | |
| """Lưu tệp vào MongoDB GridFS""" | |
| client = MongoClient("mongodb+srv://admin:1highbar456@cluster0.equkm.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0") | |
| db = client[db_name] | |
| fs = GridFS(db, collection_name) | |
| file_id = fs.put(file_data, filename=file_name) | |
| return file_id | |
| def create_translated_ppt(db_name, original_ppt_id, translated_xml_id, output_collection): | |
| """Tạo PowerPoint dịch từ MongoDB và lưu vào MongoDB""" | |
| try: | |
| # Kết nối MongoDB và tải file | |
| original_ppt_io = get_file_from_mongodb(db_name, "root_file", original_ppt_id) | |
| translated_xml_io = get_file_from_mongodb(db_name, "final_xml", translated_xml_id) | |
| # Load PowerPoint gốc và XML dịch | |
| prs = Presentation(original_ppt_io) | |
| tree = ET.parse(translated_xml_io) | |
| root = tree.getroot() | |
| # Áp dụng bản dịch | |
| for slide_number, slide in enumerate(prs.slides, 1): | |
| xml_slide = root.find(f".//slide[@number='{slide_number}']") | |
| if xml_slide is None: | |
| continue | |
| for shape_index, shape in enumerate(slide.shapes): | |
| if shape.shape_type == MSO_SHAPE_TYPE.GROUP: | |
| apply_group_properties_recursive(shape, shape_index, xml_slide) | |
| elif shape.shape_type == MSO_SHAPE_TYPE.TABLE: | |
| table_element = xml_slide.find(f".//table_element[@shape_index='{shape_index}']") | |
| if table_element is not None: | |
| props_element = table_element.find("properties") | |
| if props_element is not None and props_element.text: | |
| try: | |
| table_data = json.loads(props_element.text) | |
| apply_table_properties(shape.table, table_data) | |
| except Exception as e: | |
| print(f"Error applying table properties: {str(e)}") | |
| elif hasattr(shape, "text"): | |
| text_element = xml_slide.find(f".//text_element[@shape_index='{shape_index}']") | |
| if text_element is not None: | |
| props_element = text_element.find("properties") | |
| if props_element is not None and props_element.text: | |
| try: | |
| shape_data = json.loads(props_element.text) | |
| apply_shape_properties(shape, shape_data) | |
| except Exception as e: | |
| print(f"Error applying shape properties: {str(e)}") | |
| # Lưu PowerPoint vào MongoDB | |
| output_io = BytesIO() | |
| prs.save(output_io) | |
| output_io.seek(0) # Reset vị trí đọc | |
| file_id = save_file_to_mongodb(db_name, output_collection, "translated_presentation.pptx", output_io) | |
| print(f"Translated PowerPoint saved to MongoDB with ID: {file_id}") | |
| return file_id | |
| except Exception as e: | |
| print(f"Error creating translated PowerPoint: {str(e)}") | |
| return None | |
| def save_file_to_mongodb(db_name, collection_name, file_name, file_data): | |
| """Lưu tệp vào MongoDB GridFS""" | |
| client = MongoClient("mongodb+srv://admin:1highbar456@cluster0.equkm.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0") | |
| db = client[db_name] | |
| fs = GridFS(db, collection_name) | |
| file_id = fs.put(file_data, filename=file_name) | |
| return file_id |