Spaces:
Sleeping
Sleeping
| """ | |
| Meeting note parsers for extracting structured data from markdown files. | |
| """ | |
| from pathlib import Path | |
| from typing import List, Optional | |
| from datetime import datetime | |
| from pydantic import BaseModel, Field | |
| import re | |
| class ActionItem(BaseModel): | |
| """Represents an action item from a meeting.""" | |
| task: str | |
| assignee: Optional[str] = None | |
| deadline: Optional[str] = None | |
| completed: bool = False | |
| class MeetingNote(BaseModel): | |
| """Represents a parsed meeting note.""" | |
| project_name: str | |
| title: str | |
| date: Optional[datetime] = None | |
| participants: List[str] = Field(default_factory=list) | |
| discussion: Optional[str] = None | |
| decisions: List[str] = Field(default_factory=list) | |
| action_items: List[ActionItem] = Field(default_factory=list) | |
| blockers: List[str] = Field(default_factory=list) | |
| file_path: str | |
| class MeetingParser: | |
| """Parser for markdown meeting notes.""" | |
| def parse_date(date_str: str) -> Optional[datetime]: | |
| """Parse date from various formats.""" | |
| date_formats = [ | |
| "%Y-%m-%d", | |
| "%d/%m/%Y", | |
| "%m/%d/%Y", | |
| "%B %d, %Y", | |
| "%b %d, %Y", | |
| "%Y/%m/%d" | |
| ] | |
| for fmt in date_formats: | |
| try: | |
| return datetime.strptime(date_str.strip(), fmt) | |
| except ValueError: | |
| continue | |
| return None | |
| def parse_action_item(line: str) -> Optional[ActionItem]: | |
| """Parse an action item line.""" | |
| # Match patterns like: | |
| # - [ ] Task | |
| # - [x] Task | |
| # - [ ] Alice: Task by Jan 20 | |
| # - [x] Bob: Task (by 2025-01-20) | |
| completed = False | |
| if "[x]" in line.lower() or "[✓]" in line or "[✔]" in line: | |
| completed = True | |
| # Remove checkbox markers | |
| line = re.sub(r'\[[ xX✓✔]\]', '', line).strip() | |
| line = line.lstrip('- ').strip() | |
| if not line: | |
| return None | |
| # Try to extract assignee | |
| assignee = None | |
| assignee_match = re.match(r'^([A-Za-z\s]+):\s*(.+)$', line) | |
| if assignee_match: | |
| assignee = assignee_match.group(1).strip() | |
| line = assignee_match.group(2).strip() | |
| # Try to extract deadline | |
| deadline = None | |
| deadline_patterns = [ | |
| r'by\s+([A-Za-z]+\s+\d{1,2}(?:,\s+\d{4})?)', | |
| r'by\s+(\d{4}-\d{2}-\d{2})', | |
| r'\(by\s+([^)]+)\)', | |
| ] | |
| for pattern in deadline_patterns: | |
| deadline_match = re.search(pattern, line, re.IGNORECASE) | |
| if deadline_match: | |
| deadline = deadline_match.group(1).strip() | |
| line = re.sub(pattern, '', line, flags=re.IGNORECASE).strip() | |
| break | |
| return ActionItem( | |
| task=line, | |
| assignee=assignee, | |
| deadline=deadline, | |
| completed=completed | |
| ) | |
| def parse(file_path: Path, project_name: str) -> Optional[MeetingNote]: | |
| """Parse a markdown meeting note file.""" | |
| if not file_path.exists(): | |
| return None | |
| content = file_path.read_text(encoding='utf-8') | |
| lines = content.split('\n') | |
| # Initialize fields | |
| title = file_path.stem.replace('-', ' ').replace('_', ' ').title() | |
| date = None | |
| participants = [] | |
| discussion = [] | |
| decisions = [] | |
| action_items = [] | |
| blockers = [] | |
| current_section = None | |
| for line in lines: | |
| line_stripped = line.strip() | |
| # Skip empty lines | |
| if not line_stripped: | |
| continue | |
| # Check for title | |
| if line_stripped.startswith('# '): | |
| title = line_stripped[2:].strip() | |
| # Try to extract from "Meeting: X" format | |
| if title.lower().startswith('meeting:'): | |
| title = title[8:].strip() | |
| continue | |
| # Check for metadata | |
| if line_stripped.lower().startswith('date:'): | |
| date_str = line_stripped[5:].strip() | |
| date = MeetingParser.parse_date(date_str) | |
| continue | |
| if line_stripped.lower().startswith('participants:'): | |
| participants_str = line_stripped[13:].strip() | |
| participants = [p.strip() for p in participants_str.split(',')] | |
| continue | |
| # Check for sections | |
| if line_stripped.startswith('## '): | |
| section_name = line_stripped[3:].strip().lower() | |
| if 'discussion' in section_name or 'notes' in section_name: | |
| current_section = 'discussion' | |
| elif 'decision' in section_name: | |
| current_section = 'decisions' | |
| elif 'action' in section_name or 'todo' in section_name or 'task' in section_name: | |
| current_section = 'action_items' | |
| elif 'blocker' in section_name or 'issue' in section_name: | |
| current_section = 'blockers' | |
| else: | |
| current_section = 'discussion' | |
| continue | |
| # Add content to current section | |
| if current_section == 'discussion': | |
| discussion.append(line_stripped) | |
| elif current_section == 'decisions': | |
| if line_stripped.startswith('-') or line_stripped.startswith('*'): | |
| decisions.append(line_stripped.lstrip('-*').strip()) | |
| elif current_section == 'action_items': | |
| if '[' in line_stripped: | |
| action_item = MeetingParser.parse_action_item(line_stripped) | |
| if action_item: | |
| action_items.append(action_item) | |
| elif current_section == 'blockers': | |
| if line_stripped.startswith('-') or line_stripped.startswith('*'): | |
| blockers.append(line_stripped.lstrip('-*').strip()) | |
| return MeetingNote( | |
| project_name=project_name, | |
| title=title, | |
| date=date, | |
| participants=participants, | |
| discussion='\n'.join(discussion) if discussion else None, | |
| decisions=decisions, | |
| action_items=action_items, | |
| blockers=blockers, | |
| file_path=str(file_path) | |
| ) | |
| def load_meetings_from_directory(data_dir: Path) -> List[MeetingNote]: | |
| """Load all meeting notes from a directory structure.""" | |
| meetings = [] | |
| if not data_dir.exists(): | |
| return meetings | |
| # Expected structure: data_dir/project_name/meetings/*.md | |
| for project_dir in data_dir.iterdir(): | |
| if not project_dir.is_dir(): | |
| continue | |
| project_name = project_dir.name | |
| meetings_dir = project_dir / "meetings" | |
| if not meetings_dir.exists(): | |
| continue | |
| for meeting_file in meetings_dir.glob("*.md"): | |
| meeting = MeetingParser.parse(meeting_file, project_name) | |
| if meeting: | |
| meetings.append(meeting) | |
| return meetings | |