import gradio as gr import re import datefinder class AdvancedActionItemExtractor: def __init__(self): self.patterns = self._build_patterns() def _build_patterns(self): return [ {'pattern': r'(\b[A-Z][a-z]+\b)\s+will\s+([^\.]+?)\s+by\s+([^\.]+?)(?=\.|\s+and|\s*$)', 'groups': ['assignee', 'action', 'deadline']}, {'pattern': r'(\b[A-Z][a-z]+\b):\s+([^\.]+?)\s+by\s+([^\.]+?)(?=\.|\s+and|\s*$)', 'groups': ['assignee', 'action', 'deadline']}, {'pattern': r'(\b[A-Z][a-z]+\b)\s+to\s+([^\.]+?)\s+by\s+([^\.]+?)(?=\.|\s+and|\s*$)', 'groups': ['assignee', 'action', 'deadline']}, {'pattern': r'(\b[A-Z][a-z]+\b)\s+should\s+([^\.]+?)\s+by\s+([^\.]+?)(?=\.|\s+and|\s*$)', 'groups': ['assignee', 'action', 'deadline']}, {'pattern': r'\bI\s+will\s+([^\.]+?)\s+by\s+([^\.]+?)(?=\.|\s+and|\s*$)', 'groups': ['action', 'deadline'], 'assignee': 'Current Speaker'}, ] def extract(self, text): if not text or not isinstance(text, str): return [] cleaned_text = re.sub(r'\s+', ' ', text) action_items = [] sentences = re.split(r'[.!?]+', cleaned_text) for sentence in sentences: sentence = sentence.strip() for pattern_config in self.patterns: matches = re.finditer(pattern_config['pattern'], sentence, re.IGNORECASE) for match in matches: action_item = self._parse_match(match, pattern_config, sentence) if action_item: action_items.append(action_item) return action_items def _parse_match(self, match, pattern_config, sentence): groups = match.groups() action_item = {'assignee': None, 'action': None, 'deadline': None} if 'assignee' in pattern_config: action_item['assignee'] = pattern_config['assignee'] for i, group_name in enumerate(pattern_config['groups']): if i < len(groups): if group_name == 'assignee': action_item['assignee'] = groups[i].title() elif group_name == 'action': action_item['action'] = groups[i].strip() elif group_name == 'deadline': action_item['deadline'] = groups[i].strip() return action_item def format_output(self, action_items): if not action_items: return "ACTION ITEMS:\nNo action items found." output_lines = ["ACTION ITEMS:"] for i, item in enumerate(action_items, 1): deadline = item['deadline'] if item['deadline'] else 'TBD' output_lines.append(f"{i}. {item['assignee']}: {item['action']} by {deadline}") return "\n".join(output_lines) def extract_action_items(text): extractor = AdvancedActionItemExtractor() action_items = extractor.extract(text) return extractor.format_output(action_items) demo = gr.Interface( fn=extract_action_items, inputs=gr.Textbox(lines=5, placeholder="Paste meeting transcript here...\nExample: Mike will set up Flask project by Oct 5."), outputs=gr.Textbox(lines=10, label="Extracted Action Items"), title="Action Item & Deadline Extractor", description="Extract action items, assignees, and deadlines from meeting transcripts" ) if __name__ == "__main__": demo.launch()