Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import re | |
| import datefinder | |
| class AdvancedActionItemExtractor: | |
| def __init__(self): | |
| self.patterns = self._build_patterns() | |
| def _build_patterns(self): | |
| return [ | |
| {'pattern': r'(\b[A-Z][a-z]+\b)\s+will\s+([^\.]+?)\s+by\s+([^\.]+?)(?=\.|\s+and|\s*$)', 'groups': ['assignee', 'action', 'deadline']}, | |
| {'pattern': r'(\b[A-Z][a-z]+\b):\s+([^\.]+?)\s+by\s+([^\.]+?)(?=\.|\s+and|\s*$)', 'groups': ['assignee', 'action', 'deadline']}, | |
| {'pattern': r'(\b[A-Z][a-z]+\b)\s+to\s+([^\.]+?)\s+by\s+([^\.]+?)(?=\.|\s+and|\s*$)', 'groups': ['assignee', 'action', 'deadline']}, | |
| {'pattern': r'(\b[A-Z][a-z]+\b)\s+should\s+([^\.]+?)\s+by\s+([^\.]+?)(?=\.|\s+and|\s*$)', 'groups': ['assignee', 'action', 'deadline']}, | |
| {'pattern': r'\bI\s+will\s+([^\.]+?)\s+by\s+([^\.]+?)(?=\.|\s+and|\s*$)', 'groups': ['action', 'deadline'], 'assignee': 'Current Speaker'}, | |
| ] | |
| def extract(self, text): | |
| if not text or not isinstance(text, str): return [] | |
| cleaned_text = re.sub(r'\s+', ' ', text) | |
| action_items = [] | |
| sentences = re.split(r'[.!?]+', cleaned_text) | |
| for sentence in sentences: | |
| sentence = sentence.strip() | |
| for pattern_config in self.patterns: | |
| matches = re.finditer(pattern_config['pattern'], sentence, re.IGNORECASE) | |
| for match in matches: | |
| action_item = self._parse_match(match, pattern_config, sentence) | |
| if action_item: action_items.append(action_item) | |
| return action_items | |
| def _parse_match(self, match, pattern_config, sentence): | |
| groups = match.groups() | |
| action_item = {'assignee': None, 'action': None, 'deadline': None} | |
| if 'assignee' in pattern_config: action_item['assignee'] = pattern_config['assignee'] | |
| for i, group_name in enumerate(pattern_config['groups']): | |
| if i < len(groups): | |
| if group_name == 'assignee': action_item['assignee'] = groups[i].title() | |
| elif group_name == 'action': action_item['action'] = groups[i].strip() | |
| elif group_name == 'deadline': action_item['deadline'] = groups[i].strip() | |
| return action_item | |
| def format_output(self, action_items): | |
| if not action_items: return "ACTION ITEMS:\nNo action items found." | |
| output_lines = ["ACTION ITEMS:"] | |
| for i, item in enumerate(action_items, 1): | |
| deadline = item['deadline'] if item['deadline'] else 'TBD' | |
| output_lines.append(f"{i}. {item['assignee']}: {item['action']} by {deadline}") | |
| return "\n".join(output_lines) | |
| def extract_action_items(text): | |
| extractor = AdvancedActionItemExtractor() | |
| action_items = extractor.extract(text) | |
| return extractor.format_output(action_items) | |
| demo = gr.Interface( | |
| fn=extract_action_items, | |
| inputs=gr.Textbox(lines=5, placeholder="Paste meeting transcript here...\nExample: Mike will set up Flask project by Oct 5."), | |
| outputs=gr.Textbox(lines=10, label="Extracted Action Items"), | |
| title="Action Item & Deadline Extractor", | |
| description="Extract action items, assignees, and deadlines from meeting transcripts" | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |