Saswat84's picture
Upload folder using huggingface_hub
3cf54df verified
import gradio as gr
import re
import datefinder
class AdvancedActionItemExtractor:
def __init__(self):
self.patterns = self._build_patterns()
def _build_patterns(self):
return [
{'pattern': r'(\b[A-Z][a-z]+\b)\s+will\s+([^\.]+?)\s+by\s+([^\.]+?)(?=\.|\s+and|\s*$)', 'groups': ['assignee', 'action', 'deadline']},
{'pattern': r'(\b[A-Z][a-z]+\b):\s+([^\.]+?)\s+by\s+([^\.]+?)(?=\.|\s+and|\s*$)', 'groups': ['assignee', 'action', 'deadline']},
{'pattern': r'(\b[A-Z][a-z]+\b)\s+to\s+([^\.]+?)\s+by\s+([^\.]+?)(?=\.|\s+and|\s*$)', 'groups': ['assignee', 'action', 'deadline']},
{'pattern': r'(\b[A-Z][a-z]+\b)\s+should\s+([^\.]+?)\s+by\s+([^\.]+?)(?=\.|\s+and|\s*$)', 'groups': ['assignee', 'action', 'deadline']},
{'pattern': r'\bI\s+will\s+([^\.]+?)\s+by\s+([^\.]+?)(?=\.|\s+and|\s*$)', 'groups': ['action', 'deadline'], 'assignee': 'Current Speaker'},
]
def extract(self, text):
if not text or not isinstance(text, str): return []
cleaned_text = re.sub(r'\s+', ' ', text)
action_items = []
sentences = re.split(r'[.!?]+', cleaned_text)
for sentence in sentences:
sentence = sentence.strip()
for pattern_config in self.patterns:
matches = re.finditer(pattern_config['pattern'], sentence, re.IGNORECASE)
for match in matches:
action_item = self._parse_match(match, pattern_config, sentence)
if action_item: action_items.append(action_item)
return action_items
def _parse_match(self, match, pattern_config, sentence):
groups = match.groups()
action_item = {'assignee': None, 'action': None, 'deadline': None}
if 'assignee' in pattern_config: action_item['assignee'] = pattern_config['assignee']
for i, group_name in enumerate(pattern_config['groups']):
if i < len(groups):
if group_name == 'assignee': action_item['assignee'] = groups[i].title()
elif group_name == 'action': action_item['action'] = groups[i].strip()
elif group_name == 'deadline': action_item['deadline'] = groups[i].strip()
return action_item
def format_output(self, action_items):
if not action_items: return "ACTION ITEMS:\nNo action items found."
output_lines = ["ACTION ITEMS:"]
for i, item in enumerate(action_items, 1):
deadline = item['deadline'] if item['deadline'] else 'TBD'
output_lines.append(f"{i}. {item['assignee']}: {item['action']} by {deadline}")
return "\n".join(output_lines)
def extract_action_items(text):
extractor = AdvancedActionItemExtractor()
action_items = extractor.extract(text)
return extractor.format_output(action_items)
demo = gr.Interface(
fn=extract_action_items,
inputs=gr.Textbox(lines=5, placeholder="Paste meeting transcript here...\nExample: Mike will set up Flask project by Oct 5."),
outputs=gr.Textbox(lines=10, label="Extracted Action Items"),
title="Action Item & Deadline Extractor",
description="Extract action items, assignees, and deadlines from meeting transcripts"
)
if __name__ == "__main__":
demo.launch()