Check out History versions! Google T5 mini llm

#2
by AryanJh - opened

There's a couple code additions i removed because i kept running into errors, They are very interesting attempts at bringing the T5 to communicate event histories as a natural language.

There's also this dynamic event matcher function that Claude and i played around with, which seems to work very fine.

I just need to find the opportunity, and effort to put it in.
if an idea strikes you let me know!

class DynamicEventMatcher:
def init(self):
self.known_categories = set()
self.known_hosts = set()
self.known_locations = set()
self.faculty_patterns = {}
self.category_patterns = {}

def learn_from_events(self, events: List[Event]):
    """Learn patterns from existing events"""
    # Collect all unique categories
    for event in events:
        self.known_categories.update(event.categories)
        self.known_hosts.update(event.hosts)
        self.known_locations.add(event.location)
        
        # Learn faculty associations
        for host in event.hosts:
            for category in event.categories:
                key = (host, category)
                if 'faculty' in host.lower():
                    self.faculty_patterns[key] = self.faculty_patterns.get(key, 0) + 1
        
        # Learn category associations
        for cat1 in event.categories:
            for cat2 in event.categories:
                if cat1 != cat2:
                    key = (cat1, cat2)
                    self.category_patterns[key] = self.category_patterns.get(key, 0) + 1

def get_faculty_score(self, event: Event, query: str) -> float:
    """Score faculty relevance using learned patterns"""
    score = 0.0
    query_lower = query.lower()
    
    # Check direct faculty mentions
    for host in event.hosts:
        if 'faculty' in host.lower():
            # Use fuzzy matching for faculty names
            ratio = fuzz.partial_ratio(query_lower, host.lower())
            if ratio > 80:
                score += 2.0 * (ratio / 100)
    
    # Check associated categories
    for category in event.categories:
        for (host, cat), count in self.faculty_patterns.items():
            if category == cat and fuzz.partial_ratio(query_lower, host.lower()) > 80:
                score += 1.0 * (count / max(self.faculty_patterns.values()))
                
    return score

def get_category_score(self, event: Event, query_type: str) -> float:
    """Score category relevance using learned patterns"""
    score = 0.0
    if not query_type:
        return score
        
    # Direct category match
    for category in event.categories:
        ratio = fuzz.partial_ratio(query_type.lower(), category.lower())
        if ratio > 80:
            score += 1.5 * (ratio / 100)
            
        # Check associated categories
        for (cat1, cat2), count in self.category_patterns.items():
            if category == cat1 and fuzz.partial_ratio(query_type.lower(), cat2.lower()) > 80:
                score += 0.5 * (count / max(self.category_patterns.values()))
                
    return score

def get_location_score(self, event: Event, query: str) -> float:
    """Score location relevance"""
    score = 0.0
    location_lower = event.location.lower()
    query_lower = query.lower()
    
    # Online detection
    online_terms = {'online', 'virtual', 'teams', 'zoom'}
    if any(term in query_lower for term in online_terms):
        if any(term in location_lower for term in online_terms):
            score += 1.5
            
    # In-person detection
    campus_terms = {'room', 'hall', 'building', 'plaza', 'campus'}
    if any(term in query_lower for term in {'in-person', 'campus', 'building'}):
        if any(term in location_lower for term in campus_terms):
            score += 1.5
            
    return score

def match_events(self, events: List[Event], query_info: Dict, top_k: int = 3) -> List[Tuple[Event, float]]:
    """Match events using learned patterns"""
    # First, learn from all events
    self.learn_from_events(events)
    
    scored_events = []
    for event in events:
        # Calculate component scores
        faculty_score = self.get_faculty_score(event, query_info['original_query'])
        category_score = self.get_category_score(event, query_info['event_type'])
        location_score = self.get_location_score(event, query_info['original_query'])
        
        # Combine scores with weights
        total_score = (faculty_score * 1.5 + 
                     category_score * 1.2 + 
                     location_score * 1.0)
                     
        if total_score > 0:
            scored_events.append((event, total_score))
    
    # Sort by score and return top k
    return sorted(scored_events, key=lambda x: x[1], reverse=True)[:top_k]

class ImprovedResponseGenerator:
def format_event_details(self, event: Event, score: float) -> str:
"""Format event details with relevance score"""
time_str = event.start_time.strftime('%I:%M %p')
date_str = event.start_time.strftime('%A, %B %d, %Y')

    # Determine location type
    location = event.location
    if any(term in location.lower() for term in ['teams', 'zoom', 'online']):
        location = f"๐Ÿ“ฑ {location} (Online)"
    else:
        location = f"๐Ÿ“ {location}"
    
    # Format categories with better grouping
    category_groups = self.group_categories(event.categories)
    
    return f"""

{event.title} {'๐ŸŒŸ' * int(min(score, 5))}
๐Ÿ“… {date_str} at {time_str}
{location}
๐Ÿ‘ฅ Hosted by: {', '.join(event.hosts)}
{category_groups}
๐Ÿ”— {event.link}
"""

def group_categories(self, categories: List[str]) -> str:
    """Group categories by type"""
    groups = {
        'Academic': [],
        'Format': [],
        'Topic': [],
        'Audience': []
    }
    
    for cat in categories:
        if any(term in cat.lower() for term in ['academic', 'thoughtful', 'research']):
            groups['Academic'].append(cat)
        elif any(term in cat.lower() for term in ['workshop', 'seminar', 'conference']):
            groups['Format'].append(cat)
        elif any(term in cat.lower() for term in ['science', 'technology', 'business']):
            groups['Topic'].append(cat)
        else:
            groups['Audience'].append(cat)
    
    result = []
    for group, cats in groups.items():
        if cats:
            result.append(f"๐Ÿท๏ธ {group}: {' | '.join(cats)}")
    
    return '\n'.join(result)

def process_query_and_respond(query: str, events: List[Event]) -> str:
"""Process query with improved matching and response generation"""
processor = QueryProcessor()
matcher = DynamicEventMatcher()
generator = ImprovedResponseGenerator()

query_info = processor.process_query(query)
matched_events = matcher.match_events(events, query_info)

if not matched_events:
    return f"I couldn't find any events matching your query for {query_info['faculty'] or 'any faculty'} " \
           f"and {query_info['event_type'] or 'any event type'}. Try broadening your search."

response = "Here are the most relevant events I found:\n"
for event, score in matched_events:
    response += generator.format_event_details(event, score)

return response

Test various types of queries

test_queries = [
# Faculty specific queries
"Show me Faculty of Mathematics and Science events",
"What's happening in Goodman School of Business",

# Event type queries
"Are there any research seminars?",
"Show me workshops and training sessions",

# Location specific queries
"What online events are available?",
"Show me in-person events at Inniskillin Hall",

# Mixed queries
"Online workshops from Faculty of Mathematics and Science",
"In-person Goodman seminars",

# Edge cases
"Events in the Physics department",  # Test department recognition
"Student club meetings",  # Test category variations

]

def run_tests(events: List[Event]):
"""Run test queries and display results"""
print("Running Event Matcher Tests")
print("=" * 50)

# Initialize improved components
processor = QueryProcessor()
matcher = DynamicEventMatcher()
generator = ImprovedResponseGenerator()

# First, let the matcher learn from all events
matcher.learn_from_events(events)

# Print some statistics about what was learned
print("\nLearned Patterns:")
print(f"Number of unique categories: {len(matcher.known_categories)}")
print(f"Number of unique hosts: {len(matcher.known_hosts)}")
print(f"Number of location types: {len(matcher.known_locations)}")
print(f"Number of faculty patterns: {len(matcher.faculty_patterns)}")
print(f"Number of category patterns: {len(matcher.category_patterns)}")
print("\n" + "=" * 50 + "\n")

# Run each test query
for query in test_queries:
    print(f"\nTesting Query: '{query}'")
    print("-" * 50)
    
    # Process query
    query_info = processor.process_query(query)
    print(f"Processed Query Info: {query_info}")
    
    # Get matches
    matched_events = matcher.match_events(events, query_info)
    
    if matched_events:
        print(f"\nFound {len(matched_events)} matching events:")
        for event, score in matched_events:
            print(f"\nRelevance Score: {score:.2f}")
            print(generator.format_event_details(event, score))
    else:
        print("\nNo matching events found.")
        
    print("\n" + "=" * 50)

Run the tests with your events

print("Starting test suite...")
run_tests(events) # 'events' should be your list of parsed events from the RSS feed

Sign up or log in to comment