Spaces:
Sleeping
Sleeping
from typing import List | |
from dataclasses import asdict | |
import pandas as pd | |
import gradio as gr | |
from DDICourseSearch import DDICourseSearch, CourseSearchResult | |
# Load the courses | |
df = pd.read_csv('./data/courses.csv') | |
df.head() | |
# data cleaning | |
df = df.replace({r'\s+$': '', r'^\s+': ''}, regex=True).replace(r'\n', ' ', regex=True) | |
df = df.drop(df.columns[[4]], axis=1) | |
search_engine = DDICourseSearch( | |
courses_df = df | |
) | |
def convert_to_dataframe(course_list: List[CourseSearchResult]) -> pd.DataFrame: | |
# Convert each CourseSearchResult object to a dictionary and then to a DataFrame | |
return pd.DataFrame([asdict(course) for course in course_list]) | |
# Function to search for products | |
def search_courses(query, k, search_type, rerank=True): | |
if not query.strip(): | |
return df # Return all products if the query is empty | |
results = search_engine.search(query, k=k, search_type=search_type, rerank=rerank) | |
return convert_to_dataframe(results) | |
# Gradio interface | |
def search_interface(query): | |
results = search_courses(query, k=5) | |
return results | |
with gr.Blocks() as demo: | |
with gr.Row(): | |
query = gr.Textbox(label="Search Query", placeholder="write a query to find the courses") | |
with gr.Row(): | |
search_type = gr.Dropdown(label="Search Type", choices=['semantic', 'keyword', 'hybrid'], value='hybrid') | |
k = gr.Number(label="Items Count", value=10) | |
rerank = gr.Checkbox(value=True, label="Rerank") | |
results = gr.Dataframe(label="Search Results") | |
search_button = gr.Button("Search", variant='primary') | |
search_button.click(fn=search_courses, inputs=[query, k, search_type, rerank], outputs=results) | |
demo.launch() | |