Kaung Myat Htet
change to 10 default for k
3247fad
from typing import List
from dataclasses import asdict
import pandas as pd
import gradio as gr
from DDICourseSearch import DDICourseSearch, CourseSearchResult
# Load the courses
df = pd.read_csv('./data/courses.csv')
df.head()
# data cleaning
df = df.replace({r'\s+$': '', r'^\s+': ''}, regex=True).replace(r'\n', ' ', regex=True)
df = df.drop(df.columns[[4]], axis=1)
search_engine = DDICourseSearch(
courses_df = df
)
def convert_to_dataframe(course_list: List[CourseSearchResult]) -> pd.DataFrame:
# Convert each CourseSearchResult object to a dictionary and then to a DataFrame
return pd.DataFrame([asdict(course) for course in course_list])
# Function to search for products
def search_courses(query, k, search_type, rerank=True):
if not query.strip():
return df # Return all products if the query is empty
results = search_engine.search(query, k=k, search_type=search_type, rerank=rerank)
return convert_to_dataframe(results)
# Gradio interface
def search_interface(query):
results = search_courses(query, k=5)
return results
with gr.Blocks() as demo:
with gr.Row():
query = gr.Textbox(label="Search Query", placeholder="write a query to find the courses")
with gr.Row():
search_type = gr.Dropdown(label="Search Type", choices=['semantic', 'keyword', 'hybrid'], value='hybrid')
k = gr.Number(label="Items Count", value=10)
rerank = gr.Checkbox(value=True, label="Rerank")
results = gr.Dataframe(label="Search Results")
search_button = gr.Button("Search", variant='primary')
search_button.click(fn=search_courses, inputs=[query, k, search_type, rerank], outputs=results)
demo.launch()