Kaung Myat Htet
inialize project
118fce5
raw
history blame
1.7 kB
from typing import List
from dataclasses import asdict
import pandas as pd
import gradio as gr
from DDICourseSearch import DDICourseSearch, CourseSearchResult
# Load the courses
df = pd.read_csv('./data/courses.csv')
df.head()
# data cleaning
df = df.replace({r'\s+$': '', r'^\s+': ''}, regex=True).replace(r'\n', ' ', regex=True)
df = df.drop(df.columns[[4]], axis=1)
search_engine = DDICourseSearch(
courses_df = df
)
def convert_to_dataframe(course_list: List[CourseSearchResult]) -> pd.DataFrame:
# Convert each CourseSearchResult object to a dictionary and then to a DataFrame
return pd.DataFrame([asdict(course) for course in course_list])
# Function to search for products
def search_courses(query, k, search_type, rerank=True):
if not query.strip():
return df # Return all products if the query is empty
results = search_engine.search(query, k=k, search_type=search_type, rerank=rerank)
return convert_to_dataframe(results)
# Gradio interface
def search_interface(query):
results = search_courses(query, k=5)
return results
with gr.Blocks() as demo:
with gr.Row():
query = gr.Textbox(label="Search Query", placeholder="write a query to find the courses")
with gr.Row():
search_type = gr.Dropdown(label="Search Type", choices=['semantic', 'keyword', 'hybrid'], value='hybrid')
k = gr.Number(label="Items Count", value=5)
rerank = gr.Checkbox(value=True, label="Rerank")
results = gr.Dataframe(label="Search Results")
search_button = gr.Button("Search", variant='primary')
search_button.click(fn=search_courses, inputs=[query, k, search_type, rerank], outputs=results)
demo.launch()