NetsPresso_QA / run_gradio_mrtydi_hybrid.py
geonmin-kim's picture
Upload folder using huggingface_hub
d6585f5
import argparse
import gradio as gr
from search_online import OnlineSearcher
K = 10
# default input
DEFAULT_QUERY_MRTYDI="""μ‚¬μš©μž μ§ˆμ˜μ™€ κ΄€λ ¨λœ 문단듀을 Wikipedia ν•œκ΅­μ–΄ μ½”νΌμŠ€μ—μ„œ λ°˜ν™˜ν•©λ‹ˆλ‹€.\n
예λ₯Όλ“€μ–΄\n
- 졜초둜 μ „κΈ° μžλ™μ°¨λ₯Ό κ°œλ°œν•œ 기업은 μ–΄λ””μ•Ό?
- μŠ€μΏ λ²„ 닀이빙 잠수 κ°€λŠ₯ κΉŠμ΄λŠ” μ΅œλŒ€ λͺ‡λ―Έν„°μΈκ°€μš”?
- λ…μ κ·œμ œλ²• μƒμ˜ 기업결합심사기쀀은 기업결합을 μ–΄λ–»κ²Œ κ΅¬λΆ„ν•˜μ—¬ κ²½μŸμ œν•œμ„±μ„ νŒλ‹¨ν•˜λ‚˜μš”?
"""
# manual arguments (FIXME)
args = argparse.Namespace
args.index_type='hybrid'
args.index="/root/indexes/mrtydi-korean/sparse,/root/indexes/mrtydi-korean/dense"
args.encoder="castorini/mdpr-question-nq"
args.device="cuda:0"
args.alpha=1000000
args.normalization=True
args.lang_abbr='ko'
# initialize qabot
print("initialize Mr.tydi retrieval bot")
searcher = OnlineSearcher(args)
def Retrieve(query):
hits = searcher.search(query, K)
result = searcher.print_result(hits, K)
return result
gr.Interface(
fn=Retrieve,
inputs=[
gr.inputs.Textbox(
default=DEFAULT_QUERY_MRTYDI,
label="질의"),
],
outputs=[
gr.inputs.Textbox(
default="κ²€μƒ‰λœ 문단듀과 점수λ₯Ό 좜λ ₯ν•©λ‹ˆλ‹€",
label="검색 κ²°κ³Ό")
],
title="Hybrid (term + neural) 검색",
theme='dark-grass',
description=f"νŠΉμ • μ§€μ‹λ² μ΄μŠ€μ— λŒ€ν•΄ ν•™μŠ΅ν•œ 검색 μ‹œμŠ€ν…œμ„ ν…ŒμŠ€νŠΈν•©λ‹ˆλ‹€.\nμ§€μ‹λ² μ΄μŠ€μ— λ§žμΆ”μ–΄ μž¬ν•™μŠ΅μ΄ κ°€λŠ₯ν•˜λ©°, ν˜„μž¬ 데λͺ¨μ—μ„œλŠ” Wikipedia ν•œκ΅­μ–΄ μ½”νΌμŠ€ (2020 dump, mr.Tydi ver, #문단=1496126)에 λŒ€ν•΄ ν•™μŠ΅ν•œ λͺ¨λΈμ„ μ‚¬μš©ν•©λ‹ˆλ‹€. \n\n neural: castorini/mdpr-[passage,question]-nq, term: BM25"
).launch(share=True)