import os import logging import gradio as gr import pandas as pd from dotenv import load_dotenv import jieba jieba.cut('你好') from wordcloud import WordCloud from PIL import Image import matplotlib.pyplot as plt from sheet import compose_query, get_serp, get_condensed_result, extract_results, postprocess_result, format_output, category2supercategory load_dotenv() logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) classes = list([ x for x in category2supercategory.keys() if len(x)>0]) def plot_wordcloud( text): """ """ if os.getenv("FONT_PATH", None) is not None: wc_generator = WordCloud(font_path=os.getenv("FONT_PATH")) else: wc_generator = WordCloud() img = wc_generator.generate( " ".join(jieba.cut(text))) # fig, ax = plt.subplots() # ax.imshow(wordcloud, interpolation='bilinear') # ax.axis("off") return img.to_image() def format_category( formatted_results): """ """ return "\n\n".join([ f"> 大類別:{formatted_results['supercategory'].values[0]}", f"> 小類別:{formatted_results['category'].values[0]}", f"> 商家名稱:{formatted_results['store_name'].values[0]}", f"> 電話:{formatted_results['phone_number'].values[0]}", f"> 描述:{formatted_results['description'].values[0]}" ]) def do( business_id, business_name, address): """ """ crawled_results = [] google_domain = "google.com.tw" gl = 'tw' lr = 'lang_zh-TW' query = compose_query(address, business_name) try: res = get_serp( query, google_domain, gl, lr) except Exception as e: return f"Error: {e}" cond_res = get_condensed_result(res) crawled_results.append( { "index": 0, "business_id": business_id, "business_name": business_name, "serp": res, "evidence": cond_res, "address": address } ) crawled_results = pd.DataFrame(crawled_results) # logger.debug(crawled_results) extracted_results = extract_results( crawled_results, classes=classes) # logger.error(extracted_results['extracted_results'].columns) extracted_results = extracted_results['extracted_results'][ [ 'business_id', 'business_name', 'address', 'category', 'evidence', 'phone_number', 'description', 'store_name'] ] logger.debug( extracted_results['category']) print(extracted_results['category']) postprocessed_results = postprocess_result( extracted_results, postprocessed_results_path="/tmp/postprocessed_results.joblib", category_hierarchy=category2supercategory) os.remove("/tmp/postprocessed_results.joblib") formatted_results = format_output( postprocessed_results) logger.debug( formatted_results) print(formatted_results) formatted_output = format_category( formatted_results) img = plot_wordcloud(formatted_results['formatted_evidence'].values[0]) return f"【參考資料】\n{formatted_results['formatted_evidence'].values[0]}", img, f"【辨識結果】\n{formatted_output}" ## --- interface --- ## # outputs = [gr.Dataframe(row_count = (1, "dynamic"), col_count=(6,"dynamic"), label="output data", interactive=1)] # demo = gr.Interface( # fn=do, # inputs=[ "text", "text", "text"], # outputs=outputs, # ) ## --- block --- ## with gr.Blocks() as demo: gr.Markdown("🌟 自動分類餐廳型態 🌟") with gr.Row(): inputs = [ gr.Textbox( label="統一編號", placeholder="輸入八碼數字(optional)"), gr.Textbox( label="商家名稱", placeholder="輸入商家或公司名稱"), gr.Textbox(label="地址", placeholder="至少輸入縣市,完整地址更好")] with gr.Row(): # outputs = [gr.Dataframe(row_count = (1, "dynamic"), col_count=(6,"dynamic"), label="output data", interactive=1)] outputs = [ gr.Markdown( label="參考資料(google search)"), gr.Image( label="文字雲"), gr.Markdown( label="類別", )] btn = gr.Button("Submit") btn.click(fn=do, inputs=inputs, outputs=outputs) if __name__ == "__main__": demo.launch(share=True, auth=("kota", "kota"))