File size: 3,973 Bytes
4925baf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a5003e3
4925baf
 
 
 
 
 
 
 
 
 
 
 
 
a5003e3
4925baf
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import os
import logging

import gradio as gr
import pandas as pd
from dotenv import load_dotenv
import jieba
jieba.cut('你好')
from wordcloud import WordCloud
from PIL import Image
import matplotlib.pyplot as plt

from sheet import compose_query, get_serp, get_condensed_result, extract_results, postprocess_result, format_output, category2supercategory

load_dotenv()

logger = logging.getLogger(__name__) 
logger.setLevel(logging.DEBUG)


def plot_wordcloud( text):
    """
    """
    if os.getenv("FONT_PATH", None) is not None:
        wc_generator = WordCloud(font_path=os.getenv("FONT_PATH"))
    else:
        wc_generator = WordCloud()
    img = wc_generator.generate( " ".join(jieba.cut(text)))
    # fig, ax = plt.subplots()
    # ax.imshow(wordcloud, interpolation='bilinear')
    # ax.axis("off")
    return img.to_image()

def format_category( formatted_results):
    """
    """
    return "\n\n".join([
        f"> 大類別:{formatted_results['supercategory'].values[0]}",
        f"> 小類別:{formatted_results['category'].values[0]}",
        f"> 商家名稱:{formatted_results['store_name'].values[0]}",
        f"> 電話:{formatted_results['phone_number'].values[0]}",
        f"> 描述:{formatted_results['description'].values[0]}"
    ])

def do( business_id, business_name, address):
    """
    """

    crawled_results = []

    google_domain = "google.com.tw"
    gl = 'tw'
    lr  = 'lang_zh-TW'

    query = compose_query(address, business_name)
    try:
        res = get_serp( query, google_domain, gl, lr)
    except Exception as e:
        return f"Error: {e}"
    
    cond_res = get_condensed_result(res)

    crawled_results.append( { 
        "index": 0, 
        "business_id": business_id, 
        "business_name": business_name, 
        "serp": res,
        "evidence": cond_res, 
        "address": address
    } )

    crawled_results = pd.DataFrame(crawled_results)
    # logger.debug(crawled_results)
    extracted_results = extract_results( crawled_results)
    # logger.error(extracted_results['extracted_results'].columns)
    extracted_results = extracted_results['extracted_results'][ [ 'business_id', 'business_name', 'address', 'category', 'evidence', 'phone_number', 'description', 'store_name'] ]

    postprocessed_results = postprocess_result( extracted_results, postprocessed_results_path="/tmp/postprocessed_results.joblib", category_hierarchy=category2supercategory)
    os.remove("/tmp/postprocessed_results.joblib")

    formatted_results = format_output( postprocessed_results)
    # logger.error( formatted_results.columns)
    
    formatted_output = format_category( formatted_results)

    img = plot_wordcloud(formatted_results['formatted_evidence'].values[0])
    return f"【參考資料】\n{formatted_results['formatted_evidence'].values[0]}", img, f"【辨識結果】\n{formatted_output}"

## --- interface --- ##
# outputs = [gr.Dataframe(row_count = (1, "dynamic"), col_count=(6,"dynamic"), label="output data", interactive=1)]
# demo = gr.Interface(
#         fn=do,
#         inputs=[ "text", "text", "text"],
#         outputs=outputs,
#     )

## --- block --- ##
with gr.Blocks() as demo:
    gr.Markdown("🌟 自動分類餐廳型態 🌟")
    with gr.Row():
        inputs = [ gr.Textbox( label="統一編號", placeholder="輸入八碼數字(optional)"), gr.Textbox( label="商家名稱", placeholder="輸入商家或公司名稱"), gr.Textbox(label="地址", placeholder="至少輸入縣市,完整地址更好")]
    with gr.Row():
        # outputs = [gr.Dataframe(row_count = (1, "dynamic"), col_count=(6,"dynamic"), label="output data", interactive=1)]
        outputs = [ gr.Markdown( label="參考資料(google search)"), gr.Image( label="文字雲"), gr.Markdown( label="類別", )]
    btn = gr.Button("Submit")
    btn.click(fn=do, inputs=inputs, outputs=outputs)


if __name__ == "__main__":
    
    demo.launch(share=True, auth=("kota", "kota"))