File size: 4,165 Bytes
4925baf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7cfd43a
4925baf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7cfd43a
4925baf
 
60274d1
 
4925baf
 
 
 
60274d1
 
4925baf
 
 
a5003e3
4925baf
 
 
 
 
 
 
 
 
 
 
 
 
a5003e3
4925baf
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import os
import logging

import gradio as gr
import pandas as pd
from dotenv import load_dotenv
import jieba
jieba.cut('你好')
from wordcloud import WordCloud
from PIL import Image
import matplotlib.pyplot as plt

from sheet import compose_query, get_serp, get_condensed_result, extract_results, postprocess_result, format_output, category2supercategory

load_dotenv()

logger = logging.getLogger(__name__) 
logger.setLevel(logging.DEBUG)

classes = list([ x for x in category2supercategory.keys() if len(x)>0])

def plot_wordcloud( text):
    """
    """
    if os.getenv("FONT_PATH", None) is not None:
        wc_generator = WordCloud(font_path=os.getenv("FONT_PATH"))
    else:
        wc_generator = WordCloud()
    img = wc_generator.generate( " ".join(jieba.cut(text)))
    # fig, ax = plt.subplots()
    # ax.imshow(wordcloud, interpolation='bilinear')
    # ax.axis("off")
    return img.to_image()

def format_category( formatted_results):
    """
    """
    return "\n\n".join([
        f"> 大類別:{formatted_results['supercategory'].values[0]}",
        f"> 小類別:{formatted_results['category'].values[0]}",
        f"> 商家名稱:{formatted_results['store_name'].values[0]}",
        f"> 電話:{formatted_results['phone_number'].values[0]}",
        f"> 描述:{formatted_results['description'].values[0]}"
    ])

def do( business_id, business_name, address):
    """
    """

    crawled_results = []

    google_domain = "google.com.tw"
    gl = 'tw'
    lr  = 'lang_zh-TW'

    query = compose_query(address, business_name)
    try:
        res = get_serp( query, google_domain, gl, lr)
    except Exception as e:
        return f"Error: {e}"
    
    cond_res = get_condensed_result(res)

    crawled_results.append( { 
        "index": 0, 
        "business_id": business_id, 
        "business_name": business_name, 
        "serp": res,
        "evidence": cond_res, 
        "address": address
    } )

    crawled_results = pd.DataFrame(crawled_results)
    # logger.debug(crawled_results)
    extracted_results = extract_results( crawled_results, classes=classes)
    # logger.error(extracted_results['extracted_results'].columns)
    extracted_results = extracted_results['extracted_results'][ [ 'business_id', 'business_name', 'address', 'category', 'evidence', 'phone_number', 'description', 'store_name'] ]
    logger.debug( extracted_results['category'])
    print(extracted_results['category'])
    postprocessed_results = postprocess_result( extracted_results, postprocessed_results_path="/tmp/postprocessed_results.joblib", category_hierarchy=category2supercategory)
    os.remove("/tmp/postprocessed_results.joblib")

    formatted_results = format_output( postprocessed_results)
    logger.debug( formatted_results)
    print(formatted_results)
    formatted_output = format_category( formatted_results)

    img = plot_wordcloud(formatted_results['formatted_evidence'].values[0])
    return f"【參考資料】\n{formatted_results['formatted_evidence'].values[0]}", img, f"【辨識結果】\n{formatted_output}"

## --- interface --- ##
# outputs = [gr.Dataframe(row_count = (1, "dynamic"), col_count=(6,"dynamic"), label="output data", interactive=1)]
# demo = gr.Interface(
#         fn=do,
#         inputs=[ "text", "text", "text"],
#         outputs=outputs,
#     )

## --- block --- ##
with gr.Blocks() as demo:
    gr.Markdown("🌟 自動分類餐廳型態 🌟")
    with gr.Row():
        inputs = [ gr.Textbox( label="統一編號", placeholder="輸入八碼數字(optional)"), gr.Textbox( label="商家名稱", placeholder="輸入商家或公司名稱"), gr.Textbox(label="地址", placeholder="至少輸入縣市,完整地址更好")]
    with gr.Row():
        # outputs = [gr.Dataframe(row_count = (1, "dynamic"), col_count=(6,"dynamic"), label="output data", interactive=1)]
        outputs = [ gr.Markdown( label="參考資料(google search)"), gr.Image( label="文字雲"), gr.Markdown( label="類別", )]
    btn = gr.Button("Submit")
    btn.click(fn=do, inputs=inputs, outputs=outputs)


if __name__ == "__main__":
    
    demo.launch(share=True, auth=("kota", "kota"))