Spaces:
Runtime error
Runtime error
File size: 4,165 Bytes
4925baf 7cfd43a 4925baf 7cfd43a 4925baf 60274d1 4925baf 60274d1 4925baf a5003e3 4925baf a5003e3 4925baf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 |
import os
import logging
import gradio as gr
import pandas as pd
from dotenv import load_dotenv
import jieba
jieba.cut('你好')
from wordcloud import WordCloud
from PIL import Image
import matplotlib.pyplot as plt
from sheet import compose_query, get_serp, get_condensed_result, extract_results, postprocess_result, format_output, category2supercategory
load_dotenv()
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
classes = list([ x for x in category2supercategory.keys() if len(x)>0])
def plot_wordcloud( text):
"""
"""
if os.getenv("FONT_PATH", None) is not None:
wc_generator = WordCloud(font_path=os.getenv("FONT_PATH"))
else:
wc_generator = WordCloud()
img = wc_generator.generate( " ".join(jieba.cut(text)))
# fig, ax = plt.subplots()
# ax.imshow(wordcloud, interpolation='bilinear')
# ax.axis("off")
return img.to_image()
def format_category( formatted_results):
"""
"""
return "\n\n".join([
f"> 大類別:{formatted_results['supercategory'].values[0]}",
f"> 小類別:{formatted_results['category'].values[0]}",
f"> 商家名稱:{formatted_results['store_name'].values[0]}",
f"> 電話:{formatted_results['phone_number'].values[0]}",
f"> 描述:{formatted_results['description'].values[0]}"
])
def do( business_id, business_name, address):
"""
"""
crawled_results = []
google_domain = "google.com.tw"
gl = 'tw'
lr = 'lang_zh-TW'
query = compose_query(address, business_name)
try:
res = get_serp( query, google_domain, gl, lr)
except Exception as e:
return f"Error: {e}"
cond_res = get_condensed_result(res)
crawled_results.append( {
"index": 0,
"business_id": business_id,
"business_name": business_name,
"serp": res,
"evidence": cond_res,
"address": address
} )
crawled_results = pd.DataFrame(crawled_results)
# logger.debug(crawled_results)
extracted_results = extract_results( crawled_results, classes=classes)
# logger.error(extracted_results['extracted_results'].columns)
extracted_results = extracted_results['extracted_results'][ [ 'business_id', 'business_name', 'address', 'category', 'evidence', 'phone_number', 'description', 'store_name'] ]
logger.debug( extracted_results['category'])
print(extracted_results['category'])
postprocessed_results = postprocess_result( extracted_results, postprocessed_results_path="/tmp/postprocessed_results.joblib", category_hierarchy=category2supercategory)
os.remove("/tmp/postprocessed_results.joblib")
formatted_results = format_output( postprocessed_results)
logger.debug( formatted_results)
print(formatted_results)
formatted_output = format_category( formatted_results)
img = plot_wordcloud(formatted_results['formatted_evidence'].values[0])
return f"【參考資料】\n{formatted_results['formatted_evidence'].values[0]}", img, f"【辨識結果】\n{formatted_output}"
## --- interface --- ##
# outputs = [gr.Dataframe(row_count = (1, "dynamic"), col_count=(6,"dynamic"), label="output data", interactive=1)]
# demo = gr.Interface(
# fn=do,
# inputs=[ "text", "text", "text"],
# outputs=outputs,
# )
## --- block --- ##
with gr.Blocks() as demo:
gr.Markdown("🌟 自動分類餐廳型態 🌟")
with gr.Row():
inputs = [ gr.Textbox( label="統一編號", placeholder="輸入八碼數字(optional)"), gr.Textbox( label="商家名稱", placeholder="輸入商家或公司名稱"), gr.Textbox(label="地址", placeholder="至少輸入縣市,完整地址更好")]
with gr.Row():
# outputs = [gr.Dataframe(row_count = (1, "dynamic"), col_count=(6,"dynamic"), label="output data", interactive=1)]
outputs = [ gr.Markdown( label="參考資料(google search)"), gr.Image( label="文字雲"), gr.Markdown( label="類別", )]
btn = gr.Button("Submit")
btn.click(fn=do, inputs=inputs, outputs=outputs)
if __name__ == "__main__":
demo.launch(share=True, auth=("kota", "kota"))
|