seawolf2357 commited on
Commit
76d493c
ยท
verified ยท
1 Parent(s): 45eb115

Update app-backup.py

Browse files
Files changed (1) hide show
  1. app-backup.py +91 -70
app-backup.py CHANGED
@@ -1,9 +1,17 @@
1
  import gradio as gr
2
  import requests
3
  import json
 
4
  from datetime import datetime, timedelta
 
 
5
 
6
- API_KEY = "V38CNn4HXpLtynJQyOeoUensTEYoFy8PBUxKpDqAW1pawT1vfJ2BWtPQ98h6"
 
 
 
 
 
7
 
8
  MAJOR_COUNTRIES = [
9
  "United States", "United Kingdom", "Canada", "Australia", "Germany",
@@ -18,7 +26,7 @@ MAJOR_COUNTRIES = [
18
  "Indonesia", "Philippines", "Vietnam", "Pakistan", "Bangladesh"
19
  ]
20
 
21
- def search_serphouse(query, country, page=1, num_result=100):
22
  url = "https://api.serphouse.com/serp/live"
23
 
24
  now = datetime.utcnow()
@@ -58,11 +66,8 @@ def search_serphouse(query, country, page=1, num_result=100):
58
 
59
  def format_results_from_raw(results):
60
  try:
61
- # ๋””๋ฒ„๊ทธ ์ •๋ณด ์ƒ๋žต
62
- debug_info = ""
63
-
64
  if isinstance(results, dict) and "error" in results:
65
- return "Error: " + results["error"], ""
66
 
67
  if not isinstance(results, dict):
68
  raise ValueError("๊ฒฐ๊ณผ๊ฐ€ ์‚ฌ์ „ ํ˜•์‹์ด ์•„๋‹™๋‹ˆ๋‹ค.")
@@ -83,10 +88,9 @@ def format_results_from_raw(results):
83
  news_results = []
84
 
85
  if not news_results:
86
- return "๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.", ""
87
 
88
- # ๋‰ด์Šค ๊ฒฐ๊ณผ๋ฅผ ๋ฆฌ์ŠคํŠธ ํ˜•ํƒœ๋กœ ํฌ๋งทํŒ… (์ด๋ฏธ์ง€ ์ธ๋„ค์ผ ํฌํ•จ)
89
- list_output = ""
90
 
91
  for idx, result in enumerate(news_results, 1):
92
  title = result.get("title", "์ œ๋ชฉ ์—†์Œ")
@@ -96,86 +100,103 @@ def format_results_from_raw(results):
96
  time = result.get("time", result.get("date", "์•Œ ์ˆ˜ ์—†๋Š” ์‹œ๊ฐ„"))
97
  image_url = result.get("img", result.get("thumbnail", ""))
98
 
99
- # base64๋กœ ์ธ์ฝ”๋”ฉ๋œ ์ด๋ฏธ์ง€๋ฅผ ์ฒ˜๋ฆฌํ•˜์ง€ ์•Š์Œ
100
- if image_url and not image_url.startswith("data:image"):
101
- thumbnail_html = f'<img src="{image_url}" alt="Thumbnail" style="width: 100px; height: auto;">'
102
- else:
103
- thumbnail_html = ''
104
-
105
- # ๋ฆฌ์ŠคํŠธ ํ˜•์‹์˜ ๊ธฐ์‚ฌ (์ด๋ฏธ์ง€ ์ธ๋„ค์ผ ํฌํ•จ)
106
- list_item = f"""
107
- <div style="margin-bottom: 20px;">
108
- <h4>{idx}. <a href="{link}" target="_blank">{title}</a></h4>
109
- <p>{thumbnail_html}</p>
110
- <p>์š”์•ฝ: {snippet}</p>
111
- <p>์ถœ์ฒ˜: {channel} | ์‹œ๊ฐ„: {time}</p>
112
- <hr>
113
- </div>
114
- """
115
- list_output += list_item
116
-
117
- return list_output, ""
118
 
119
  except Exception as e:
120
  error_message = f"๊ฒฐ๊ณผ ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}"
121
- return "Error: " + error_message, ""
122
 
123
  def serphouse_search(query, country):
124
  # ํŽ˜์ด์ง€์™€ ๊ฒฐ๊ณผ ์ˆ˜์˜ ๊ธฐ๋ณธ๊ฐ’์„ ์„ค์ •ํ•ฉ๋‹ˆ๋‹ค.
125
  page = 1
126
- num_result = 100
127
  results = search_serphouse(query, country, page, num_result)
128
- list_output, debug_info = format_results_from_raw(results)
129
- return list_output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
 
131
  css = """
132
  footer {
133
  visibility: hidden;
134
  }
135
- /* '๋‰ด์Šค ๊ฒฐ๊ณผ'์™€ '๋””๋ฒ„๊ทธ ์ •๋ณด' ํƒญ ์ˆจ๊ธฐ๊ธฐ */
136
- #tab-๋‰ด์Šค_๊ฒฐ๊ณผ, #tab-๋””๋ฒ„๊ทธ_์ •๋ณด {
137
- display: none !important;
138
- }
139
- /* 'ํŽ˜์ด์ง€'์™€ '๊ฒฐ๊ณผ ์ˆ˜' ์ž…๋ ฅ ์š”์†Œ ์ˆจ๊ธฐ๊ธฐ */
140
- .slider-container {
141
- display: none !important;
142
- }
143
  """
144
 
145
  # Gradio ์ธํ„ฐํŽ˜์ด์Šค ๊ตฌ์„ฑ
146
- with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css, title="24์‹œ๊ฐ„ ์ด๋‚ด ๋‰ด์Šค ๊ฒ€์ƒ‰ ์ธํ„ฐํŽ˜์ด์Šค") as iface:
147
- gr.Markdown("## 24์‹œ๊ฐ„ ์ด๋‚ด ๋‰ด์Šค ๊ฒ€์ƒ‰ ์ธํ„ฐํŽ˜์ด์Šค")
148
- gr.Markdown("๊ฒ€์ƒ‰์–ด๋ฅผ ์ž…๋ ฅํ•˜๊ณ  ๊ตญ๊ฐ€๋ฅผ ์„ ํƒํ•˜์—ฌ 24์‹œ๊ฐ„ ์ด๋‚ด์˜ ๋‰ด์Šค ๊ฒฐ๊ณผ๋ฅผ ๊ฐ€์ ธ์˜ต๋‹ˆ๋‹ค.")
149
 
150
- with gr.Tab("๊ฒ€์ƒ‰"):
151
  with gr.Row():
152
  query = gr.Textbox(label="๊ฒ€์ƒ‰์–ด")
153
  country = gr.Dropdown(MAJOR_COUNTRIES, label="๊ตญ๊ฐ€", value="South Korea")
154
- # 'ํŽ˜์ด์ง€'์™€ '๊ฒฐ๊ณผ ์ˆ˜' ์ž…๋ ฅ ์š”์†Œ ์ œ๊ฑฐ
155
- # with gr.Row():
156
- # page = gr.Slider(1, 10, 1, label="ํŽ˜์ด์ง€")
157
- # num_result = gr.Slider(1, 100, 100, label="๊ฒฐ๊ณผ ์ˆ˜")
158
-
159
- search_button = gr.Button("๊ฒ€์ƒ‰")
160
-
161
- # '๋‰ด์Šค ๊ฒฐ๊ณผ'์™€ '๋””๋ฒ„๊ทธ ์ •๋ณด' ํƒญ ์ œ๊ฑฐ
162
- # with gr.Tab("๋‰ด์Šค ๊ฒฐ๊ณผ"):
163
- # news_output = gr.HTML(label="๋‰ด์Šค ๊ฒฐ๊ณผ")
164
 
165
- with gr.Tab("๋ฆฌ์ŠคํŠธ"):
166
- list_output = gr.HTML(label="๋ฆฌ์ŠคํŠธ ๊ฒฐ๊ณผ") # HTML๋กœ ๋ณ€๊ฒฝ
167
 
168
- # with gr.Tab("๋””๋ฒ„๊ทธ ์ •๋ณด"):
169
- # debug_output = gr.Textbox(label="๋””๋ฒ„๊ทธ ์ •๋ณด", lines=10)
170
-
171
- def search_and_display(query, country):
172
- list_output_text = serphouse_search(query, country)
173
- return {list_output: list_output_text}
174
-
175
- search_button.click(
176
- search_and_display,
177
- inputs=[query, country],
178
- outputs=[list_output]
179
- )
180
-
181
- iface.launch(auth=("gini", "pick"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import requests
3
  import json
4
+ import os
5
  from datetime import datetime, timedelta
6
+ from bs4 import BeautifulSoup # ์›น ํŽ˜์ด์ง€์—์„œ ํ…์ŠคํŠธ๋ฅผ ์ถ”์ถœํ•˜๊ธฐ ์œ„ํ•ด ์‚ฌ์šฉ
7
+ from huggingface_hub import InferenceClient # LLM ์‚ฌ์šฉ์„ ์œ„ํ•ด ํ•„์š”
8
 
9
+ # ํ•„์š”ํ•œ ํŒจํ‚ค์ง€ ์„ค์น˜ (ํ•„์š”ํ•œ ๊ฒฝ์šฐ ์ฃผ์„์„ ์ œ๊ฑฐํ•˜๊ณ  ์‹คํ–‰)
10
+ # !pip install bs4 huggingface_hub
11
+
12
+ # ํ™˜๊ฒฝ ๋ณ€์ˆ˜์—์„œ API ํ‚ค ๊ฐ€์ ธ์˜ค๊ธฐ (API ํ‚ค๋Š” ์•ˆ์ „ํ•˜๊ฒŒ ๊ด€๋ฆฌ๋˜์–ด์•ผ ํ•ฉ๋‹ˆ๋‹ค)
13
+ API_KEY = os.getenv("SERPHOUSE_API_KEY") # ๋ณธ์ธ์˜ SerpHouse API ํ‚ค๋ฅผ ํ™˜๊ฒฝ ๋ณ€์ˆ˜๋กœ ์„ค์ •ํ•˜์„ธ์š”.
14
+ HF_TOKEN = os.getenv("HF_TOKEN") # Hugging Face API ํ† ํฐ์„ ํ™˜๊ฒฝ ๋ณ€์ˆ˜๋กœ ์„ค์ •ํ•˜์„ธ์š”.
15
 
16
  MAJOR_COUNTRIES = [
17
  "United States", "United Kingdom", "Canada", "Australia", "Germany",
 
26
  "Indonesia", "Philippines", "Vietnam", "Pakistan", "Bangladesh"
27
  ]
28
 
29
+ def search_serphouse(query, country, page=1, num_result=10):
30
  url = "https://api.serphouse.com/serp/live"
31
 
32
  now = datetime.utcnow()
 
66
 
67
  def format_results_from_raw(results):
68
  try:
 
 
 
69
  if isinstance(results, dict) and "error" in results:
70
+ return "Error: " + results["error"], []
71
 
72
  if not isinstance(results, dict):
73
  raise ValueError("๊ฒฐ๊ณผ๊ฐ€ ์‚ฌ์ „ ํ˜•์‹์ด ์•„๋‹™๋‹ˆ๋‹ค.")
 
88
  news_results = []
89
 
90
  if not news_results:
91
+ return "๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.", []
92
 
93
+ articles = []
 
94
 
95
  for idx, result in enumerate(news_results, 1):
96
  title = result.get("title", "์ œ๋ชฉ ์—†์Œ")
 
100
  time = result.get("time", result.get("date", "์•Œ ์ˆ˜ ์—†๋Š” ์‹œ๊ฐ„"))
101
  image_url = result.get("img", result.get("thumbnail", ""))
102
 
103
+ articles.append({
104
+ "title": title,
105
+ "link": link,
106
+ "snippet": snippet,
107
+ "channel": channel,
108
+ "time": time,
109
+ "image_url": image_url
110
+ })
111
+
112
+ return "", articles
 
 
 
 
 
 
 
 
 
113
 
114
  except Exception as e:
115
  error_message = f"๊ฒฐ๊ณผ ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}"
116
+ return "Error: " + error_message, []
117
 
118
  def serphouse_search(query, country):
119
  # ํŽ˜์ด์ง€์™€ ๊ฒฐ๊ณผ ์ˆ˜์˜ ๊ธฐ๋ณธ๊ฐ’์„ ์„ค์ •ํ•ฉ๋‹ˆ๋‹ค.
120
  page = 1
121
+ num_result = 10
122
  results = search_serphouse(query, country, page, num_result)
123
+ error_message, articles = format_results_from_raw(results)
124
+ return error_message, articles
125
+
126
+ # LLM ์„ค์ •
127
+ hf_client = InferenceClient("CohereForAI/c4ai-command-r-plus-08-2024", token=HF_TOKEN)
128
+
129
+ def summarize_article(url):
130
+ try:
131
+ # ์›น ํŽ˜์ด์ง€์—์„œ ํ…์ŠคํŠธ ์ถ”์ถœ
132
+ response = requests.get(url)
133
+ response.raise_for_status()
134
+ soup = BeautifulSoup(response.text, 'html.parser')
135
+ # ๋ชจ๋“  ํ…์ŠคํŠธ๋ฅผ ์ถ”์ถœ (๊ฐ„๋‹จํ•œ ์˜ˆ์‹œ)
136
+ text = ' '.join([p.get_text() for p in soup.find_all('p')])
137
+ if not text.strip():
138
+ return "๊ธฐ์‚ฌ ๋‚ด์šฉ์„ ๊ฐ€์ ธ์˜ฌ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
139
+
140
+ # ์š”์•ฝ ์ƒ์„ฑ
141
+ prompt = f"๋‹ค์Œ ์˜์–ด ๊ธฐ์‚ฌ๋ฅผ ํ•œ๊ตญ์–ด๋กœ 3๋ฌธ์žฅ์œผ๋กœ ์š”์•ฝํ•˜์„ธ์š”:\n{text}"
142
+ summary = hf_client.text_generation(prompt, max_new_tokens=500)
143
+ return summary
144
+ except Exception as e:
145
+ return f"์š”์•ฝ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}"
146
 
147
  css = """
148
  footer {
149
  visibility: hidden;
150
  }
 
 
 
 
 
 
 
 
151
  """
152
 
153
  # Gradio ์ธํ„ฐํŽ˜์ด์Šค ๊ตฌ์„ฑ
154
+ with gr.Blocks(css=css, title="NewsAI ์„œ๋น„์Šค") as iface:
155
+ gr.Markdown("๊ฒ€์ƒ‰์–ด๋ฅผ ์ž…๋ ฅํ•˜๊ณ  ์›ํ•˜๋Š” ๊ตญ๊ฐ€๋ฅผ ์„ ํƒํ•˜๋ฉด, ๊ฒ€์ƒ‰์–ด์™€ ์ผ์น˜ํ•˜๋Š” 24์‹œ๊ฐ„ ์ด๋‚ด ๋‰ด์Šค๋ฅผ ์ตœ๋Œ€ 10๊ฐœ ์ถœ๋ ฅํ•ฉ๋‹ˆ๋‹ค.")
 
156
 
157
+ with gr.Column():
158
  with gr.Row():
159
  query = gr.Textbox(label="๊ฒ€์ƒ‰์–ด")
160
  country = gr.Dropdown(MAJOR_COUNTRIES, label="๊ตญ๊ฐ€", value="South Korea")
161
+ search_button = gr.Button("๊ฒ€์ƒ‰")
 
 
 
 
 
 
 
 
 
162
 
163
+ article_outputs = []
 
164
 
165
+ def search_and_display(query, country):
166
+ error_message, articles = serphouse_search(query, country)
167
+ if error_message:
168
+ return gr.update(visible=True, value=error_message)
169
+ else:
170
+ # ๊ธฐ์กด ์ถœ๋ ฅ๋ฌผ ์ œ๊ฑฐ
171
+ for components in article_outputs:
172
+ for component in components:
173
+ component.visible = False
174
+ article_outputs.clear()
175
+
176
+ # ๊ฐ ๊ธฐ์‚ฌ์— ๋Œ€ํ•ด ์ถœ๋ ฅ ์ƒ์„ฑ
177
+ for article in articles:
178
+ with gr.Column():
179
+ title = gr.Markdown(f"### [{article['title']}]({article['link']})")
180
+ image = gr.Image(value=article['image_url'], visible=bool(article['image_url']), shape=(200, 150))
181
+ snippet = gr.Markdown(f"**์š”์•ฝ:** {article['snippet']}")
182
+ info = gr.Markdown(f"**์ถœ์ฒ˜:** {article['channel']} | **์‹œ๊ฐ„:** {article['time']}")
183
+ analyze_button = gr.Button("๋ถ„์„")
184
+ summary_output = gr.Markdown(visible=False)
185
+
186
+ def analyze_article(url):
187
+ summary = summarize_article(url)
188
+ summary_output.update(value=summary, visible=True)
189
+
190
+ analyze_button.click(analyze_article, inputs=gr.State(article['link']), outputs=summary_output)
191
+
192
+ article_outputs.append([title, image, snippet, info, analyze_button, summary_output])
193
+
194
+ return gr.update()
195
+
196
+ search_button.click(
197
+ search_and_display,
198
+ inputs=[query, country],
199
+ outputs=[]
200
+ )
201
+
202
+ iface.launch(auth=("gini", "pick"))