seawolf2357 commited on
Commit
cce9aa5
ยท
verified ยท
1 Parent(s): 06af11d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +317 -1
app.py CHANGED
@@ -1 +1,317 @@
1
- ss
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import requests
3
+ import json
4
+ import os
5
+ from datetime import datetime, timedelta
6
+ from huggingface_hub import InferenceClient
7
+
8
+ API_KEY = os.getenv("SERPHOUSE_API_KEY")
9
+ hf_client = InferenceClient("CohereForAI/c4ai-command-r-plus-08-2024", token=os.getenv("HF_TOKEN"))
10
+
11
+ # ๊ตญ๊ฐ€๋ณ„ ์–ธ์–ด ์ฝ”๋“œ ๋งคํ•‘
12
+ COUNTRY_LANGUAGES = {
13
+ "South Korea": "ko",
14
+ "Japan": "ja",
15
+ "China": "zh",
16
+ "Russia": "ru",
17
+ "France": "fr",
18
+ "Germany": "de",
19
+ "Spain": "es",
20
+ "Italy": "it",
21
+ "Netherlands": "nl",
22
+ "Portugal": "pt",
23
+ "Thailand": "th",
24
+ "Vietnam": "vi",
25
+ "Indonesia": "id",
26
+ "Malaysia": "ms",
27
+ "Saudi Arabia": "ar",
28
+ "United Arab Emirates": "ar",
29
+ "Egypt": "ar",
30
+ "Morocco": "ar",
31
+ "Greece": "el",
32
+ "Poland": "pl",
33
+ "Czech Republic": "cs",
34
+ "Hungary": "hu",
35
+ "Turkey": "tr",
36
+ "Romania": "ro",
37
+ "Bulgaria": "bg",
38
+ "Croatia": "hr",
39
+ "Serbia": "sr",
40
+ "Slovakia": "sk",
41
+ "Slovenia": "sl",
42
+ "Estonia": "et",
43
+ "Latvia": "lv",
44
+ "Lithuania": "lt"
45
+ }
46
+
47
+ COUNTRY_LOCATIONS = {
48
+ "United States": "United States",
49
+ "United Kingdom": "United Kingdom",
50
+ "Canada": "Canada",
51
+ "Australia": "Australia",
52
+ "Germany": "Germany",
53
+ "France": "France",
54
+ "Japan": "Japan",
55
+ "South Korea": "South Korea",
56
+ "China": "China",
57
+ "India": "India",
58
+ "Brazil": "Brazil",
59
+ "Mexico": "Mexico",
60
+ "Russia": "Russia",
61
+ "Italy": "Italy",
62
+ "Spain": "Spain",
63
+ "Netherlands": "Netherlands",
64
+ "Singapore": "Singapore",
65
+ "Hong Kong": "Hong Kong",
66
+ "Indonesia": "Indonesia",
67
+ "Malaysia": "Malaysia",
68
+ "Philippines": "Philippines",
69
+ "Thailand": "Thailand",
70
+ "Vietnam": "Vietnam",
71
+ "Belgium": "Belgium",
72
+ "Denmark": "Denmark",
73
+ "Finland": "Finland",
74
+ "Ireland": "Ireland",
75
+ "Norway": "Norway",
76
+ "Poland": "Poland",
77
+ "Sweden": "Sweden",
78
+ "Switzerland": "Switzerland",
79
+ "Austria": "Austria",
80
+ "Czech Republic": "Czech Republic",
81
+ "Greece": "Greece",
82
+ "Hungary": "Hungary",
83
+ "Portugal": "Portugal",
84
+ "Romania": "Romania",
85
+ "Turkey": "Turkey",
86
+ "Israel": "Israel",
87
+ "Saudi Arabia": "Saudi Arabia",
88
+ "United Arab Emirates": "United Arab Emirates",
89
+ "South Africa": "South Africa",
90
+ "Argentina": "Argentina",
91
+ "Chile": "Chile",
92
+ "Colombia": "Colombia",
93
+ "Peru": "Peru",
94
+ "Venezuela": "Venezuela",
95
+ "New Zealand": "New Zealand",
96
+ "Bangladesh": "Bangladesh",
97
+ "Pakistan": "Pakistan",
98
+ "Egypt": "Egypt",
99
+ "Morocco": "Morocco",
100
+ "Nigeria": "Nigeria",
101
+ "Kenya": "Kenya",
102
+ "Ukraine": "Ukraine",
103
+ "Croatia": "Croatia",
104
+ "Slovakia": "Slovakia",
105
+ "Bulgaria": "Bulgaria",
106
+ "Serbia": "Serbia",
107
+ "Estonia": "Estonia",
108
+ "Latvia": "Latvia",
109
+ "Lithuania": "Lithuania",
110
+ "Slovenia": "Slovenia",
111
+ "Luxembourg": "Luxembourg",
112
+ "Malta": "Malta",
113
+ "Cyprus": "Cyprus",
114
+ "Iceland": "Iceland"
115
+ }
116
+
117
+ MAJOR_COUNTRIES = list(COUNTRY_LOCATIONS.keys())
118
+
119
+ def translate_query(query, country):
120
+ try:
121
+ if country in COUNTRY_LANGUAGES:
122
+ target_lang = COUNTRY_LANGUAGES[country]
123
+ prompt = f"Translate only this text to {target_lang} language without any explanation. Output only translated text: {query}"
124
+
125
+ translated = hf_client.text_generation(
126
+ prompt,
127
+ max_new_tokens=50, # ๋ฒˆ์—ญ๋œ ํ…์ŠคํŠธ๋งŒ ํ•„์š”ํ•˜๋ฏ€๋กœ ํ† ํฐ ์ˆ˜ ๊ฐ์†Œ
128
+ temperature=0.1 # ์ •ํ™•ํ•œ ๋ฒˆ์—ญ์„ ์œ„ํ•ด temperature ๋‚ฎ์ถค
129
+ )
130
+ # ๋ฒˆ์—ญ๋œ ํ…์ŠคํŠธ๋งŒ ์ถ”์ถœ
131
+ translated = translated.strip()
132
+ print(f"Original query: {query}")
133
+ print(f"Translated query: {translated}")
134
+ return translated
135
+ return query
136
+ except Exception as e:
137
+ print(f"Translation error: {str(e)}")
138
+ return query
139
+
140
+ def search_serphouse(query, country, page=1, num_result=10):
141
+ url = "https://api.serphouse.com/serp/live"
142
+
143
+ # ๊ฒ€์ƒ‰์–ด ๋ฒˆ์—ญ
144
+ translated_query = translate_query(query, country)
145
+
146
+ payload = {
147
+ "data": {
148
+ "q": translated_query, # ๋ฒˆ์—ญ๋œ ๊ฒ€์ƒ‰์–ด ์‚ฌ์šฉ
149
+ "domain": "google.com",
150
+ "loc": COUNTRY_LOCATIONS.get(country, "United States"),
151
+ "lang": "en",
152
+ "device": "desktop",
153
+ "serp_type": "news",
154
+ "page": "1",
155
+ "num": "10"
156
+ }
157
+ }
158
+
159
+ headers = {
160
+ "accept": "application/json",
161
+ "content-type": "application/json",
162
+ "authorization": f"Bearer {API_KEY}"
163
+ }
164
+
165
+ try:
166
+ response = requests.post(url, json=payload, headers=headers)
167
+ print("Request payload:", json.dumps(payload, indent=2))
168
+ print("Response status:", response.status_code)
169
+
170
+ response.raise_for_status()
171
+ return {"results": response.json(), "translated_query": translated_query}
172
+ except requests.RequestException as e:
173
+ return {"error": f"Error: {str(e)}", "translated_query": query}
174
+
175
+ def format_results_from_raw(response_data):
176
+ if "error" in response_data:
177
+ return "Error: " + response_data["error"], []
178
+
179
+ try:
180
+ results = response_data["results"]
181
+ translated_query = response_data["translated_query"]
182
+
183
+ news_results = results.get('results', {}).get('results', {}).get('news', [])
184
+ if not news_results:
185
+ return "๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.", []
186
+
187
+ articles = []
188
+ for idx, result in enumerate(news_results, 1):
189
+ articles.append({
190
+ "index": idx,
191
+ "title": result.get("title", "์ œ๋ชฉ ์—†์Œ"),
192
+ "link": result.get("url", result.get("link", "#")),
193
+ "snippet": result.get("snippet", "๋‚ด์šฉ ์—†์Œ"),
194
+ "channel": result.get("channel", result.get("source", "์•Œ ์ˆ˜ ์—†์Œ")),
195
+ "time": result.get("time", result.get("date", "์•Œ ์ˆ˜ ์—†๋Š” ์‹œ๊ฐ„")),
196
+ "image_url": result.get("img", result.get("thumbnail", "")),
197
+ "translated_query": translated_query
198
+ })
199
+ return "", articles
200
+ except Exception as e:
201
+ return f"๊ฒฐ๊ณผ ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}", []
202
+
203
+ def serphouse_search(query, country):
204
+ response_data = search_serphouse(query, country)
205
+ return format_results_from_raw(response_data)
206
+
207
+ css = """
208
+ footer {visibility: hidden;}
209
+ """
210
+
211
+ # Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ˆ˜์ •
212
+ with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css, title="NewsAI ์„œ๋น„์Šค") as iface:
213
+ gr.Markdown("๊ฒ€์ƒ‰์–ด๋ฅผ ์ž…๋ ฅํ•˜๊ณ  ์›ํ•˜๋Š” ๊ตญ๊ฐ€๋ฅผ ์„ ํƒํ•˜๋ฉด, ๊ฒ€์ƒ‰์–ด์™€ ์ผ์น˜ํ•˜๋Š” 24์‹œ๊ฐ„ ์ด๋‚ด ๋‰ด์Šค๋ฅผ ์ตœ๋Œ€ 100๊ฐœ ์ถœ๋ ฅํ•ฉ๋‹ˆ๋‹ค.")
214
+
215
+
216
+ with gr.Column():
217
+ with gr.Row():
218
+ query = gr.Textbox(label="๊ฒ€์ƒ‰์–ด")
219
+ country = gr.Dropdown(MAJOR_COUNTRIES, label="๊ตญ๊ฐ€", value="South Korea")
220
+
221
+ # ๋ฒˆ์—ญ๋œ ๊ฒ€์ƒ‰์–ด ํ‘œ์‹œ
222
+ translated_display = gr.Markdown(visible=True, label="๋ฒˆ์—ญ๋œ ๊ฒ€์ƒ‰์–ด")
223
+
224
+ search_button = gr.Button("๊ฒ€์ƒ‰", variant="primary")
225
+
226
+
227
+
228
+ progress = gr.Progress()
229
+ status_message = gr.Markdown(visible=False)
230
+ articles_state = gr.State([])
231
+
232
+ article_components = []
233
+ for i in range(100):
234
+ with gr.Group(visible=False) as article_group:
235
+ title = gr.Markdown()
236
+ image = gr.Image(width=200, height=150)
237
+ snippet = gr.Markdown()
238
+ info = gr.Markdown()
239
+
240
+ article_components.append({
241
+ 'group': article_group,
242
+ 'title': title,
243
+ 'image': image,
244
+ 'snippet': snippet,
245
+ 'info': info,
246
+ 'index': i,
247
+ })
248
+
249
+ def search_and_display(query, country, articles_state, progress=gr.Progress()):
250
+ progress(0, desc="๊ฒ€์ƒ‰์–ด ๋ฒˆ์—ญ ์ค‘...")
251
+
252
+ # ๊ฒ€์ƒ‰์–ด ๋ฒˆ์—ญ
253
+ translated_query = translate_query(query, country)
254
+ if translated_query != query:
255
+ translated_display_text = f"์›๋ณธ ๊ฒ€์ƒ‰์–ด: {query}\n๋ฒˆ์—ญ๋œ ๊ฒ€์ƒ‰์–ด: {translated_query}"
256
+ else:
257
+ translated_display_text = f"๊ฒ€์ƒ‰์–ด: {query}"
258
+
259
+ progress(0.2, desc="๊ฒ€์ƒ‰ ์‹œ์ž‘...")
260
+ response_data = search_serphouse(query, country)
261
+ error_message, articles = format_results_from_raw(response_data)
262
+
263
+ outputs = [gr.update(value=translated_display_text, visible=True)] # ๋ฒˆ์—ญ ๊ฒฐ๊ณผ ํ‘œ์‹œ
264
+
265
+
266
+
267
+ if error_message:
268
+ outputs.append(gr.update(value=error_message, visible=True))
269
+ for comp in article_components:
270
+ outputs.extend([
271
+ gr.update(visible=False), gr.update(), gr.update(),
272
+ gr.update(), gr.update()
273
+ ])
274
+ articles_state = []
275
+ else:
276
+ outputs.append(gr.update(value="", visible=False))
277
+ total_articles = len(articles)
278
+ for idx, comp in enumerate(article_components):
279
+ progress((idx + 1) / total_articles, desc=f"๊ฒฐ๊ณผ ํ‘œ์‹œ ์ค‘... {idx + 1}/{total_articles}")
280
+ if idx < len(articles):
281
+ article = articles[idx]
282
+ image_url = article['image_url']
283
+ image_update = gr.update(value=image_url, visible=True) if image_url and not image_url.startswith('data:image') else gr.update(value=None, visible=False)
284
+
285
+ outputs.extend([
286
+ gr.update(visible=True),
287
+ gr.update(value=f"### [{article['title']}]({article['link']})"),
288
+ image_update,
289
+ gr.update(value=f"**์š”์•ฝ:** {article['snippet']}"),
290
+ gr.update(value=f"**์ถœ์ฒ˜:** {article['channel']} | **์‹œ๊ฐ„:** {article['time']}")
291
+ ])
292
+ else:
293
+ outputs.extend([
294
+ gr.update(visible=False), gr.update(), gr.update(),
295
+ gr.update(), gr.update()
296
+ ])
297
+ articles_state = articles
298
+
299
+ progress(1.0, desc="์™„๋ฃŒ!")
300
+ outputs.append(articles_state)
301
+ outputs.append(gr.update(visible=False))
302
+ return outputs
303
+
304
+ search_outputs = [translated_query_display, gr.Markdown(visible=False)] # ๋ฒˆ์—ญ ๊ฒฐ๊ณผ ์ปดํฌ๋„ŒํŠธ ์ถ”๊ฐ€
305
+ for comp in article_components:
306
+ search_outputs.extend([comp['group'], comp['title'], comp['image'],
307
+ comp['snippet'], comp['info']])
308
+ search_outputs.extend([articles_state, status_message])
309
+
310
+ search_button.click(
311
+ search_and_display,
312
+ inputs=[query, country, articles_state],
313
+ outputs=search_outputs,
314
+ show_progress=True
315
+ )
316
+
317
+ iface.launch()