seawolf2357 commited on
Commit
52196d9
ยท
verified ยท
1 Parent(s): 4e14759

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -138
app.py CHANGED
@@ -8,8 +8,7 @@ from huggingface_hub import InferenceClient
8
  API_KEY = os.getenv("SERPHOUSE_API_KEY")
9
  hf_client = InferenceClient("CohereForAI/c4ai-command-r-plus-08-2024", token=os.getenv("HF_TOKEN"))
10
 
11
-
12
-
13
  COUNTRY_CODES = {
14
  "United States": "US",
15
  "United Kingdom": "GB",
@@ -20,7 +19,7 @@ COUNTRY_CODES = {
20
  "Japan": "JP",
21
  "South Korea": "KR",
22
  "China": "CN",
23
- "Taiwan": "TW", # ๋Œ€๋งŒ ์ถ”๊ฐ€
24
  "India": "IN",
25
  "Brazil": "BR",
26
  "Mexico": "MX",
@@ -34,125 +33,29 @@ COUNTRY_CODES = {
34
  "Malaysia": "MY",
35
  "Philippines": "PH",
36
  "Thailand": "TH",
37
- "Vietnam": "VN",
38
- "Belgium": "BE",
39
- "Denmark": "DK",
40
- "Finland": "FI",
41
- "Ireland": "IE",
42
- "Norway": "NO",
43
- "Poland": "PL",
44
- "Sweden": "SE",
45
- "Switzerland": "CH",
46
- "Austria": "AT",
47
- "Czech Republic": "CZ",
48
- "Greece": "GR",
49
- "Hungary": "HU",
50
- "Portugal": "PT",
51
- "Romania": "RO",
52
- "Turkey": "TR",
53
- "Israel": "IL",
54
- "Saudi Arabia": "SA",
55
- "United Arab Emirates": "AE",
56
- "South Africa": "ZA",
57
- "Argentina": "AR",
58
- "Chile": "CL",
59
- "Colombia": "CO",
60
- "Peru": "PE",
61
- "Venezuela": "VE",
62
- "New Zealand": "NZ",
63
- "Bangladesh": "BD",
64
- "Pakistan": "PK",
65
- "Egypt": "EG",
66
- "Morocco": "MA",
67
- "Nigeria": "NG",
68
- "Kenya": "KE",
69
- "Ukraine": "UA",
70
- "Croatia": "HR",
71
- "Slovakia": "SK",
72
- "Bulgaria": "BG",
73
- "Serbia": "RS",
74
- "Estonia": "EE",
75
- "Latvia": "LV",
76
- "Lithuania": "LT",
77
- "Slovenia": "SI",
78
- "Luxembourg": "LU",
79
- "Malta": "MT",
80
- "Cyprus": "CY",
81
- "Iceland": "IS"
82
  }
83
 
 
84
 
85
  def is_english(text):
86
- # ์˜์–ด๋กœ๋งŒ ๊ตฌ์„ฑ๋˜์–ด ์žˆ๋Š”์ง€ ํ™•์ธ
87
  return all(ord(char) < 128 for char in text.replace(' ', ''))
88
-
89
- COUNTRY_LANGUAGES = {
90
- "South Korea": "ko",
91
- "Japan": "ja",
92
- "China": "zh",
93
- "Taiwan": "zh-tw", # ๋Œ€๋งŒ์–ด(๋ฒˆ์ฒด ์ค‘๊ตญ์–ด) ์ถ”๊ฐ€
94
- "Russia": "ru",
95
- "France": "fr",
96
- "Germany": "de",
97
- "Spain": "es",
98
- "Italy": "it",
99
- "Netherlands": "nl",
100
- "Portugal": "pt",
101
- "Thailand": "th",
102
- "Vietnam": "vi",
103
- "Indonesia": "id",
104
- "Malaysia": "ms",
105
- "Saudi Arabia": "ar",
106
- "United Arab Emirates": "ar",
107
- "Egypt": "ar",
108
- "Morocco": "ar",
109
- "Greece": "el",
110
- "Poland": "pl",
111
- "Czech Republic": "cs",
112
- "Hungary": "hu",
113
- "Turkey": "tr",
114
- "Romania": "ro",
115
- "Bulgaria": "bg",
116
- "Croatia": "hr",
117
- "Serbia": "sr",
118
- "Slovakia": "sk",
119
- "Slovenia": "sl",
120
- "Estonia": "et",
121
- "Latvia": "lv",
122
- "Lithuania": "lt",
123
- "Ukraine": "uk",
124
- "Israel": "he",
125
- "Bangladesh": "bn",
126
- "Pakistan": "ur",
127
- "Finland": "fi",
128
- "Denmark": "da",
129
- "Norway": "no",
130
- "Sweden": "sv",
131
- "Iceland": "is",
132
- "Philippines": "fil",
133
- "Brazil": "pt-br",
134
- "Argentina": "es-ar",
135
- "Chile": "es-cl",
136
- "Colombia": "es-co",
137
- "Peru": "es-pe",
138
- "Venezuela": "es-ve"
139
- }
140
 
141
- # ๋ฒˆ์—ญ ํ”„๋กฌํ”„ํŠธ ์ˆ˜์ •
142
  def translate_query(query, country):
143
  try:
 
144
  if is_english(query):
145
  print(f"English query detected, using original: {query}")
146
  return query[:255]
147
 
 
148
  if country == "South Korea":
149
  return query[:255]
150
 
151
- if country in COUNTRY_LANGUAGES:
152
  query = query[:100]
153
- target_lang = COUNTRY_LANGUAGES[country]
154
 
155
- # ๋ฒˆ์—ญ ํ”„๋กฌํ”„ํŠธ ๊ฐœ์„ 
156
  prompt = f"""Translate this text to {target_lang} language.
157
  For Japanese, use Kanji and Kana.
158
  For Chinese (China), use Simplified Chinese.
@@ -175,15 +78,10 @@ def translate_query(query, country):
175
  print(f"Translation error: {str(e)}")
176
  return query[:255]
177
 
178
-
179
-
180
- # MAJOR_COUNTRIES ์ •์˜ ์ˆ˜์ •
181
- MAJOR_COUNTRIES = list(COUNTRY_CODES.keys()) # COUNTRY_LOCATIONS ๋Œ€์‹  COUNTRY_CODES ์‚ฌ์šฉ
182
-
183
- # search_serphouse ํ•จ์ˆ˜ ์ˆ˜์ •
184
  def search_serphouse(query, country, page=1, num_result=10):
185
  url = "https://api.serphouse.com/serp/live"
186
 
 
187
  translated_query = translate_query(query, country)
188
  print(f"Original query: {query}")
189
  print(f"Translated query: {translated_query}")
@@ -192,14 +90,12 @@ def search_serphouse(query, country, page=1, num_result=10):
192
  "data": {
193
  "q": translated_query,
194
  "domain": "google.com",
195
- "loc": country,
196
- "lang": COUNTRY_LANGUAGES.get(country, "en"),
197
  "device": "desktop",
198
- "serp_type": "web",
199
  "page": "1",
200
- "num": "10",
201
- "verbatim": "0",
202
- "gfilter": "0"
203
  }
204
  }
205
 
@@ -213,7 +109,6 @@ def search_serphouse(query, country, page=1, num_result=10):
213
  response = requests.post(url, json=payload, headers=headers)
214
  print("Request payload:", json.dumps(payload, indent=2, ensure_ascii=False))
215
  print("Response status:", response.status_code)
216
- print("Full response content:", response.text) # ์ „์ฒด ์‘๋‹ต ๋‚ด์šฉ ์ถœ๋ ฅ
217
 
218
  response.raise_for_status()
219
  return {"results": response.json(), "translated_query": translated_query}
@@ -231,13 +126,12 @@ def format_results_from_raw(response_data):
231
  results = response_data["results"]
232
  translated_query = response_data["translated_query"]
233
 
234
- # ์‘๋‹ต ๊ตฌ์กฐ ๋ณ€๊ฒฝ์— ๋”ฐ๋ฅธ ์ˆ˜์ •
235
- search_results = results.get('results', {}).get('organic_results', []) # organic_results๋กœ ๋ณ€๊ฒฝ
236
- if not search_results:
237
  return "๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.", []
238
 
239
  articles = []
240
- for idx, result in enumerate(search_results, 1):
241
  articles.append({
242
  "index": idx,
243
  "title": result.get("title", "์ œ๋ชฉ ์—†์Œ"),
@@ -250,7 +144,6 @@ def format_results_from_raw(response_data):
250
  })
251
  return "", articles
252
  except Exception as e:
253
- print(f"Format error: {str(e)}") # ๋””๋ฒ„๊น…์„ ์œ„ํ•œ ์—๋Ÿฌ ์ถœ๋ ฅ ์ถ”๊ฐ€
254
  return f"๊ฒฐ๊ณผ ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}", []
255
 
256
  def serphouse_search(query, country):
@@ -296,9 +189,8 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css, title="NewsAI ์„œ๋น„์Šค") as
296
  })
297
 
298
  def search_and_display(query, country, articles_state, progress=gr.Progress()):
299
- progress(0, desc="๊ฒ€์ƒ‰ ์‹œ์ž‘...")
300
-
301
- # ๋ฒˆ์—ญ ๋ฐ ํ‘œ์‹œ ํ…์ŠคํŠธ ์ฒ˜๋ฆฌ
302
  translated_query = translate_query(query, country)
303
  if is_english(query):
304
  translated_display_text = f"์˜์–ด ๊ฒ€์ƒ‰์–ด: {query}"
@@ -310,8 +202,6 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css, title="NewsAI ์„œ๋น„์Šค") as
310
  translated_display_text = f"๊ฒ€์ƒ‰์–ด: {query}"
311
 
312
  progress(0.2, desc="๊ฒ€์ƒ‰ ์ค‘...")
313
-
314
-
315
  response_data = search_serphouse(query, country)
316
  error_message, articles = format_results_from_raw(response_data)
317
 
@@ -354,19 +244,12 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css, title="NewsAI ์„œ๋น„์Šค") as
354
  outputs.append(gr.update(visible=False))
355
  return outputs
356
 
357
- # ๊ฒ€์ƒ‰ ๋ฒ„ํŠผ ํด๋ฆญ ์‹œ ์—…๋ฐ์ดํŠธ๋  ์ถœ๋ ฅ ์ปดํฌ๋„ŒํŠธ ๋ชฉ๋ก
358
- search_outputs = [translated_display, gr.Markdown(visible=False)] # ๋ฒˆ์—ญ ๊ฒฐ๊ณผ ํ‘œ์‹œ ์ปดํฌ๋„ŒํŠธ
359
  for comp in article_components:
360
- search_outputs.extend([
361
- comp['group'],
362
- comp['title'],
363
- comp['image'],
364
- comp['snippet'],
365
- comp['info']
366
- ])
367
  search_outputs.extend([articles_state, status_message])
368
 
369
- # ๊ฒ€์ƒ‰ ๋ฒ„ํŠผ ํด๋ฆญ ์ด๋ฒคํŠธ ์„ค์ •
370
  search_button.click(
371
  fn=search_and_display,
372
  inputs=[query, country, articles_state],
 
8
  API_KEY = os.getenv("SERPHOUSE_API_KEY")
9
  hf_client = InferenceClient("CohereForAI/c4ai-command-r-plus-08-2024", token=os.getenv("HF_TOKEN"))
10
 
11
+ # ๊ตญ๊ฐ€๋ณ„ ์ฝ”๋“œ ๋งคํ•‘
 
12
  COUNTRY_CODES = {
13
  "United States": "US",
14
  "United Kingdom": "GB",
 
19
  "Japan": "JP",
20
  "South Korea": "KR",
21
  "China": "CN",
22
+ "Taiwan": "TW",
23
  "India": "IN",
24
  "Brazil": "BR",
25
  "Mexico": "MX",
 
33
  "Malaysia": "MY",
34
  "Philippines": "PH",
35
  "Thailand": "TH",
36
+ "Vietnam": "VN"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  }
38
 
39
+ MAJOR_COUNTRIES = list(COUNTRY_CODES.keys())
40
 
41
  def is_english(text):
 
42
  return all(ord(char) < 128 for char in text.replace(' ', ''))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
 
44
  def translate_query(query, country):
45
  try:
46
+ # ์˜์–ด ์ž…๋ ฅ์ธ ๊ฒฝ์šฐ ๋ฒˆ์—ญํ•˜์ง€ ์•Š์Œ
47
  if is_english(query):
48
  print(f"English query detected, using original: {query}")
49
  return query[:255]
50
 
51
+ # ํ•œ๊ธ€ ์ž…๋ ฅ์ด๊ณ  South Korea๊ฐ€ ์„ ํƒ๋œ ๊ฒฝ์šฐ ๋ฒˆ์—ญํ•˜์ง€ ์•Š์Œ
52
  if country == "South Korea":
53
  return query[:255]
54
 
55
+ if country in COUNTRY_CODES:
56
  query = query[:100]
57
+ target_lang = COUNTRY_CODES[country]
58
 
 
59
  prompt = f"""Translate this text to {target_lang} language.
60
  For Japanese, use Kanji and Kana.
61
  For Chinese (China), use Simplified Chinese.
 
78
  print(f"Translation error: {str(e)}")
79
  return query[:255]
80
 
 
 
 
 
 
 
81
  def search_serphouse(query, country, page=1, num_result=10):
82
  url = "https://api.serphouse.com/serp/live"
83
 
84
+ # ๊ฒ€์ƒ‰์–ด ๋ฒˆ์—ญ
85
  translated_query = translate_query(query, country)
86
  print(f"Original query: {query}")
87
  print(f"Translated query: {translated_query}")
 
90
  "data": {
91
  "q": translated_query,
92
  "domain": "google.com",
93
+ "country_code": COUNTRY_CODES.get(country, "US"),
94
+ "lang": "en",
95
  "device": "desktop",
96
+ "serp_type": "news",
97
  "page": "1",
98
+ "num": "10"
 
 
99
  }
100
  }
101
 
 
109
  response = requests.post(url, json=payload, headers=headers)
110
  print("Request payload:", json.dumps(payload, indent=2, ensure_ascii=False))
111
  print("Response status:", response.status_code)
 
112
 
113
  response.raise_for_status()
114
  return {"results": response.json(), "translated_query": translated_query}
 
126
  results = response_data["results"]
127
  translated_query = response_data["translated_query"]
128
 
129
+ news_results = results.get('results', {}).get('organic_results', [])
130
+ if not news_results:
 
131
  return "๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.", []
132
 
133
  articles = []
134
+ for idx, result in enumerate(news_results, 1):
135
  articles.append({
136
  "index": idx,
137
  "title": result.get("title", "์ œ๋ชฉ ์—†์Œ"),
 
144
  })
145
  return "", articles
146
  except Exception as e:
 
147
  return f"๊ฒฐ๊ณผ ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}", []
148
 
149
  def serphouse_search(query, country):
 
189
  })
190
 
191
  def search_and_display(query, country, articles_state, progress=gr.Progress()):
192
+ progress(0, desc="๊ฒ€์ƒ‰์–ด ๋ฒˆ์—ญ ์ค‘...")
193
+
 
194
  translated_query = translate_query(query, country)
195
  if is_english(query):
196
  translated_display_text = f"์˜์–ด ๊ฒ€์ƒ‰์–ด: {query}"
 
202
  translated_display_text = f"๊ฒ€์ƒ‰์–ด: {query}"
203
 
204
  progress(0.2, desc="๊ฒ€์ƒ‰ ์ค‘...")
 
 
205
  response_data = search_serphouse(query, country)
206
  error_message, articles = format_results_from_raw(response_data)
207
 
 
244
  outputs.append(gr.update(visible=False))
245
  return outputs
246
 
247
+ search_outputs = [translated_display, gr.Markdown(visible=False)]
 
248
  for comp in article_components:
249
+ search_outputs.extend([comp['group'], comp['title'], comp['image'],
250
+ comp['snippet'], comp['info']])
 
 
 
 
 
251
  search_outputs.extend([articles_state, status_message])
252
 
 
253
  search_button.click(
254
  fn=search_and_display,
255
  inputs=[query, country, articles_state],