Spaces:
openfree
/
Running on CPU Upgrade

seawolf2357 commited on
Commit
1abf4a9
ยท
verified ยท
1 Parent(s): e70c1dd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +110 -159
app.py CHANGED
@@ -8,12 +8,11 @@ from huggingface_hub import InferenceClient
8
  API_KEY = os.getenv("SERPHOUSE_API_KEY")
9
  hf_client = InferenceClient("CohereForAI/c4ai-command-r-plus-08-2024", token=os.getenv("HF_TOKEN"))
10
 
11
- # ๊ตญ๊ฐ€๋ณ„ ์–ธ์–ด ์ฝ”๋“œ ๋งคํ•‘ ๋จผ์ € ์ •์˜
12
  COUNTRY_LANGUAGES = {
13
  "South Korea": "ko",
14
  "Japan": "ja",
15
  "China": "zh",
16
- "Taiwan": "zh-tw",
17
  "Russia": "ru",
18
  "France": "fr",
19
  "Germany": "de",
@@ -42,153 +41,116 @@ COUNTRY_LANGUAGES = {
42
  "Slovenia": "sl",
43
  "Estonia": "et",
44
  "Latvia": "lv",
45
- "Lithuania": "lt",
46
- "Ukraine": "uk",
47
- "Israel": "he",
48
- "Bangladesh": "bn",
49
- "Pakistan": "ur",
50
- "Finland": "fi",
51
- "Denmark": "da",
52
- "Norway": "no",
53
- "Sweden": "sv",
54
- "Iceland": "is",
55
- "Philippines": "fil",
56
- "Brazil": "pt-br",
57
- "Argentina": "es-ar",
58
- "Chile": "es-cl",
59
- "Colombia": "es-co",
60
- "Peru": "es-pe",
61
- "Venezuela": "es-ve"
62
  }
63
 
64
- COUNTRY_CODES = {
65
- "United States": "US",
66
- "United Kingdom": "GB",
67
- "Canada": "CA",
68
- "Australia": "AU",
69
- "Germany": "DE",
70
- "France": "FR",
71
- "Japan": "JP",
72
- "South Korea": "KR",
73
- "China": "CN",
74
- "Taiwan": "TW",
75
- "India": "IN",
76
- "Brazil": "BR",
77
- "Mexico": "MX",
78
- "Russia": "RU",
79
- "Italy": "IT",
80
- "Spain": "ES",
81
- "Netherlands": "NL",
82
- "Singapore": "SG",
83
- "Hong Kong": "HK",
84
- "Indonesia": "ID",
85
- "Malaysia": "MY",
86
- "Philippines": "PH",
87
- "Thailand": "TH",
88
- "Vietnam": "VN",
89
- "Belgium": "BE",
90
- "Denmark": "DK",
91
- "Finland": "FI",
92
- "Ireland": "IE",
93
- "Norway": "NO",
94
- "Poland": "PL",
95
- "Sweden": "SE",
96
- "Switzerland": "CH",
97
- "Austria": "AT",
98
- "Czech Republic": "CZ",
99
- "Greece": "GR",
100
- "Hungary": "HU",
101
- "Portugal": "PT",
102
- "Romania": "RO",
103
- "Turkey": "TR",
104
- "Israel": "IL",
105
- "Saudi Arabia": "SA",
106
- "United Arab Emirates": "AE",
107
- "South Africa": "ZA",
108
- "Argentina": "AR",
109
- "Chile": "CL",
110
- "Colombia": "CO",
111
- "Peru": "PE",
112
- "Venezuela": "VE",
113
- "New Zealand": "NZ",
114
- "Bangladesh": "BD",
115
- "Pakistan": "PK",
116
- "Egypt": "EG",
117
- "Morocco": "MA",
118
- "Nigeria": "NG",
119
- "Kenya": "KE",
120
- "Ukraine": "UA",
121
- "Croatia": "HR",
122
- "Slovakia": "SK",
123
- "Bulgaria": "BG",
124
- "Serbia": "RS",
125
- "Estonia": "EE",
126
- "Latvia": "LV",
127
- "Lithuania": "LT",
128
- "Slovenia": "SI",
129
- "Luxembourg": "LU",
130
- "Malta": "MT",
131
- "Cyprus": "CY",
132
- "Iceland": "IS"
133
  }
134
 
135
- MAJOR_COUNTRIES = list(COUNTRY_CODES.keys())
136
-
137
- def is_english(text):
138
- return all(ord(char) < 128 for char in text.replace(' ', ''))
139
 
140
  def translate_query(query, country):
141
  try:
142
- # ์˜์–ด ์ž…๋ ฅ์ธ ๊ฒฝ์šฐ ๋ฒˆ์—ญํ•˜์ง€ ์•Š์Œ
143
- if is_english(query):
144
- print(f"English query detected, using original: {query}")
145
- return query[:255]
146
-
147
- # ํ•œ๊ธ€ ์ž…๋ ฅ์ด๊ณ  South Korea๊ฐ€ ์„ ํƒ๋œ ๊ฒฝ์šฐ ๋ฒˆ์—ญํ•˜์ง€ ์•Š์Œ
148
- if country == "South Korea":
149
- return query[:255]
150
-
151
- if country in COUNTRY_CODES:
152
- query = query[:100]
153
- target_lang = COUNTRY_CODES[country]
154
-
155
- prompt = f"""Translate this text to {target_lang} language.
156
- For Japanese, use Kanji and Kana.
157
- For Chinese (China), use Simplified Chinese.
158
- For Chinese (Taiwan), use Traditional Chinese.
159
- For Korean, use Hangul.
160
- Only output the translated text without any explanation.
161
- Text to translate: {query}"""
162
 
163
  translated = hf_client.text_generation(
164
  prompt,
165
- max_new_tokens=50,
166
- temperature=0.1
167
  )
168
- translated = translated.strip()[:255]
169
- print(f"Original query: {query}")
170
- print(f"Translated query: {translated}")
171
- return translated
172
- return query[:255]
173
  except Exception as e:
174
  print(f"Translation error: {str(e)}")
175
- return query[:255]
176
 
177
  def search_serphouse(query, country, page=1, num_result=10):
178
  url = "https://api.serphouse.com/serp/live"
179
 
 
 
 
 
 
180
  payload = {
181
  "data": {
182
  "q": query,
183
  "domain": "google.com",
184
- "loc": country, # ๊ตญ๊ฐ€ ์ด๋ฆ„ ์‚ฌ์šฉ
185
- "lang": COUNTRY_LANGUAGES.get(country, "en"),
186
  "device": "desktop",
187
- "serp_type": "web", # web์œผ๋กœ ๋ณ€๊ฒฝ
188
  "page": "1",
189
- "verbatim": "0",
190
- "gfilter": "0",
191
- "num": "10" # num์œผ๋กœ ๋ณ€๊ฒฝ
192
  }
193
  }
194
 
@@ -200,17 +162,13 @@ def search_serphouse(query, country, page=1, num_result=10):
200
 
201
  try:
202
  response = requests.post(url, json=payload, headers=headers)
203
- print("Request payload:", json.dumps(payload, indent=2, ensure_ascii=False))
204
  print("Response status:", response.status_code)
205
- print("Response content:", response.text)
206
 
207
  response.raise_for_status()
208
- return {"results": response.json(), "translated_query": query}
209
  except requests.RequestException as e:
210
- error_msg = f"Error: {str(e)}"
211
- if hasattr(response, 'text'):
212
- error_msg += f"\nResponse content: {response.text}"
213
- return {"error": error_msg, "translated_query": query}
214
 
215
  def format_results_from_raw(response_data):
216
  if "error" in response_data:
@@ -220,26 +178,24 @@ def format_results_from_raw(response_data):
220
  results = response_data["results"]
221
  translated_query = response_data["translated_query"]
222
 
223
- # ์‘๋‹ต ๊ตฌ์กฐ ๋ณ€๊ฒฝ
224
- organic_results = results.get('results', {}).get('organic', [])
225
- if not organic_results:
226
  return "๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.", []
227
 
228
  articles = []
229
- for idx, result in enumerate(organic_results, 1):
230
  articles.append({
231
  "index": idx,
232
  "title": result.get("title", "์ œ๋ชฉ ์—†์Œ"),
233
- "link": result.get("link", "#"),
234
  "snippet": result.get("snippet", "๋‚ด์šฉ ์—†์Œ"),
235
- "channel": result.get("source", "์•Œ ์ˆ˜ ์—†์Œ"),
236
- "time": result.get("date", "์•Œ ์ˆ˜ ์—†๋Š” ์‹œ๊ฐ„"),
237
- "image_url": result.get("thumbnail", ""),
238
  "translated_query": translated_query
239
  })
240
  return "", articles
241
  except Exception as e:
242
- print(f"Format error: {str(e)}") # ๋””๋ฒ„๊น…์šฉ ์ถœ๋ ฅ ์ถ”๊ฐ€
243
  return f"๊ฒฐ๊ณผ ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}", []
244
 
245
  def serphouse_search(query, country):
@@ -250,16 +206,17 @@ css = """
250
  footer {visibility: hidden;}
251
  """
252
 
 
253
  with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css, title="NewsAI ์„œ๋น„์Šค") as iface:
254
- gr.Markdown("๊ฒ€์ƒ‰์–ด๋ฅผ ์ž…๋ ฅํ•˜๊ณ  ์›ํ•˜๋Š” ๊ตญ๊ฐ€๋ฅผ ์„ ํƒํ•˜๋ฉด, ํ•ด๋‹น ๊ตญ๊ฐ€์˜ ์–ธ์–ด๋กœ ๋ฒˆ์—ญ๋œ ๊ฒ€์ƒ‰์–ด๋กœ ๋‰ด์Šค๋ฅผ ๊ฒ€์ƒ‰ํ•ฉ๋‹ˆ๋‹ค.")
255
 
256
  with gr.Column():
257
  with gr.Row():
258
  query = gr.Textbox(label="๊ฒ€์ƒ‰์–ด")
259
  country = gr.Dropdown(MAJOR_COUNTRIES, label="๊ตญ๊ฐ€", value="South Korea")
260
 
261
- # ๋ฒˆ์—ญ๋œ ๊ฒ€์ƒ‰์–ด ํ‘œ์‹œ ์ปดํฌ๋„ŒํŠธ
262
- translated_display = gr.Markdown(visible=True)
263
 
264
  search_button = gr.Button("๊ฒ€์ƒ‰", variant="primary")
265
 
@@ -287,21 +244,15 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css, title="NewsAI ์„œ๋น„์Šค") as
287
  def search_and_display(query, country, articles_state, progress=gr.Progress()):
288
  progress(0, desc="๊ฒ€์ƒ‰์–ด ๋ฒˆ์—ญ ์ค‘...")
289
 
 
290
  translated_query = translate_query(query, country)
291
- if is_english(query):
292
- translated_display_text = f"์˜์–ด ๊ฒ€์ƒ‰์–ด: {query}"
293
- elif country == "South Korea":
294
- translated_display_text = f"๊ฒ€์ƒ‰์–ด: {query}"
295
- elif translated_query != query:
296
- translated_display_text = f"์›๋ณธ ๊ฒ€์ƒ‰์–ด: {query}\n๋ฒˆ์—ญ๋œ ๊ฒ€์ƒ‰์–ด: {translated_query}"
297
- else:
298
- translated_display_text = f"๊ฒ€์ƒ‰์–ด: {query}"
299
 
300
- progress(0.2, desc="๊ฒ€์ƒ‰ ์ค‘...")
301
- response_data = search_serphouse(query, country)
302
- error_message, articles = format_results_from_raw(response_data)
303
 
304
- outputs = [gr.update(value=translated_display_text, visible=True)]
305
 
306
  if error_message:
307
  outputs.append(gr.update(value=error_message, visible=True))
@@ -340,14 +291,14 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css, title="NewsAI ์„œ๋น„์Šค") as
340
  outputs.append(gr.update(visible=False))
341
  return outputs
342
 
343
- search_outputs = [translated_display, gr.Markdown(visible=False)]
344
  for comp in article_components:
345
  search_outputs.extend([comp['group'], comp['title'], comp['image'],
346
  comp['snippet'], comp['info']])
347
  search_outputs.extend([articles_state, status_message])
348
 
349
  search_button.click(
350
- fn=search_and_display,
351
  inputs=[query, country, articles_state],
352
  outputs=search_outputs,
353
  show_progress=True
 
8
  API_KEY = os.getenv("SERPHOUSE_API_KEY")
9
  hf_client = InferenceClient("CohereForAI/c4ai-command-r-plus-08-2024", token=os.getenv("HF_TOKEN"))
10
 
11
+ # ๊ตญ๊ฐ€๋ณ„ ์–ธ์–ด ์ฝ”๋“œ ๋งคํ•‘
12
  COUNTRY_LANGUAGES = {
13
  "South Korea": "ko",
14
  "Japan": "ja",
15
  "China": "zh",
 
16
  "Russia": "ru",
17
  "France": "fr",
18
  "Germany": "de",
 
41
  "Slovenia": "sl",
42
  "Estonia": "et",
43
  "Latvia": "lv",
44
+ "Lithuania": "lt"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  }
46
 
47
+ COUNTRY_LOCATIONS = {
48
+ "United States": "United States",
49
+ "United Kingdom": "United Kingdom",
50
+ "Canada": "Canada",
51
+ "Australia": "Australia",
52
+ "Germany": "Germany",
53
+ "France": "France",
54
+ "Japan": "Japan",
55
+ "South Korea": "South Korea",
56
+ "China": "China",
57
+ "India": "India",
58
+ "Brazil": "Brazil",
59
+ "Mexico": "Mexico",
60
+ "Russia": "Russia",
61
+ "Italy": "Italy",
62
+ "Spain": "Spain",
63
+ "Netherlands": "Netherlands",
64
+ "Singapore": "Singapore",
65
+ "Hong Kong": "Hong Kong",
66
+ "Indonesia": "Indonesia",
67
+ "Malaysia": "Malaysia",
68
+ "Philippines": "Philippines",
69
+ "Thailand": "Thailand",
70
+ "Vietnam": "Vietnam",
71
+ "Belgium": "Belgium",
72
+ "Denmark": "Denmark",
73
+ "Finland": "Finland",
74
+ "Ireland": "Ireland",
75
+ "Norway": "Norway",
76
+ "Poland": "Poland",
77
+ "Sweden": "Sweden",
78
+ "Switzerland": "Switzerland",
79
+ "Austria": "Austria",
80
+ "Czech Republic": "Czech Republic",
81
+ "Greece": "Greece",
82
+ "Hungary": "Hungary",
83
+ "Portugal": "Portugal",
84
+ "Romania": "Romania",
85
+ "Turkey": "Turkey",
86
+ "Israel": "Israel",
87
+ "Saudi Arabia": "Saudi Arabia",
88
+ "United Arab Emirates": "United Arab Emirates",
89
+ "South Africa": "South Africa",
90
+ "Argentina": "Argentina",
91
+ "Chile": "Chile",
92
+ "Colombia": "Colombia",
93
+ "Peru": "Peru",
94
+ "Venezuela": "Venezuela",
95
+ "New Zealand": "New Zealand",
96
+ "Bangladesh": "Bangladesh",
97
+ "Pakistan": "Pakistan",
98
+ "Egypt": "Egypt",
99
+ "Morocco": "Morocco",
100
+ "Nigeria": "Nigeria",
101
+ "Kenya": "Kenya",
102
+ "Ukraine": "Ukraine",
103
+ "Croatia": "Croatia",
104
+ "Slovakia": "Slovakia",
105
+ "Bulgaria": "Bulgaria",
106
+ "Serbia": "Serbia",
107
+ "Estonia": "Estonia",
108
+ "Latvia": "Latvia",
109
+ "Lithuania": "Lithuania",
110
+ "Slovenia": "Slovenia",
111
+ "Luxembourg": "Luxembourg",
112
+ "Malta": "Malta",
113
+ "Cyprus": "Cyprus",
114
+ "Iceland": "Iceland"
 
115
  }
116
 
117
+ MAJOR_COUNTRIES = list(COUNTRY_LOCATIONS.keys())
 
 
 
118
 
119
  def translate_query(query, country):
120
  try:
121
+ if country in COUNTRY_LANGUAGES:
122
+ target_lang = COUNTRY_LANGUAGES[country]
123
+ prompt = f"Translate the following English text to {target_lang} language. Only output the translated text without any explanations or quotes: {query}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
 
125
  translated = hf_client.text_generation(
126
  prompt,
127
+ max_new_tokens=100,
128
+ temperature=0.3
129
  )
130
+ return translated.strip()
131
+ return query
 
 
 
132
  except Exception as e:
133
  print(f"Translation error: {str(e)}")
134
+ return query
135
 
136
  def search_serphouse(query, country, page=1, num_result=10):
137
  url = "https://api.serphouse.com/serp/live"
138
 
139
+ # ๊ฒ€์ƒ‰์–ด ๋ฒˆ์—ญ
140
+ translated_query = translate_query(query, country)
141
+ print(f"Original query: {query}")
142
+ print(f"Translated query: {translated_query}")
143
+
144
  payload = {
145
  "data": {
146
  "q": query,
147
  "domain": "google.com",
148
+ "loc": COUNTRY_LOCATIONS.get(country, "United States"),
149
+ "lang": "en",
150
  "device": "desktop",
151
+ "serp_type": "news",
152
  "page": "1",
153
+ "num": "10"
 
 
154
  }
155
  }
156
 
 
162
 
163
  try:
164
  response = requests.post(url, json=payload, headers=headers)
165
+ print("Request payload:", json.dumps(payload, indent=2))
166
  print("Response status:", response.status_code)
 
167
 
168
  response.raise_for_status()
169
+ return {"results": response.json(), "translated_query": translated_query}
170
  except requests.RequestException as e:
171
+ return {"error": f"Error: {str(e)}", "translated_query": query}
 
 
 
172
 
173
  def format_results_from_raw(response_data):
174
  if "error" in response_data:
 
178
  results = response_data["results"]
179
  translated_query = response_data["translated_query"]
180
 
181
+ news_results = results.get('results', {}).get('results', {}).get('news', [])
182
+ if not news_results:
 
183
  return "๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.", []
184
 
185
  articles = []
186
+ for idx, result in enumerate(news_results, 1):
187
  articles.append({
188
  "index": idx,
189
  "title": result.get("title", "์ œ๋ชฉ ์—†์Œ"),
190
+ "link": result.get("url", result.get("link", "#")),
191
  "snippet": result.get("snippet", "๋‚ด์šฉ ์—†์Œ"),
192
+ "channel": result.get("channel", result.get("source", "์•Œ ์ˆ˜ ์—†์Œ")),
193
+ "time": result.get("time", result.get("date", "์•Œ ์ˆ˜ ์—†๋Š” ์‹œ๊ฐ„")),
194
+ "image_url": result.get("img", result.get("thumbnail", "")),
195
  "translated_query": translated_query
196
  })
197
  return "", articles
198
  except Exception as e:
 
199
  return f"๊ฒฐ๊ณผ ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}", []
200
 
201
  def serphouse_search(query, country):
 
206
  footer {visibility: hidden;}
207
  """
208
 
209
+ # Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ˆ˜์ •
210
  with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css, title="NewsAI ์„œ๋น„์Šค") as iface:
211
+ gr.Markdown("๊ฒ€์ƒ‰์–ด๋ฅผ ์ž…๋ ฅํ•˜๊ณ  ์›ํ•˜๋Š” ๊ตญ๊ฐ€๋ฅผ ์„ ํƒํ•˜๋ฉด, ๊ฒ€์ƒ‰์–ด์™€ ์ผ์น˜ํ•˜๋Š” 24์‹œ๊ฐ„ ์ด๋‚ด ๋‰ด์Šค๋ฅผ ์ตœ๋Œ€ 100๊ฐœ ์ถœ๋ ฅํ•ฉ๋‹ˆ๋‹ค.")
212
 
213
  with gr.Column():
214
  with gr.Row():
215
  query = gr.Textbox(label="๊ฒ€์ƒ‰์–ด")
216
  country = gr.Dropdown(MAJOR_COUNTRIES, label="๊ตญ๊ฐ€", value="South Korea")
217
 
218
+ # ๋ฒˆ์—ญ ๊ฒฐ๊ณผ๋ฅผ ๋ณด์—ฌ์ฃผ๋Š” ์ปดํฌ๋„ŒํŠธ ์ถ”๊ฐ€
219
+ translated_query_display = gr.Markdown(visible=False)
220
 
221
  search_button = gr.Button("๊ฒ€์ƒ‰", variant="primary")
222
 
 
244
  def search_and_display(query, country, articles_state, progress=gr.Progress()):
245
  progress(0, desc="๊ฒ€์ƒ‰์–ด ๋ฒˆ์—ญ ์ค‘...")
246
 
247
+ # ๊ฒ€์ƒ‰์–ด ๋ฒˆ์—ญ
248
  translated_query = translate_query(query, country)
249
+ translated_display = f"**์›๋ณธ ๊ฒ€์ƒ‰์–ด:** {query}\n**๋ฒˆ์—ญ๋œ ๊ฒ€์ƒ‰์–ด:** {translated_query}" if translated_query != query else f"**๊ฒ€์ƒ‰์–ด:** {query}"
 
 
 
 
 
 
 
250
 
251
+ progress(0.2, desc="๊ฒ€์ƒ‰ ์‹œ์ž‘...")
252
+ error_message, articles = serphouse_search(query, country)
253
+ progress(0.5, desc="๊ฒฐ๊ณผ ์ฒ˜๋ฆฌ ์ค‘...")
254
 
255
+ outputs = [gr.update(value=translated_display, visible=True)] # ๋ฒˆ์—ญ ๊ฒฐ๊ณผ ํ‘œ์‹œ
256
 
257
  if error_message:
258
  outputs.append(gr.update(value=error_message, visible=True))
 
291
  outputs.append(gr.update(visible=False))
292
  return outputs
293
 
294
+ search_outputs = [translated_query_display, gr.Markdown(visible=False)] # ๋ฒˆ์—ญ ๊ฒฐ๊ณผ ์ปดํฌ๋„ŒํŠธ ์ถ”๊ฐ€
295
  for comp in article_components:
296
  search_outputs.extend([comp['group'], comp['title'], comp['image'],
297
  comp['snippet'], comp['info']])
298
  search_outputs.extend([articles_state, status_message])
299
 
300
  search_button.click(
301
+ search_and_display,
302
  inputs=[query, country, articles_state],
303
  outputs=search_outputs,
304
  show_progress=True