seawolf2357 commited on
Commit
d2cccd5
Β·
verified Β·
1 Parent(s): 54328d4

Update app-backup1.py

Browse files
Files changed (1) hide show
  1. app-backup1.py +137 -36
app-backup1.py CHANGED
@@ -5,48 +5,87 @@ import os
5
  from datetime import datetime, timedelta
6
  from huggingface_hub import InferenceClient
7
 
 
 
8
  API_KEY = os.getenv("SERPHOUSE_API_KEY")
9
- hf_client = InferenceClient("CohereForAI/c4ai-command-r-plus-08-2024", token=os.getenv("HF_TOKEN"))
10
 
11
  # ꡭ가별 μ–Έμ–΄ μ½”λ“œ 맀핑
12
  COUNTRY_LANGUAGES = {
13
- "South Korea": "ko",
 
 
 
 
 
 
14
  "Japan": "ja",
 
15
  "China": "zh",
 
 
 
16
  "Russia": "ru",
17
- "France": "fr",
18
- "Germany": "de",
19
- "Spain": "es",
20
  "Italy": "it",
 
21
  "Netherlands": "nl",
22
- "Portugal": "pt",
23
- "Thailand": "th",
24
- "Vietnam": "vi",
25
  "Indonesia": "id",
26
  "Malaysia": "ms",
27
- "Saudi Arabia": "ar",
28
- "United Arab Emirates": "ar",
29
- "Egypt": "ar",
30
- "Morocco": "ar",
31
- "Greece": "el",
 
 
 
32
  "Poland": "pl",
 
 
 
33
  "Czech Republic": "cs",
 
34
  "Hungary": "hu",
35
- "Turkey": "tr",
36
  "Romania": "ro",
37
- "Bulgaria": "bg",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  "Croatia": "hr",
39
- "Serbia": "sr",
40
  "Slovakia": "sk",
41
- "Slovenia": "sl",
 
42
  "Estonia": "et",
43
  "Latvia": "lv",
44
- "Lithuania": "lt"
 
 
 
 
 
45
  }
46
 
47
  COUNTRY_LOCATIONS = {
48
  "United States": "United States",
49
  "United Kingdom": "United Kingdom",
 
50
  "Canada": "Canada",
51
  "Australia": "Australia",
52
  "Germany": "Germany",
@@ -118,24 +157,66 @@ MAJOR_COUNTRIES = list(COUNTRY_LOCATIONS.keys())
118
 
119
  def translate_query(query, country):
120
  try:
 
 
 
 
 
 
121
  if country in COUNTRY_LANGUAGES:
 
 
 
 
 
122
  target_lang = COUNTRY_LANGUAGES[country]
123
- prompt = f"Translate the following English text to {target_lang} language. Only output the translated text without any explanations or quotes: {query}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
 
125
- translated = hf_client.text_generation(
126
- prompt,
127
- max_new_tokens=100,
128
- temperature=0.3
129
- )
130
- return translated.strip()
131
  return query
 
132
  except Exception as e:
133
- print(f"Translation error: {str(e)}")
134
  return query
135
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  def search_serphouse(query, country, page=1, num_result=10):
137
  url = "https://api.serphouse.com/serp/live"
138
 
 
 
 
 
 
139
  # 검색어 λ²ˆμ—­
140
  translated_query = translate_query(query, country)
141
  print(f"Original query: {query}")
@@ -143,14 +224,16 @@ def search_serphouse(query, country, page=1, num_result=10):
143
 
144
  payload = {
145
  "data": {
146
- "q": query,
147
  "domain": "google.com",
148
  "loc": COUNTRY_LOCATIONS.get(country, "United States"),
149
- "lang": "en",
150
  "device": "desktop",
151
  "serp_type": "news",
152
  "page": "1",
153
- "num": "10"
 
 
154
  }
155
  }
156
 
@@ -162,7 +245,7 @@ def search_serphouse(query, country, page=1, num_result=10):
162
 
163
  try:
164
  response = requests.post(url, json=payload, headers=headers)
165
- print("Request payload:", json.dumps(payload, indent=2))
166
  print("Response status:", response.status_code)
167
 
168
  response.raise_for_status()
@@ -206,16 +289,19 @@ css = """
206
  footer {visibility: hidden;}
207
  """
208
 
209
- # Gradio μΈν„°νŽ˜μ΄μŠ€ μˆ˜μ •
210
  with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css, title="NewsAI μ„œλΉ„μŠ€") as iface:
211
- gr.Markdown("검색어λ₯Ό μž…λ ₯ν•˜κ³  μ›ν•˜λŠ” κ΅­κ°€λ₯Ό μ„ νƒν•˜λ©΄, 검색어와 μΌμΉ˜ν•˜λŠ” 24μ‹œκ°„ 이내 λ‰΄μŠ€λ₯Ό μ΅œλŒ€ 100개 좜λ ₯ν•©λ‹ˆλ‹€.")
212
-
 
213
  with gr.Column():
214
  with gr.Row():
215
  query = gr.Textbox(label="검색어")
216
  country = gr.Dropdown(MAJOR_COUNTRIES, label="κ΅­κ°€", value="South Korea")
217
 
218
- # λ²ˆμ—­ κ²°κ³Όλ₯Ό λ³΄μ—¬μ£ΌλŠ” μ»΄ν¬λ„ŒνŠΈ μΆ”κ°€
 
 
 
219
  translated_query_display = gr.Markdown(visible=False)
220
 
221
  search_button = gr.Button("검색", variant="primary")
@@ -242,6 +328,9 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css, title="NewsAI μ„œλΉ„μŠ€") as
242
  })
243
 
244
  def search_and_display(query, country, articles_state, progress=gr.Progress()):
 
 
 
245
  progress(0, desc="검색어 λ²ˆμ—­ 쀑...")
246
 
247
  # 검색어 λ²ˆμ—­
@@ -252,7 +341,10 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css, title="NewsAI μ„œλΉ„μŠ€") as
252
  error_message, articles = serphouse_search(query, country)
253
  progress(0.5, desc="κ²°κ³Ό 처리 쀑...")
254
 
255
- outputs = [gr.update(value=translated_display, visible=True)] # λ²ˆμ—­ κ²°κ³Ό ν‘œμ‹œ
 
 
 
256
 
257
  if error_message:
258
  outputs.append(gr.update(value=error_message, visible=True))
@@ -289,9 +381,18 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css, title="NewsAI μ„œλΉ„μŠ€") as
289
  progress(1.0, desc="μ™„λ£Œ!")
290
  outputs.append(articles_state)
291
  outputs.append(gr.update(visible=False))
 
 
 
 
 
292
  return outputs
293
 
294
- search_outputs = [translated_query_display, gr.Markdown(visible=False)] # λ²ˆμ—­ κ²°κ³Ό μ»΄ν¬λ„ŒνŠΈ μΆ”κ°€
 
 
 
 
295
  for comp in article_components:
296
  search_outputs.extend([comp['group'], comp['title'], comp['image'],
297
  comp['snippet'], comp['info']])
 
5
  from datetime import datetime, timedelta
6
  from huggingface_hub import InferenceClient
7
 
8
+
9
+
10
  API_KEY = os.getenv("SERPHOUSE_API_KEY")
11
+ # hf_client = InferenceClient("CohereForAI/c4ai-command-r-plus-08-2024", token=os.getenv("HF_TOKEN"))
12
 
13
  # ꡭ가별 μ–Έμ–΄ μ½”λ“œ 맀핑
14
  COUNTRY_LANGUAGES = {
15
+ "United States": "en",
16
+ "United Kingdom": "en",
17
+ "Taiwan": "zh-TW", # λŒ€λ§Œμ–΄(번체 쀑ꡭ어)
18
+ "Canada": "en",
19
+ "Australia": "en",
20
+ "Germany": "de",
21
+ "France": "fr",
22
  "Japan": "ja",
23
+ "South Korea": "ko",
24
  "China": "zh",
25
+ "India": "hi",
26
+ "Brazil": "pt",
27
+ "Mexico": "es",
28
  "Russia": "ru",
 
 
 
29
  "Italy": "it",
30
+ "Spain": "es",
31
  "Netherlands": "nl",
32
+ "Singapore": "en",
33
+ "Hong Kong": "zh-HK",
 
34
  "Indonesia": "id",
35
  "Malaysia": "ms",
36
+ "Philippines": "tl",
37
+ "Thailand": "th",
38
+ "Vietnam": "vi",
39
+ "Belgium": "nl",
40
+ "Denmark": "da",
41
+ "Finland": "fi",
42
+ "Ireland": "en",
43
+ "Norway": "no",
44
  "Poland": "pl",
45
+ "Sweden": "sv",
46
+ "Switzerland": "de",
47
+ "Austria": "de",
48
  "Czech Republic": "cs",
49
+ "Greece": "el",
50
  "Hungary": "hu",
51
+ "Portugal": "pt",
52
  "Romania": "ro",
53
+ "Turkey": "tr",
54
+ "Israel": "he",
55
+ "Saudi Arabia": "ar",
56
+ "United Arab Emirates": "ar",
57
+ "South Africa": "en",
58
+ "Argentina": "es",
59
+ "Chile": "es",
60
+ "Colombia": "es",
61
+ "Peru": "es",
62
+ "Venezuela": "es",
63
+ "New Zealand": "en",
64
+ "Bangladesh": "bn",
65
+ "Pakistan": "ur",
66
+ "Egypt": "ar",
67
+ "Morocco": "ar",
68
+ "Nigeria": "en",
69
+ "Kenya": "sw",
70
+ "Ukraine": "uk",
71
  "Croatia": "hr",
 
72
  "Slovakia": "sk",
73
+ "Bulgaria": "bg",
74
+ "Serbia": "sr",
75
  "Estonia": "et",
76
  "Latvia": "lv",
77
+ "Lithuania": "lt",
78
+ "Slovenia": "sl",
79
+ "Luxembourg": "fr",
80
+ "Malta": "mt",
81
+ "Cyprus": "el",
82
+ "Iceland": "is"
83
  }
84
 
85
  COUNTRY_LOCATIONS = {
86
  "United States": "United States",
87
  "United Kingdom": "United Kingdom",
88
+ "Taiwan": "Taiwan", # κ΅­κ°€λͺ… μ‚¬μš©
89
  "Canada": "Canada",
90
  "Australia": "Australia",
91
  "Germany": "Germany",
 
157
 
158
  def translate_query(query, country):
159
  try:
160
+ # μ˜μ–΄ μž…λ ₯ 확인
161
+ if is_english(query):
162
+ print(f"μ˜μ–΄ 검색어 감지 - 원본 μ‚¬μš©: {query}")
163
+ return query
164
+
165
+ # μ„ νƒλœ κ΅­κ°€κ°€ λ²ˆμ—­ 지원 ꡭ가인 경우
166
  if country in COUNTRY_LANGUAGES:
167
+ # South Korea μ„ νƒμ‹œ ν•œκΈ€ μž…λ ₯은 κ·ΈλŒ€λ‘œ μ‚¬μš©
168
+ if country == "South Korea":
169
+ print(f"ν•œκ΅­ 선택 - 원본 μ‚¬μš©: {query}")
170
+ return query
171
+
172
  target_lang = COUNTRY_LANGUAGES[country]
173
+ print(f"λ²ˆμ—­ μ‹œλ„: {query} -> {country}({target_lang})")
174
+
175
+ # Google Translate API URL
176
+ url = f"https://translate.googleapis.com/translate_a/single"
177
+
178
+ params = {
179
+ "client": "gtx",
180
+ "sl": "auto", # μžλ™ μ–Έμ–΄ 감지
181
+ "tl": target_lang, # λŒ€μƒ μ–Έμ–΄
182
+ "dt": "t",
183
+ "q": query
184
+ }
185
+
186
+ response = requests.get(url, params=params)
187
+ translated_text = response.json()[0][0][0]
188
+
189
+ print(f"λ²ˆμ—­ μ™„λ£Œ: {query} -> {translated_text} ({country})")
190
+ return translated_text
191
 
 
 
 
 
 
 
192
  return query
193
+
194
  except Exception as e:
195
+ print(f"λ²ˆμ—­ 였λ₯˜: {str(e)}")
196
  return query
197
 
198
+ def is_english(text):
199
+ # μ˜μ–΄μ™€ 일반적인 기호만 ν¬ν•¨λœ 경우 True λ°˜ν™˜
200
+ return all(ord(char) < 128 for char in text.replace(' ', '').replace('-', '').replace('_', ''))
201
+
202
+
203
+ def is_korean(text):
204
+ return any('\uAC00' <= char <= '\uD7A3' for char in text)
205
+
206
+ def is_english(text):
207
+ return all(ord(char) < 128 for char in text.replace(' ', ''))
208
+
209
+ def is_korean(text):
210
+ return any('\uAC00' <= char <= '\uD7A3' for char in text)
211
+
212
  def search_serphouse(query, country, page=1, num_result=10):
213
  url = "https://api.serphouse.com/serp/live"
214
 
215
+ # 24μ‹œκ°„ 이내 λ‚ μ§œ λ²”μœ„ μ„€μ •
216
+ now = datetime.utcnow()
217
+ yesterday = now - timedelta(days=1)
218
+ date_range = f"{yesterday.strftime('%Y-%m-%d')},{now.strftime('%Y-%m-%d')}"
219
+
220
  # 검색어 λ²ˆμ—­
221
  translated_query = translate_query(query, country)
222
  print(f"Original query: {query}")
 
224
 
225
  payload = {
226
  "data": {
227
+ "q": translated_query, # μ—¬κΈ°λ₯Ό λ²ˆμ—­λœ κ²€μƒ‰μ–΄λ‘œ λ³€κ²½
228
  "domain": "google.com",
229
  "loc": COUNTRY_LOCATIONS.get(country, "United States"),
230
+ "lang": COUNTRY_LANGUAGES.get(country, "en"), # μ–Έμ–΄ μ½”λ“œλ„ ν•΄λ‹Ή κ΅­κ°€ μ–Έμ–΄λ‘œ λ³€κ²½
231
  "device": "desktop",
232
  "serp_type": "news",
233
  "page": "1",
234
+ "num": "10",
235
+ "date_range": date_range,
236
+ "sort_by": "date"
237
  }
238
  }
239
 
 
245
 
246
  try:
247
  response = requests.post(url, json=payload, headers=headers)
248
+ print("Request payload:", json.dumps(payload, indent=2, ensure_ascii=False)) # ensure_ascii=False μΆ”κ°€
249
  print("Response status:", response.status_code)
250
 
251
  response.raise_for_status()
 
289
  footer {visibility: hidden;}
290
  """
291
 
 
292
  with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css, title="NewsAI μ„œλΉ„μŠ€") as iface:
293
+ gr.Markdown("검색어λ₯Ό μž…λ ₯ν•˜κ³  μ›ν•˜λŠ” κ΅­κ°€(67개ꡭ)λ₯Ό μ„ νƒν•˜λ©΄, 검색어와 μΌμΉ˜ν•˜λŠ” 24μ‹œκ°„ 이내 λ‰΄μŠ€λ₯Ό μ΅œλŒ€ 100개 좜λ ₯ν•©λ‹ˆλ‹€.")
294
+ gr.Markdown("κ΅­κ°€ 선택후 검색어에 'ν•œκΈ€'을 μž…λ ₯ν•˜λ©΄ ν˜„μ§€ μ–Έμ–΄λ‘œ λ²ˆμ—­λ˜μ–΄ κ²€μƒ‰ν•©λ‹ˆλ‹€. 예: 'Taiwan' κ΅­κ°€ 선택후 'μ‚Όμ„±' μž…λ ₯μ‹œ 'δΈ‰ζ˜Ÿ'으둜 μžλ™ 검색 ")
295
+
296
  with gr.Column():
297
  with gr.Row():
298
  query = gr.Textbox(label="검색어")
299
  country = gr.Dropdown(MAJOR_COUNTRIES, label="κ΅­κ°€", value="South Korea")
300
 
301
+ # 검색 μƒνƒœ λ©”μ‹œμ§€ μ»΄ν¬λ„ŒνŠΈ μΆ”κ°€
302
+ search_status = gr.Markdown(visible=False)
303
+
304
+ # λ²ˆμ—­ κ²°κ³Όλ₯Ό λ³΄μ—¬μ£ΌλŠ” μ»΄ν¬λ„ŒνŠΈ
305
  translated_query_display = gr.Markdown(visible=False)
306
 
307
  search_button = gr.Button("검색", variant="primary")
 
328
  })
329
 
330
  def search_and_display(query, country, articles_state, progress=gr.Progress()):
331
+ # 검색 μ‹œμž‘ λ©”μ‹œμ§€ ν‘œμ‹œ
332
+ search_status_output = gr.update(value="검색을 μ§„ν–‰μ€‘μž…λ‹ˆλ‹€. μž μ‹œλ§Œ κΈ°λ‹€λ¦¬μ„Έμš”...", visible=True)
333
+
334
  progress(0, desc="검색어 λ²ˆμ—­ 쀑...")
335
 
336
  # 검색어 λ²ˆμ—­
 
341
  error_message, articles = serphouse_search(query, country)
342
  progress(0.5, desc="κ²°κ³Ό 처리 쀑...")
343
 
344
+ outputs = [
345
+ search_status_output, # 검색 μƒνƒœ λ©”μ‹œμ§€
346
+ gr.update(value=translated_display, visible=True) # λ²ˆμ—­ κ²°κ³Ό ν‘œμ‹œ
347
+ ]
348
 
349
  if error_message:
350
  outputs.append(gr.update(value=error_message, visible=True))
 
381
  progress(1.0, desc="μ™„λ£Œ!")
382
  outputs.append(articles_state)
383
  outputs.append(gr.update(visible=False))
384
+
385
+ # 검색 μ™„λ£Œ ν›„ μƒνƒœ λ©”μ‹œμ§€ μˆ¨κΉ€
386
+ search_status_output = gr.update(visible=False)
387
+ outputs[0] = search_status_output
388
+
389
  return outputs
390
 
391
+ search_outputs = [
392
+ search_status, # 검색 μƒνƒœ λ©”μ‹œμ§€ 좜λ ₯ μΆ”κ°€
393
+ translated_query_display,
394
+ gr.Markdown(visible=False)
395
+ ]
396
  for comp in article_components:
397
  search_outputs.extend([comp['group'], comp['title'], comp['image'],
398
  comp['snippet'], comp['info']])