openfree commited on
Commit
eaa16ac
ยท
verified ยท
1 Parent(s): 5c27f2d

Update app-backup-HN.py

Browse files
Files changed (1) hide show
  1. app-backup-HN.py +161 -33
app-backup-HN.py CHANGED
@@ -5,6 +5,11 @@ import os
5
  from datetime import datetime, timedelta
6
  from huggingface_hub import InferenceClient
7
 
 
 
 
 
 
8
  MAX_COUNTRY_RESULTS = 100 # ๊ตญ๊ฐ€๋ณ„ ์ตœ๋Œ€ ๊ฒฐ๊ณผ ์ˆ˜
9
  MAX_GLOBAL_RESULTS = 1000 # ์ „์„ธ๊ณ„ ์ตœ๋Œ€ ๊ฒฐ๊ณผ ์ˆ˜
10
 
@@ -311,6 +316,9 @@ def serphouse_search(query, country):
311
  return format_results_from_raw(response_data)
312
 
313
 
 
 
 
314
  # Hacker News API ๊ด€๋ จ ํ•จ์ˆ˜๋“ค ๋จผ์ € ์ถ”๊ฐ€
315
  def get_hn_item(item_id):
316
  """๊ฐœ๋ณ„ ์•„์ดํ…œ ์ •๋ณด ๊ฐ€์ ธ์˜ค๊ธฐ"""
@@ -351,47 +359,170 @@ def format_hn_time(timestamp):
351
  except:
352
  return "Unknown time"
353
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
354
  def refresh_hn_stories():
355
- """Hacker News ์Šคํ† ๋ฆฌ ์ƒˆ๋กœ๊ณ ์นจ"""
356
  status_msg = "Hacker News ํฌ์ŠคํŠธ๋ฅผ ๊ฐ€์ ธ์˜ค๋Š” ์ค‘..."
357
-
358
  outputs = [gr.update(value=status_msg, visible=True)]
359
 
360
- # ๋ชจ๋“  ์ปดํฌ๋„ŒํŠธ ์ดˆ๊ธฐํ™”
361
- for _ in hn_article_components:
362
  outputs.extend([
363
  gr.update(visible=False),
364
  gr.update(),
365
  gr.update()
366
  ])
367
 
 
 
368
  # ์ตœ์‹  ์Šคํ† ๋ฆฌ ๊ฐ€์ ธ์˜ค๊ธฐ
369
  stories = get_recent_stories()
 
370
 
371
- # ๊ฒฐ๊ณผ ์—…๋ฐ์ดํŠธ
372
- outputs = [gr.update(value=f"์ด {len(stories)}๊ฐœ์˜ ํฌ์ŠคํŠธ๋ฅผ ์ฐพ์•˜์Šต๋‹ˆ๋‹ค.", visible=True)]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
373
 
374
  for idx, comp in enumerate(hn_article_components):
375
- if idx < len(stories):
376
- story = stories[idx]
377
- outputs.extend([
378
  gr.update(visible=True),
379
  gr.update(value=f"### [{story.get('title', 'Untitled')}]({story.get('url', '#')})"),
380
- gr.update(value=f"**์ž‘์„ฑ์ž:** {story.get('by', 'unknown')} | **์‹œ๊ฐ„:** {format_hn_time(story.get('time', 0))} | **์ ์ˆ˜:** {story.get('score', 0)} | **๋Œ“๊ธ€:** {len(story.get('kids', []))}๊ฐœ")
 
 
 
 
 
 
381
  ])
382
  else:
383
- outputs.extend([
384
  gr.update(visible=False),
385
  gr.update(),
386
  gr.update()
387
  ])
388
 
389
- return outputs
390
-
391
 
392
  css = """
393
  footer {visibility: hidden;}
394
-
395
  #status_area {
396
  background: rgba(255, 255, 255, 0.9); /* ์•ฝ๊ฐ„ ํˆฌ๋ช…ํ•œ ํฐ์ƒ‰ ๋ฐฐ๊ฒฝ */
397
  padding: 15px;
@@ -399,40 +530,33 @@ footer {visibility: hidden;}
399
  margin-bottom: 20px;
400
  box-shadow: 0 2px 5px rgba(0,0,0,0.1); /* ๋ถ€๋“œ๋Ÿฌ์šด ๊ทธ๋ฆผ์ž ํšจ๊ณผ */
401
  }
402
-
403
  #results_area {
404
  padding: 10px;
405
  margin-top: 10px;
406
  }
407
-
408
  /* ํƒญ ์Šคํƒ€์ผ ๊ฐœ์„  */
409
  .tabs {
410
  border-bottom: 2px solid #ddd !important;
411
  margin-bottom: 20px !important;
412
  }
413
-
414
  .tab-nav {
415
  border-bottom: none !important;
416
  margin-bottom: 0 !important;
417
  }
418
-
419
  .tab-nav button {
420
  font-weight: bold !important;
421
  padding: 10px 20px !important;
422
  }
423
-
424
  .tab-nav button.selected {
425
  border-bottom: 2px solid #1f77b4 !important; /* ์„ ํƒ๋œ ํƒญ ๊ฐ•์กฐ */
426
  color: #1f77b4 !important;
427
  }
428
-
429
  /* ๊ฒ€์ƒ‰ ์ƒํƒœ ๋ฉ”์‹œ์ง€ ์Šคํƒ€์ผ */
430
  #status_area .markdown-text {
431
  font-size: 1.1em;
432
  color: #2c3e50;
433
  padding: 10px 0;
434
  }
435
-
436
  /* ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ์ปจํ…Œ์ด๋„ˆ ์Šคํƒ€์ผ */
437
  .group {
438
  border: 1px solid #eee;
@@ -441,13 +565,11 @@ footer {visibility: hidden;}
441
  border-radius: 5px;
442
  background: white;
443
  }
444
-
445
  /* ๊ฒ€์ƒ‰ ๋ฒ„ํŠผ ์Šคํƒ€์ผ */
446
  .primary-btn {
447
  background: #1f77b4 !important;
448
  border: none !important;
449
  }
450
-
451
  /* ๊ฒ€์ƒ‰์–ด ์ž…๋ ฅ์ฐฝ ์Šคํƒ€์ผ */
452
  .textbox {
453
  border: 1px solid #ddd !important;
@@ -465,7 +587,7 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css, title="NewsAI ์„œ๋น„์Šค") as
465
  with gr.Column():
466
  with gr.Row():
467
  query = gr.Textbox(label="๊ฒ€์ƒ‰์–ด")
468
- country = gr.Dropdown(MAJOR_COUNTRIES, label="๊ตญ๊ฐ€", value="South Korea")
469
 
470
  status_message = gr.Markdown("", visible=True)
471
  translated_query_display = gr.Markdown(visible=False)
@@ -525,23 +647,24 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css, title="NewsAI ์„œ๋น„์Šค") as
525
  'index': i,
526
  })
527
 
 
528
  # AI ๋ฆฌํฌํ„ฐ ํƒญ
529
  with gr.Tab("AI ๋ฆฌํฌํ„ฐ"):
530
- gr.Markdown("์ง€๋‚œ 24์‹œ๊ฐ„ ๋™์•ˆ์˜ Hacker News ํฌ์ŠคํŠธ๋ฅผ ๋ณด์—ฌ์ค๋‹ˆ๋‹ค.")
531
-
532
  with gr.Column():
533
  refresh_button = gr.Button("์ƒˆ๋กœ๊ณ ์นจ", variant="primary")
534
  status_message_hn = gr.Markdown("")
535
 
536
  with gr.Column(elem_id="hn_results_area"):
537
  hn_articles_state = gr.State([])
538
-
539
  hn_article_components = []
540
- for i in range(100):
541
  with gr.Group(visible=False) as article_group:
542
  title = gr.Markdown()
543
  info = gr.Markdown()
544
-
545
  hn_article_components.append({
546
  'group': article_group,
547
  'title': title,
@@ -551,9 +674,6 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css, title="NewsAI ์„œ๋น„์Šค") as
551
 
552
 
553
 
554
-
555
-
556
-
557
 
558
  # ๊ธฐ์กด ํ•จ์ˆ˜๋“ค
559
  def search_and_display(query, country, articles_state, progress=gr.Progress()):
@@ -750,4 +870,12 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css, title="NewsAI ์„œ๋น„์Šค") as
750
  outputs=hn_outputs
751
  )
752
 
753
- iface.launch(auth=("it1","chosun1"))
 
 
 
 
 
 
 
 
 
5
  from datetime import datetime, timedelta
6
  from huggingface_hub import InferenceClient
7
 
8
+ from bs4 import BeautifulSoup
9
+ import concurrent.futures
10
+ import time
11
+ import re
12
+
13
  MAX_COUNTRY_RESULTS = 100 # ๊ตญ๊ฐ€๋ณ„ ์ตœ๋Œ€ ๊ฒฐ๊ณผ ์ˆ˜
14
  MAX_GLOBAL_RESULTS = 1000 # ์ „์„ธ๊ณ„ ์ตœ๋Œ€ ๊ฒฐ๊ณผ ์ˆ˜
15
 
 
316
  return format_results_from_raw(response_data)
317
 
318
 
319
+
320
+
321
+
322
  # Hacker News API ๊ด€๋ จ ํ•จ์ˆ˜๋“ค ๋จผ์ € ์ถ”๊ฐ€
323
  def get_hn_item(item_id):
324
  """๊ฐœ๋ณ„ ์•„์ดํ…œ ์ •๋ณด ๊ฐ€์ ธ์˜ค๊ธฐ"""
 
359
  except:
360
  return "Unknown time"
361
 
362
+
363
+ def clean_text(text):
364
+ """HTML ํƒœ๊ทธ ์ œ๊ฑฐ ๋ฐ ํ…์ŠคํŠธ ์ •๋ฆฌ"""
365
+ text = re.sub(r'\s+', ' ', text)
366
+ text = re.sub(r'<[^>]+>', '', text)
367
+ return text.strip()
368
+
369
+ def get_article_content(url):
370
+ """URL์—์„œ ๊ธฐ์‚ฌ ๋‚ด์šฉ ์Šคํฌ๋ž˜ํ•‘"""
371
+ if not url or 'github.com' in url or 'twitter.com' in url:
372
+ return None
373
+
374
+ try:
375
+ headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}
376
+ response = requests.get(url, headers=headers, timeout=10)
377
+ soup = BeautifulSoup(response.text, 'html.parser')
378
+
379
+ # ๋ถˆํ•„์š”ํ•œ ์š”์†Œ ์ œ๊ฑฐ
380
+ for tag in soup(['script', 'style', 'nav', 'footer', 'header']):
381
+ tag.decompose()
382
+
383
+ paragraphs = soup.find_all('p')
384
+ text = ' '.join(p.get_text() for p in paragraphs)
385
+ text = clean_text(text)
386
+
387
+ return text[:4000] # ํ…์ŠคํŠธ ๊ธธ์ด ์ œํ•œ
388
+ except Exception as e:
389
+ print(f"Scraping error for {url}: {str(e)}")
390
+ return None
391
+
392
+ def generate_summary(text):
393
+ """CohereForAI ๋ชจ๋ธ์„ ์‚ฌ์šฉํ•œ ์š”์•ฝ ์ƒ์„ฑ"""
394
+ if not text:
395
+ return None
396
+
397
+ prompt = """๋ฐ˜๋“œ์‹œ ํ•œ๊ธ€(ํ•œ๊ตญ์–ด)๋กœ ์ž‘์„ฑํ•˜๋ผ. Please analyze and summarize the following text in 2-3 sentences.
398
+ Focus on the main points and key information:
399
+ Text: {text}
400
+
401
+ Summary:"""
402
+
403
+ try:
404
+ response = hf_client.text_generation(
405
+ prompt.format(text=text),
406
+ max_new_tokens=500,
407
+ temperature=0.5,
408
+ repetition_penalty=1.2
409
+ )
410
+ return response
411
+ except Exception as e:
412
+ print(f"Summary generation error: {str(e)}")
413
+ return None
414
+
415
+ def process_hn_story(story, progress=None):
416
+ """๊ฐœ๋ณ„ ์Šคํ† ๋ฆฌ ์ฒ˜๋ฆฌ ๋ฐ ์š”์•ฝ"""
417
+ try:
418
+ url = story.get('url')
419
+ if not url:
420
+ return story, None
421
+
422
+ content = get_article_content(url)
423
+ if not content:
424
+ return story, None
425
+
426
+ summary_en = generate_summary(content)
427
+ if not summary_en:
428
+ return story, None
429
+
430
+ summary_ko = translate_to_korean(summary_en)
431
+ return story, summary_ko
432
+
433
+ except Exception as e:
434
+ print(f"Story processing error: {str(e)}")
435
+ return story, None
436
+
437
  def refresh_hn_stories():
438
+ """Hacker News ์Šคํ† ๋ฆฌ ์ƒˆ๋กœ๊ณ ์นจ (์‹ค์‹œ๊ฐ„ ์ถœ๋ ฅ ๋ฒ„์ „)"""
439
  status_msg = "Hacker News ํฌ์ŠคํŠธ๋ฅผ ๊ฐ€์ ธ์˜ค๋Š” ์ค‘..."
 
440
  outputs = [gr.update(value=status_msg, visible=True)]
441
 
442
+ # ์ปดํฌ๋„ŒํŠธ ์ดˆ๊ธฐํ™”
443
+ for comp in hn_article_components:
444
  outputs.extend([
445
  gr.update(visible=False),
446
  gr.update(),
447
  gr.update()
448
  ])
449
 
450
+ yield outputs
451
+
452
  # ์ตœ์‹  ์Šคํ† ๋ฆฌ ๊ฐ€์ ธ์˜ค๊ธฐ
453
  stories = get_recent_stories()
454
+ processed_count = 0
455
 
456
+ # ์‹ค์‹œ๊ฐ„ ์ฒ˜๋ฆฌ ๋ฐ ์ถœ๋ ฅ์„ ์œ„ํ•œ ๋ฆฌ์ŠคํŠธ
457
+ processed_stories = []
458
+
459
+ with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
460
+ future_to_story = {executor.submit(process_hn_story, story): story
461
+ for story in stories[:100]}
462
+
463
+ for future in concurrent.futures.as_completed(future_to_story):
464
+ story, summary = future.result()
465
+ processed_count += 1
466
+
467
+ if summary:
468
+ # ์ƒˆ๋กœ์šด ๊ฒฐ๊ณผ๋ฅผ ๋ฆฌ์ŠคํŠธ ๋งจ ์•ž์— ์ถ”๊ฐ€
469
+ processed_stories.insert(0, (story, summary))
470
+
471
+ # ํ˜„์žฌ๊นŒ์ง€์˜ ๊ฒฐ๊ณผ ์ถœ๋ ฅ
472
+ outputs = [gr.update(value=f"์ฒ˜๋ฆฌ ์ค‘... ({processed_count}/{len(stories)})", visible=True)]
473
+
474
+ # ๋ชจ๋“  ์ปดํฌ๋„ŒํŠธ ์—…๋ฐ์ดํŠธ
475
+ for idx, comp in enumerate(hn_article_components):
476
+ if idx < len(processed_stories):
477
+ current_story, current_summary = processed_stories[idx]
478
+ outputs.extend([
479
+ gr.update(visible=True),
480
+ gr.update(value=f"### [{current_story.get('title', 'Untitled')}]({current_story.get('url', '#')})"),
481
+ gr.update(value=f"""
482
+ **์ž‘์„ฑ์ž:** {current_story.get('by', 'unknown')} |
483
+ **์‹œ๊ฐ„:** {format_hn_time(current_story.get('time', 0))} |
484
+ **์ ์ˆ˜:** {current_story.get('score', 0)} |
485
+ **๋Œ“๊ธ€:** {len(current_story.get('kids', []))}๊ฐœ\n
486
+ **AI ์š”์•ฝ:** {current_summary}
487
+ """)
488
+ ])
489
+ else:
490
+ outputs.extend([
491
+ gr.update(visible=False),
492
+ gr.update(),
493
+ gr.update()
494
+ ])
495
+
496
+ yield outputs
497
+
498
+ # ์ตœ์ข… ์ƒํƒœ ์—…๋ฐ์ดํŠธ
499
+ final_outputs = [gr.update(value=f"์ด {len(processed_stories)}๊ฐœ์˜ ํฌ์ŠคํŠธ๊ฐ€ ์ฒ˜๋ฆฌ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.", visible=True)]
500
 
501
  for idx, comp in enumerate(hn_article_components):
502
+ if idx < len(processed_stories):
503
+ story, summary = processed_stories[idx]
504
+ final_outputs.extend([
505
  gr.update(visible=True),
506
  gr.update(value=f"### [{story.get('title', 'Untitled')}]({story.get('url', '#')})"),
507
+ gr.update(value=f"""
508
+ **์ž‘์„ฑ์ž:** {story.get('by', 'unknown')} |
509
+ **์‹œ๊ฐ„:** {format_hn_time(story.get('time', 0))} |
510
+ **์ ์ˆ˜:** {story.get('score', 0)} |
511
+ **๋Œ“๊ธ€:** {len(story.get('kids', []))}๊ฐœ\n
512
+ **AI ์š”์•ฝ:** {summary}
513
+ """)
514
  ])
515
  else:
516
+ final_outputs.extend([
517
  gr.update(visible=False),
518
  gr.update(),
519
  gr.update()
520
  ])
521
 
522
+ yield final_outputs
 
523
 
524
  css = """
525
  footer {visibility: hidden;}
 
526
  #status_area {
527
  background: rgba(255, 255, 255, 0.9); /* ์•ฝ๊ฐ„ ํˆฌ๋ช…ํ•œ ํฐ์ƒ‰ ๋ฐฐ๊ฒฝ */
528
  padding: 15px;
 
530
  margin-bottom: 20px;
531
  box-shadow: 0 2px 5px rgba(0,0,0,0.1); /* ๋ถ€๋“œ๋Ÿฌ์šด ๊ทธ๋ฆผ์ž ํšจ๊ณผ */
532
  }
 
533
  #results_area {
534
  padding: 10px;
535
  margin-top: 10px;
536
  }
 
537
  /* ํƒญ ์Šคํƒ€์ผ ๊ฐœ์„  */
538
  .tabs {
539
  border-bottom: 2px solid #ddd !important;
540
  margin-bottom: 20px !important;
541
  }
 
542
  .tab-nav {
543
  border-bottom: none !important;
544
  margin-bottom: 0 !important;
545
  }
 
546
  .tab-nav button {
547
  font-weight: bold !important;
548
  padding: 10px 20px !important;
549
  }
 
550
  .tab-nav button.selected {
551
  border-bottom: 2px solid #1f77b4 !important; /* ์„ ํƒ๋œ ํƒญ ๊ฐ•์กฐ */
552
  color: #1f77b4 !important;
553
  }
 
554
  /* ๊ฒ€์ƒ‰ ์ƒํƒœ ๋ฉ”์‹œ์ง€ ์Šคํƒ€์ผ */
555
  #status_area .markdown-text {
556
  font-size: 1.1em;
557
  color: #2c3e50;
558
  padding: 10px 0;
559
  }
 
560
  /* ๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ ์ปจํ…Œ์ด๋„ˆ ์Šคํƒ€์ผ */
561
  .group {
562
  border: 1px solid #eee;
 
565
  border-radius: 5px;
566
  background: white;
567
  }
 
568
  /* ๊ฒ€์ƒ‰ ๋ฒ„ํŠผ ์Šคํƒ€์ผ */
569
  .primary-btn {
570
  background: #1f77b4 !important;
571
  border: none !important;
572
  }
 
573
  /* ๊ฒ€์ƒ‰์–ด ์ž…๋ ฅ์ฐฝ ์Šคํƒ€์ผ */
574
  .textbox {
575
  border: 1px solid #ddd !important;
 
587
  with gr.Column():
588
  with gr.Row():
589
  query = gr.Textbox(label="๊ฒ€์ƒ‰์–ด")
590
+ country = gr.Dropdown(MAJOR_COUNTRIES, label="๊ตญ๊ฐ€", value="United States")
591
 
592
  status_message = gr.Markdown("", visible=True)
593
  translated_query_display = gr.Markdown(visible=False)
 
647
  'index': i,
648
  })
649
 
650
+
651
  # AI ๋ฆฌํฌํ„ฐ ํƒญ
652
  with gr.Tab("AI ๋ฆฌํฌํ„ฐ"):
653
+ gr.Markdown("์ง€๋‚œ 24์‹œ๊ฐ„ ๋™์•ˆ์˜ Hacker News ํฌ์ŠคํŠธ๋ฅผ AI๊ฐ€ ์š”์•ฝํ•˜์—ฌ ๋ณด์—ฌ์ค๋‹ˆ๋‹ค.")
654
+
655
  with gr.Column():
656
  refresh_button = gr.Button("์ƒˆ๋กœ๊ณ ์นจ", variant="primary")
657
  status_message_hn = gr.Markdown("")
658
 
659
  with gr.Column(elem_id="hn_results_area"):
660
  hn_articles_state = gr.State([])
661
+
662
  hn_article_components = []
663
+ for i in range(100): # ์ƒ์œ„ 20๊ฐœ ํฌ์ŠคํŠธ๋งŒ ์ฒ˜๋ฆฌ
664
  with gr.Group(visible=False) as article_group:
665
  title = gr.Markdown()
666
  info = gr.Markdown()
667
+
668
  hn_article_components.append({
669
  'group': article_group,
670
  'title': title,
 
674
 
675
 
676
 
 
 
 
677
 
678
  # ๊ธฐ์กด ํ•จ์ˆ˜๋“ค
679
  def search_and_display(query, country, articles_state, progress=gr.Progress()):
 
870
  outputs=hn_outputs
871
  )
872
 
873
+
874
+ iface.launch(
875
+ server_name="0.0.0.0",
876
+ server_port=7860,
877
+ share=False, # ์™ธ๋ถ€ ๊ณต์œ  ๋น„ํ™œ์„ฑํ™”
878
+ auth=("it1","chosun1"),
879
+ ssl_verify=False, # SSL ๊ฒ€์ฆ ๋น„ํ™œ์„ฑํ™” (ํ•„์š”ํ•œ ๊ฒฝ์šฐ)
880
+ show_error=True # ์˜ค๋ฅ˜ ๋ฉ”์‹œ์ง€ ํ‘œ์‹œ
881
+ )