Shreyas094 commited on
Commit
1f8184f
1 Parent(s): f1dc47a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -22
app.py CHANGED
@@ -318,12 +318,16 @@ def process_news(query, temperature, top_p, repetition_penalty, news_source):
318
  "content": clean_content,
319
  "summary": full_summary,
320
  "cleaned_summary": cleaned_summary,
321
- "relevance_score": relevance_score # Ensure this line is present
322
  }
323
  processed_articles.append(processed_article)
324
  except Exception as e:
325
  print(f"Error processing article: {str(e)}")
326
 
 
 
 
 
327
 
328
  if not processed_articles:
329
  return f"Failed to process any news articles from {news_source}. Please try again or check the summarization process."
@@ -347,7 +351,11 @@ def process_news(query, temperature, top_p, repetition_penalty, news_source):
347
 
348
  # Update news_database for excel export
349
  global news_database
350
- news_database.extend(processed_articles)
 
 
 
 
351
 
352
  return f"Processed and added {len(processed_articles)} news articles from {news_source} to the database."
353
  except Exception as e:
@@ -454,11 +462,15 @@ def fetch_golomt_bank_news(num_results=10):
454
 
455
  def export_news_to_excel():
456
  global news_database
457
- df = pd.DataFrame(news_database)
458
 
459
- # Use the cleaned summary for the Excel export
460
- df['summary'] = df['cleaned_summary']
461
- df = df.drop(columns=['cleaned_summary']) # Remove the extra column
 
 
 
 
 
462
 
463
  # Ensure relevance_score is present and convert to float
464
  if 'relevance_score' not in df.columns:
@@ -466,28 +478,22 @@ def export_news_to_excel():
466
  else:
467
  df['relevance_score'] = pd.to_numeric(df['relevance_score'], errors='coerce').fillna(0.0)
468
 
 
 
 
 
 
469
  # Reorder columns to put relevance_score after summary
470
  columns = ['published_date', 'title', 'url', 'content', 'summary', 'relevance_score']
471
- df = df[columns]
 
 
 
472
 
473
  with NamedTemporaryFile(delete=False, suffix='.xlsx') as tmp:
474
  excel_path = tmp.name
475
-
476
- # First, save the DataFrame to Excel
477
  df.to_excel(excel_path, index=False, engine='openpyxl')
478
-
479
- # Then, load the workbook and modify the relevance_score column
480
- wb = load_workbook(excel_path)
481
- ws = wb.active
482
-
483
- for row in dataframe_to_rows(df, index=False, header=True):
484
- ws.append(row)
485
-
486
- # Format the relevance_score column as numbers
487
- for cell in ws['F'][1:]: # Assuming relevance_score is in column F
488
- cell.number_format = '0.00'
489
-
490
- wb.save(excel_path)
491
 
492
  return excel_path
493
 
 
318
  "content": clean_content,
319
  "summary": full_summary,
320
  "cleaned_summary": cleaned_summary,
321
+ "relevance_score": relevance_score
322
  }
323
  processed_articles.append(processed_article)
324
  except Exception as e:
325
  print(f"Error processing article: {str(e)}")
326
 
327
+ # Debug print
328
+ print("Processed articles:")
329
+ for article in processed_articles:
330
+ print(f"Title: {article['title']}, Score: {article['relevance_score']}")
331
 
332
  if not processed_articles:
333
  return f"Failed to process any news articles from {news_source}. Please try again or check the summarization process."
 
351
 
352
  # Update news_database for excel export
353
  global news_database
354
+ news_database = processed_articles # Directly assign the processed articles
355
+
356
+ print("Updated news_database:")
357
+ for article in news_database:
358
+ print(f"Title: {article['title']}, Score: {article['relevance_score']}")
359
 
360
  return f"Processed and added {len(processed_articles)} news articles from {news_source} to the database."
361
  except Exception as e:
 
462
 
463
  def export_news_to_excel():
464
  global news_database
 
465
 
466
+ if not news_database:
467
+ return "No articles to export. Please fetch news first."
468
+
469
+ print("Exporting the following articles:")
470
+ for article in news_database:
471
+ print(f"Title: {article['title']}, Score: {article.get('relevance_score', 'N/A')}")
472
+
473
+ df = pd.DataFrame(news_database)
474
 
475
  # Ensure relevance_score is present and convert to float
476
  if 'relevance_score' not in df.columns:
 
478
  else:
479
  df['relevance_score'] = pd.to_numeric(df['relevance_score'], errors='coerce').fillna(0.0)
480
 
481
+ # Use the cleaned summary for the Excel export
482
+ if 'cleaned_summary' in df.columns:
483
+ df['summary'] = df['cleaned_summary']
484
+ df = df.drop(columns=['cleaned_summary'])
485
+
486
  # Reorder columns to put relevance_score after summary
487
  columns = ['published_date', 'title', 'url', 'content', 'summary', 'relevance_score']
488
+ df = df[[col for col in columns if col in df.columns]]
489
+
490
+ print("Final DataFrame before export:")
491
+ print(df[['title', 'relevance_score']])
492
 
493
  with NamedTemporaryFile(delete=False, suffix='.xlsx') as tmp:
494
  excel_path = tmp.name
 
 
495
  df.to_excel(excel_path, index=False, engine='openpyxl')
496
+ print(f"Excel file saved to: {excel_path}")
 
 
 
 
 
 
 
 
 
 
 
 
497
 
498
  return excel_path
499