Spaces:
Running
Running
File size: 7,622 Bytes
b5fb8d2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 |
#!/usr/bin/env python3
"""
Setup script for Google Drive RAG system
This script helps you set up Google Drive authentication for the RAG news manager
"""
import os
import json
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
# Configuration
SCOPES = ['https://www.googleapis.com/auth/drive.file']
CREDENTIALS_FILE = 'credentials.json'
TOKEN_FILE = 'token.json'
def setup_google_drive_credentials():
"""Set up Google Drive credentials for local development"""
print("π§ Setting up Google Drive credentials for RAG system...")
print("=" * 60)
# Check if credentials file exists
if not os.path.exists(CREDENTIALS_FILE):
print(f"β {CREDENTIALS_FILE} not found!")
print("\nπ To get Google Drive credentials:")
print("1. Go to Google Cloud Console: https://console.cloud.google.com/")
print("2. Create a new project or select existing one")
print("3. Enable Google Drive API")
print("4. Go to 'Credentials' β 'Create Credentials' β 'OAuth 2.0 Client IDs'")
print("5. Choose 'Desktop application'")
print("6. Download the JSON file and rename it to 'credentials.json'")
print("7. Place it in this directory")
return False
print(f"β
Found {CREDENTIALS_FILE}")
# Load credentials
try:
with open(CREDENTIALS_FILE, 'r') as f:
creds_data = json.load(f)
print("β
Credentials file is valid JSON")
print(f" Client ID: {creds_data.get('client_id', 'N/A')[:20]}...")
print(f" Project ID: {creds_data.get('project_id', 'N/A')}")
except json.JSONDecodeError:
print("β Invalid JSON in credentials file")
return False
except Exception as e:
print(f"β Error reading credentials: {e}")
return False
# Authenticate
creds = None
# Check if token file exists
if os.path.exists(TOKEN_FILE):
print(f"β
Found existing {TOKEN_FILE}")
try:
creds = Credentials.from_authorized_user_file(TOKEN_FILE, SCOPES)
print("β
Loaded existing credentials")
except Exception as e:
print(f"β οΈ Error loading existing credentials: {e}")
creds = None
# If no valid credentials, get new ones
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
print("π Refreshing expired credentials...")
try:
creds.refresh(Request())
print("β
Credentials refreshed successfully")
except Exception as e:
print(f"β Error refreshing credentials: {e}")
creds = None
if not creds:
print("π Starting OAuth flow...")
print(" A browser window will open for authentication")
print(" Please log in with your Google account and grant permissions")
try:
flow = InstalledAppFlow.from_client_secrets_file(CREDENTIALS_FILE, SCOPES)
creds = flow.run_local_server(port=0)
print("β
Authentication successful!")
except Exception as e:
print(f"β Authentication failed: {e}")
return False
# Save credentials for next time
try:
with open(TOKEN_FILE, 'w') as token:
token.write(creds.to_json())
print(f"β
Credentials saved to {TOKEN_FILE}")
except Exception as e:
print(f"β οΈ Warning: Could not save credentials: {e}")
# Test the credentials
print("\nπ§ͺ Testing Google Drive access...")
try:
from googleapiclient.discovery import build
service = build('drive', 'v3', credentials=creds)
# List files to test access
results = service.files().list(pageSize=1, fields="files(id, name)").execute()
files = results.get('files', [])
print("β
Google Drive access successful!")
print(f" Found {len(files)} file(s) in your Drive")
if files:
print(f" Sample file: {files[0]['name']}")
return True
except Exception as e:
print(f"β Google Drive access test failed: {e}")
return False
def test_rag_system():
"""Test the RAG system"""
print("\nπ§ͺ Testing RAG News Manager...")
print("=" * 40)
try:
from rag_news_manager import initialize_rag_system, get_rag_stats
if initialize_rag_system():
print("β
RAG system initialized successfully!")
# Get statistics
stats = get_rag_stats()
if stats:
print(f"π Current RAG Statistics:")
print(f" Total entries: {stats['total_entries']}")
print(f" Real news: {stats['real_count']}")
print(f" Fake news: {stats['fake_count']}")
print(f" Average confidence: {stats['avg_confidence']:.1%}")
print(f" Google Drive folder: {stats['folder_id']}")
print(f" Google Drive file: {stats['file_id']}")
# Provide Google Drive links
if stats['folder_id']:
folder_url = f"https://drive.google.com/drive/folders/{stats['folder_id']}"
print(f"\nπ Google Drive RAG Folder: {folder_url}")
if stats['file_id']:
file_url = f"https://drive.google.com/file/d/{stats['file_id']}/view"
print(f"π Google Drive RAG File: {file_url}")
else:
print("β οΈ Could not get RAG statistics")
else:
print("β RAG system initialization failed")
return False
except ImportError as e:
print(f"β Could not import RAG system: {e}")
return False
except Exception as e:
print(f"β RAG system test failed: {e}")
return False
return True
def main():
"""Main setup function"""
print("π Google Drive RAG System Setup")
print("=" * 50)
print("This script will help you set up Google Drive integration")
print("for saving high-confidence news for RAG purposes.")
print()
# Step 1: Setup credentials
if not setup_google_drive_credentials():
print("\nβ Setup failed at credentials step")
return False
# Step 2: Test RAG system
if not test_rag_system():
print("\nβ Setup failed at RAG system test")
return False
print("\nπ Setup completed successfully!")
print("=" * 50)
print("β
Google Drive credentials configured")
print("β
RAG system initialized")
print("β
Ready to save high-confidence news!")
print()
print("π Next steps:")
print("1. Your app will now automatically save news with 95%+ confidence")
print("2. Check your Google Drive for the 'Vietnamese_Fake_News_RAG' folder")
print("3. View saved news in the 'high_confidence_news.json' file")
print("4. The system will use this data for better RAG analysis")
return True
if __name__ == "__main__":
main()
|