File size: 7,622 Bytes
b5fb8d2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
#!/usr/bin/env python3
"""

Setup script for Google Drive RAG system

This script helps you set up Google Drive authentication for the RAG news manager

"""

import os
import json
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request

# Configuration
SCOPES = ['https://www.googleapis.com/auth/drive.file']
CREDENTIALS_FILE = 'credentials.json'
TOKEN_FILE = 'token.json'

def setup_google_drive_credentials():
    """Set up Google Drive credentials for local development"""
    print("πŸ”§ Setting up Google Drive credentials for RAG system...")
    print("=" * 60)
    
    # Check if credentials file exists
    if not os.path.exists(CREDENTIALS_FILE):
        print(f"❌ {CREDENTIALS_FILE} not found!")
        print("\nπŸ“‹ To get Google Drive credentials:")
        print("1. Go to Google Cloud Console: https://console.cloud.google.com/")
        print("2. Create a new project or select existing one")
        print("3. Enable Google Drive API")
        print("4. Go to 'Credentials' β†’ 'Create Credentials' β†’ 'OAuth 2.0 Client IDs'")
        print("5. Choose 'Desktop application'")
        print("6. Download the JSON file and rename it to 'credentials.json'")
        print("7. Place it in this directory")
        return False
    
    print(f"βœ… Found {CREDENTIALS_FILE}")
    
    # Load credentials
    try:
        with open(CREDENTIALS_FILE, 'r') as f:
            creds_data = json.load(f)
        
        print("βœ… Credentials file is valid JSON")
        print(f"   Client ID: {creds_data.get('client_id', 'N/A')[:20]}...")
        print(f"   Project ID: {creds_data.get('project_id', 'N/A')}")
        
    except json.JSONDecodeError:
        print("❌ Invalid JSON in credentials file")
        return False
    except Exception as e:
        print(f"❌ Error reading credentials: {e}")
        return False
    
    # Authenticate
    creds = None
    
    # Check if token file exists
    if os.path.exists(TOKEN_FILE):
        print(f"βœ… Found existing {TOKEN_FILE}")
        try:
            creds = Credentials.from_authorized_user_file(TOKEN_FILE, SCOPES)
            print("βœ… Loaded existing credentials")
        except Exception as e:
            print(f"⚠️ Error loading existing credentials: {e}")
            creds = None
    
    # If no valid credentials, get new ones
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            print("πŸ”„ Refreshing expired credentials...")
            try:
                creds.refresh(Request())
                print("βœ… Credentials refreshed successfully")
            except Exception as e:
                print(f"❌ Error refreshing credentials: {e}")
                creds = None
        
        if not creds:
            print("πŸ” Starting OAuth flow...")
            print("   A browser window will open for authentication")
            print("   Please log in with your Google account and grant permissions")
            
            try:
                flow = InstalledAppFlow.from_client_secrets_file(CREDENTIALS_FILE, SCOPES)
                creds = flow.run_local_server(port=0)
                print("βœ… Authentication successful!")
            except Exception as e:
                print(f"❌ Authentication failed: {e}")
                return False
        
        # Save credentials for next time
        try:
            with open(TOKEN_FILE, 'w') as token:
                token.write(creds.to_json())
            print(f"βœ… Credentials saved to {TOKEN_FILE}")
        except Exception as e:
            print(f"⚠️ Warning: Could not save credentials: {e}")
    
    # Test the credentials
    print("\nπŸ§ͺ Testing Google Drive access...")
    try:
        from googleapiclient.discovery import build
        service = build('drive', 'v3', credentials=creds)
        
        # List files to test access
        results = service.files().list(pageSize=1, fields="files(id, name)").execute()
        files = results.get('files', [])
        
        print("βœ… Google Drive access successful!")
        print(f"   Found {len(files)} file(s) in your Drive")
        
        if files:
            print(f"   Sample file: {files[0]['name']}")
        
        return True
        
    except Exception as e:
        print(f"❌ Google Drive access test failed: {e}")
        return False

def test_rag_system():
    """Test the RAG system"""
    print("\nπŸ§ͺ Testing RAG News Manager...")
    print("=" * 40)
    
    try:
        from rag_news_manager import initialize_rag_system, get_rag_stats
        
        if initialize_rag_system():
            print("βœ… RAG system initialized successfully!")
            
            # Get statistics
            stats = get_rag_stats()
            if stats:
                print(f"πŸ“Š Current RAG Statistics:")
                print(f"   Total entries: {stats['total_entries']}")
                print(f"   Real news: {stats['real_count']}")
                print(f"   Fake news: {stats['fake_count']}")
                print(f"   Average confidence: {stats['avg_confidence']:.1%}")
                print(f"   Google Drive folder: {stats['folder_id']}")
                print(f"   Google Drive file: {stats['file_id']}")
                
                # Provide Google Drive links
                if stats['folder_id']:
                    folder_url = f"https://drive.google.com/drive/folders/{stats['folder_id']}"
                    print(f"\nπŸ”— Google Drive RAG Folder: {folder_url}")
                
                if stats['file_id']:
                    file_url = f"https://drive.google.com/file/d/{stats['file_id']}/view"
                    print(f"πŸ”— Google Drive RAG File: {file_url}")
            else:
                print("⚠️ Could not get RAG statistics")
        else:
            print("❌ RAG system initialization failed")
            return False
            
    except ImportError as e:
        print(f"❌ Could not import RAG system: {e}")
        return False
    except Exception as e:
        print(f"❌ RAG system test failed: {e}")
        return False
    
    return True

def main():
    """Main setup function"""
    print("πŸš€ Google Drive RAG System Setup")
    print("=" * 50)
    print("This script will help you set up Google Drive integration")
    print("for saving high-confidence news for RAG purposes.")
    print()
    
    # Step 1: Setup credentials
    if not setup_google_drive_credentials():
        print("\n❌ Setup failed at credentials step")
        return False
    
    # Step 2: Test RAG system
    if not test_rag_system():
        print("\n❌ Setup failed at RAG system test")
        return False
    
    print("\nπŸŽ‰ Setup completed successfully!")
    print("=" * 50)
    print("βœ… Google Drive credentials configured")
    print("βœ… RAG system initialized")
    print("βœ… Ready to save high-confidence news!")
    print()
    print("πŸ“‹ Next steps:")
    print("1. Your app will now automatically save news with 95%+ confidence")
    print("2. Check your Google Drive for the 'Vietnamese_Fake_News_RAG' folder")
    print("3. View saved news in the 'high_confidence_news.json' file")
    print("4. The system will use this data for better RAG analysis")
    
    return True

if __name__ == "__main__":
    main()