Spaces:
Running
Running
from fastapi import FastAPI, Depends , Request,WebSocket | |
from fastapi.security import OAuth2PasswordBearer | |
import requests | |
from jose import jwt | |
import webbrowser | |
import base64 | |
import logging | |
import time | |
import asyncio | |
app = FastAPI() | |
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token") | |
# Replace these with your own values from the Google Developer Console | |
# GOOGLE_CLIENT_ID = "" | |
# GOOGLE_CLIENT_SECRET = "" | |
# GOOGLE_REDIRECT_URI = "" | |
GOOGLE_CLIENT_ID = "485753721652-5uta3e18va2g6cnkldib2d68q39t4vod.apps.googleusercontent.com" | |
GOOGLE_CLIENT_SECRET = "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" | |
GOOGLE_REDIRECT_URI = "https://omkar008-receipt-radar-test.hf.space/auth/google" | |
GOOGLE_REDIRECT_URI_hr = "https://receiptradar-0bb387d81174.herokuapp.com/auth/google" | |
# Configure the logger | |
logging.basicConfig(level=logging.DEBUG) | |
logger = logging.getLogger(__name__) | |
async def login_google(): | |
# oauth_url = f"https://accounts.google.com/o/oauth2/auth?response_type=code&client_id={GOOGLE_CLIENT_ID}&redirect_uri={GOOGLE_REDIRECT_URI}&scope=openid%20profile%20email&access_type=offline" | |
#Below is the URL to prompt the user to login to his specified gmail account and also give a readonly access to his gmail | |
oauth_url = f"https://accounts.google.com/o/oauth2/auth?response_type=code&client_id={GOOGLE_CLIENT_ID}&redirect_uri={GOOGLE_REDIRECT_URI}&scope=openid%20profile%20email%20https://www.googleapis.com/auth/gmail.readonly&access_type=offline" | |
oauth_url_hr = f"https://accounts.google.com/o/oauth2/auth?response_type=code&client_id={GOOGLE_CLIENT_ID}&redirect_uri={GOOGLE_REDIRECT_URI_hr}&scope=openid%20profile%20email%20https://www.googleapis.com/auth/gmail.readonly&access_type=offline&state=receipts" | |
webbrowser.open(oauth_url) | |
return { | |
"url_hr": oauth_url_hr | |
} | |
async def auth_google(request: Request): | |
data = await request.json() | |
code = data.get("access_token") | |
print("Printing the access token") | |
print(code) | |
if not code: | |
raise HTTPException(status_code=400, detail="Authorization code not provided") | |
# token_url = "https://accounts.google.com/o/oauth2/token" | |
# print(code) | |
# data = { | |
# "code": code, | |
# "client_id": GOOGLE_CLIENT_ID, | |
# "client_secret": GOOGLE_CLIENT_SECRET, | |
# "redirect_uri": GOOGLE_REDIRECT_URI, | |
# "grant_type": "authorization_code", | |
# } | |
# response = requests.post(token_url, data=data) | |
# access_token = response.json().get("access_token") | |
access_token_new = code | |
user_info = requests.get("https://www.googleapis.com/oauth2/v1/userinfo", headers={"Authorization": f"Bearer {access_token_new}"}) | |
page_token = None | |
messages = [] | |
jobs_query = "subject:receipt OR subject:receipts OR subject:Invoice OR subject:invoice has:attachment " | |
while True: | |
# Construct Gmail API request with pageToken | |
gmail_url = f"https://www.googleapis.com/gmail/v1/users/me/messages?q={jobs_query}" | |
if page_token: | |
gmail_url += f"&pageToken={page_token}" | |
gmail_response = requests.get(gmail_url, headers={"Authorization": f"Bearer {access_token_new}"}) | |
gmail_data = gmail_response.json() | |
# Check if there are messages in the response | |
if "messages" in gmail_data: | |
messages.extend(gmail_data["messages"]) | |
# Check if there are more pages | |
if "nextPageToken" in gmail_data: | |
page_token = gmail_data["nextPageToken"] | |
else: | |
break # No more pages, exit the loop | |
attachments = [] | |
attachment_no = 0 | |
data_new = {} | |
for i,message in enumerate(messages) : | |
# print(i) | |
# print(message) | |
if message: | |
message_id = message.get("id") | |
print(message_id) | |
if message_id: | |
message_url = f"https://www.googleapis.com/gmail/v1/users/me/messages/{message_id}" | |
message_response = requests.get(message_url, headers={"Authorization": f"Bearer {access_token_new}"}) | |
message_data = message_response.json() | |
# Check for parts in the message payload | |
if "payload" in message_data and "parts" in message_data["payload"]: | |
for part in message_data["payload"]["parts"]: | |
if "body" in part and "attachmentId" in part["body"]: | |
attachment_id = part["body"]["attachmentId"] | |
attachment_url = f"https://www.googleapis.com/gmail/v1/users/me/messages/{message_id}/attachments/{attachment_id}" | |
attachment_response = requests.get(attachment_url, headers={"Authorization": f"Bearer {access_token_new}"}) | |
attachment_data = attachment_response.json() | |
data = attachment_data.get("data") | |
filename = part.get("filename", "untitled.txt") | |
if data: | |
data_new[filename]=data | |
attachment_content = base64.urlsafe_b64decode(data.encode("UTF-8")) | |
attachment_no+=1 | |
# if data: | |
# # Decode base64-encoded attachment data | |
# attachment_content = base64.urlsafe_b64decode(data.encode("UTF-8")) | |
# # Save the attachment to a file | |
# save_path = f"/Users/omkarmalpure/Documents/Gmail_API/attachments/{filename}" | |
# with open(save_path, "wb") as file: | |
# file.write(attachment_content) | |
# attachments.append(save_path) | |
return {"attachment_count":attachment_no,"attachment_content":data_new} | |
async def send_chunked_data(websocket: WebSocket, filename: str, data: str): | |
chunk_size = 1024 # Set an appropriate chunk size | |
for i in range(0, len(data), chunk_size): | |
await websocket.send_json({"filename": filename, "data_chunk": data[i:i + chunk_size]}) | |
await asyncio.sleep(0.1) | |
await websocket.send_text("FinishedThisAttachment") | |
async def test_websocket(websocket: WebSocket): | |
#This code is basically the authorization code and this authorization code helps us to get the access token with the required scopes that we have set . | |
#We require the gmail.readonly scopes that requires verification of our application and all. | |
# raw_body = await request.body() | |
# return {"data":Yo Yo"} | |
# data = await request.json() | |
# code = data.get("access_token") | |
await websocket.accept() | |
# await print(sucess) | |
# await print("Hi hi working") | |
logger.info("Hi hi succefull in connecting !!") | |
# await websocket.send_json({"message":"Yes Websockets successfull"}) | |
# await print("working after line 163") | |
logger.info("Now receiving json!!") | |
data = await websocket.receive_text() | |
logger.info("Received JSON data: %s", data) | |
# print(data) | |
# code = data.get("message") | |
# print(code) | |
# code = raw_body.decode() | |
# sent=event_generator(data) | |
for i in range(1, 11): | |
logging.info(f"printing value {i}") | |
def get_messages(code:str): | |
print() | |
# print(code) | |
logging.info("entered into the get_messages") | |
access_token = code | |
print("printing access_token") | |
print(access_token) | |
page_token = None | |
messages = [] | |
jobs_query = "subject:receipt OR subject:receipts has:attachment" | |
while True: | |
# Construct Gmail API request with pageToken | |
print("into the gmail") | |
gmail_url = f"https://www.googleapis.com/gmail/v1/users/me/messages?q={jobs_query}" | |
if page_token: | |
gmail_url += f"&pageToken={page_token}" | |
gmail_response = requests.get(gmail_url, headers={"Authorization": f"Bearer {access_token}"}) | |
logging.info(f"{gmail_response}") | |
print(gmail_response) | |
gmail_data = gmail_response.json() | |
# Check if there are messages in the response | |
if "messages" in gmail_data: | |
messages.extend(gmail_data["messages"]) | |
# Check if there are more pages | |
if "nextPageToken" in gmail_data: | |
page_token = gmail_data["nextPageToken"] | |
else: | |
break # No more pages, exit the loop | |
print("returning the messages") | |
unique_thread_ids = set() | |
filtered_data_list = [] | |
for entry in messages: | |
thread_id = entry['threadId'] | |
if thread_id not in unique_thread_ids: | |
unique_thread_ids.add(thread_id) | |
filtered_data_list.append(entry) | |
print(messages) | |
print(filtered_data_list) | |
logging.info(f"{filtered_data_list}") | |
return filtered_data_list | |
async def event_generator(code:str): | |
logging.info("entered into the event_generator") | |
print(code) | |
access_token = code | |
messages=get_messages(access_token) | |
print(len(messages)) | |
# await websocket.send_json({"message 1":0}) | |
# time.sleep(1) | |
# await websocket.send_text("message 2") | |
# time.sleep(1) | |
attachments = [] | |
prev_data="" | |
data_new={} | |
attachment_no=0 | |
batch_size = 5 | |
prev_filename = None | |
for i,message in enumerate(messages) : | |
print(i) | |
logging.info(f"{i}") | |
logging.info(f"{message}") | |
print(message) | |
if message: | |
message_id = message.get("id") | |
thread_id = message.get("threadId") | |
print(message_id) | |
if message_id: | |
message_url = f"https://www.googleapis.com/gmail/v1/users/me/messages/{message_id}" | |
message_response = requests.get(message_url, headers={"Authorization": f"Bearer {access_token}"}) | |
message_data = message_response.json() | |
# Check for parts in the message payload | |
if "payload" in message_data and "parts" in message_data["payload"]: | |
for part in message_data["payload"]["parts"]: | |
if "body" in part and "attachmentId" in part["body"]: | |
attachment_id = part["body"]["attachmentId"] | |
print(attachment_id) | |
attachment_url = f"https://www.googleapis.com/gmail/v1/users/me/messages/{message_id}/attachments/{attachment_id}" | |
attachment_response = requests.get(attachment_url, headers={"Authorization": f"Bearer {access_token}"}) | |
attachment_data = attachment_response.json() | |
data = attachment_data.get("data",{}) | |
filename = part.get("filename", "untitled.txt") | |
# print("Print the data json response for that gmail") | |
print(filename) | |
# print(attachment_data) | |
# json_str = json.dumps(attachment_data, indent=2) | |
# with subprocess.Popen(["less"], stdin=subprocess.PIPE) as less_process: | |
# less_process.communicate(input=json_str.encode("utf-8")) | |
if data: | |
data_new[filename]=str(data[:10]) | |
attachment_content = base64.urlsafe_b64decode(data.encode("UTF-8")) | |
# await websocket.send_json({"filename": filename}) | |
# await websocket.send_bytes(attachment_content) | |
await send_chunked_data(websocket, filename, data) | |
attachment_no+=1 | |
# time.sleep(2) | |
# await websocket.send_json({filename:data}) | |
# yield f"data: {str(data_new)}\n\n" | |
# data_new={} | |
await websocket.send_text("CompletedFetchingMessages") | |
await event_generator(data) | |
logging.info("Closing connection") | |
await websocket.close() | |
async def get_token(token: str = Depends(oauth2_scheme)): | |
return jwt.decode(token, GOOGLE_CLIENT_SECRET, algorithms=["HS256"]) | |