TLD1 / app.py
Fred808's picture
Update app.py
d295fbe verified
import os
import asyncio
import logging
from telethon import TelegramClient
from telethon.errors import SessionPasswordNeededError, PhoneCodeInvalidError, AuthKeyError
from huggingface_hub import upload_file
from dotenv import load_dotenv
from flask import Flask, request, render_template, jsonify
import threading
# === Load secrets from .env ===
load_dotenv()
API_ID = os.getenv("API_ID")
API_HASH = os.getenv("API_HASH")
HF_TOKEN = os.getenv("HF_TOKEN")
CHANNEL = os.getenv("CHANNEL_USERNAME")
REPO_ID = os.getenv("DATASET_REPO")
DATA_PATH = "telegram_uploads"
# === Logging setup ===
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s β€” %(levelname)s β€” %(message)s",
handlers=[
logging.FileHandler("upload.log"),
logging.StreamHandler()
]
)
# === Setup Telegram Client (only if credentials are available) ===
client = None
if API_ID and API_HASH:
try:
client = TelegramClient("my_session", int(API_ID), API_HASH)
except ValueError as e:
logging.error(f"Failed to initialize Telegram client: {e}")
client = None
# === Ensure download folder exists ===
os.makedirs("downloads", exist_ok=True)
# === Upload wrapper ===
def upload_to_dataset(filepath):
if not HF_TOKEN or not REPO_ID:
return False, f"❌ Missing Hugging Face credentials"
try:
upload_file(
path_or_fileobj=filepath,
path_in_repo=f"{DATA_PATH}/{os.path.basename(filepath)}",
repo_id=REPO_ID,
repo_type="dataset",
token=HF_TOKEN
)
logging.info(f"[↑] Uploaded: {filepath}")
return True, f"βœ… Uploaded: {os.path.basename(filepath)}"
except Exception as e:
logging.error(f"[!] Upload failed: {filepath} β€” {e}")
return False, f"❌ Upload failed: {os.path.basename(filepath)} β€” {e}"
# === Main file processing logic with improved error handling ===
async def process_filenames(name_input):
if not client:
return "❌ Error: Telegram client not initialized. Please check your API credentials."
if not CHANNEL:
return "❌ Error: Channel username not configured."
try:
# Check if client is already connected
if not client.is_connected():
await client.connect()
# Check if we're authorized
if not await client.is_user_authorized():
return "❌ Error: Telegram client not authorized. This application requires a pre-authenticated session file."
filenames = [name.strip().lower() for name in name_input.replace(",", "\n").splitlines() if name.strip()]
results = []
found = set()
# Use a more conservative approach to message iteration
try:
messages = []
async for msg in client.iter_messages(CHANNEL, limit=30000):
messages.append(msg)
except Exception as e:
logging.error(f"Error iterating messages: {e}")
return f"❌ Error accessing channel messages: {str(e)}"
for i, msg in enumerate(messages):
if msg.media and msg.file:
fname = msg.file.name or f"file_{msg.id}{msg.file.ext}"
for search in filenames:
if search in fname.lower() and fname not in found:
found.add(fname)
path = f"downloads/{fname}"
if not os.path.exists(path):
try:
await msg.download_media(file=path)
success, msg_text = upload_to_dataset(path)
results.append(msg_text)
except Exception as download_error:
logging.error(f"Download error for {fname}: {download_error}")
results.append(f"❌ Download failed: {fname} β€” {str(download_error)}")
else:
results.append(f"⏩ Already exists: {fname}")
break
matched_names = [r.split(":")[-1].strip().lower() for r in results if r.startswith("βœ…") or r.startswith("⏩")]
for name in filenames:
if not any(name in matched for matched in matched_names):
results.append(f"❌ Not found: {name}")
return "\n".join(results) if results else "❌ No files matched."
except AuthKeyError:
logging.error("Auth key error - session may be corrupted")
return "❌ Error: Session authentication failed. The session file may be corrupted or expired."
except SessionPasswordNeededError:
logging.error("Two-factor authentication required")
return "❌ Error: Two-factor authentication is enabled. This application requires a pre-authenticated session."
except EOFError as e:
logging.error(f"EOF Error: {e}")
return "❌ Error: Connection interrupted. This may be due to network issues or session problems."
except Exception as e:
logging.error(f"Error in process_filenames: {e}")
return f"❌ Error: {str(e)}"
def run_async_in_thread(coro):
"""Run async function in a separate thread with its own event loop"""
def run_in_thread():
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
return loop.run_until_complete(coro)
finally:
loop.close()
import concurrent.futures
with concurrent.futures.ThreadPoolExecutor() as executor:
future = executor.submit(run_in_thread)
return future.result()
# === Flask App with explicit template and static folder paths ===
# Get the directory where this script is located
basedir = os.path.abspath(os.path.dirname(__file__))
app = Flask(__name__,
template_folder=os.path.join(basedir, 'templates'),
static_folder=os.path.join(basedir, 'static'))
@app.route('/')
def index():
return render_template('index.html')
@app.route('/upload', methods=['POST'])
def upload():
try:
filenames_input = request.form.get('filenames', '').strip()
if not filenames_input:
return "❌ Error: No filenames provided", 400
# Check if credentials are configured
if not client:
return "❌ Error: Application not configured. Please set up your environment variables with API credentials.", 500
# Run the async function in a separate thread
results = run_async_in_thread(process_filenames(filenames_input))
return results
except Exception as e:
logging.error(f"Error in upload route: {e}")
return f"❌ Error: {str(e)}", 500
@app.route('/health')
def health():
status = {
"status": "healthy",
"message": "Hugging Face Uploader is running",
"configured": {
"telegram": bool(client),
"huggingface": bool(HF_TOKEN and REPO_ID),
"channel": bool(CHANNEL)
},
"paths": {
"basedir": basedir,
"template_folder": app.template_folder,
"static_folder": app.static_folder
}
}
return jsonify(status)
@app.route('/config')
def config():
"""Show configuration status"""
config_status = {
"API_ID": "βœ… Set" if API_ID else "❌ Missing",
"API_HASH": "βœ… Set" if API_HASH else "❌ Missing",
"HF_TOKEN": "βœ… Set" if HF_TOKEN else "❌ Missing",
"CHANNEL_USERNAME": "βœ… Set" if CHANNEL else "❌ Missing",
"DATASET_REPO": "βœ… Set" if REPO_ID else "❌ Missing"
}
return jsonify(config_status)
@app.route('/debug')
def debug():
"""Debug endpoint to check file structure"""
import glob
debug_info = {
"current_directory": os.getcwd(),
"script_directory": basedir,
"template_folder": app.template_folder,
"static_folder": app.static_folder,
"files_in_current_dir": os.listdir('.'),
"templates_exists": os.path.exists('templates'),
"static_exists": os.path.exists('static'),
"templates_files": glob.glob('templates/*') if os.path.exists('templates') else [],
"static_files": glob.glob('static/**/*', recursive=True) if os.path.exists('static') else [],
"session_file_exists": os.path.exists('my_session.session')
}
return jsonify(debug_info)
@app.route('/session-info')
def session_info():
"""Check Telegram session status"""
if not client:
return jsonify({"error": "Client not initialized"})
try:
# This is a synchronous check
session_status = {
"session_file_exists": os.path.exists('my_session.session'),
"client_initialized": bool(client),
"session_file_size": os.path.getsize('my_session.session') if os.path.exists('my_session.session') else 0
}
return jsonify(session_status)
except Exception as e:
return jsonify({"error": str(e)})
if __name__ == '__main__':
print("Starting Hugging Face Uploader...")
print("Configuration status:")
print(f" API_ID: {'βœ… Set' if API_ID else '❌ Missing'}")
print(f" API_HASH: {'βœ… Set' if API_HASH else '❌ Missing'}")
print(f" HF_TOKEN: {'βœ… Set' if HF_TOKEN else '❌ Missing'}")
print(f" CHANNEL_USERNAME: {'βœ… Set' if CHANNEL else '❌ Missing'}")
print(f" DATASET_REPO: {'βœ… Set' if REPO_ID else '❌ Missing'}")
print(f"\nPaths:")
print(f" Base directory: {basedir}")
print(f" Template folder: {app.template_folder}")
print(f" Static folder: {app.static_folder}")
print(f"\nSession info:")
print(f" Session file exists: {os.path.exists('my_session.session')}")
print("\n⚠️ IMPORTANT: This application requires a pre-authenticated Telegram session.")
print(" You must create the session file locally first, then upload it to your Space.")
print("\nTo configure, set environment variables in your Space settings.")
print("Visit http://localhost:7860 to use the application.")
# Use port 7860 for Hugging Face Spaces compatibility
app.run(host='0.0.0.0', port=7860, debug=False)