|
|
import os |
|
|
import asyncio |
|
|
import logging |
|
|
from telethon import TelegramClient |
|
|
from telethon.errors import SessionPasswordNeededError, PhoneCodeInvalidError, AuthKeyError |
|
|
from huggingface_hub import upload_file |
|
|
from dotenv import load_dotenv |
|
|
from flask import Flask, request, render_template, jsonify |
|
|
import threading |
|
|
|
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
API_ID = os.getenv("API_ID") |
|
|
API_HASH = os.getenv("API_HASH") |
|
|
HF_TOKEN = os.getenv("HF_TOKEN") |
|
|
CHANNEL = os.getenv("CHANNEL_USERNAME") |
|
|
REPO_ID = os.getenv("DATASET_REPO") |
|
|
DATA_PATH = "telegram_uploads" |
|
|
|
|
|
|
|
|
logging.basicConfig( |
|
|
level=logging.INFO, |
|
|
format="%(asctime)s β %(levelname)s β %(message)s", |
|
|
handlers=[ |
|
|
logging.FileHandler("upload.log"), |
|
|
logging.StreamHandler() |
|
|
] |
|
|
) |
|
|
|
|
|
|
|
|
client = None |
|
|
if API_ID and API_HASH: |
|
|
try: |
|
|
client = TelegramClient("my_session", int(API_ID), API_HASH) |
|
|
except ValueError as e: |
|
|
logging.error(f"Failed to initialize Telegram client: {e}") |
|
|
client = None |
|
|
|
|
|
|
|
|
os.makedirs("downloads", exist_ok=True) |
|
|
|
|
|
|
|
|
def upload_to_dataset(filepath): |
|
|
if not HF_TOKEN or not REPO_ID: |
|
|
return False, f"β Missing Hugging Face credentials" |
|
|
|
|
|
try: |
|
|
upload_file( |
|
|
path_or_fileobj=filepath, |
|
|
path_in_repo=f"{DATA_PATH}/{os.path.basename(filepath)}", |
|
|
repo_id=REPO_ID, |
|
|
repo_type="dataset", |
|
|
token=HF_TOKEN |
|
|
) |
|
|
logging.info(f"[β] Uploaded: {filepath}") |
|
|
return True, f"β
Uploaded: {os.path.basename(filepath)}" |
|
|
except Exception as e: |
|
|
logging.error(f"[!] Upload failed: {filepath} β {e}") |
|
|
return False, f"β Upload failed: {os.path.basename(filepath)} β {e}" |
|
|
|
|
|
|
|
|
async def process_filenames(name_input): |
|
|
if not client: |
|
|
return "β Error: Telegram client not initialized. Please check your API credentials." |
|
|
|
|
|
if not CHANNEL: |
|
|
return "β Error: Channel username not configured." |
|
|
|
|
|
try: |
|
|
|
|
|
if not client.is_connected(): |
|
|
await client.connect() |
|
|
|
|
|
|
|
|
if not await client.is_user_authorized(): |
|
|
return "β Error: Telegram client not authorized. This application requires a pre-authenticated session file." |
|
|
|
|
|
filenames = [name.strip().lower() for name in name_input.replace(",", "\n").splitlines() if name.strip()] |
|
|
results = [] |
|
|
found = set() |
|
|
|
|
|
|
|
|
try: |
|
|
messages = [] |
|
|
async for msg in client.iter_messages(CHANNEL, limit=30000): |
|
|
messages.append(msg) |
|
|
except Exception as e: |
|
|
logging.error(f"Error iterating messages: {e}") |
|
|
return f"β Error accessing channel messages: {str(e)}" |
|
|
|
|
|
for i, msg in enumerate(messages): |
|
|
if msg.media and msg.file: |
|
|
fname = msg.file.name or f"file_{msg.id}{msg.file.ext}" |
|
|
|
|
|
for search in filenames: |
|
|
if search in fname.lower() and fname not in found: |
|
|
found.add(fname) |
|
|
path = f"downloads/{fname}" |
|
|
|
|
|
if not os.path.exists(path): |
|
|
try: |
|
|
await msg.download_media(file=path) |
|
|
success, msg_text = upload_to_dataset(path) |
|
|
results.append(msg_text) |
|
|
except Exception as download_error: |
|
|
logging.error(f"Download error for {fname}: {download_error}") |
|
|
results.append(f"β Download failed: {fname} β {str(download_error)}") |
|
|
else: |
|
|
results.append(f"β© Already exists: {fname}") |
|
|
break |
|
|
|
|
|
matched_names = [r.split(":")[-1].strip().lower() for r in results if r.startswith("β
") or r.startswith("β©")] |
|
|
for name in filenames: |
|
|
if not any(name in matched for matched in matched_names): |
|
|
results.append(f"β Not found: {name}") |
|
|
|
|
|
return "\n".join(results) if results else "β No files matched." |
|
|
|
|
|
except AuthKeyError: |
|
|
logging.error("Auth key error - session may be corrupted") |
|
|
return "β Error: Session authentication failed. The session file may be corrupted or expired." |
|
|
except SessionPasswordNeededError: |
|
|
logging.error("Two-factor authentication required") |
|
|
return "β Error: Two-factor authentication is enabled. This application requires a pre-authenticated session." |
|
|
except EOFError as e: |
|
|
logging.error(f"EOF Error: {e}") |
|
|
return "β Error: Connection interrupted. This may be due to network issues or session problems." |
|
|
except Exception as e: |
|
|
logging.error(f"Error in process_filenames: {e}") |
|
|
return f"β Error: {str(e)}" |
|
|
|
|
|
def run_async_in_thread(coro): |
|
|
"""Run async function in a separate thread with its own event loop""" |
|
|
def run_in_thread(): |
|
|
loop = asyncio.new_event_loop() |
|
|
asyncio.set_event_loop(loop) |
|
|
try: |
|
|
return loop.run_until_complete(coro) |
|
|
finally: |
|
|
loop.close() |
|
|
|
|
|
import concurrent.futures |
|
|
with concurrent.futures.ThreadPoolExecutor() as executor: |
|
|
future = executor.submit(run_in_thread) |
|
|
return future.result() |
|
|
|
|
|
|
|
|
|
|
|
basedir = os.path.abspath(os.path.dirname(__file__)) |
|
|
|
|
|
app = Flask(__name__, |
|
|
template_folder=os.path.join(basedir, 'templates'), |
|
|
static_folder=os.path.join(basedir, 'static')) |
|
|
|
|
|
@app.route('/') |
|
|
def index(): |
|
|
return render_template('index.html') |
|
|
|
|
|
@app.route('/upload', methods=['POST']) |
|
|
def upload(): |
|
|
try: |
|
|
filenames_input = request.form.get('filenames', '').strip() |
|
|
if not filenames_input: |
|
|
return "β Error: No filenames provided", 400 |
|
|
|
|
|
|
|
|
if not client: |
|
|
return "β Error: Application not configured. Please set up your environment variables with API credentials.", 500 |
|
|
|
|
|
|
|
|
results = run_async_in_thread(process_filenames(filenames_input)) |
|
|
return results |
|
|
|
|
|
except Exception as e: |
|
|
logging.error(f"Error in upload route: {e}") |
|
|
return f"β Error: {str(e)}", 500 |
|
|
|
|
|
@app.route('/health') |
|
|
def health(): |
|
|
status = { |
|
|
"status": "healthy", |
|
|
"message": "Hugging Face Uploader is running", |
|
|
"configured": { |
|
|
"telegram": bool(client), |
|
|
"huggingface": bool(HF_TOKEN and REPO_ID), |
|
|
"channel": bool(CHANNEL) |
|
|
}, |
|
|
"paths": { |
|
|
"basedir": basedir, |
|
|
"template_folder": app.template_folder, |
|
|
"static_folder": app.static_folder |
|
|
} |
|
|
} |
|
|
return jsonify(status) |
|
|
|
|
|
@app.route('/config') |
|
|
def config(): |
|
|
"""Show configuration status""" |
|
|
config_status = { |
|
|
"API_ID": "β
Set" if API_ID else "β Missing", |
|
|
"API_HASH": "β
Set" if API_HASH else "β Missing", |
|
|
"HF_TOKEN": "β
Set" if HF_TOKEN else "β Missing", |
|
|
"CHANNEL_USERNAME": "β
Set" if CHANNEL else "β Missing", |
|
|
"DATASET_REPO": "β
Set" if REPO_ID else "β Missing" |
|
|
} |
|
|
|
|
|
return jsonify(config_status) |
|
|
|
|
|
@app.route('/debug') |
|
|
def debug(): |
|
|
"""Debug endpoint to check file structure""" |
|
|
import glob |
|
|
|
|
|
debug_info = { |
|
|
"current_directory": os.getcwd(), |
|
|
"script_directory": basedir, |
|
|
"template_folder": app.template_folder, |
|
|
"static_folder": app.static_folder, |
|
|
"files_in_current_dir": os.listdir('.'), |
|
|
"templates_exists": os.path.exists('templates'), |
|
|
"static_exists": os.path.exists('static'), |
|
|
"templates_files": glob.glob('templates/*') if os.path.exists('templates') else [], |
|
|
"static_files": glob.glob('static/**/*', recursive=True) if os.path.exists('static') else [], |
|
|
"session_file_exists": os.path.exists('my_session.session') |
|
|
} |
|
|
|
|
|
return jsonify(debug_info) |
|
|
|
|
|
@app.route('/session-info') |
|
|
def session_info(): |
|
|
"""Check Telegram session status""" |
|
|
if not client: |
|
|
return jsonify({"error": "Client not initialized"}) |
|
|
|
|
|
try: |
|
|
|
|
|
session_status = { |
|
|
"session_file_exists": os.path.exists('my_session.session'), |
|
|
"client_initialized": bool(client), |
|
|
"session_file_size": os.path.getsize('my_session.session') if os.path.exists('my_session.session') else 0 |
|
|
} |
|
|
return jsonify(session_status) |
|
|
except Exception as e: |
|
|
return jsonify({"error": str(e)}) |
|
|
|
|
|
if __name__ == '__main__': |
|
|
print("Starting Hugging Face Uploader...") |
|
|
print("Configuration status:") |
|
|
print(f" API_ID: {'β
Set' if API_ID else 'β Missing'}") |
|
|
print(f" API_HASH: {'β
Set' if API_HASH else 'β Missing'}") |
|
|
print(f" HF_TOKEN: {'β
Set' if HF_TOKEN else 'β Missing'}") |
|
|
print(f" CHANNEL_USERNAME: {'β
Set' if CHANNEL else 'β Missing'}") |
|
|
print(f" DATASET_REPO: {'β
Set' if REPO_ID else 'β Missing'}") |
|
|
print(f"\nPaths:") |
|
|
print(f" Base directory: {basedir}") |
|
|
print(f" Template folder: {app.template_folder}") |
|
|
print(f" Static folder: {app.static_folder}") |
|
|
print(f"\nSession info:") |
|
|
print(f" Session file exists: {os.path.exists('my_session.session')}") |
|
|
print("\nβ οΈ IMPORTANT: This application requires a pre-authenticated Telegram session.") |
|
|
print(" You must create the session file locally first, then upload it to your Space.") |
|
|
print("\nTo configure, set environment variables in your Space settings.") |
|
|
print("Visit http://localhost:7860 to use the application.") |
|
|
|
|
|
|
|
|
app.run(host='0.0.0.0', port=7860, debug=False) |
|
|
|
|
|
|