Spaces:
Sleeping
Sleeping
| # app.py | |
| # A Flask API updated to use the proper Flask logger instead of print(). | |
| import os | |
| import pandas as pd | |
| import io | |
| from flask import Flask, request, jsonify | |
| from flask_cors import CORS | |
| from huggingface_hub import HfApi | |
| from datetime import datetime | |
| import logging | |
| # --- Initialization --- | |
| app = Flask(__name__) | |
| CORS(app) | |
| # Configure logging | |
| app.logger.setLevel(logging.INFO) | |
| app.logger.info("--- Flask app.py is starting up! ---") | |
| # --- Configuration --- | |
| app.logger.info("Loading environment variables...") | |
| try: | |
| HF_TOKEN = os.environ["HF_TOKEN"] | |
| AUDIO_DATASET_REPO_ID = os.environ["AUDIO_DATASET_REPO_ID"] | |
| TEXT_DATASET_REPO_ID = os.environ["TEXT_DATASET_REPO_ID"] | |
| app.logger.info("Successfully loaded all required environment variables.") | |
| except KeyError as e: | |
| app.logger.error(f"FATAL ERROR: Missing secret environment variable: {e}") | |
| HF_TOKEN, AUDIO_DATASET_REPO_ID, TEXT_DATASET_REPO_ID = None, None, None | |
| # --- Hugging Face API Client --- | |
| if HF_TOKEN: | |
| app.logger.info("Initializing HfApi client...") | |
| api = HfApi(token=HF_TOKEN) | |
| app.logger.info("HfApi client initialized.") | |
| else: | |
| api = None | |
| app.logger.warning("Warning: HfApi not initialized because HF_TOKEN is not set.") | |
| # --- Helper Function --- | |
| def get_unique_filename(): | |
| """Generates a unique filename based on the current timestamp to avoid collisions.""" | |
| return f"data_{datetime.utcnow().strftime('%Y%m%d_%H%M%S_%f')}.parquet" | |
| # --- API Endpoints --- | |
| def index(): | |
| """A simple index route to confirm the API is running.""" | |
| app.logger.info("Request received for / route.") | |
| return "Hugging Face Data Uploader API is running." | |
| def add_audio_data(): | |
| """ | |
| Receives an audio/transcription pair, converts to Parquet, and uploads to the audio dataset. | |
| """ | |
| app.logger.info("Request received for /add-audio route.") | |
| if not api: | |
| app.logger.error("API client not available for /add-audio.") | |
| return jsonify({"error": "Server is not configured with Hugging Face credentials."}), 500 | |
| try: | |
| app.logger.info("Attempting to process /add-audio data...") | |
| data = request.get_json() | |
| if not data or 'audio' not in data or 'transcription' not in data: | |
| app.logger.warning("Invalid payload received for /add-audio.") | |
| return jsonify({"error": "Invalid payload. 'audio' and 'transcription' fields are required."}), 400 | |
| app.logger.info("Data validated. Creating DataFrame.") | |
| df = pd.DataFrame([data]) | |
| buffer = io.BytesIO() | |
| df.to_parquet(buffer, index=False, engine='pyarrow') | |
| buffer.seek(0) | |
| app.logger.info(f"Uploading file to audio dataset: {AUDIO_DATASET_REPO_ID}") | |
| api.upload_file( | |
| path_or_fileobj=buffer, | |
| path_in_repo=get_unique_filename(), | |
| repo_id=AUDIO_DATASET_REPO_ID, | |
| repo_type="dataset", | |
| commit_message="Add new audio-transcription pair" | |
| ) | |
| app.logger.info("File successfully uploaded to audio dataset.") | |
| return jsonify({"message": "Audio data added successfully."}), 201 | |
| except Exception as e: | |
| app.logger.error(f"---! UNEXPECTED ERROR in /add-audio !---: {e}", exc_info=True) | |
| return jsonify({"error": "An internal error occurred.", "details": str(e)}), 500 | |
| def add_text_data(): | |
| """ | |
| Receives a transcription/summary pair, converts to Parquet, and uploads to the text dataset. | |
| """ | |
| app.logger.info("Request received for /add-text route.") | |
| if not api: | |
| app.logger.error("API client not available for /add-text.") | |
| return jsonify({"error": "Server is not configured with Hugging Face credentials."}), 500 | |
| try: | |
| app.logger.info("Attempting to process /add-text data...") | |
| data = request.get_json() | |
| if not data or 'transcription' not in data or 'summary' not in data: | |
| app.logger.warning("Invalid payload received for /add-text.") | |
| return jsonify({"error": "Invalid payload. 'transcription' and 'summary' fields are required."}), 400 | |
| app.logger.info("Data validated. Creating DataFrame.") | |
| df = pd.DataFrame([data]) | |
| buffer = io.BytesIO() | |
| df.to_parquet(buffer, index=False, engine='pyarrow') | |
| buffer.seek(0) | |
| app.logger.info(f"Uploading file to text dataset: {TEXT_DATASET_REPO_ID}") | |
| api.upload_file( | |
| path_or_fileobj=buffer, | |
| path_in_repo=get_unique_filename(), | |
| repo_id=TEXT_DATASET_REPO_ID, | |
| repo_type="dataset", | |
| commit_message="Add new transcription-summary pair" | |
| ) | |
| app.logger.info("File successfully uploaded to text dataset.") | |
| return jsonify({"message": "Text data added successfully."}), 201 | |
| except Exception as e: | |
| app.logger.error(f"---! UNEXPECTED ERROR in /add-text !---: {e}", exc_info=True) | |
| return jsonify({"error": "An internal error occurred.", "details": str(e)}), 500 | |
| # To run on Hugging Face Spaces | |
| if __name__ == '__main__': | |
| app.logger.info("Starting Flask development server...") | |
| app.run(host='0.0.0.0', port=7860) | |