Spaces:

mosheofer1
/

multi_beam_text_streamer

Sleeping

Moshe Ofer commited on Dec 26, 2024

Commit

07e5e01

1 Parent(s): da278a5

Initial commit for Hugging Face Space

Files changed (3) hide show

Dockerfile CHANGED Viewed

@@ -17,7 +17,13 @@ ENV PYTHONUNBUFFERED=1
 ENV EVENTLET_NO_GREENDNS=yes
 ENV EVENTLET_THREADPOOL_SIZE=32
 ENV EVENTLET_WEBSOCKET_MONITOR_TIMEOUT=60
-ENV GUNICORN_CMD_ARGS="--worker-class eventlet --workers 1 --timeout 300 --keep-alive 65 --log-level debug --access-logfile - --error-logfile -"
 # Copy application files
 COPY . /app
@@ -26,18 +32,20 @@ COPY . /app
 RUN pip install --no-cache-dir --upgrade pip
 RUN pip install --no-cache-dir -r requirements.txt
-# Expose port
 EXPOSE 7860
-# Modified command to use explicit configuration
 CMD ["gunicorn", \
-     "--worker-class", "eventlet", \
-     "--workers", "1", \
-     "--worker-connections", "1000", \
-     "--timeout", "300", \
-     "--keep-alive", "65", \
      "--bind", "0.0.0.0:7860", \
-     "--log-level", "debug", \
      "--access-logfile", "-", \
      "--error-logfile", "-", \
      "app:app"]

 ENV EVENTLET_NO_GREENDNS=yes
 ENV EVENTLET_THREADPOOL_SIZE=32
 ENV EVENTLET_WEBSOCKET_MONITOR_TIMEOUT=60
+ENV EVENTLET_NONBLOCKING=1  # Enable non-blocking mode
+ENV GUNICORN_TIMEOUT=300
+ENV GUNICORN_WORKER_CLASS=eventlet
+ENV GUNICORN_WORKERS=1
+ENV GUNICORN_WORKER_CONNECTIONS=1000
+ENV GUNICORN_KEEP_ALIVE=65
+ENV GUNICORN_LOG_LEVEL=debug
 # Copy application files
 COPY . /app
 RUN pip install --no-cache-dir --upgrade pip
 RUN pip install --no-cache-dir -r requirements.txt
+# Create gunicorn config file
+RUN echo 'worker_class = "eventlet"' > gunicorn.conf.py && \
+    echo 'workers = 1' >> gunicorn.conf.py && \
+    echo 'worker_connections = 1000' >> gunicorn.conf.py && \
+    echo 'timeout = 300' >> gunicorn.conf.py && \
+    echo 'keepalive = 65' >> gunicorn.conf.py && \
+    echo 'loglevel = "debug"' >> gunicorn.conf.py
 EXPOSE 7860
+# Modified command with explicit configuration
 CMD ["gunicorn", \
+     "--config", "gunicorn.conf.py", \
      "--bind", "0.0.0.0:7860", \
      "--access-logfile", "-", \
      "--error-logfile", "-", \
      "app:app"]

README.md CHANGED Viewed

@@ -1,11 +1,36 @@
----
-title: Multi Beam Text Streamer
-emoji: 🏆
-colorFrom: green
-colorTo: pink
-sdk: docker
-pinned: false
-short_description: A generic text streamer that supports beam search and manage
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# Beam Search Generator with MultiBeamTextStreamer
+This Hugging Face Space hosts a **Beam Search Generator** web application, powered by a Flask backend and integrated with Hugging Face Transformers. The application provides real-time visualization of beam search generation, offering insights into how language models explore multiple text completion possibilities simultaneously.
+## 🚀 Features
+- Real-time visualization of active and completed beams.
+- Adjustable generation parameters:
+  - Number of beams.
+  - Maximum tokens.
+  - Generation speed (using delay sliders).
+- Seamless interaction with Hugging Face Transformers.
+- Powered by the custom `MultiBeamTextStreamer` from the Transformers library.
+## 📜 How It Works
+1. Input a **prompt** in the text box.
+2. Configure generation settings:
+   - Number of beams.
+   - Maximum token count.
+   - Delay speed.
+3. Click **Generate** to visualize the beam search process in real-time.
+## 🛠️ Setup Instructions
+### Prerequisites
+- Python 3.9+
+- A Hugging Face account (if running the app locally, ensure access to the required models)
+### Running Locally
+1. **Clone the repository**:
+   ```bash
+   git clone https://huggingface.co/spaces/<your-username>/<your-space-name>
+   cd <your-space-name>

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import eventlet
-eventlet.monkey_patch(socket=True, select=True)
 import eventlet.wsgi
@@ -7,16 +7,18 @@ from flask import Flask, render_template
 from flask_socketio import SocketIO
 from transformers import MultiBeamTextStreamer, AutoTokenizer, AutoModelForCausalLM
 import torch
-import time
 app = Flask(__name__)
 socketio = SocketIO(
     app,
-    ping_timeout=60,
     async_mode='eventlet',
     cors_allowed_origins="*",
     logger=True,
-    engineio_logger=True
 )
 # Initialize model and tokenizer
 MODEL_NAME = "Qwen/Qwen2.5-0.5B-Instruct"
@@ -47,11 +49,12 @@ class WebSocketBeamStreamer(MultiBeamTextStreamer):
         self.beam_texts[beam_idx] = new_text
         if self.sleep_time > 0:
             eventlet.sleep(self.sleep_time / 1000)  # Convert milliseconds to seconds
-        # Force immediate emit and wait for confirmation
         socketio.emit('beam_update', {
             'beam_idx': beam_idx,
             'text': new_text
-        }, callback=lambda: eventlet.sleep(0))
     def on_beam_finished(self, final_text: str):
         """Send completion notification through websocket"""

 import eventlet
+eventlet.monkey_patch(socket=True, select=True, thread=True)
 import eventlet.wsgi
 from flask_socketio import SocketIO
 from transformers import MultiBeamTextStreamer, AutoTokenizer, AutoModelForCausalLM
 import torch
 app = Flask(__name__)
 socketio = SocketIO(
     app,
     async_mode='eventlet',
+    message_queue=None,  # Explicitly set to None for single-worker setup
+    ping_timeout=60,
+    ping_interval=25,
     cors_allowed_origins="*",
     logger=True,
+    engineio_logger=True,
+    async_handlers=True  # Enable async handlers
 )
 # Initialize model and tokenizer
 MODEL_NAME = "Qwen/Qwen2.5-0.5B-Instruct"
         self.beam_texts[beam_idx] = new_text
         if self.sleep_time > 0:
             eventlet.sleep(self.sleep_time / 1000)  # Convert milliseconds to seconds
+        # Force immediate emit and flush
         socketio.emit('beam_update', {
             'beam_idx': beam_idx,
             'text': new_text
+        }, namespace='/', callback=lambda: eventlet.sleep(0))
+        socketio.sleep(0)  # Force context switch
     def on_beam_finished(self, final_text: str):
         """Send completion notification through websocket"""