Moshe Ofer commited on
Commit
07e5e01
Β·
1 Parent(s): da278a5

Initial commit for Hugging Face Space

Browse files
Files changed (3) hide show
  1. Dockerfile +17 -9
  2. README.md +36 -11
  3. app.py +9 -6
Dockerfile CHANGED
@@ -17,7 +17,13 @@ ENV PYTHONUNBUFFERED=1
17
  ENV EVENTLET_NO_GREENDNS=yes
18
  ENV EVENTLET_THREADPOOL_SIZE=32
19
  ENV EVENTLET_WEBSOCKET_MONITOR_TIMEOUT=60
20
- ENV GUNICORN_CMD_ARGS="--worker-class eventlet --workers 1 --timeout 300 --keep-alive 65 --log-level debug --access-logfile - --error-logfile -"
 
 
 
 
 
 
21
 
22
  # Copy application files
23
  COPY . /app
@@ -26,18 +32,20 @@ COPY . /app
26
  RUN pip install --no-cache-dir --upgrade pip
27
  RUN pip install --no-cache-dir -r requirements.txt
28
 
29
- # Expose port
 
 
 
 
 
 
 
30
  EXPOSE 7860
31
 
32
- # Modified command to use explicit configuration
33
  CMD ["gunicorn", \
34
- "--worker-class", "eventlet", \
35
- "--workers", "1", \
36
- "--worker-connections", "1000", \
37
- "--timeout", "300", \
38
- "--keep-alive", "65", \
39
  "--bind", "0.0.0.0:7860", \
40
- "--log-level", "debug", \
41
  "--access-logfile", "-", \
42
  "--error-logfile", "-", \
43
  "app:app"]
 
17
  ENV EVENTLET_NO_GREENDNS=yes
18
  ENV EVENTLET_THREADPOOL_SIZE=32
19
  ENV EVENTLET_WEBSOCKET_MONITOR_TIMEOUT=60
20
+ ENV EVENTLET_NONBLOCKING=1 # Enable non-blocking mode
21
+ ENV GUNICORN_TIMEOUT=300
22
+ ENV GUNICORN_WORKER_CLASS=eventlet
23
+ ENV GUNICORN_WORKERS=1
24
+ ENV GUNICORN_WORKER_CONNECTIONS=1000
25
+ ENV GUNICORN_KEEP_ALIVE=65
26
+ ENV GUNICORN_LOG_LEVEL=debug
27
 
28
  # Copy application files
29
  COPY . /app
 
32
  RUN pip install --no-cache-dir --upgrade pip
33
  RUN pip install --no-cache-dir -r requirements.txt
34
 
35
+ # Create gunicorn config file
36
+ RUN echo 'worker_class = "eventlet"' > gunicorn.conf.py && \
37
+ echo 'workers = 1' >> gunicorn.conf.py && \
38
+ echo 'worker_connections = 1000' >> gunicorn.conf.py && \
39
+ echo 'timeout = 300' >> gunicorn.conf.py && \
40
+ echo 'keepalive = 65' >> gunicorn.conf.py && \
41
+ echo 'loglevel = "debug"' >> gunicorn.conf.py
42
+
43
  EXPOSE 7860
44
 
45
+ # Modified command with explicit configuration
46
  CMD ["gunicorn", \
47
+ "--config", "gunicorn.conf.py", \
 
 
 
 
48
  "--bind", "0.0.0.0:7860", \
 
49
  "--access-logfile", "-", \
50
  "--error-logfile", "-", \
51
  "app:app"]
README.md CHANGED
@@ -1,11 +1,36 @@
1
- ---
2
- title: Multi Beam Text Streamer
3
- emoji: πŸ†
4
- colorFrom: green
5
- colorTo: pink
6
- sdk: docker
7
- pinned: false
8
- short_description: A generic text streamer that supports beam search and manage
9
- ---
10
-
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Beam Search Generator with MultiBeamTextStreamer
2
+
3
+ This Hugging Face Space hosts a **Beam Search Generator** web application, powered by a Flask backend and integrated with Hugging Face Transformers. The application provides real-time visualization of beam search generation, offering insights into how language models explore multiple text completion possibilities simultaneously.
4
+
5
+ ## πŸš€ Features
6
+
7
+ - Real-time visualization of active and completed beams.
8
+ - Adjustable generation parameters:
9
+ - Number of beams.
10
+ - Maximum tokens.
11
+ - Generation speed (using delay sliders).
12
+ - Seamless interaction with Hugging Face Transformers.
13
+ - Powered by the custom `MultiBeamTextStreamer` from the Transformers library.
14
+
15
+ ## πŸ“œ How It Works
16
+
17
+ 1. Input a **prompt** in the text box.
18
+ 2. Configure generation settings:
19
+ - Number of beams.
20
+ - Maximum token count.
21
+ - Delay speed.
22
+ 3. Click **Generate** to visualize the beam search process in real-time.
23
+
24
+ ## πŸ› οΈ Setup Instructions
25
+
26
+ ### Prerequisites
27
+
28
+ - Python 3.9+
29
+ - A Hugging Face account (if running the app locally, ensure access to the required models)
30
+
31
+ ### Running Locally
32
+
33
+ 1. **Clone the repository**:
34
+ ```bash
35
+ git clone https://huggingface.co/spaces/<your-username>/<your-space-name>
36
+ cd <your-space-name>
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import eventlet
2
- eventlet.monkey_patch(socket=True, select=True)
3
 
4
  import eventlet.wsgi
5
 
@@ -7,16 +7,18 @@ from flask import Flask, render_template
7
  from flask_socketio import SocketIO
8
  from transformers import MultiBeamTextStreamer, AutoTokenizer, AutoModelForCausalLM
9
  import torch
10
- import time
11
 
12
  app = Flask(__name__)
13
  socketio = SocketIO(
14
  app,
15
- ping_timeout=60,
16
  async_mode='eventlet',
 
 
 
17
  cors_allowed_origins="*",
18
  logger=True,
19
- engineio_logger=True
 
20
  )
21
  # Initialize model and tokenizer
22
  MODEL_NAME = "Qwen/Qwen2.5-0.5B-Instruct"
@@ -47,11 +49,12 @@ class WebSocketBeamStreamer(MultiBeamTextStreamer):
47
  self.beam_texts[beam_idx] = new_text
48
  if self.sleep_time > 0:
49
  eventlet.sleep(self.sleep_time / 1000) # Convert milliseconds to seconds
50
- # Force immediate emit and wait for confirmation
51
  socketio.emit('beam_update', {
52
  'beam_idx': beam_idx,
53
  'text': new_text
54
- }, callback=lambda: eventlet.sleep(0))
 
55
 
56
  def on_beam_finished(self, final_text: str):
57
  """Send completion notification through websocket"""
 
1
  import eventlet
2
+ eventlet.monkey_patch(socket=True, select=True, thread=True)
3
 
4
  import eventlet.wsgi
5
 
 
7
  from flask_socketio import SocketIO
8
  from transformers import MultiBeamTextStreamer, AutoTokenizer, AutoModelForCausalLM
9
  import torch
 
10
 
11
  app = Flask(__name__)
12
  socketio = SocketIO(
13
  app,
 
14
  async_mode='eventlet',
15
+ message_queue=None, # Explicitly set to None for single-worker setup
16
+ ping_timeout=60,
17
+ ping_interval=25,
18
  cors_allowed_origins="*",
19
  logger=True,
20
+ engineio_logger=True,
21
+ async_handlers=True # Enable async handlers
22
  )
23
  # Initialize model and tokenizer
24
  MODEL_NAME = "Qwen/Qwen2.5-0.5B-Instruct"
 
49
  self.beam_texts[beam_idx] = new_text
50
  if self.sleep_time > 0:
51
  eventlet.sleep(self.sleep_time / 1000) # Convert milliseconds to seconds
52
+ # Force immediate emit and flush
53
  socketio.emit('beam_update', {
54
  'beam_idx': beam_idx,
55
  'text': new_text
56
+ }, namespace='/', callback=lambda: eventlet.sleep(0))
57
+ socketio.sleep(0) # Force context switch
58
 
59
  def on_beam_finished(self, final_text: str):
60
  """Send completion notification through websocket"""