Spaces:
Sleeping
Sleeping
Xudong Xiao
commited on
Commit
•
0a00054
1
Parent(s):
7df592d
Create task from Youtube link & query status of a task using taskId
Browse files- entries/run.py +0 -1
- src/task.py +34 -16
- src/web/api_specs.yaml +79 -0
- src/web/web.py +18 -7
entries/run.py
CHANGED
@@ -66,7 +66,6 @@ if __name__ == "__main__":
|
|
66 |
task_list.append({"id": task_id, "status": "created", "resource_status:": "local"})
|
67 |
stream = open(local_dir.joinpath("task_queue.yaml"), "w")
|
68 |
dump(tasks_queue, stream)
|
69 |
-
|
70 |
task.run_pipeline()
|
71 |
|
72 |
|
|
|
66 |
task_list.append({"id": task_id, "status": "created", "resource_status:": "local"})
|
67 |
stream = open(local_dir.joinpath("task_queue.yaml"), "w")
|
68 |
dump(tasks_queue, stream)
|
|
|
69 |
task.run_pipeline()
|
70 |
|
71 |
|
src/task.py
CHANGED
@@ -1,12 +1,9 @@
|
|
1 |
-
|
2 |
-
|
|
|
3 |
import openai
|
4 |
-
import stable_whisper
|
5 |
-
import torch
|
6 |
-
import whisper
|
7 |
from pytube import YouTube
|
8 |
from os import getenv
|
9 |
-
from enum import Enum
|
10 |
from pathlib import Path
|
11 |
from enum import Enum, auto
|
12 |
import logging
|
@@ -44,19 +41,30 @@ SRT_Script : SrtScript
|
|
44 |
- (Optional) mp4
|
45 |
"""
|
46 |
|
47 |
-
class TaskStatus(Enum):
|
48 |
-
CREATED =
|
49 |
-
INITIALIZING_ASR =
|
50 |
-
PRE_PROCESSING =
|
51 |
-
TRANSLATING =
|
52 |
-
POST_PROCESSING =
|
53 |
-
OUTPUT_MODULE =
|
54 |
|
55 |
|
56 |
|
57 |
class Task:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
def __init__(self, task_id, task_local_dir, launch_info):
|
59 |
-
self.
|
|
|
60 |
openai.api_key = getenv("OPENAI_API_KEY")
|
61 |
self.launch_info = launch_info
|
62 |
self.task_local_dir = task_local_dir
|
@@ -66,6 +74,7 @@ class Task:
|
|
66 |
self.task_id = task_id
|
67 |
self.progress = NotImplemented
|
68 |
self.SRT_Script = None
|
|
|
69 |
|
70 |
|
71 |
@staticmethod
|
@@ -88,26 +97,36 @@ class Task:
|
|
88 |
# Module 1 ASR: audio --> SRT_script
|
89 |
def get_srt_class(self, whisper_model='tiny', method="stable"):
|
90 |
# Instead of using the script_en variable directly, we'll use script_input
|
|
|
|
|
91 |
pass
|
92 |
|
93 |
# Module 2: SRT preprocess: perform preprocess steps
|
94 |
def preprocess(self):
|
|
|
|
|
95 |
pass
|
96 |
|
97 |
def update_translation_progress(self, new_progress):
|
98 |
if self.progress == TaskStatus.TRANSLATING:
|
99 |
self.progress = TaskStatus.TRANSLATING.value[0], new_progress
|
|
|
100 |
|
101 |
# Module 3: perform srt translation
|
102 |
def translation(self):
|
|
|
103 |
pass
|
104 |
|
105 |
# Module 4: perform srt post process steps
|
106 |
def postprocess(self):
|
|
|
|
|
107 |
pass
|
108 |
|
109 |
# Module 5: output module
|
110 |
def output_render(self):
|
|
|
|
|
111 |
pass
|
112 |
|
113 |
def run_pipeline(self):
|
@@ -115,8 +134,7 @@ class Task:
|
|
115 |
self.preprocess()
|
116 |
self.translation()
|
117 |
self.postprocess()
|
118 |
-
|
119 |
-
return out
|
120 |
|
121 |
class YoutubeTask(Task):
|
122 |
def __init__(self, task_id, task_local_dir, launch_info, youtube_url):
|
|
|
1 |
+
import threading
|
2 |
+
import time
|
3 |
+
|
4 |
import openai
|
|
|
|
|
|
|
5 |
from pytube import YouTube
|
6 |
from os import getenv
|
|
|
7 |
from pathlib import Path
|
8 |
from enum import Enum, auto
|
9 |
import logging
|
|
|
41 |
- (Optional) mp4
|
42 |
"""
|
43 |
|
44 |
+
class TaskStatus(str, Enum):
|
45 |
+
CREATED = 'CREATED'
|
46 |
+
INITIALIZING_ASR = 'INITIALIZING_ASR'
|
47 |
+
PRE_PROCESSING = 'PRE_PROCESSING'
|
48 |
+
TRANSLATING = 'TRANSLATING'
|
49 |
+
POST_PROCESSING = 'POST_PROCESSING'
|
50 |
+
OUTPUT_MODULE = 'OUTPUT_MODULE'
|
51 |
|
52 |
|
53 |
|
54 |
class Task:
|
55 |
+
@property
|
56 |
+
def status(self):
|
57 |
+
with self.__status_lock:
|
58 |
+
return self.__status
|
59 |
+
|
60 |
+
@status.setter
|
61 |
+
def status(self, new_status):
|
62 |
+
with self.__status_lock:
|
63 |
+
self.__status = new_status
|
64 |
+
|
65 |
def __init__(self, task_id, task_local_dir, launch_info):
|
66 |
+
self.__status_lock = threading.Lock()
|
67 |
+
self.__status = TaskStatus.CREATED
|
68 |
openai.api_key = getenv("OPENAI_API_KEY")
|
69 |
self.launch_info = launch_info
|
70 |
self.task_local_dir = task_local_dir
|
|
|
74 |
self.task_id = task_id
|
75 |
self.progress = NotImplemented
|
76 |
self.SRT_Script = None
|
77 |
+
self.result = None
|
78 |
|
79 |
|
80 |
@staticmethod
|
|
|
97 |
# Module 1 ASR: audio --> SRT_script
|
98 |
def get_srt_class(self, whisper_model='tiny', method="stable"):
|
99 |
# Instead of using the script_en variable directly, we'll use script_input
|
100 |
+
self.status = TaskStatus.INITIALIZING_ASR
|
101 |
+
time.sleep(5)
|
102 |
pass
|
103 |
|
104 |
# Module 2: SRT preprocess: perform preprocess steps
|
105 |
def preprocess(self):
|
106 |
+
self.status = TaskStatus.PRE_PROCESSING
|
107 |
+
time.sleep(5)
|
108 |
pass
|
109 |
|
110 |
def update_translation_progress(self, new_progress):
|
111 |
if self.progress == TaskStatus.TRANSLATING:
|
112 |
self.progress = TaskStatus.TRANSLATING.value[0], new_progress
|
113 |
+
time.sleep(5)
|
114 |
|
115 |
# Module 3: perform srt translation
|
116 |
def translation(self):
|
117 |
+
time.sleep(5)
|
118 |
pass
|
119 |
|
120 |
# Module 4: perform srt post process steps
|
121 |
def postprocess(self):
|
122 |
+
self.status = TaskStatus.POST_PROCESSING
|
123 |
+
time.sleep(5)
|
124 |
pass
|
125 |
|
126 |
# Module 5: output module
|
127 |
def output_render(self):
|
128 |
+
self.status = TaskStatus.OUTPUT_MODULE
|
129 |
+
return "TODO"
|
130 |
pass
|
131 |
|
132 |
def run_pipeline(self):
|
|
|
134 |
self.preprocess()
|
135 |
self.translation()
|
136 |
self.postprocess()
|
137 |
+
self.result = self.output_render()
|
|
|
138 |
|
139 |
class YoutubeTask(Task):
|
140 |
def __init__(self, task_id, task_local_dir, launch_info, youtube_url):
|
src/web/api_specs.yaml
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
openapi: 3.0.3
|
2 |
+
info:
|
3 |
+
title: Pigeon AI
|
4 |
+
description: Pigeon AI
|
5 |
+
version: 1.0.0
|
6 |
+
servers:
|
7 |
+
- url: 'https'
|
8 |
+
paths:
|
9 |
+
/api/task:
|
10 |
+
post:
|
11 |
+
summary: Create a task
|
12 |
+
operationId: createTask
|
13 |
+
requestBody:
|
14 |
+
content:
|
15 |
+
application/json:
|
16 |
+
schema:
|
17 |
+
$ref: '#/components/schemas/youtubeLink'
|
18 |
+
responses:
|
19 |
+
'200':
|
20 |
+
description: OK
|
21 |
+
content:
|
22 |
+
application/json:
|
23 |
+
schema:
|
24 |
+
$ref: '#/components/schemas/task'
|
25 |
+
/api/task/{taskId}/status:
|
26 |
+
get:
|
27 |
+
summary: Get task status
|
28 |
+
operationId: getTask
|
29 |
+
parameters:
|
30 |
+
- name: taskId
|
31 |
+
in: path
|
32 |
+
required: true
|
33 |
+
description: task id
|
34 |
+
schema:
|
35 |
+
type: string
|
36 |
+
responses:
|
37 |
+
'200':
|
38 |
+
description: OK
|
39 |
+
content:
|
40 |
+
application/json:
|
41 |
+
schema:
|
42 |
+
$ref: '#/components/schemas/taskStatus'
|
43 |
+
'404':
|
44 |
+
description: Not Found
|
45 |
+
content:
|
46 |
+
application/json:
|
47 |
+
schema:
|
48 |
+
$ref: '#/components/schemas/error'
|
49 |
+
|
50 |
+
components:
|
51 |
+
schemas:
|
52 |
+
youtubeLink:
|
53 |
+
type: object
|
54 |
+
properties:
|
55 |
+
youtubeLink:
|
56 |
+
type: string
|
57 |
+
description: youtube link
|
58 |
+
example: https://www.youtube.com/watch?v=5qap5aO4i9A
|
59 |
+
task:
|
60 |
+
type: object
|
61 |
+
properties:
|
62 |
+
taskId:
|
63 |
+
type: string
|
64 |
+
description: task id generated by uuid
|
65 |
+
example: 7a765280-1a72-47e4-8747-8a38cdbaca91
|
66 |
+
taskStatus:
|
67 |
+
type: object
|
68 |
+
properties:
|
69 |
+
status:
|
70 |
+
type: string
|
71 |
+
description: task status
|
72 |
+
example: PROCESSING
|
73 |
+
error:
|
74 |
+
type: object
|
75 |
+
properties:
|
76 |
+
error:
|
77 |
+
type: string
|
78 |
+
description: error message
|
79 |
+
example: 'Invalid youtube link'
|
src/web/web.py
CHANGED
@@ -2,27 +2,38 @@ import yaml
|
|
2 |
from flask import Flask, request, jsonify
|
3 |
from concurrent.futures import ThreadPoolExecutor
|
4 |
from src.task import Task
|
|
|
5 |
|
6 |
app = Flask(__name__)
|
7 |
|
8 |
# Global thread pool
|
9 |
executor = ThreadPoolExecutor(max_workers=4) # Adjust max_workers as per your requirement
|
10 |
|
|
|
|
|
|
|
11 |
@app.route('/api/task', methods=['POST'])
|
12 |
-
def
|
|
|
13 |
data = request.get_json()
|
14 |
-
|
15 |
-
if not data or 'youtube_link' not in data:
|
16 |
return jsonify({'error': 'YouTube link not provided'}), 400
|
17 |
-
youtube_link = data['
|
18 |
launch_config = yaml.load(open("./configs/local_launch.yaml"), Loader=yaml.Loader)
|
19 |
-
|
20 |
-
|
|
|
21 |
# Submit task to thread pool
|
22 |
executor.submit(task.run)
|
23 |
|
24 |
-
return jsonify({'
|
25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
if __name__ == '__main__':
|
28 |
app.run(debug=True)
|
|
|
2 |
from flask import Flask, request, jsonify
|
3 |
from concurrent.futures import ThreadPoolExecutor
|
4 |
from src.task import Task
|
5 |
+
from uuid import uuid4
|
6 |
|
7 |
app = Flask(__name__)
|
8 |
|
9 |
# Global thread pool
|
10 |
executor = ThreadPoolExecutor(max_workers=4) # Adjust max_workers as per your requirement
|
11 |
|
12 |
+
# thread safe task map to store task status
|
13 |
+
task_map = {}
|
14 |
+
|
15 |
@app.route('/api/task', methods=['POST'])
|
16 |
+
def create_task_youtube():
|
17 |
+
global task_map
|
18 |
data = request.get_json()
|
19 |
+
if not data or 'youtubeLink' not in data:
|
|
|
20 |
return jsonify({'error': 'YouTube link not provided'}), 400
|
21 |
+
youtube_link = data['youtubeLink']
|
22 |
launch_config = yaml.load(open("./configs/local_launch.yaml"), Loader=yaml.Loader)
|
23 |
+
task_id = str(uuid4())
|
24 |
+
task = Task.fromYoutubeLink(youtube_link, task_id, launch_config)
|
25 |
+
task_map[task_id] = task
|
26 |
# Submit task to thread pool
|
27 |
executor.submit(task.run)
|
28 |
|
29 |
+
return jsonify({'taskId': task.task_id})
|
30 |
|
31 |
+
@app.route('/api/task/<taskId>/status', methods=['GET'])
|
32 |
+
def get_task_status(taskId):
|
33 |
+
global task_map
|
34 |
+
if taskId not in task_map:
|
35 |
+
return jsonify({'error': 'Task not found'}), 404
|
36 |
+
return jsonify({'status': task_map[taskId].status})
|
37 |
|
38 |
if __name__ == '__main__':
|
39 |
app.run(debug=True)
|