video-gtp

Running

App Files Files Community

qingxu99 commited on 21 days ago

Commit

2af5f09

•

1 Parent(s): 0f0157b

update

Browse files

Files changed (25) hide show

.gitignore +2 -0
Dockerfile +26 -0
README.md +6 -5
docker_as_a_service/docker_as_a_service.py +211 -0
docker_as_a_service/docker_as_a_service_host.py +230 -0
docker_as_a_service/experimental_mods/config.json +6 -0
docker_as_a_service/experimental_mods/docker_as_a_service.py +151 -0
docker_as_a_service/experimental_mods/docker_to_api copy.py +97 -0
docker_as_a_service/experimental_mods/get_bilibili_resource copy.py +37 -0
docker_as_a_service/experimental_mods/get_bilibili_resource.py +34 -0
docker_as_a_service/experimental_mods/get_search_kw_api_stop.py +138 -0
docker_as_a_service/experimental_mods/test_docker_to_api.py +69 -0
docker_as_a_service/shared_utils/advanced_markdown_format.py +478 -0
docker_as_a_service/shared_utils/char_visual_effect.py +25 -0
docker_as_a_service/shared_utils/colorful.py +88 -0
docker_as_a_service/shared_utils/config_loader.py +131 -0
docker_as_a_service/shared_utils/connect_void_terminal.py +91 -0
docker_as_a_service/shared_utils/cookie_manager.py +127 -0
docker_as_a_service/shared_utils/docker_as_service_api.py +70 -0
docker_as_a_service/shared_utils/fastapi_server.py +322 -0
docker_as_a_service/shared_utils/handle_upload.py +156 -0
docker_as_a_service/shared_utils/key_pattern_manager.py +121 -0
docker_as_a_service/shared_utils/logging.py +69 -0
docker_as_a_service/shared_utils/map_names.py +34 -0
docker_as_a_service/shared_utils/text_mask.py +109 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ *.pyc
2	+ password

Dockerfile ADDED Viewed

	@@ -0,0 +1,26 @@

+FROM fuqingxu/bbdown
+RUN apt update && apt-get install -y python3 python3-dev python3-pip
+RUN python3 -m pip install fastapi pydantic loguru --break-system-packages
+RUN python3 -m pip install requests python-multipart --break-system-packages
+RUN python3 -m pip install uvicorn --break-system-packages
+RUN python3 -m pip install tenacity --break-system-packages
+# 为了让user用户可以访问/root目录
+RUN useradd -m -u 1000 user
+RUN chown -R user:user /root
+USER user
+COPY ./docker_as_a_service /docker_as_a_service
+WORKDIR /docker_as_a_service
+# ENTRYPOINT [ "python3", "docker_as_a_service.py" ]
+ENTRYPOINT ["/bin/bash", "-c"]
+CMD ["python3 docker_as_a_service.py"]
+# CMD ["python3", "docker_as_a_service.py"]
+# docker build -t testbbdown .
+# docker run --rm -it -p 49000:49000 testbbdown
+# docker run --rm -it -p 49000:49000 --name funnn  testbbdown bash
+# /root/.dotnet/tools/BBDown BV1LSSHYXEtv --audio-only --use-app-api --work-dir /tmp/tmp9lrn38wb

README.md CHANGED Viewed

@@ -1,10 +1,11 @@
 ---
-title: Video Gpt
-emoji: 👁
-colorFrom: red
-colorTo: gray
 sdk: docker
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: VGPT
+emoji: 🐳
+colorFrom: blue
+colorTo: indigo
 sdk: docker
 pinned: false
+app_port: 49000
 ---

docker_as_a_service/docker_as_a_service.py ADDED Viewed

	@@ -0,0 +1,211 @@

+"""
+DaaS (Docker as a Service) is a service
+that allows users to run docker commands on the server side.
+"""
+from fastapi import FastAPI
+from fastapi.responses import StreamingResponse
+from fastapi import FastAPI, File, UploadFile, HTTPException
+from pydantic import BaseModel, Field
+from typing import Optional, Dict
+import time
+import os
+import asyncio
+import subprocess
+import uuid
+import glob
+import threading
+import queue
+from shared_utils.docker_as_service_api import DockerServiceApiComModel
+app = FastAPI()
+def python_obj_to_pickle_file_bytes(obj):
+    import pickle
+    import io
+    with io.BytesIO() as f:
+        pickle.dump(obj, f)
+        return f.getvalue()
+def yield_message(message):
+    dsacm = DockerServiceApiComModel(server_message=message)
+    return python_obj_to_pickle_file_bytes(dsacm)
+def read_output(stream, output_queue):
+    while True:
+        line_stdout = stream.readline()
+        # print('recv')
+        if line_stdout:
+            output_queue.put(line_stdout)
+        else:
+            break
+async def stream_generator(request_obj):
+    import tempfile
+    # Create a temporary directory
+    print('create temp dir')
+    with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as temp_dir:
+        # Construct the docker command
+        download_folder = temp_dir
+        # Get list of existing files before download
+        existing_file_before_download = []
+        video_id = request_obj.client_command
+        cmd = [
+            '/root/.dotnet/tools/BBDown',
+            video_id,
+            '--use-app-api',
+            '--work-dir',
+            f'{os.path.abspath(temp_dir)}'
+        ]
+        cmd = ' '.join(cmd)
+        yield yield_message(cmd)
+        process = subprocess.Popen(cmd,
+                    stdout=subprocess.PIPE,
+                    stderr=subprocess.PIPE,
+                    shell=True,
+                    text=True)
+        stdout_queue = queue.Queue()
+        thread = threading.Thread(target=read_output, args=(process.stdout, stdout_queue))
+        thread.daemon = True
+        thread.start()
+        stderr_queue = queue.Queue()
+        thread = threading.Thread(target=read_output, args=(process.stderr, stderr_queue))
+        thread.daemon = True
+        thread.start()
+        while True:
+            print("looping")
+            # Check if there is any output in the queue
+            stdout_this_round = ""
+            stderr_this_round = ""
+            while True:
+                try:
+                    output_stdout = stdout_queue.get_nowait()  # Non-blocking get
+                    if output_stdout:
+                        stdout_this_round += output_stdout
+                        print(output_stdout)
+                except queue.Empty:
+                    yield yield_message(stdout_this_round)
+                    break
+            while True:
+                try:
+                    output_stderr = stderr_queue.get_nowait()  # Non-blocking get
+                    if output_stderr:
+                        stderr_this_round += output_stderr
+                        print(output_stderr)
+                except queue.Empty:
+                    yield yield_message(stderr_this_round)
+                    break
+            # Break the loop if the process has finished
+            if process.poll() is not None:
+                break
+            await asyncio.sleep(0.5)
+        print("(daas return) ")
+        # Get the return code
+        return_code = process.returncode
+        yield yield_message("(daas return code:) " + str(return_code))
+        print("(daas return code:) " + str(return_code))
+        # print('sleeping')
+        # time.sleep(9999)
+        # print(f"Successfully downloaded video {video_id}")
+        # existing_file_after_download = glob.glob(os.path.join(download_folder, '**', '*'))
+        existing_file_after_download = glob.glob(os.path.join(download_folder, '**'), recursive=True)
+        print("downloaded_files")
+        print(existing_file_after_download)
+        # existing_file_after_download = list(os.listdir(download_folder))
+        # get the difference
+        downloaded_files = [
+            f for f in existing_file_after_download if f not in existing_file_before_download
+        ]
+        downloaded_files_path = [
+            os.path.join(download_folder, f) for f in existing_file_after_download if f not in existing_file_before_download
+        ]
+        # read file
+        server_file_attach = {}
+        for fp, fn in zip(downloaded_files_path, downloaded_files):
+            if os.path.isdir(fp): continue
+            with open(fp, "rb") as f:
+                file_bytes = f.read()
+                server_file_attach[fn] = file_bytes
+        print("downloaded_files")
+        print(downloaded_files)
+        dsacm = DockerServiceApiComModel(
+            server_message="complete",
+            server_file_attach=server_file_attach,
+        )
+        print("sending files")
+        yield python_obj_to_pickle_file_bytes(dsacm)
+def simple_generator(return_obj):
+    dsacm = DockerServiceApiComModel(
+        server_message=return_obj,
+    )
+    yield python_obj_to_pickle_file_bytes(dsacm)
+@app.post("/stream")
+async def stream_response(file: UploadFile = File(...)):
+    # read the file in memory, treat it as pickle file, and unpickle it
+    import pickle
+    import io
+    content = await file.read()
+    with io.BytesIO(content) as f:
+        request_obj = pickle.load(f)
+    # process the request_obj
+    return StreamingResponse(stream_generator(request_obj), media_type="application/octet-stream")
+@app.post("/search")
+async def stream_response(file: UploadFile = File(...)):
+    # read the file in memory, treat it as pickle file, and unpickle it
+    import pickle
+    import io
+    content = await file.read()
+    with io.BytesIO(content) as f:
+        request_obj = pickle.load(f)
+    # process the request_obj
+    keyword = request_obj.client_command
+    from experimental_mods.get_search_kw_api_stop import search_videos
+    # Default parameters for video search
+    csrf_token = '40a227fcf12c380d7d3c81af2cd8c5e8'  # Using default from main()
+    search_type = 'default'
+    max_pages = 1
+    output_path = 'search_results'
+    config_path = 'experimental_mods/config.json'
+    # Search for videos and return the first result
+    videos = search_videos(
+        keyword=keyword,
+        csrf_token=csrf_token,
+        search_type=search_type,
+        max_pages=max_pages,
+        output_path=output_path,
+        config_path=config_path,
+        early_stop=True
+    )
+    return StreamingResponse(simple_generator(videos), media_type="application/octet-stream")
+@app.get("/")
+async def hi():
+    return "Hello, this is Docker as a Service (DaaS)! If you want to use this service, you must duplicate this space. " \
+           "您好，这里是Docker作为服务（DaaS）！如果您想使用此服务，您必须复制此空间。复制方法：点击https://huggingface.co/spaces/hamercity/bbdown页面右上角的三个点，然后选择“复制空间”。" \
+           "此外，在设置中，你还需要修改URL，例如：DAAS_SERVER_URL = \"https://你的用户名-你的空间名.hf.space/stream\""
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=49000)

docker_as_a_service/docker_as_a_service_host.py ADDED Viewed

	@@ -0,0 +1,230 @@

+"""
+DaaS (Docker as a Service) is a service
+that allows users to run docker commands on the server side.
+"""
+from fastapi import FastAPI
+from fastapi.responses import StreamingResponse
+from fastapi import FastAPI, File, UploadFile, HTTPException
+from pydantic import BaseModel, Field
+from typing import Optional, Dict
+import time
+import os
+import asyncio
+import subprocess
+import uuid
+import glob
+import threading
+import queue
+from shared_utils.docker_as_service_api import DockerServiceApiComModel
+app = FastAPI()
+def python_obj_to_pickle_file_bytes(obj):
+    import pickle
+    import io
+    with io.BytesIO() as f:
+        pickle.dump(obj, f)
+        return f.getvalue()
+def yield_message(message):
+    dsacm = DockerServiceApiComModel(server_message=message)
+    return python_obj_to_pickle_file_bytes(dsacm)
+def read_output(stream, output_queue):
+    while True:
+        line_stdout = stream.readline()
+        # print('recv')
+        if line_stdout:
+            output_queue.put(line_stdout)
+        else:
+            break
+async def stream_generator(request_obj):
+    import tempfile
+    # Create a temporary directory
+    with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as temp_dir:
+        # Construct the docker command
+        download_folder = temp_dir
+        # Get list of existing files before download
+        existing_file_before_download = []
+        video_id = request_obj.client_command
+        cmd = [
+            '/root/.dotnet/tools/BBDown',
+            video_id,
+            '--use-app-api',
+            '--work-dir',
+            f'{os.path.abspath(temp_dir)}'
+        ]
+        cmd_chmod = []
+        cmd = [
+            'docker', 'run', '--rm',
+            '-v',
+            f'{os.path.abspath(temp_dir)}:/downloads',
+            'bbdown',
+            video_id,
+            '--use-app-api',
+            '--work-dir',
+            '/downloads'
+        ]
+        cmd_chmod = [
+            'docker', 'run', '--rm',
+            '-v',
+            f'{os.path.abspath(temp_dir)}:/downloads',
+            '--entrypoint=""', # override the entrypoint
+            'bbdown',   # image name
+            # chmod -R 777 /downloads
+            'chmod',
+            '-R',
+            '777',
+            '/downloads'
+        ]
+        cmd = ' '.join(cmd)
+        yield yield_message(cmd)
+        process = subprocess.Popen(cmd,
+                    stdout=subprocess.PIPE,
+                    stderr=subprocess.PIPE,
+                    shell=True,
+                    text=True)
+        stdout_queue = queue.Queue()
+        thread = threading.Thread(target=read_output, args=(process.stdout, stdout_queue))
+        thread.daemon = True
+        thread.start()
+        stderr_queue = queue.Queue()
+        thread = threading.Thread(target=read_output, args=(process.stderr, stderr_queue))
+        thread.daemon = True
+        thread.start()
+        while True:
+            print("looping")
+            # Check if there is any output in the queue
+            stdout_this_round = ""
+            stderr_this_round = ""
+            while True:
+                try:
+                    output_stdout = stdout_queue.get_nowait()  # Non-blocking get
+                    if output_stdout:
+                        stdout_this_round += output_stdout
+                        print(output_stdout)
+                except queue.Empty:
+                    yield yield_message(stdout_this_round)
+                    break
+            while True:
+                try:
+                    output_stderr = stderr_queue.get_nowait()  # Non-blocking get
+                    if output_stderr:
+                        stderr_this_round += output_stderr
+                        print(output_stderr)
+                except queue.Empty:
+                    yield yield_message(stderr_this_round)
+                    break
+            # Break the loop if the process has finished
+            if process.poll() is not None:
+                break
+            await asyncio.sleep(0.5)
+        # Get the return code
+        return_code = process.returncode
+        yield yield_message("(daas return code:) " + str(return_code))
+        # change files mod to 777
+        if cmd_chmod:
+            docker_chmod_res = subprocess.call(' '.join(cmd_chmod), shell=True)
+        # print(f"Successfully downloaded video {video_id}")
+        existing_file_after_download = glob.glob(os.path.join(download_folder, '**', '*'))
+        # existing_file_after_download = list(os.listdir(download_folder))
+        # get the difference
+        downloaded_files = [
+            f for f in existing_file_after_download if f not in existing_file_before_download
+        ]
+        downloaded_files_path = [
+            os.path.join(download_folder, f) for f in existing_file_after_download if f not in existing_file_before_download
+        ]
+        # read file
+        server_file_attach = {}
+        for fp, fn in zip(downloaded_files_path, downloaded_files):
+            if os.path.isdir(fp): continue
+            with open(fp, "rb") as f:
+                file_bytes = f.read()
+                server_file_attach[fn] = file_bytes
+        dsacm = DockerServiceApiComModel(
+            server_message="complete",
+            server_file_attach=server_file_attach,
+        )
+        yield python_obj_to_pickle_file_bytes(dsacm)
+def simple_generator(return_obj):
+    dsacm = DockerServiceApiComModel(
+        server_message=return_obj,
+    )
+    yield python_obj_to_pickle_file_bytes(dsacm)
+@app.post("/stream")
+async def stream_response(file: UploadFile = File(...)):
+    # read the file in memory, treat it as pickle file, and unpickle it
+    import pickle
+    import io
+    content = await file.read()
+    with io.BytesIO(content) as f:
+        request_obj = pickle.load(f)
+    # process the request_obj
+    return StreamingResponse(stream_generator(request_obj), media_type="application/octet-stream")
+@app.post("/search")
+async def stream_response(file: UploadFile = File(...)):
+    # read the file in memory, treat it as pickle file, and unpickle it
+    import pickle
+    import io
+    content = await file.read()
+    with io.BytesIO(content) as f:
+        request_obj = pickle.load(f)
+    # process the request_obj
+    keyword = request_obj.client_command
+    from experimental_mods.get_search_kw_api_stop import search_videos
+    # Default parameters for video search
+    csrf_token = '40a227fcf12c380d7d3c81af2cd8c5e8'  # Using default from main()
+    search_type = 'default'
+    max_pages = 1
+    output_path = 'search_results'
+    config_path = 'experimental_mods/config.json'
+    # Search for videos and return the first result
+    videos = search_videos(
+        keyword=keyword,
+        csrf_token=csrf_token,
+        search_type=search_type,
+        max_pages=max_pages,
+        output_path=output_path,
+        config_path=config_path,
+        early_stop=True
+    )
+    return StreamingResponse(simple_generator(videos), media_type="application/octet-stream")
+@app.get("/")
+async def hi():
+    return "Hello, this is Docker as a Service (DaaS)! If you want to use this service, you must duplicate this space. " \
+           "您好，这里是Docker作为服务（DaaS）！如果您想使用此服务，您必须复制此空间。复制方法：点击https://huggingface.co/spaces/hamercity/bbdown页面右上角的三个点，然后选择“复制空间”。" \
+           "此外，在设置中，你还需要修改URL，例如：DAAS_SERVER_URL = \"https://你的用户名-你的空间名.hf.space/stream\""
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=49000)

docker_as_a_service/experimental_mods/config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36",
+  "cookie": "buvid3=902A16D7-B19F-790B-FF85-197C38F3227462114infoc; b_nut=1731952262; b_lsid=10DF102ED6_19340664283; _uuid=84EEC61010-CE55-DAC4-68C10-CFA6108F89C8963816infoc; buvid_fp=b2f71cc1058da966a62a2caf13596b1f; buvid4=0C27B28C-406E-B88B-D232-51167891712B62902-024111817-wg%2Bfug1OO8Jl5lXoeCp0dw%3D%3D; enable_web_push=DISABLE; home_feed_column=4; browser_resolution=1313-699; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3MzIyMTE0NjMsImlhdCI6MTczMTk1MjIwMywicGx0IjotMX0.xKiEBdcpGFZy7Qv2wCExcBoRK-LGtvv_wvmCbuDoCN8; bili_ticket_expires=1732211403; CURRENT_FNVAL=4048; sid=5h1fpj7o; rpdid=|(k|kmJk)Y|u0J'u~JumlkkY)"
+}

docker_as_a_service/experimental_mods/docker_as_a_service.py ADDED Viewed

	@@ -0,0 +1,151 @@

+"""
+DaaS (Docker as a Service) is a service
+that allows users to run docker commands on the server side.
+"""
+from fastapi import FastAPI
+from fastapi.responses import StreamingResponse
+from fastapi import FastAPI, File, UploadFile, HTTPException
+from pydantic import BaseModel, Field
+from typing import Optional, Dict
+import time
+import os
+import asyncio
+import subprocess
+import uuid
+import threading
+import queue
+app = FastAPI()
+class DockerServiceApiComModel(BaseModel):
+    client_command: Optional[str] = Field(default=None, title="Client command", description="The command to be executed on the client side")
+    client_file_attach: Optional[dict] = Field(default=None, title="Client file attach", description="The file to be attached to the client side")
+    server_message: Optional[str] = Field(default=None, title="Server standard error", description="The standard error from the server side")
+    server_std_err: Optional[str] = Field(default=None, title="Server standard error", description="The standard error from the server side")
+    server_std_out: Optional[str] = Field(default=None, title="Server standard output", description="The standard output from the server side")
+    server_file_attach: Optional[dict] = Field(default=None, title="Server file attach", description="The file to be attached to the server side")
+def python_obj_to_pickle_file_bytes(obj):
+    import pickle
+    import io
+    with io.BytesIO() as f:
+        pickle.dump(obj, f)
+        return f.getvalue()
+def yield_message(message):
+    dsacm = DockerServiceApiComModel(server_message=message)
+    return python_obj_to_pickle_file_bytes(dsacm)
+def read_output(stream, output_queue):
+    while True:
+        line_stdout = stream.readline()
+        print('recv')
+        if line_stdout:
+            output_queue.put(line_stdout)
+        else:
+            break
+async def stream_generator(request_obj):
+    import tempfile
+    # Create a temporary directory
+    with tempfile.TemporaryDirectory() as temp_dir:
+        # Construct the docker command
+        download_folder = temp_dir
+        # Get list of existing files before download
+        existing_file_before_download = []
+        video_id = request_obj.client_command
+        cmd = [
+            # 'docker', 'run', '--rm',
+            # '-v', f'{download_folder}:/downloads',
+            # 'bbdown',
+            # video_id,
+            # '--use-app-api',
+            # '--work-dir', '/downloads'
+            "while true; do date; sleep 1; done"
+        ]
+        cmd = ' '.join(cmd)
+        yield yield_message(cmd)
+        process = subprocess.Popen(cmd,
+                    stdout=subprocess.PIPE,
+                    stderr=subprocess.PIPE,
+                    shell=True,
+                    text=True)
+        stdout_queue = queue.Queue()
+        thread = threading.Thread(target=read_output, args=(process.stdout, stdout_queue))
+        thread.daemon = True
+        thread.start()
+        stderr_queue = queue.Queue()
+        thread = threading.Thread(target=read_output, args=(process.stderr, stderr_queue))
+        thread.daemon = True
+        thread.start()
+        while True:
+            print("looping")
+            # Check if there is any output in the queue
+            try:
+                output_stdout = stdout_queue.get_nowait()  # Non-blocking get
+                if output_stdout:
+                    print(output_stdout)
+                    yield yield_message(output_stdout)
+                output_stderr = stderr_queue.get_nowait()  # Non-blocking get
+                if output_stderr:
+                    print(output_stdout)
+                    yield yield_message(output_stderr)
+            except queue.Empty:
+                pass  # No output available
+            # Break the loop if the process has finished
+            if process.poll() is not None:
+                break
+            await asyncio.sleep(0.25)
+        # Get the return code
+        return_code = process.returncode
+        yield yield_message("(return code:) " + str(return_code))
+        # print(f"Successfully downloaded video {video_id}")
+        existing_file_after_download = list(os.listdir(download_folder))
+        # get the difference
+        downloaded_files = [
+            f for f in existing_file_after_download if f not in existing_file_before_download
+        ]
+        downloaded_files_path = [
+            os.path.join(download_folder, f) for f in existing_file_after_download if f not in existing_file_before_download
+        ]
+        # read file
+        server_file_attach = {}
+        for fp, fn in zip(downloaded_files_path, downloaded_files):
+            with open(fp, "rb") as f:
+                file_bytes = f.read()
+                server_file_attach[fn] = file_bytes
+        dsacm = DockerServiceApiComModel(
+            server_message="complete",
+            server_file_attach=server_file_attach,
+        )
+        yield python_obj_to_pickle_file_bytes(dsacm)
+@app.post("/stream")
+async def stream_response(file: UploadFile = File(...)):
+    # read the file in memory, treat it as pickle file, and unpickle it
+    import pickle
+    import io
+    content = await file.read()
+    with io.BytesIO(content) as f:
+        request_obj = pickle.load(f)
+    # process the request_obj
+    return StreamingResponse(stream_generator(request_obj), media_type="application/octet-stream")
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="127.0.0.1", port=48000)

docker_as_a_service/experimental_mods/docker_to_api copy.py ADDED Viewed

	@@ -0,0 +1,97 @@

+import os
+import pickle
+from fastapi import FastAPI, File, UploadFile, HTTPException
+from fastapi.responses import StreamingResponse
+from io import BytesIO
+import subprocess
+app = FastAPI()
+@app.post("/container_task")
+async def container_task(file: UploadFile = File(...)):
+    # Save the uploaded file to disk
+    input_filepath = "input.pkl"
+    with open(input_filepath, "wb") as f:
+        f.write(await file.read())
+    # Process the unpickle_param from the file
+    try:
+        with open(input_filepath, 'rb') as f:
+            unpickle_param = pickle.load(f)
+    except Exception as e:
+        raise HTTPException(status_code=400, detail=f"Failed to unpickle the input file: {str(e)}")
+    # Execute the Docker command
+    command = ["docker", "run", "--rm", "bbdown", str(unpickle_param)]
+    process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    # Stream the output of the command
+    stdout_stream = BytesIO()
+    stderr_stream = BytesIO()
+    # Create a generator to stream output
+    def stream_output():
+        while True:
+            output = process.stdout.readline()
+            if output == b"" and process.poll() is not None:
+                break
+            if output:
+                stdout_stream.write(output)
+        stderr_output = process.stderr.read()
+        stderr_stream.write(stderr_output)
+        yield ""
+    # Return the StreamingResponse for the current output
+    async def response_stream():
+        for _ in stream_output():
+            yield stdout_stream.getvalue()
+            stdout_stream.seek(0)  # Rewind for next read
+            stdout_stream.truncate()  # Clear for next fill
+    # Run the process and wait for completion
+    process.wait()
+    # Check for errors
+    if process.returncode != 0:
+        raise HTTPException(status_code=500, detail=f"Docker command failed with error: {stderr_stream.getvalue().decode()}")
+    # Create a new pickle file as output
+    output_filepath = "output.pkl"
+    with open(output_filepath, 'wb') as f:
+        f.write(b"Your output data here.")  # Replace this with actual output data
+    # Return the output file
+    return StreamingResponse(open(output_filepath, "rb"), media_type='application/octet-stream',
+                             headers={"Content-Disposition": f"attachment; filename={os.path.basename(output_filepath)}"})
+# To run the application, use: uvicorn your_file_name:app --reload
+from fastapi import FastAPI
+from fastapi.responses import StreamingResponse
+import time
+import asyncio
+app = FastAPI()
+async def stream_generator():
+    for i in range(10):
+        yield f"Data chunk {i}\n"
+        await asyncio.sleep(1)  # Simulating some delay
+@app.get("/stream")
+async def stream_response():
+    return StreamingResponse(stream_generator(), media_type="text/plain")
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="127.0.0.1", port=8000)
+def client_call(*args, **kwargs):
+    result = execute(*args, **kwargs)
+    result.text
+    result.file_manifest
+    result.files

docker_as_a_service/experimental_mods/get_bilibili_resource copy.py ADDED Viewed

	@@ -0,0 +1,37 @@

+from toolbox import update_ui, get_conf, promote_file_to_downloadzone, update_ui_lastest_msg, generate_file_link
+def download_bilibili(video_id, only_audio, user_name, chatbot, history):
+    # run : docker run --rm -v $(pwd)/downloads:/downloads bbdown BV1LSSHYXEtv --use-app-api --work-dir /downloads
+    import os
+    import subprocess
+    from toolbox import get_log_folder
+    download_folder_rel = get_log_folder(user=user_name, plugin_name="shared")
+    download_folder = os.path.abspath(download_folder_rel)
+    # Get list of existing files before download
+    existing_file_before_download = list(os.listdir(download_folder))
+    # Construct the docker command
+    cmd = [
+        'docker', 'run', '--rm',
+        '-v', f'{download_folder}:/downloads',
+        'bbdown',
+        video_id,
+        '--use-app-api',
+        '--work-dir', '/downloads'
+    ]
+    if only_audio:
+        cmd.append('--audio-only')
+    # Execute the command
+    result = subprocess.run(cmd, check=True, capture_output=True, text=True)
+    # print(f"Successfully downloaded video {video_id}")
+    existing_file_after_download = list(os.listdir(download_folder))
+    # get the difference
+    downloaded_files = [os.path.join(download_folder_rel, f) for f in existing_file_after_download if f not in existing_file_before_download]
+    return downloaded_files

docker_as_a_service/experimental_mods/get_bilibili_resource.py ADDED Viewed

	@@ -0,0 +1,34 @@

+from toolbox import update_ui, get_conf, promote_file_to_downloadzone, update_ui_lastest_msg, generate_file_link
+from shared_utils.docker_as_service_api import stream_daas
+from shared_utils.docker_as_service_api import DockerServiceApiComModel
+def download_bilibili(video_id, only_audio, user_name, chatbot, history):
+    # run : docker run --rm -v $(pwd)/downloads:/downloads bbdown BV1LSSHYXEtv --use-app-api --work-dir /downloads
+    import os
+    import subprocess
+    from toolbox import get_log_folder
+    chatbot.append([None, "Processing..."])
+    yield from update_ui(chatbot, history)
+    client_command = f'{video_id} --audio-only' if only_audio else video_id
+    server_url = get_conf('DAAS_SERVER_URL')
+    docker_service_api_com_model = DockerServiceApiComModel(client_command=client_command)
+    save_file_dir = get_log_folder(user_name, plugin_name='media_downloader')
+    for output_manifest in stream_daas(docker_service_api_com_model, server_url, save_file_dir):
+        status_buf = ""
+        status_buf += "DaaS message: \n\n"
+        status_buf += output_manifest['server_message'].replace('\n', '<br/>')
+        status_buf += "\n\n"
+        status_buf += "DaaS standard error: \n\n"
+        status_buf += output_manifest['server_std_err'].replace('\n', '<br/>')
+        status_buf += "\n\n"
+        status_buf += "DaaS standard output: \n\n"
+        status_buf += output_manifest['server_std_out'].replace('\n', '<br/>')
+        status_buf += "\n\n"
+        status_buf += "DaaS file attach: \n\n"
+        status_buf += str(output_manifest['server_file_attach'])
+        yield from update_ui_lastest_msg(status_buf, chatbot, history)
+    return output_manifest['server_file_attach']

docker_as_a_service/experimental_mods/get_search_kw_api_stop.py ADDED Viewed

	@@ -0,0 +1,138 @@

+import os
+import re
+import time
+import json
+import random
+import requests
+import argparse
+from loguru import logger
+from datetime import datetime, timezone, timedelta
+from typing import List, Dict
+from tenacity import retry, stop_after_attempt, wait_random
+def update_and_save_data(new_data: List[Dict], filename: str):
+    if os.path.exists(filename):
+        with open(filename, 'r', encoding='utf-8') as f:
+            existing_data = json.load(f)
+    else:
+        existing_data = []
+    existing_bvids = set(video['bvid'] for video in existing_data)
+    for video in new_data:
+        if video['bvid'] not in existing_bvids:
+            existing_data.append(video)
+            existing_bvids.add(video['bvid'])
+    with open(filename, 'w', encoding='utf-8') as f:
+        json.dump(existing_data, f, ensure_ascii=False, indent=4)
+    print(f"数据已更新并保存到 {filename}")
+    return existing_data
+def extract_and_combine(text):
+    match = re.search(r'(.*?)<em class="keyword">(.*?)</em>(.*)', text)
+    if match:
+        combined = match.group(1) + match.group(2) + match.group(3)
+        return combined
+    return text
+def convert_timestamp_to_beijing_time(timestamp):
+    utc_time = datetime.fromtimestamp(timestamp, timezone.utc)
+    beijing_time = utc_time + timedelta(hours=8)
+    return beijing_time.strftime('%Y-%m-%d %H:%M:%S')
+def load_headers(config_path):
+    with open(config_path, 'r', encoding='utf-8') as f:
+        config = json.load(f)
+    print(f"已从 {config_path} 加载配置，请求头为：{config}")
+    return config
+@retry(stop=stop_after_attempt(3), wait=wait_random(min=1, max=3))
+def make_api_request(url, headers):
+    response = requests.get(url=url, headers=headers)
+    response.raise_for_status()
+    return response.json()
+def search_videos(keyword, csrf_token, search_type, max_pages=5, output_path=None, config_path='config.json', early_stop=False):
+    url_template = "https://api.bilibili.com/x/web-interface/search/type?search_type=video&keyword={keyword}&page={page}&order={search_type}&duration=0&tids=0"
+    headers = load_headers(config_path)
+    videos = []
+    existing_bvids = set()
+    if early_stop and output_path:
+        output_file = f"search_results_{keyword.replace(' ', '_')}_{search_type}.json"
+        file_path = os.path.join(output_path, output_file)
+        if os.path.exists(file_path):
+            with open(file_path, 'r', encoding='utf-8') as f:
+                existing_data = json.load(f)
+                existing_bvids = set(video['bvid'] for video in existing_data)
+    for page in range(1, max_pages + 1):
+        url = url_template.format(keyword=keyword, page=page, search_type=search_type)
+        try:
+            data = make_api_request(url, headers)
+            if data['code'] != 0:
+                logger.error(f"Error fetching page {page}: {data['message']}")
+                break
+            if 'result' not in data['data']:
+                logger.info(f"No more results found on page {page}")
+                break
+            result = data['data']['result']
+            if not result:
+                logger.info(f"No more results found on page {page}")
+                break
+            new_videos = []
+            for video in result:
+                video_data = {
+                    'title': extract_and_combine(video['title']),
+                    'author': video['author'],
+                    'author_id': video['mid'],
+                    'bvid': video['bvid'],
+                    '播放量': video['play'],
+                    '弹幕': video['danmaku'],
+                    '评论': video['review'],
+                    '点赞': video['favorites'],
+                    '发布时间': convert_timestamp_to_beijing_time(video['pubdate']),
+                    '视频时长': video['duration'],
+                    'tag': video['tag'],
+                    'description': video['description']
+                }
+                new_videos.append(video_data)
+            new_bvids = set(video['bvid'] for video in new_videos)
+            duplicate_count = len(new_bvids.intersection(existing_bvids))
+            logger.info(f"Page {page}: {duplicate_count} out of {len(new_videos)} videos already exist in the dataset.")
+            videos.extend(new_videos)
+            logger.info(f"Collected {len(videos)} videos from {page} pages")
+            time.sleep(random.uniform(1, 3))  # Random delay between 1 and 3 seconds
+        except Exception as e:
+            logger.error(f"Error on page {page}: {str(e)}")
+            break
+    return videos
+def main():
+    parser = argparse.ArgumentParser(description="Search for videos on Bilibili")
+    parser.add_argument("--keyword", default='天文馆的猫', help="Search keyword")
+    parser.add_argument("--csrf_token", default='40a227fcf12c380d7d3c81af2cd8c5e8', help="CSRF token for authentication")
+    parser.add_argument("--search_type", default='default', choices=['pubdate', 'default', 'stow', 'dm', 'click'], help="Search order type")
+    parser.add_argument("--max_pages", default=1, type=int, help="Maximum number of pages to fetch")
+    parser.add_argument("--output_path", default='search_results', help="Output directory for search results")
+    parser.add_argument("--interval", default=1, type=int, help="Interval in hours between searches")
+    parser.add_argument("--early_stop", default=True, help="Enable early stopping if all videos on a page already exist in the dataset")
+    args = parser.parse_args()
+    videos = search_videos(args.keyword, args.csrf_token, args.search_type, args.max_pages, args.output_path, early_stop=args.early_stop)
+    print(videos)
+if __name__ == "__main__":
+    main()

docker_as_a_service/experimental_mods/test_docker_to_api.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import requests
+import pickle
+import io
+import os
+from pydantic import BaseModel, Field
+from typing import Optional, Dict
+class DockerServiceApiComModel(BaseModel):
+    client_command: Optional[str] = Field(default=None, title="Client command", description="The command to be executed on the client side")
+    client_file_attach: Optional[dict] = Field(default=None, title="Client file attach", description="The file to be attached to the client side")
+    server_message: Optional[str] = Field(default=None, title="Server standard error", description="The standard error from the server side")
+    server_std_err: Optional[str] = Field(default=None, title="Server standard error", description="The standard error from the server side")
+    server_std_out: Optional[str] = Field(default=None, title="Server standard output", description="The standard output from the server side")
+    server_file_attach: Optional[dict] = Field(default=None, title="Server file attach", description="The file to be attached to the server side")
+def process_received(received: DockerServiceApiComModel, save_file_dir="./daas_output"):
+    # Process the received data
+    if received.server_message:
+        print(f"Recv message: {received.server_message}")
+    if received.server_std_err:
+        print(f"Recv standard error: {received.server_std_err}")
+    if received.server_std_out:
+        print(f"Recv standard output: {received.server_std_out}")
+    if received.server_file_attach:
+        # print(f"Recv file attach: {received.server_file_attach}")
+        for file_name, file_content in received.server_file_attach.items():
+            new_fp = os.path.join(save_file_dir, file_name)
+            new_fp_dir = os.path.dirname(new_fp)
+            if not os.path.exists(new_fp_dir):
+                os.makedirs(new_fp_dir, exist_ok=True)
+            with open(new_fp, 'wb') as f:
+                f.write(file_content)
+        print(f"Saved file attach to {save_file_dir}")
+def send_file_and_stream_response(docker_service_api_com_model, server_url):
+    # Prepare the file
+    # Pickle the object
+    pickled_data = pickle.dumps(docker_service_api_com_model)
+    # Create a file-like object from the pickled data
+    file_obj = io.BytesIO(pickled_data)
+    # Prepare the file for sending
+    files = {'file': ('docker_service_api_com_model.pkl', file_obj, 'application/octet-stream')}
+    # Send the POST request
+    response = requests.post(server_url, files=files, stream=True)
+    max_full_package_size = 1024 * 1024 * 1024 * 1  # 1 GB
+    # Check if the request was successful
+    if response.status_code == 200:
+        # Process the streaming response
+        for chunk in response.iter_content(max_full_package_size):
+            if chunk:
+                received = pickle.loads(chunk)
+                process_received(received)
+    else:
+        print(f"Error: Received status code {response.status_code}")
+        print(response.text)
+# Usage
+if __name__ == "__main__":
+    server_url = "http://localhost:49000/stream"  # Replace with your server URL
+    docker_service_api_com_model = DockerServiceApiComModel(
+        client_command='BV1LSSHYXEtv --audio-only',
+    )
+    send_file_and_stream_response(docker_service_api_com_model, server_url)

docker_as_a_service/shared_utils/advanced_markdown_format.py ADDED Viewed

	@@ -0,0 +1,478 @@

+import markdown
+import re
+import os
+import math
+from loguru import logger
+from textwrap import dedent
+from functools import lru_cache
+from pymdownx.superfences import fence_code_format
+from latex2mathml.converter import convert as tex2mathml
+from shared_utils.config_loader import get_conf as get_conf
+from shared_utils.text_mask import apply_gpt_academic_string_mask
+markdown_extension_configs = {
+    "mdx_math": {
+        "enable_dollar_delimiter": True,
+        "use_gitlab_delimiters": False,
+    },
+}
+code_highlight_configs = {
+    "pymdownx.superfences": {
+        "css_class": "codehilite",
+        "custom_fences": [
+            {"name": "mermaid", "class": "mermaid", "format": fence_code_format}
+        ],
+    },
+    "pymdownx.highlight": {
+        "css_class": "codehilite",
+        "guess_lang": True,
+        # 'auto_title': True,
+        # 'linenums': True
+    },
+}
+code_highlight_configs_block_mermaid = {
+    "pymdownx.superfences": {
+        "css_class": "codehilite",
+        # "custom_fences": [
+        #     {"name": "mermaid", "class": "mermaid", "format": fence_code_format}
+        # ],
+    },
+    "pymdownx.highlight": {
+        "css_class": "codehilite",
+        "guess_lang": True,
+        # 'auto_title': True,
+        # 'linenums': True
+    },
+}
+mathpatterns = {
+    r"(?<!\\|\$)(\$)([^\$]+)(\$)": {"allow_multi_lines": False},  #  $...$
+    r"(?<!\\)(\$\$)([^\$]+)(\$\$)": {"allow_multi_lines": True},  # $$...$$
+    r"(?<!\\)(\\\[)(.+?)(\\\])": {"allow_multi_lines": False},  # \[...\]
+    r'(?<!\\)(\\\()(.+?)(\\\))': {'allow_multi_lines': False},                       # \(...\)
+    # r'(?<!\\)(\\begin{([a-z]+?\*?)})(.+?)(\\end{\2})': {'allow_multi_lines': True},  # \begin...\end
+    # r'(?<!\\)(\$`)([^`]+)(`\$)': {'allow_multi_lines': False},                       # $`...`$
+}
+def tex2mathml_catch_exception(content, *args, **kwargs):
+    try:
+        content = tex2mathml(content, *args, **kwargs)
+    except:
+        content = content
+    return content
+def replace_math_no_render(match):
+    content = match.group(1)
+    if "mode=display" in match.group(0):
+        content = content.replace("\n", "</br>")
+        return f'<font color="#00FF00">$$</font><font color="#FF00FF">{content}</font><font color="#00FF00">$$</font>'
+    else:
+        return f'<font color="#00FF00">$</font><font color="#FF00FF">{content}</font><font color="#00FF00">$</font>'
+def replace_math_render(match):
+    content = match.group(1)
+    if "mode=display" in match.group(0):
+        if "\\begin{aligned}" in content:
+            content = content.replace("\\begin{aligned}", "\\begin{array}")
+            content = content.replace("\\end{aligned}", "\\end{array}")
+            content = content.replace("&", " ")
+        content = tex2mathml_catch_exception(content, display="block")
+        return content
+    else:
+        return tex2mathml_catch_exception(content)
+def markdown_bug_hunt(content):
+    """
+    解决一个mdx_math的bug（单$包裹begin命令时多余<script>）
+    """
+    content = content.replace(
+        '<script type="math/tex">\n<script type="math/tex; mode=display">',
+        '<script type="math/tex; mode=display">',
+    )
+    content = content.replace("</script>\n</script>", "</script>")
+    return content
+def is_equation(txt):
+    """
+    判定是否为公式 | 测试1 写出洛伦兹定律，使用tex格式公式 测试2 给出柯西不等式，使用latex格式 测试3 写出麦克斯韦方程组
+    """
+    if "```" in txt and "```reference" not in txt:
+        return False
+    if "$" not in txt and "\\[" not in txt:
+        return False
+    matches = []
+    for pattern, property in mathpatterns.items():
+        flags = re.ASCII | re.DOTALL if property["allow_multi_lines"] else re.ASCII
+        matches.extend(re.findall(pattern, txt, flags))
+    if len(matches) == 0:
+        return False
+    contain_any_eq = False
+    illegal_pattern = re.compile(r"[^\x00-\x7F]|echo")
+    for match in matches:
+        if len(match) != 3:
+            return False
+        eq_canidate = match[1]
+        if illegal_pattern.search(eq_canidate):
+            return False
+        else:
+            contain_any_eq = True
+    return contain_any_eq
+def fix_markdown_indent(txt):
+    # fix markdown indent
+    if (" - " not in txt) or (". " not in txt):
+        # do not need to fix, fast escape
+        return txt
+    # walk through the lines and fix non-standard indentation
+    lines = txt.split("\n")
+    pattern = re.compile(r"^\s+-")
+    activated = False
+    for i, line in enumerate(lines):
+        if line.startswith("- ") or line.startswith("1. "):
+            activated = True
+        if activated and pattern.match(line):
+            stripped_string = line.lstrip()
+            num_spaces = len(line) - len(stripped_string)
+            if (num_spaces % 4) == 3:
+                num_spaces_should_be = math.ceil(num_spaces / 4) * 4
+                lines[i] = " " * num_spaces_should_be + stripped_string
+    return "\n".join(lines)
+FENCED_BLOCK_RE = re.compile(
+    dedent(
+        r"""
+        (?P<fence>^[ \t]*(?:~{3,}|`{3,}))[ ]*                      # opening fence
+        ((\{(?P<attrs>[^\}\n]*)\})|                              # (optional {attrs} or
+        (\.?(?P<lang>[\w#.+-]*)[ ]*)?                            # optional (.)lang
+        (hl_lines=(?P<quot>"|')(?P<hl_lines>.*?)(?P=quot)[ ]*)?) # optional hl_lines)
+        \n                                                       # newline (end of opening fence)
+        (?P<code>.*?)(?<=\n)                                     # the code block
+        (?P=fence)[ ]*$                                          # closing fence
+    """
+    ),
+    re.MULTILINE | re.DOTALL | re.VERBOSE,
+)
+def get_line_range(re_match_obj, txt):
+    start_pos, end_pos = re_match_obj.regs[0]
+    num_newlines_before = txt[: start_pos + 1].count("\n")
+    line_start = num_newlines_before
+    line_end = num_newlines_before + txt[start_pos:end_pos].count("\n") + 1
+    return line_start, line_end
+def fix_code_segment_indent(txt):
+    lines = []
+    change_any = False
+    txt_tmp = txt
+    while True:
+        re_match_obj = FENCED_BLOCK_RE.search(txt_tmp)
+        if not re_match_obj:
+            break
+        if len(lines) == 0:
+            lines = txt.split("\n")
+        # 清空 txt_tmp 对应的位置方便下次搜索
+        start_pos, end_pos = re_match_obj.regs[0]
+        txt_tmp = txt_tmp[:start_pos] + " " * (end_pos - start_pos) + txt_tmp[end_pos:]
+        line_start, line_end = get_line_range(re_match_obj, txt)
+        # 获取公共缩进
+        shared_indent_cnt = 1e5
+        for i in range(line_start, line_end):
+            stripped_string = lines[i].lstrip()
+            num_spaces = len(lines[i]) - len(stripped_string)
+            if num_spaces < shared_indent_cnt:
+                shared_indent_cnt = num_spaces
+        # 修复缩进
+        if (shared_indent_cnt < 1e5) and (shared_indent_cnt % 4) == 3:
+            num_spaces_should_be = math.ceil(shared_indent_cnt / 4) * 4
+            for i in range(line_start, line_end):
+                add_n = num_spaces_should_be - shared_indent_cnt
+                lines[i] = " " * add_n + lines[i]
+            if not change_any:  # 遇到第一个
+                change_any = True
+    if change_any:
+        return "\n".join(lines)
+    else:
+        return txt
+def fix_dollar_sticking_bug(txt):
+    """
+    修复不标准的dollar公式符号的问题
+    """
+    txt_result = ""
+    single_stack_height = 0
+    double_stack_height = 0
+    while True:
+        while True:
+            index = txt.find('$')
+            if index == -1:
+                txt_result += txt
+                return txt_result
+            if single_stack_height > 0:
+                if txt[:(index+1)].find('\n') > 0 or txt[:(index+1)].find('<td>') > 0 or txt[:(index+1)].find('</td>') > 0:
+                    logger.error('公式之中出现了异常 (Unexpect element in equation)')
+                    single_stack_height = 0
+                    txt_result += ' $'
+                    continue
+            if double_stack_height > 0:
+                if txt[:(index+1)].find('\n\n') > 0:
+                    logger.error('公式之中出现了异常 (Unexpect element in equation)')
+                    double_stack_height = 0
+                    txt_result += '$$'
+                    continue
+            is_double = (txt[index+1] == '$')
+            if is_double:
+                if single_stack_height != 0:
+                    # add a padding
+                    txt = txt[:(index+1)] + " " + txt[(index+1):]
+                    continue
+                if double_stack_height == 0:
+                    double_stack_height = 1
+                else:
+                    double_stack_height = 0
+                txt_result += txt[:(index+2)]
+                txt = txt[(index+2):]
+            else:
+                if double_stack_height != 0:
+                    # logger.info(txt[:(index)])
+                    logger.info('发现异常嵌套公式')
+                if single_stack_height == 0:
+                    single_stack_height = 1
+                else:
+                    single_stack_height = 0
+                    # logger.info(txt[:(index)])
+                txt_result += txt[:(index+1)]
+                txt = txt[(index+1):]
+            break
+def markdown_convertion_for_file(txt):
+    """
+    将Markdown格式的文本转换为HTML格式。如果包含数学公式，则先将公式转换为HTML格式。
+    """
+    from themes.theme import advanced_css
+    pre = f"""
+    <!DOCTYPE html><head><meta charset="utf-8"><title>GPT-Academic输出文档</title><style>{advanced_css}</style></head>
+    <body>
+    <div class="test_temp1" style="width:10%; height: 500px; float:left;"></div>
+    <div class="test_temp2" style="width:80%;padding: 40px;float:left;padding-left: 20px;padding-right: 20px;box-shadow: rgba(0, 0, 0, 0.2) 0px 0px 8px 8px;border-radius: 10px;">
+        <div class="markdown-body">
+    """
+    suf = """
+        </div>
+    </div>
+    <div class="test_temp3" style="width:10%; height: 500px; float:left;"></div>
+    </body>
+    """
+    if txt.startswith(pre) and txt.endswith(suf):
+        # print('警告，输入了已经经过转化的字符串，二次转化可能出问题')
+        return txt  # 已经被转化过，不需要再次转化
+    find_equation_pattern = r'<script type="math/tex(?:.*?)>(.*?)</script>'
+    txt = fix_markdown_indent(txt)
+    convert_stage_1 = fix_dollar_sticking_bug(txt)
+    # convert everything to html format
+    convert_stage_2 = markdown.markdown(
+        text=convert_stage_1,
+        extensions=[
+            "sane_lists",
+            "tables",
+            "mdx_math",
+            "pymdownx.superfences",
+            "pymdownx.highlight",
+        ],
+        extension_configs={**markdown_extension_configs, **code_highlight_configs},
+    )
+    def repl_fn(match):
+        content = match.group(2)
+        return f'<script type="math/tex">{content}</script>'
+    pattern = "|".join([pattern for pattern, property in mathpatterns.items() if not property["allow_multi_lines"]])
+    pattern = re.compile(pattern, flags=re.ASCII)
+    convert_stage_3 = pattern.sub(repl_fn, convert_stage_2)
+    convert_stage_4 = markdown_bug_hunt(convert_stage_3)
+    # 2. convert to rendered equation
+    convert_stage_5, n = re.subn(
+        find_equation_pattern, replace_math_render, convert_stage_4, flags=re.DOTALL
+    )
+    # cat them together
+    return pre + convert_stage_5 + suf
+@lru_cache(maxsize=128)  # 使用 lru缓存 加快转换速度
+def markdown_convertion(txt):
+    """
+    将Markdown格式的文本转换为HTML格式。如果包含数学公式，则先将公式转换为HTML格式。
+    """
+    pre = '<div class="markdown-body">'
+    suf = "</div>"
+    if txt.startswith(pre) and txt.endswith(suf):
+        # print('警告，输入了已经经过转化的字符串，二次转化可能出问题')
+        return txt  # 已经被转化过，不需要再次转化
+    find_equation_pattern = r'<script type="math/tex(?:.*?)>(.*?)</script>'
+    txt = fix_markdown_indent(txt)
+    # txt = fix_code_segment_indent(txt)
+    if is_equation(txt):  # 有$标识的公式符号，且没有代码段```的标识
+        # convert everything to html format
+        split = markdown.markdown(text="---")
+        convert_stage_1 = markdown.markdown(
+            text=txt,
+            extensions=[
+                "sane_lists",
+                "tables",
+                "mdx_math",
+                "pymdownx.superfences",
+                "pymdownx.highlight",
+            ],
+            extension_configs={**markdown_extension_configs, **code_highlight_configs},
+        )
+        convert_stage_1 = markdown_bug_hunt(convert_stage_1)
+        # 1. convert to easy-to-copy tex (do not render math)
+        convert_stage_2_1, n = re.subn(
+            find_equation_pattern,
+            replace_math_no_render,
+            convert_stage_1,
+            flags=re.DOTALL,
+        )
+        # 2. convert to rendered equation
+        convert_stage_2_2, n = re.subn(
+            find_equation_pattern, replace_math_render, convert_stage_1, flags=re.DOTALL
+        )
+        # cat them together
+        return pre + convert_stage_2_1 + f"{split}" + convert_stage_2_2 + suf
+    else:
+        return (
+            pre
+            + markdown.markdown(
+                txt,
+                extensions=[
+                    "sane_lists",
+                    "tables",
+                    "pymdownx.superfences",
+                    "pymdownx.highlight",
+                ],
+                extension_configs=code_highlight_configs,
+            )
+            + suf
+        )
+def close_up_code_segment_during_stream(gpt_reply):
+    """
+    在gpt输出代码的中途（输出了前面的```，但还没输出完后面的```），补上后面的```
+    Args:
+        gpt_reply (str): GPT模型返回的回复字符串。
+    Returns:
+        str: 返回一个新的字符串，将输出代码片段的“后面的```”补上。
+    """
+    if "```" not in gpt_reply:
+        return gpt_reply
+    if gpt_reply.endswith("```"):
+        return gpt_reply
+    # 排除了以上两个情况，我们
+    segments = gpt_reply.split("```")
+    n_mark = len(segments) - 1
+    if n_mark % 2 == 1:
+        return gpt_reply + "\n```"  # 输出代码片段中！
+    else:
+        return gpt_reply
+def special_render_issues_for_mermaid(text):
+    # 用不太优雅的方式处理一个core_functional.py中出现的mermaid渲染特例：
+    # 我不希望"总结绘制脑图"prompt中的mermaid渲染出来
+    @lru_cache(maxsize=1)
+    def get_special_case():
+        from core_functional import get_core_functions
+        special_case = get_core_functions()["总结绘制脑图"]["Suffix"]
+        return special_case
+    if text.endswith(get_special_case()): text = text.replace("```mermaid", "```")
+    return text
+def compat_non_markdown_input(text):
+    """
+    改善非markdown输入的显示效果，例如将空格转换为&nbsp;，将换行符转换为</br>等。
+    """
+    if "```" in text:
+        # careful input：markdown输入
+        text = special_render_issues_for_mermaid(text)  # 处理特殊的渲染问题
+        return text
+    elif "</div>" in text:
+        # careful input：html输入
+        return text
+    else:
+        # whatever input：非markdown输入
+        lines = text.split("\n")
+        for i, line in enumerate(lines):
+            lines[i] = lines[i].replace(" ", "&nbsp;")  # 空格转换为&nbsp;
+        text = "</br>".join(lines)  # 换行符转换为</br>
+        return text
+@lru_cache(maxsize=128)  # 使用lru缓存
+def simple_markdown_convertion(text):
+    pre = '<div class="markdown-body">'
+    suf = "</div>"
+    if text.startswith(pre) and text.endswith(suf):
+        return text  # 已经被转化过，不需要再次转化
+    text = compat_non_markdown_input(text)    # 兼容非markdown输入
+    text = markdown.markdown(
+        text,
+        extensions=["pymdownx.superfences", "tables", "pymdownx.highlight"],
+        extension_configs=code_highlight_configs,
+    )
+    return pre + text + suf
+def format_io(self, y):
+    """
+    将输入和输出解析为HTML格式。将y中最后一项的输入部分段落化，并将输出部分的Markdown和数学公式转换为HTML格式。
+    """
+    if y is None or y == []:
+        return []
+    i_ask, gpt_reply = y[-1]
+    i_ask = apply_gpt_academic_string_mask(i_ask, mode="show_render")
+    gpt_reply = apply_gpt_academic_string_mask(gpt_reply, mode="show_render")
+    # 当代码输出半截的时候，试着补上后个```
+    if gpt_reply is not None:
+        gpt_reply = close_up_code_segment_during_stream(gpt_reply)
+    # 处理提问与输出
+    y[-1] = (
+        # 输入部分
+        None if i_ask is None else simple_markdown_convertion(i_ask),
+        # 输出部分
+        None if gpt_reply is None else markdown_convertion(gpt_reply),
+    )
+    return y

docker_as_a_service/shared_utils/char_visual_effect.py ADDED Viewed

	@@ -0,0 +1,25 @@

+def is_full_width_char(ch):
+    """判断给定的单个字符是否是全角字符"""
+    if '\u4e00' <= ch <= '\u9fff':
+        return True  # 中文字符
+    if '\uff01' <= ch <= '\uff5e':
+        return True  # 全角符号
+    if '\u3000' <= ch <= '\u303f':
+        return True  # CJK标点符号
+    return False
+def scolling_visual_effect(text, scroller_max_len):
+    text = text.\
+            replace('\n', '').replace('`', '.').replace(' ', '.').replace('<br/>', '.....').replace('$', '.')
+    place_take_cnt = 0
+    pointer = len(text) - 1
+    if len(text) < scroller_max_len:
+        return text
+    while place_take_cnt < scroller_max_len and pointer > 0:
+        if is_full_width_char(text[pointer]): place_take_cnt += 2
+        else: place_take_cnt += 1
+        pointer -= 1
+    return text[pointer:]

docker_as_a_service/shared_utils/colorful.py ADDED Viewed

	@@ -0,0 +1,88 @@

+import platform
+from sys import stdout
+from loguru import logger
+if platform.system()=="Linux":
+    pass
+else:
+    from colorama import init
+    init()
+# Do you like the elegance of Chinese characters?
+def print红(*kw,**kargs):
+    print("\033[0;31m",*kw,"\033[0m",**kargs)
+def print绿(*kw,**kargs):
+    print("\033[0;32m",*kw,"\033[0m",**kargs)
+def print黄(*kw,**kargs):
+    print("\033[0;33m",*kw,"\033[0m",**kargs)
+def print蓝(*kw,**kargs):
+    print("\033[0;34m",*kw,"\033[0m",**kargs)
+def print紫(*kw,**kargs):
+    print("\033[0;35m",*kw,"\033[0m",**kargs)
+def print靛(*kw,**kargs):
+    print("\033[0;36m",*kw,"\033[0m",**kargs)
+def print亮红(*kw,**kargs):
+    print("\033[1;31m",*kw,"\033[0m",**kargs)
+def print亮绿(*kw,**kargs):
+    print("\033[1;32m",*kw,"\033[0m",**kargs)
+def print亮黄(*kw,**kargs):
+    print("\033[1;33m",*kw,"\033[0m",**kargs)
+def print亮蓝(*kw,**kargs):
+    print("\033[1;34m",*kw,"\033[0m",**kargs)
+def print亮紫(*kw,**kargs):
+    print("\033[1;35m",*kw,"\033[0m",**kargs)
+def print亮靛(*kw,**kargs):
+    print("\033[1;36m",*kw,"\033[0m",**kargs)
+# Do you like the elegance of Chinese characters?
+def sprint红(*kw):
+    return "\033[0;31m"+' '.join(kw)+"\033[0m"
+def sprint绿(*kw):
+    return "\033[0;32m"+' '.join(kw)+"\033[0m"
+def sprint黄(*kw):
+    return "\033[0;33m"+' '.join(kw)+"\033[0m"
+def sprint蓝(*kw):
+    return "\033[0;34m"+' '.join(kw)+"\033[0m"
+def sprint紫(*kw):
+    return "\033[0;35m"+' '.join(kw)+"\033[0m"
+def sprint靛(*kw):
+    return "\033[0;36m"+' '.join(kw)+"\033[0m"
+def sprint亮红(*kw):
+    return "\033[1;31m"+' '.join(kw)+"\033[0m"
+def sprint亮绿(*kw):
+    return "\033[1;32m"+' '.join(kw)+"\033[0m"
+def sprint亮黄(*kw):
+    return "\033[1;33m"+' '.join(kw)+"\033[0m"
+def sprint亮蓝(*kw):
+    return "\033[1;34m"+' '.join(kw)+"\033[0m"
+def sprint亮紫(*kw):
+    return "\033[1;35m"+' '.join(kw)+"\033[0m"
+def sprint亮靛(*kw):
+    return "\033[1;36m"+' '.join(kw)+"\033[0m"
+def log红(*kw,**kargs):
+    logger.opt(depth=1).info(sprint红(*kw))
+def log绿(*kw,**kargs):
+    logger.opt(depth=1).info(sprint绿(*kw))
+def log黄(*kw,**kargs):
+    logger.opt(depth=1).info(sprint黄(*kw))
+def log蓝(*kw,**kargs):
+    logger.opt(depth=1).info(sprint蓝(*kw))
+def log紫(*kw,**kargs):
+    logger.opt(depth=1).info(sprint紫(*kw))
+def log靛(*kw,**kargs):
+    logger.opt(depth=1).info(sprint靛(*kw))
+def log亮红(*kw,**kargs):
+    logger.opt(depth=1).info(sprint亮红(*kw))
+def log亮绿(*kw,**kargs):
+    logger.opt(depth=1).info(sprint亮绿(*kw))
+def log亮黄(*kw,**kargs):
+    logger.opt(depth=1).info(sprint亮黄(*kw))
+def log亮蓝(*kw,**kargs):
+    logger.opt(depth=1).info(sprint亮蓝(*kw))
+def log亮紫(*kw,**kargs):
+    logger.opt(depth=1).info(sprint亮紫(*kw))
+def log亮靛(*kw,**kargs):
+    logger.opt(depth=1).info(sprint亮靛(*kw))

docker_as_a_service/shared_utils/config_loader.py ADDED Viewed

	@@ -0,0 +1,131 @@

+import importlib
+import time
+import os
+from functools import lru_cache
+from shared_utils.colorful import log亮红, log亮绿, log亮蓝
+pj = os.path.join
+default_user_name = 'default_user'
+def read_env_variable(arg, default_value):
+    """
+    环境变量可以是 `GPT_ACADEMIC_CONFIG`(优先)，也可以直接是`CONFIG`
+    例如在windows cmd中，既可以写：
+        set USE_PROXY=True
+        set API_KEY=sk-j7caBpkRoxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+        set proxies={"http":"http://127.0.0.1:10085", "https":"http://127.0.0.1:10085",}
+        set AVAIL_LLM_MODELS=["gpt-3.5-turbo", "chatglm"]
+        set AUTHENTICATION=[("username", "password"), ("username2", "password2")]
+    也可以写：
+        set GPT_ACADEMIC_USE_PROXY=True
+        set GPT_ACADEMIC_API_KEY=sk-j7caBpkRoxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+        set GPT_ACADEMIC_proxies={"http":"http://127.0.0.1:10085", "https":"http://127.0.0.1:10085",}
+        set GPT_ACADEMIC_AVAIL_LLM_MODELS=["gpt-3.5-turbo", "chatglm"]
+        set GPT_ACADEMIC_AUTHENTICATION=[("username", "password"), ("username2", "password2")]
+    """
+    arg_with_prefix = "GPT_ACADEMIC_" + arg
+    if arg_with_prefix in os.environ:
+        env_arg = os.environ[arg_with_prefix]
+    elif arg in os.environ:
+        env_arg = os.environ[arg]
+    else:
+        raise KeyError
+    log亮绿(f"[ENV_VAR] 尝试加载{arg}，默认值：{default_value} --> 修正值：{env_arg}")
+    try:
+        if isinstance(default_value, bool):
+            env_arg = env_arg.strip()
+            if env_arg == 'True': r = True
+            elif env_arg == 'False': r = False
+            else: log亮红('Expect `True` or `False`, but have:', env_arg); r = default_value
+        elif isinstance(default_value, int):
+            r = int(env_arg)
+        elif isinstance(default_value, float):
+            r = float(env_arg)
+        elif isinstance(default_value, str):
+            r = env_arg.strip()
+        elif isinstance(default_value, dict):
+            r = eval(env_arg)
+        elif isinstance(default_value, list):
+            r = eval(env_arg)
+        elif default_value is None:
+            assert arg == "proxies"
+            r = eval(env_arg)
+        else:
+            log亮红(f"[ENV_VAR] 环境变量{arg}不支持通过环境变量设置! ")
+            raise KeyError
+    except:
+        log亮红(f"[ENV_VAR] 环境变量{arg}加载失败! ")
+        raise KeyError(f"[ENV_VAR] 环境变量{arg}加载失败! ")
+    log亮绿(f"[ENV_VAR] 成功读取环境变量{arg}")
+    return r
+@lru_cache(maxsize=128)
+def read_single_conf_with_lru_cache(arg):
+    from shared_utils.key_pattern_manager import is_any_api_key
+    try:
+        # 优先级1. 获取环境变量作为配置
+        default_ref = getattr(importlib.import_module('config'), arg) # 读取默认值作为数据类型转换的参考
+        r = read_env_variable(arg, default_ref)
+    except:
+        try:
+            # 优先级2. 获取config_private中的配置
+            r = getattr(importlib.import_module('config_private'), arg)
+        except:
+            # 优先级3. 获取config中的配置
+            r = getattr(importlib.import_module('config'), arg)
+    # 在读取API_KEY时，检查一下是不是忘了改config
+    if arg == 'API_URL_REDIRECT':
+        oai_rd = r.get("https://api.openai.com/v1/chat/completions", None) # API_URL_REDIRECT填写格式是错误的，请阅读`https://github.com/binary-husky/gpt_academic/wiki/项目配置说明`
+        if oai_rd and not oai_rd.endswith('/completions'):
+            log亮红("\n\n[API_URL_REDIRECT] API_URL_REDIRECT填错了。请阅读`https://github.com/binary-husky/gpt_academic/wiki/项目配置说明`。如果您确信自己没填错，无视此消息即可。")
+            time.sleep(5)
+    if arg == 'API_KEY':
+        log亮蓝(f"[API_KEY] 本项目现已支持OpenAI和Azure的api-key。也支持同时填写多个api-key，如API_KEY=\"openai-key1,openai-key2,azure-key3\"")
+        log亮蓝(f"[API_KEY] 您既可以在config.py中修改api-key(s)，也可以在问题输入区输入临时的api-key(s)，然后回车键提交后即可生效。")
+        if is_any_api_key(r):
+            log亮绿(f"[API_KEY] 您的 API_KEY 是: {r[:15]}*** API_KEY 导入成功")
+        else:
+            log亮红(f"[API_KEY] 您的 API_KEY（{r[:15]}***）不满足任何一种已知的密钥格式，请在config文件中修改API密钥之后再运行（详见`https://github.com/binary-husky/gpt_academic/wiki/api_key`）。")
+    if arg == 'proxies':
+        if not read_single_conf_with_lru_cache('USE_PROXY'): r = None # 检查USE_PROXY，防止proxies单独起作用
+        if r is None:
+            log亮红('[PROXY] 网络代理状态：未配置。无代理状态下很可能无法访问OpenAI家族的模型。建议：检查USE_PROXY选项是否修改。')
+        else:
+            log亮绿('[PROXY] 网络代理状态：已配置。配置信息如下：', str(r))
+            assert isinstance(r, dict), 'proxies格式错误，请注意proxies选项的格式，不要遗漏括号。'
+    return r
+@lru_cache(maxsize=128)
+def get_conf(*args):
+    """
+    本项目的所有配置都集中在config.py中。 修改配置有三种方法，您只需要选择其中一种即可：
+        - 直接修改config.py
+        - 创建并修改config_private.py
+        - 修改环境变量（修改docker-compose.yml等价于修改容器内部的环境变量）
+    注意：如果您使用docker-compose部署，请修改docker-compose（等价于修改容器内部的环境变量）
+    """
+    res = []
+    for arg in args:
+        r = read_single_conf_with_lru_cache(arg)
+        res.append(r)
+    if len(res) == 1: return res[0]
+    return res
+def set_conf(key, value):
+    from toolbox import read_single_conf_with_lru_cache
+    read_single_conf_with_lru_cache.cache_clear()
+    get_conf.cache_clear()
+    os.environ[key] = str(value)
+    altered = get_conf(key)
+    return altered
+def set_multi_conf(dic):
+    for k, v in dic.items(): set_conf(k, v)
+    return

docker_as_a_service/shared_utils/connect_void_terminal.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import os
+"""
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+接驳void-terminal:
+    - set_conf:                     在运行过程中动态地修改配置
+    - set_multi_conf:               在运行过程中动态地修改多个配置
+    - get_plugin_handle:            获取插件的句柄
+    - get_plugin_default_kwargs:    获取插件的默认参数
+    - get_chat_handle:              获取简单聊天的句柄
+    - get_chat_default_kwargs:      获取简单聊天的默认参数
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+"""
+def get_plugin_handle(plugin_name):
+    """
+    e.g. plugin_name = 'crazy_functions.Markdown_Translate->Markdown翻译指定语言'
+    """
+    import importlib
+    assert (
+        "->" in plugin_name
+    ), "Example of plugin_name: crazy_functions.Markdown_Translate->Markdown翻译指定语言"
+    module, fn_name = plugin_name.split("->")
+    f_hot_reload = getattr(importlib.import_module(module, fn_name), fn_name)
+    return f_hot_reload
+def get_chat_handle():
+    """
+    Get chat function
+    """
+    from request_llms.bridge_all import predict_no_ui_long_connection
+    return predict_no_ui_long_connection
+def get_plugin_default_kwargs():
+    """
+    Get Plugin Default Arguments
+    """
+    from toolbox import ChatBotWithCookies, load_chat_cookies
+    cookies = load_chat_cookies()
+    llm_kwargs = {
+        "api_key": cookies["api_key"],
+        "llm_model": cookies["llm_model"],
+        "top_p": 1.0,
+        "max_length": None,
+        "temperature": 1.0,
+    }
+    chatbot = ChatBotWithCookies(llm_kwargs)
+    # txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request
+    DEFAULT_FN_GROUPS_kwargs = {
+        "main_input": "./README.md",
+        "llm_kwargs": llm_kwargs,
+        "plugin_kwargs": {},
+        "chatbot_with_cookie": chatbot,
+        "history": [],
+        "system_prompt": "You are a good AI.",
+        "user_request": None,
+    }
+    return DEFAULT_FN_GROUPS_kwargs
+def get_chat_default_kwargs():
+    """
+    Get Chat Default Arguments
+    """
+    from toolbox import load_chat_cookies
+    cookies = load_chat_cookies()
+    llm_kwargs = {
+        "api_key": cookies["api_key"],
+        "llm_model": cookies["llm_model"],
+        "top_p": 1.0,
+        "max_length": None,
+        "temperature": 1.0,
+    }
+    default_chat_kwargs = {
+        "inputs": "Hello there, are you ready?",
+        "llm_kwargs": llm_kwargs,
+        "history": [],
+        "sys_prompt": "You are AI assistant",
+        "observe_window": None,
+        "console_slience": False,
+    }
+    return default_chat_kwargs

docker_as_a_service/shared_utils/cookie_manager.py ADDED Viewed

	@@ -0,0 +1,127 @@

+import json
+import base64
+from typing import Callable
+def load_web_cookie_cache__fn_builder(customize_btns, cookies, predefined_btns)->Callable:
+    def load_web_cookie_cache(persistent_cookie_, cookies_):
+        import gradio as gr
+        from themes.theme import load_dynamic_theme, to_cookie_str, from_cookie_str, assign_user_uuid
+        ret = {}
+        for k in customize_btns:
+            ret.update({customize_btns[k]: gr.update(visible=False, value="")})
+        try: persistent_cookie_ = from_cookie_str(persistent_cookie_)    # persistent cookie to dict
+        except: return ret
+        customize_fn_overwrite_ = persistent_cookie_.get("custom_bnt", {})
+        cookies_['customize_fn_overwrite'] = customize_fn_overwrite_
+        ret.update({cookies: cookies_})
+        for k,v in persistent_cookie_["custom_bnt"].items():
+            if v['Title'] == "": continue
+            if k in customize_btns: ret.update({customize_btns[k]: gr.update(visible=True, value=v['Title'])})
+            else: ret.update({predefined_btns[k]: gr.update(visible=True, value=v['Title'])})
+        return ret
+    return load_web_cookie_cache
+def assign_btn__fn_builder(customize_btns, predefined_btns, cookies, web_cookie_cache)->Callable:
+    def assign_btn(persistent_cookie_, cookies_, basic_btn_dropdown_, basic_fn_title, basic_fn_prefix, basic_fn_suffix, clean_up=False):
+        import gradio as gr
+        from themes.theme import load_dynamic_theme, to_cookie_str, from_cookie_str, assign_user_uuid
+        ret = {}
+        # 读取之前的自定义按钮
+        customize_fn_overwrite_ = cookies_['customize_fn_overwrite']
+        # 更新新的自定义按钮
+        customize_fn_overwrite_.update({
+            basic_btn_dropdown_:
+                {
+                    "Title":basic_fn_title,
+                    "Prefix":basic_fn_prefix,
+                    "Suffix":basic_fn_suffix,
+                }
+            }
+        )
+        if clean_up:
+            customize_fn_overwrite_ = {}
+        cookies_.update(customize_fn_overwrite_)    # 更新cookie
+        visible = (not clean_up) and (basic_fn_title != "")
+        if basic_btn_dropdown_ in customize_btns:
+            # 是自定义按钮，不是预定义按钮
+            ret.update({customize_btns[basic_btn_dropdown_]: gr.update(visible=visible, value=basic_fn_title)})
+        else:
+            # 是预定义按钮
+            ret.update({predefined_btns[basic_btn_dropdown_]: gr.update(visible=visible, value=basic_fn_title)})
+        ret.update({cookies: cookies_})
+        try: persistent_cookie_ = from_cookie_str(persistent_cookie_)   # persistent cookie to dict
+        except: persistent_cookie_ = {}
+        persistent_cookie_["custom_bnt"] = customize_fn_overwrite_      # dict update new value
+        persistent_cookie_ = to_cookie_str(persistent_cookie_)          # persistent cookie to dict
+        ret.update({web_cookie_cache: persistent_cookie_})             # write persistent cookie
+        return ret
+    return assign_btn
+# cookies, web_cookie_cache = make_cookie_cache()
+def make_cookie_cache():
+    # 定义 后端state（cookies）、前端（web_cookie_cache）两兄弟
+    import gradio as gr
+    from toolbox import load_chat_cookies
+    # 定义cookies的后端state
+    cookies = gr.State(load_chat_cookies())
+    # 定义cookies的一个孪生的前端存储区（隐藏）
+    web_cookie_cache = gr.Textbox(visible=False, elem_id="web_cookie_cache")
+    return cookies, web_cookie_cache
+# history, history_cache, history_cache_update = make_history_cache()
+def make_history_cache():
+    # 定义 后端state（history）、前端（history_cache）、后端setter（history_cache_update）三兄弟
+    import gradio as gr
+    # 定义history的后端state
+    history = gr.State([])
+    # 定义history的一个孪生的前端存储区（隐藏）
+    history_cache = gr.Textbox(visible=False, elem_id="history_cache")
+    # 定义history_cache->history的更新方法（隐藏）。在触发这个按钮时，会先执行js代码更新history_cache，然后再执行python代码更新history
+    def process_history_cache(history_cache):
+        return json.loads(history_cache)
+    # 另一种更简单的setter方法
+    history_cache_update = gr.Button("", elem_id="elem_update_history", visible=False).click(
+        process_history_cache, inputs=[history_cache], outputs=[history])
+    return history, history_cache, history_cache_update
+def create_button_with_javascript_callback(btn_value, elem_id, variant, js_callback, input_list, output_list, function, input_name_list, output_name_list):
+    import gradio as gr
+    middle_ware_component = gr.Textbox(visible=False, elem_id=elem_id+'_buffer')
+    def get_fn_wrap():
+        def fn_wrap(*args):
+            summary_dict = {}
+            for name, value in zip(input_name_list, args):
+                summary_dict.update({name: value})
+            res = function(*args)
+            for name, value in zip(output_name_list, res):
+                summary_dict.update({name: value})
+            summary = base64.b64encode(json.dumps(summary_dict).encode('utf8')).decode("utf-8")
+            return (*res, summary)
+        return fn_wrap
+    btn = gr.Button(btn_value, elem_id=elem_id, variant=variant)
+    call_args = ""
+    for name in output_name_list:
+        call_args += f"""Data["{name}"],"""
+    call_args = call_args.rstrip(",")
+    _js_callback = """
+        (base64MiddleString)=>{
+            console.log('hello')
+            const stringData = atob(base64MiddleString);
+            let Data = JSON.parse(stringData);
+            call = JS_CALLBACK_GEN;
+            call(CALL_ARGS);
+        }
+    """.replace("JS_CALLBACK_GEN", js_callback).replace("CALL_ARGS", call_args)
+    btn.click(get_fn_wrap(), input_list, output_list+[middle_ware_component]).then(None, [middle_ware_component], None, _js=_js_callback)
+    return btn

docker_as_a_service/shared_utils/docker_as_service_api.py ADDED Viewed

	@@ -0,0 +1,70 @@

+import requests
+import pickle
+import io
+import os
+from pydantic import BaseModel, Field
+from typing import Optional, Dict, Any
+from loguru import logger
+class DockerServiceApiComModel(BaseModel):
+    client_command: Optional[str] = Field(default=None, title="Client command", description="The command to be executed on the client side")
+    client_file_attach: Optional[dict] = Field(default=None, title="Client file attach", description="The file to be attached to the client side")
+    server_message: Optional[Any] = Field(default=None, title="Server standard error", description="The standard error from the server side")
+    server_std_err: Optional[str] = Field(default=None, title="Server standard error", description="The standard error from the server side")
+    server_std_out: Optional[str] = Field(default=None, title="Server standard output", description="The standard output from the server side")
+    server_file_attach: Optional[dict] = Field(default=None, title="Server file attach", description="The file to be attached to the server side")
+def process_received(received: DockerServiceApiComModel, save_file_dir="./daas_output", output_manifest={}):
+    # Process the received data
+    if received.server_message:
+        output_manifest['server_message'] += received.server_message
+    if received.server_std_err:
+        output_manifest['server_std_err'] += received.server_std_err
+    if received.server_std_out:
+        output_manifest['server_std_out'] += received.server_std_out
+    if received.server_file_attach:
+        # print(f"Recv file attach: {received.server_file_attach}")
+        for file_name, file_content in received.server_file_attach.items():
+            new_fp = os.path.join(save_file_dir, file_name)
+            new_fp_dir = os.path.dirname(new_fp)
+            if not os.path.exists(new_fp_dir):
+                os.makedirs(new_fp_dir, exist_ok=True)
+            with open(new_fp, 'wb') as f:
+                f.write(file_content)
+            output_manifest['server_file_attach'].append(new_fp)
+    return output_manifest
+def stream_daas(docker_service_api_com_model, server_url):
+    # Prepare the file
+    # Pickle the object
+    pickled_data = pickle.dumps(docker_service_api_com_model)
+    # Create a file-like object from the pickled data
+    file_obj = io.BytesIO(pickled_data)
+    # Prepare the file for sending
+    files = {'file': ('docker_service_api_com_model.pkl', file_obj, 'application/octet-stream')}
+    # Send the POST request
+    response = requests.post(server_url, files=files, stream=True)
+    max_full_package_size = 1024 * 1024 * 1024 * 1  # 1 GB
+    received_output_manifest = {}
+    received_output_manifest['server_message'] = ""
+    received_output_manifest['server_std_err'] = ""
+    received_output_manifest['server_std_out'] = ""
+    received_output_manifest['server_file_attach'] = []
+    # Check if the request was successful
+    if response.status_code == 200:
+        # Process the streaming response
+        for chunk in response.iter_content(max_full_package_size):
+            if chunk:
+                received = pickle.loads(chunk)
+                received_output_manifest = process_received(received, received_output_manifest)
+                yield received_output_manifest
+    else:
+        logger.error(f"Error: Received status code {response.status_code}, response.text: {response.text}")
+    return received_output_manifest

docker_as_a_service/shared_utils/fastapi_server.py ADDED Viewed

	@@ -0,0 +1,322 @@

+"""
+Tests:
+- custom_path false / no user auth:
+    -- upload file(yes)
+    -- download file(yes)
+    -- websocket(yes)
+    -- block __pycache__ access(yes)
+        -- rel (yes)
+        -- abs (yes)
+    -- block user access(fail) http://localhost:45013/file=gpt_log/admin/chat_secrets.log
+        -- fix(commit f6bf05048c08f5cd84593f7fdc01e64dec1f584a)-> block successful
+- custom_path yes("/cc/gptac") / no user auth:
+    -- upload file(yes)
+    -- download file(yes)
+    -- websocket(yes)
+    -- block __pycache__ access(yes)
+    -- block user access(yes)
+- custom_path yes("/cc/gptac/") / no user auth:
+    -- upload file(yes)
+    -- download file(yes)
+    -- websocket(yes)
+    -- block user access(yes)
+- custom_path yes("/cc/gptac/") / + user auth:
+    -- upload file(yes)
+    -- download file(yes)
+    -- websocket(yes)
+    -- block user access(yes)
+    -- block user-wise access (yes)
+- custom_path no + user auth:
+    -- upload file(yes)
+    -- download file(yes)
+    -- websocket(yes)
+    -- block user access(yes)
+    -- block user-wise access (yes)
+queue cocurrent effectiveness
+    -- upload file(yes)
+    -- download file(yes)
+    -- websocket(yes)
+"""
+import os, requests, threading, time
+import uvicorn
+def validate_path_safety(path_or_url, user):
+    from toolbox import get_conf, default_user_name
+    from toolbox import FriendlyException
+    PATH_PRIVATE_UPLOAD, PATH_LOGGING = get_conf('PATH_PRIVATE_UPLOAD', 'PATH_LOGGING')
+    sensitive_path = None
+    path_or_url = os.path.relpath(path_or_url)
+    if path_or_url.startswith(PATH_LOGGING):    # 日志文件（按用户划分）
+        sensitive_path = PATH_LOGGING
+    elif path_or_url.startswith(PATH_PRIVATE_UPLOAD):   # 用户的上传目录（按用户划分）
+        sensitive_path = PATH_PRIVATE_UPLOAD
+    elif path_or_url.startswith('tests') or path_or_url.startswith('build'):   # 一个常用的测试目录
+        return True
+    else:
+        raise FriendlyException(f"输入文件的路径 ({path_or_url}) 存在，但位置非法。请将文件上传后再执行该任务。") # return False
+    if sensitive_path:
+        allowed_users = [user, 'autogen', 'arxiv_cache', default_user_name]  # three user path that can be accessed
+        for user_allowed in allowed_users:
+            if f"{os.sep}".join(path_or_url.split(os.sep)[:2]) == os.path.join(sensitive_path, user_allowed):
+                return True
+        raise FriendlyException(f"输入文件的路径 ({path_or_url}) 存在，但属于其他用户。请将文件上传后再执行该任务。") # return False
+    return True
+def _authorize_user(path_or_url, request, gradio_app):
+    from toolbox import get_conf, default_user_name
+    PATH_PRIVATE_UPLOAD, PATH_LOGGING = get_conf('PATH_PRIVATE_UPLOAD', 'PATH_LOGGING')
+    sensitive_path = None
+    path_or_url = os.path.relpath(path_or_url)
+    if path_or_url.startswith(PATH_LOGGING):
+        sensitive_path = PATH_LOGGING
+    if path_or_url.startswith(PATH_PRIVATE_UPLOAD):
+        sensitive_path = PATH_PRIVATE_UPLOAD
+    if sensitive_path:
+        token = request.cookies.get("access-token") or request.cookies.get("access-token-unsecure")
+        user = gradio_app.tokens.get(token)  # get user
+        allowed_users = [user, 'autogen', 'arxiv_cache', default_user_name]  # three user path that can be accessed
+        for user_allowed in allowed_users:
+            # exact match
+            if f"{os.sep}".join(path_or_url.split(os.sep)[:2]) == os.path.join(sensitive_path, user_allowed):
+                return True
+        return False # "越权访问!"
+    return True
+class Server(uvicorn.Server):
+    # A server that runs in a separate thread
+    def install_signal_handlers(self):
+        pass
+    def run_in_thread(self):
+        self.thread = threading.Thread(target=self.run, daemon=True)
+        self.thread.start()
+        while not self.started:
+            time.sleep(5e-2)
+    def close(self):
+        self.should_exit = True
+        self.thread.join()
+def start_app(app_block, CONCURRENT_COUNT, AUTHENTICATION, PORT, SSL_KEYFILE, SSL_CERTFILE):
+    import uvicorn
+    import fastapi
+    import gradio as gr
+    from fastapi import FastAPI
+    from gradio.routes import App
+    from toolbox import get_conf
+    CUSTOM_PATH, PATH_LOGGING = get_conf('CUSTOM_PATH', 'PATH_LOGGING')
+    # --- --- configurate gradio app block --- ---
+    app_block:gr.Blocks
+    app_block.ssl_verify = False
+    app_block.auth_message = '请登录'
+    app_block.favicon_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "docs/logo.png")
+    app_block.auth = AUTHENTICATION if len(AUTHENTICATION) != 0 else None
+    app_block.blocked_paths = ["config.py", "__pycache__", "config_private.py", "docker-compose.yml", "Dockerfile", f"{PATH_LOGGING}/admin"]
+    app_block.dev_mode = False
+    app_block.config = app_block.get_config_file()
+    app_block.enable_queue = True
+    app_block.queue(concurrency_count=CONCURRENT_COUNT)
+    app_block.validate_queue_settings()
+    app_block.show_api = False
+    app_block.config = app_block.get_config_file()
+    max_threads = 40
+    app_block.max_threads = max(
+        app_block._queue.max_thread_count if app_block.enable_queue else 0, max_threads
+    )
+    app_block.is_colab = False
+    app_block.is_kaggle = False
+    app_block.is_sagemaker = False
+    gradio_app = App.create_app(app_block)
+    for route in list(gradio_app.router.routes):
+        if route.path == "/proxy={url_path:path}":
+            gradio_app.router.routes.remove(route)
+    # --- --- replace gradio endpoint to forbid access to sensitive files --- ---
+    if len(AUTHENTICATION) > 0:
+        dependencies = []
+        endpoint = None
+        for route in list(gradio_app.router.routes):
+            if route.path == "/file/{path:path}":
+                gradio_app.router.routes.remove(route)
+            if route.path == "/file={path_or_url:path}":
+                dependencies = route.dependencies
+                endpoint = route.endpoint
+                gradio_app.router.routes.remove(route)
+        @gradio_app.get("/file/{path:path}", dependencies=dependencies)
+        @gradio_app.head("/file={path_or_url:path}", dependencies=dependencies)
+        @gradio_app.get("/file={path_or_url:path}", dependencies=dependencies)
+        async def file(path_or_url: str, request: fastapi.Request):
+            if not _authorize_user(path_or_url, request, gradio_app):
+                return "越权访问!"
+            stripped = path_or_url.lstrip().lower()
+            if stripped.startswith("https://") or stripped.startswith("http://"):
+                return "账户密码授权模式下, 禁止链接!"
+            if '../' in stripped:
+                return "非法路径!"
+            return await endpoint(path_or_url, request)
+        from fastapi import Request, status
+        from fastapi.responses import FileResponse, RedirectResponse
+        @gradio_app.get("/academic_logout")
+        async def logout():
+            response = RedirectResponse(url=CUSTOM_PATH, status_code=status.HTTP_302_FOUND)
+            response.delete_cookie('access-token')
+            response.delete_cookie('access-token-unsecure')
+            return response
+    else:
+        dependencies = []
+        endpoint = None
+        for route in list(gradio_app.router.routes):
+            if route.path == "/file/{path:path}":
+                gradio_app.router.routes.remove(route)
+            if route.path == "/file={path_or_url:path}":
+                dependencies = route.dependencies
+                endpoint = route.endpoint
+                gradio_app.router.routes.remove(route)
+        @gradio_app.get("/file/{path:path}", dependencies=dependencies)
+        @gradio_app.head("/file={path_or_url:path}", dependencies=dependencies)
+        @gradio_app.get("/file={path_or_url:path}", dependencies=dependencies)
+        async def file(path_or_url: str, request: fastapi.Request):
+            stripped = path_or_url.lstrip().lower()
+            if stripped.startswith("https://") or stripped.startswith("http://"):
+                return "账户密码授权模式下, 禁止链接!"
+            if '../' in stripped:
+                return "非法路径!"
+            return await endpoint(path_or_url, request)
+    # --- --- enable TTS (text-to-speech) functionality --- ---
+    TTS_TYPE = get_conf("TTS_TYPE")
+    if TTS_TYPE != "DISABLE":
+        # audio generation functionality
+        import httpx
+        from fastapi import FastAPI, Request, HTTPException
+        from starlette.responses import Response
+        async def forward_request(request: Request, method: str) -> Response:
+            async with httpx.AsyncClient() as client:
+                try:
+                    # Forward the request to the target service
+                    if TTS_TYPE == "EDGE_TTS":
+                        import tempfile
+                        import edge_tts
+                        import wave
+                        import uuid
+                        from pydub import AudioSegment
+                        json = await request.json()
+                        voice = get_conf("EDGE_TTS_VOICE")
+                        tts = edge_tts.Communicate(text=json['text'], voice=voice)
+                        temp_folder = tempfile.gettempdir()
+                        temp_file_name = str(uuid.uuid4().hex)
+                        temp_file = os.path.join(temp_folder, f'{temp_file_name}.mp3')
+                        await tts.save(temp_file)
+                        try:
+                            mp3_audio = AudioSegment.from_file(temp_file, format="mp3")
+                            mp3_audio.export(temp_file, format="wav")
+                            with open(temp_file, 'rb') as wav_file: t = wav_file.read()
+                            os.remove(temp_file)
+                            return Response(content=t)
+                        except:
+                            raise RuntimeError("ffmpeg未安装，无法处理EdgeTTS音频��安装方法见`https://github.com/jiaaro/pydub#getting-ffmpeg-set-up`")
+                    if TTS_TYPE == "LOCAL_SOVITS_API":
+                        # Forward the request to the target service
+                        TARGET_URL = get_conf("GPT_SOVITS_URL")
+                        body = await request.body()
+                        resp = await client.post(TARGET_URL, content=body, timeout=60)
+                        # Return the response from the target service
+                        return Response(content=resp.content, status_code=resp.status_code, headers=dict(resp.headers))
+                except httpx.RequestError as e:
+                    raise HTTPException(status_code=400, detail=f"Request to the target service failed: {str(e)}")
+        @gradio_app.post("/vits")
+        async def forward_post_request(request: Request):
+            return await forward_request(request, "POST")
+    # --- --- app_lifespan --- ---
+    from contextlib import asynccontextmanager
+    @asynccontextmanager
+    async def app_lifespan(app):
+        async def startup_gradio_app():
+            if gradio_app.get_blocks().enable_queue:
+                gradio_app.get_blocks().startup_events()
+        async def shutdown_gradio_app():
+            pass
+        await startup_gradio_app() # startup logic here
+        yield  # The application will serve requests after this point
+        await shutdown_gradio_app() # cleanup/shutdown logic here
+    # --- --- FastAPI --- ---
+    fastapi_app = FastAPI(lifespan=app_lifespan)
+    fastapi_app.mount(CUSTOM_PATH, gradio_app)
+    # --- --- favicon and block fastapi api reference routes --- ---
+    from starlette.responses import JSONResponse
+    if CUSTOM_PATH != '/':
+        from fastapi.responses import FileResponse
+        @fastapi_app.get("/favicon.ico")
+        async def favicon():
+            return FileResponse(app_block.favicon_path)
+        @fastapi_app.middleware("http")
+        async def middleware(request: Request, call_next):
+            if request.scope['path'] in ["/docs", "/redoc", "/openapi.json"]:
+                return JSONResponse(status_code=404, content={"message": "Not Found"})
+            response = await call_next(request)
+            return response
+    # --- --- uvicorn.Config --- ---
+    ssl_keyfile = None if SSL_KEYFILE == "" else SSL_KEYFILE
+    ssl_certfile = None if SSL_CERTFILE == "" else SSL_CERTFILE
+    server_name = "0.0.0.0"
+    config = uvicorn.Config(
+        fastapi_app,
+        host=server_name,
+        port=PORT,
+        reload=False,
+        log_level="warning",
+        ssl_keyfile=ssl_keyfile,
+        ssl_certfile=ssl_certfile,
+    )
+    server = Server(config)
+    url_host_name = "localhost" if server_name == "0.0.0.0" else server_name
+    if ssl_keyfile is not None:
+        if ssl_certfile is None:
+            raise ValueError(
+                "ssl_certfile must be provided if ssl_keyfile is provided."
+            )
+        path_to_local_server = f"https://{url_host_name}:{PORT}/"
+    else:
+        path_to_local_server = f"http://{url_host_name}:{PORT}/"
+    if CUSTOM_PATH != '/':
+        path_to_local_server += CUSTOM_PATH.lstrip('/').rstrip('/') + '/'
+    # --- --- begin  --- ---
+    server.run_in_thread()
+    # --- --- after server launch --- ---
+    app_block.server = server
+    app_block.server_name = server_name
+    app_block.local_url = path_to_local_server
+    app_block.protocol = (
+        "https"
+        if app_block.local_url.startswith("https") or app_block.is_colab
+        else "http"
+    )
+    if app_block.enable_queue:
+        app_block._queue.set_url(path_to_local_server)
+    forbid_proxies = {
+        "http": "",
+        "https": "",
+    }
+    requests.get(f"{app_block.local_url}startup-events", verify=app_block.ssl_verify, proxies=forbid_proxies)
+    app_block.is_running = True
+    app_block.block_thread()

docker_as_a_service/shared_utils/handle_upload.py ADDED Viewed

	@@ -0,0 +1,156 @@

+import importlib
+import time
+import inspect
+import re
+import os
+import base64
+import gradio
+import shutil
+import glob
+from shared_utils.config_loader import get_conf
+from loguru import logger
+def html_local_file(file):
+    base_path = os.path.dirname(__file__)  # 项目目录
+    if os.path.exists(str(file)):
+        file = f'file={file.replace(base_path, ".")}'
+    return file
+def html_local_img(__file, layout="left", max_width=None, max_height=None, md=True):
+    style = ""
+    if max_width is not None:
+        style += f"max-width: {max_width};"
+    if max_height is not None:
+        style += f"max-height: {max_height};"
+    __file = html_local_file(__file)
+    a = f'<div align="{layout}"><img src="{__file}" style="{style}"></div>'
+    if md:
+        a = f"![{__file}]({__file})"
+    return a
+def file_manifest_filter_type(file_list, filter_: list = None):
+    new_list = []
+    if not filter_:
+        filter_ = ["png", "jpg", "jpeg"]
+    for file in file_list:
+        if str(os.path.basename(file)).split(".")[-1] in filter_:
+            new_list.append(html_local_img(file, md=False))
+        else:
+            new_list.append(file)
+    return new_list
+def zip_extract_member_new(self, member, targetpath, pwd):
+    # 修复中文乱码的问题
+    """Extract the ZipInfo object 'member' to a physical
+        file on the path targetpath.
+    """
+    import zipfile
+    if not isinstance(member, zipfile.ZipInfo):
+        member = self.getinfo(member)
+    # build the destination pathname, replacing
+    # forward slashes to platform specific separators.
+    arcname = member.filename.replace('/', os.path.sep)
+    arcname = arcname.encode('cp437', errors='replace').decode('gbk', errors='replace')
+    if os.path.altsep:
+        arcname = arcname.replace(os.path.altsep, os.path.sep)
+    # interpret absolute pathname as relative, remove drive letter or
+    # UNC path, redundant separators, "." and ".." components.
+    arcname = os.path.splitdrive(arcname)[1]
+    invalid_path_parts = ('', os.path.curdir, os.path.pardir)
+    arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
+                                if x not in invalid_path_parts)
+    if os.path.sep == '\\':
+        # filter illegal characters on Windows
+        arcname = self._sanitize_windows_name(arcname, os.path.sep)
+    targetpath = os.path.join(targetpath, arcname)
+    targetpath = os.path.normpath(targetpath)
+    # Create all upper directories if necessary.
+    upperdirs = os.path.dirname(targetpath)
+    if upperdirs and not os.path.exists(upperdirs):
+        os.makedirs(upperdirs)
+    if member.is_dir():
+        if not os.path.isdir(targetpath):
+            os.mkdir(targetpath)
+        return targetpath
+    with self.open(member, pwd=pwd) as source, \
+            open(targetpath, "wb") as target:
+        shutil.copyfileobj(source, target)
+    return targetpath
+def extract_archive(file_path, dest_dir):
+    import zipfile
+    import tarfile
+    import os
+    # Get the file extension of the input file
+    file_extension = os.path.splitext(file_path)[1]
+    # Extract the archive based on its extension
+    if file_extension == ".zip":
+        with zipfile.ZipFile(file_path, "r") as zipobj:
+            zipobj._extract_member = lambda a,b,c: zip_extract_member_new(zipobj, a,b,c)    # 修复中文乱码的问题
+            zipobj.extractall(path=dest_dir)
+            logger.info("Successfully extracted zip archive to {}".format(dest_dir))
+    elif file_extension in [".tar", ".gz", ".bz2"]:
+        try:
+            with tarfile.open(file_path, "r:*") as tarobj:
+                # 清理提取路径，移除任何不安全的元素
+                for member in tarobj.getmembers():
+                    member_path = os.path.normpath(member.name)
+                    full_path = os.path.join(dest_dir, member_path)
+                    full_path = os.path.abspath(full_path)
+                    if not full_path.startswith(os.path.abspath(dest_dir) + os.sep):
+                        raise Exception(f"Attempted Path Traversal in {member.name}")
+                tarobj.extractall(path=dest_dir)
+                logger.info("Successfully extracted tar archive to {}".format(dest_dir))
+        except tarfile.ReadError as e:
+            if file_extension == ".gz":
+                # 一些特别奇葩的项目，是一个gz文件，里面不是tar，只有一个tex文件
+                import gzip
+                with gzip.open(file_path, 'rb') as f_in:
+                    with open(os.path.join(dest_dir, 'main.tex'), 'wb') as f_out:
+                        f_out.write(f_in.read())
+            else:
+                raise e
+    # 第三方库，需要预先pip install rarfile
+    # 此外，Windows上还需要安装winrar软件，配置其Path环境变量，如"C:\Program Files\WinRAR"才可以
+    elif file_extension == ".rar":
+        try:
+            import rarfile
+            with rarfile.RarFile(file_path) as rf:
+                rf.extractall(path=dest_dir)
+                logger.info("Successfully extracted rar archive to {}".format(dest_dir))
+        except:
+            logger.info("Rar format requires additional dependencies to install")
+            return "\n\n解压失败! 需要安装pip install rarfile来解压rar文件。建议：使用zip压缩格式。"
+    # 第三方库，需要预先pip install py7zr
+    elif file_extension == ".7z":
+        try:
+            import py7zr
+            with py7zr.SevenZipFile(file_path, mode="r") as f:
+                f.extractall(path=dest_dir)
+                logger.info("Successfully extracted 7z archive to {}".format(dest_dir))
+        except:
+            logger.info("7z format requires additional dependencies to install")
+            return "\n\n解压失败! 需要安装pip install py7zr来解压7z文件"
+    else:
+        return ""
+    return ""

docker_as_a_service/shared_utils/key_pattern_manager.py ADDED Viewed

	@@ -0,0 +1,121 @@

+import re
+import os
+from functools import wraps, lru_cache
+from shared_utils.advanced_markdown_format import format_io
+from shared_utils.config_loader import get_conf as get_conf
+pj = os.path.join
+default_user_name = 'default_user'
+# match openai keys
+openai_regex = re.compile(
+    r"sk-[a-zA-Z0-9_-]{48}$|" +
+    r"sk-[a-zA-Z0-9_-]{92}$|" +
+    r"sk-proj-[a-zA-Z0-9_-]{48}$|"+
+    r"sk-proj-[a-zA-Z0-9_-]{124}$|"+
+    r"sk-proj-[a-zA-Z0-9_-]{156}$|"+ #新版apikey位数不匹配故修改此正则表达式
+    r"sess-[a-zA-Z0-9]{40}$"
+)
+def is_openai_api_key(key):
+    CUSTOM_API_KEY_PATTERN = get_conf('CUSTOM_API_KEY_PATTERN')
+    if len(CUSTOM_API_KEY_PATTERN) != 0:
+        API_MATCH_ORIGINAL = re.match(CUSTOM_API_KEY_PATTERN, key)
+    else:
+        API_MATCH_ORIGINAL = openai_regex.match(key)
+    return bool(API_MATCH_ORIGINAL)
+def is_azure_api_key(key):
+    API_MATCH_AZURE = re.match(r"[a-zA-Z0-9]{32}$", key)
+    return bool(API_MATCH_AZURE)
+def is_api2d_key(key):
+    API_MATCH_API2D = re.match(r"fk[a-zA-Z0-9]{6}-[a-zA-Z0-9]{32}$", key)
+    return bool(API_MATCH_API2D)
+def is_openroute_api_key(key):
+    API_MATCH_OPENROUTE = re.match(r"sk-or-v1-[a-zA-Z0-9]{64}$", key)
+    return bool(API_MATCH_OPENROUTE)
+def is_cohere_api_key(key):
+    API_MATCH_AZURE = re.match(r"[a-zA-Z0-9]{40}$", key)
+    return bool(API_MATCH_AZURE)
+def is_any_api_key(key):
+    if ',' in key:
+        keys = key.split(',')
+        for k in keys:
+            if is_any_api_key(k): return True
+        return False
+    else:
+        return is_openai_api_key(key) or is_api2d_key(key) or is_azure_api_key(key) or is_cohere_api_key(key)
+def what_keys(keys):
+    avail_key_list = {'OpenAI Key': 0, "Azure Key": 0, "API2D Key": 0}
+    key_list = keys.split(',')
+    for k in key_list:
+        if is_openai_api_key(k):
+            avail_key_list['OpenAI Key'] += 1
+    for k in key_list:
+        if is_api2d_key(k):
+            avail_key_list['API2D Key'] += 1
+    for k in key_list:
+        if is_azure_api_key(k):
+            avail_key_list['Azure Key'] += 1
+    return f"检测到： OpenAI Key {avail_key_list['OpenAI Key']} 个, Azure Key {avail_key_list['Azure Key']} 个, API2D Key {avail_key_list['API2D Key']} 个"
+def select_api_key(keys, llm_model):
+    import random
+    avail_key_list = []
+    key_list = keys.split(',')
+    if llm_model.startswith('gpt-') or llm_model.startswith('one-api-') or llm_model.startswith('o1-'):
+        for k in key_list:
+            if is_openai_api_key(k): avail_key_list.append(k)
+    if llm_model.startswith('api2d-'):
+        for k in key_list:
+            if is_api2d_key(k): avail_key_list.append(k)
+    if llm_model.startswith('azure-'):
+        for k in key_list:
+            if is_azure_api_key(k): avail_key_list.append(k)
+    if llm_model.startswith('cohere-'):
+        for k in key_list:
+            if is_cohere_api_key(k): avail_key_list.append(k)
+    if llm_model.startswith('openrouter-'):
+        for k in key_list:
+            if is_openroute_api_key(k): avail_key_list.append(k)
+    if len(avail_key_list) == 0:
+        raise RuntimeError(f"您提供的api-key不满足要求，不包含任何可用于{llm_model}的api-key。您可能选择了错误的模型或请求源（左上角更换模型菜单中可切换openai,azure,claude,cohere等请求源）。")
+    api_key = random.choice(avail_key_list) # 随机负载均衡
+    return api_key
+def select_api_key_for_embed_models(keys, llm_model):
+    import random
+    avail_key_list = []
+    key_list = keys.split(',')
+    if llm_model.startswith('text-embedding-'):
+        for k in key_list:
+            if is_openai_api_key(k): avail_key_list.append(k)
+    if len(avail_key_list) == 0:
+        raise RuntimeError(f"您提供的api-key不满足要求，不包含任何可用于{llm_model}的api-key。您可能选择了错误的模型或请求源。")
+    api_key = random.choice(avail_key_list) # 随机负载均衡
+    return api_key

docker_as_a_service/shared_utils/logging.py ADDED Viewed

	@@ -0,0 +1,69 @@

+from loguru import logger
+import logging
+import sys
+import os
+def chat_log_filter(record):
+    return "chat_msg" in record["extra"]
+def not_chat_log_filter(record):
+    return "chat_msg" not in record["extra"]
+def formatter_with_clip(record):
+    # Note this function returns the string to be formatted, not the actual message to be logged
+    # record["extra"]["serialized"] = "555555"
+    max_len = 12
+    record['function_x'] = record['function'].center(max_len)
+    if len(record['function_x']) > max_len:
+        record['function_x'] = ".." + record['function_x'][-(max_len-2):]
+    record['line_x'] = str(record['line']).ljust(3)
+    return '<green>{time:HH:mm}</green> | <cyan>{function_x}</cyan>:<cyan>{line_x}</cyan> | <level>{message}</level>\n'
+def setup_logging(PATH_LOGGING):
+    admin_log_path = os.path.join(PATH_LOGGING, "admin")
+    os.makedirs(admin_log_path, exist_ok=True)
+    sensitive_log_path = os.path.join(admin_log_path, "chat_secrets.log")
+    regular_log_path = os.path.join(admin_log_path, "console_log.log")
+    logger.remove()
+    logger.configure(
+        levels=[dict(name="WARNING", color="<g>")],
+    )
+    logger.add(
+        sys.stderr,
+        format=formatter_with_clip,
+        # format='<green>{time:HH:mm}</green> | <cyan>{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>',
+        filter=(lambda record: not chat_log_filter(record)),
+        colorize=True,
+        enqueue=True
+    )
+    logger.add(
+        sensitive_log_path,
+        format='<green>{time:MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>',
+        rotation="10 MB",
+        filter=chat_log_filter,
+        enqueue=True,
+    )
+    logger.add(
+        regular_log_path,
+        format='<green>{time:MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>',
+        rotation="10 MB",
+        filter=not_chat_log_filter,
+        enqueue=True,
+    )
+    logging.getLogger("httpx").setLevel(logging.WARNING)
+    logger.warning(f"所有对话记录将自动保存在本地目录{sensitive_log_path}, 请注意自我隐私保护哦！")
+# logger.bind(chat_msg=True).info("This message is logged to the file!")
+# logger.debug(f"debug message")
+# logger.info(f"info message")
+# logger.success(f"success message")
+# logger.error(f"error message")
+# logger.add("special.log", filter=lambda record: "special" in record["extra"])
+# logger.debug("This message is not logged to the file")

docker_as_a_service/shared_utils/map_names.py ADDED Viewed

	@@ -0,0 +1,34 @@

+import re
+mapping_dic = {
+    # "qianfan": "qianfan（文心一言大模型）",
+    # "zhipuai": "zhipuai（智谱GLM4超级模型🔥）",
+    # "gpt-4-1106-preview": "gpt-4-1106-preview（新调优版本GPT-4🔥）",
+    # "gpt-4-vision-preview": "gpt-4-vision-preview（识图模型GPT-4V）",
+}
+rev_mapping_dic = {}
+for k, v in mapping_dic.items():
+    rev_mapping_dic[v] = k
+def map_model_to_friendly_names(m):
+    if m in mapping_dic:
+        return mapping_dic[m]
+    return m
+def map_friendly_names_to_model(m):
+    if m in rev_mapping_dic:
+        return rev_mapping_dic[m]
+    return m
+def read_one_api_model_name(model: str):
+    """return real model name and max_token.
+    """
+    max_token_pattern = r"\(max_token=(\d+)\)"
+    match = re.search(max_token_pattern, model)
+    if match:
+        max_token_tmp = match.group(1)  # 获取 max_token 的值
+        max_token_tmp = int(max_token_tmp)
+        model = re.sub(max_token_pattern, "", model)  # 从原字符串中删除 "(max_token=...)"
+    else:
+        max_token_tmp = 4096
+    return model, max_token_tmp

docker_as_a_service/shared_utils/text_mask.py ADDED Viewed

	@@ -0,0 +1,109 @@

+import re
+from functools import lru_cache
+# 这段代码是使用Python编程语言中的re模块，即正则表达式库，来定义了一个正则表达式模式。
+# 这个模式被编译成一个正则表达式对象，存储在名为const_extract_exp的变量中，以便于后续快速的匹配和查找操作。
+# 这里解释一下正则表达式中的几个特殊字符：
+# - . 表示任意单一字符。
+# - * 表示前一个字符可以出现0次或多次。
+# - ? 在这里用作非贪婪匹配，也就是说它会匹配尽可能少的字符。在(.*?)中，它确保我们匹配的任意文本是尽可能短的，也就是说，它会在</show_llm>和</show_render>标签之前停止匹配。
+# - () 括号在正则表达式中表示捕获组。
+# - 在这个例子中，(.*?)表示捕获任意长度的文本，直到遇到括号外部最近的限定符，即</show_llm>和</show_render>。
+# -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-==-=-=-=/1=-=-=-=-=-=-=-=-=-=-=-=-=-=/2-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+const_extract_re = re.compile(
+    r"<gpt_academic_string_mask><show_llm>(.*?)</show_llm><show_render>(.*?)</show_render></gpt_academic_string_mask>"
+)
+# -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-==-=-=-=-=-=/1=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-/2-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+const_extract_langbased_re = re.compile(
+    r"<gpt_academic_string_mask><lang_english>(.*?)</lang_english><lang_chinese>(.*?)</lang_chinese></gpt_academic_string_mask>",
+    flags=re.DOTALL,
+)
+@lru_cache(maxsize=128)
+def apply_gpt_academic_string_mask(string, mode="show_all"):
+    """
+    当字符串中有掩码tag时（<gpt_academic_string_mask><show_...>），根据字符串要给谁看（大模型，还是web渲染），对字符串进行处理，返回处理后的字符串
+    示意图：https://mermaid.live/edit#pako:eNqlkUtLw0AUhf9KuOta0iaTplkIPlpduFJwoZEwJGNbzItpita2O6tF8QGKogXFtwu7cSHiq3-mk_oznFR8IYLgrGbuOd9hDrcCpmcR0GDW9ubNPKaBMDauuwI_A9M6YN-3y0bODwxsYos4BdMoBrTg5gwHF-d0mBH6-vqFQe58ed5m9XPW2uteX3Tubrj0ljLYcwxxR3h1zB43WeMs3G19yEM9uapDMe_NG9i2dagKw1Fee4c1D9nGEbtc-5n6HbNtJ8IyHOs8tbs7V2HrlDX2w2Y7XD_5haHEtQiNsOwfMVa_7TzsvrWIuJGo02qTrdwLk9gukQylHv3Afv1ML270s-HZUndrmW1tdA-WfvbM_jMFYuAQ6uCCxVdciTJ1CPLEITpo_GphypeouzXuw6XAmyi7JmgBLZEYlHwLB2S4gHMUO-9DH7tTnvf1CVoFFkBLSOk4QmlRTqpIlaWUHINyNFXjaQWpCYRURUKiWovBYo8X4ymEJFlECQUpqaQkJmuvWygPpg
+    """
+    if not string:
+        return string
+    if "<gpt_academic_string_mask>" not in string: # No need to process
+        return string
+    if mode == "show_all":
+        return string
+    if mode == "show_llm":
+        string = const_extract_re.sub(r"\1", string)
+    elif mode == "show_render":
+        string = const_extract_re.sub(r"\2", string)
+    else:
+        raise ValueError("Invalid mode")
+    return string
+@lru_cache(maxsize=128)
+def build_gpt_academic_masked_string(text_show_llm="", text_show_render=""):
+    """
+    根据字符串要给谁看（大模型，还是web渲染），生成带掩码tag的字符串
+    """
+    return f"<gpt_academic_string_mask><show_llm>{text_show_llm}</show_llm><show_render>{text_show_render}</show_render></gpt_academic_string_mask>"
+@lru_cache(maxsize=128)
+def apply_gpt_academic_string_mask_langbased(string, lang_reference):
+    """
+    当字符串中有掩码tag时（<gpt_academic_string_mask><lang_...>），根据语言，选择提示词，对字符串进行处理，返回处理后的字符串
+    例如，如果lang_reference是英文，那么就只显示英文提示词，中文提示词就不显示了
+    举例：
+        输入1
+            string = "注意，lang_reference这段文字是：<gpt_academic_string_mask><lang_english>英语</lang_english><lang_chinese>中文</lang_chinese></gpt_academic_string_mask>"
+            lang_reference = "hello world"
+        输出1
+            "注意，lang_reference这段文字是：英语"
+        输入2
+            string = "注意，lang_reference这段文字是中文"   # 注意这里没有掩码tag，所以不会被处理
+            lang_reference = "hello world"
+        输出2
+            "注意，lang_reference这段文字是中文"            # 原样返回
+    """
+    if "<gpt_academic_string_mask>" not in string: # No need to process
+        return string
+    def contains_chinese(string):
+        chinese_regex = re.compile(u'[\u4e00-\u9fff]+')
+        return chinese_regex.search(string) is not None
+    mode = "english" if not contains_chinese(lang_reference) else "chinese"
+    if mode == "english":
+        string = const_extract_langbased_re.sub(r"\1", string)
+    elif mode == "chinese":
+        string = const_extract_langbased_re.sub(r"\2", string)
+    else:
+        raise ValueError("Invalid mode")
+    return string
+@lru_cache(maxsize=128)
+def build_gpt_academic_masked_string_langbased(text_show_english="", text_show_chinese=""):
+    """
+    根据语言，选择提示词，对字符串进行处理，返回处理后的字符串
+    """
+    return f"<gpt_academic_string_mask><lang_english>{text_show_english}</lang_english><lang_chinese>{text_show_chinese}</lang_chinese></gpt_academic_string_mask>"
+if __name__ == "__main__":
+    # Test
+    input_string = (
+        "你好\n"
+        + build_gpt_academic_masked_string(text_show_llm="mermaid", text_show_render="")
+        + "你好\n"
+    )
+    print(
+        apply_gpt_academic_string_mask(input_string, "show_llm")
+    )  # Should print the strings with 'abc' in place of the academic mask tags
+    print(
+        apply_gpt_academic_string_mask(input_string, "show_render")
+    )  # Should print the strings with 'xyz' in place of the academic mask tags