chenlin commited on
Commit
abbf8c3
1 Parent(s): bdcb3d3
Files changed (5) hide show
  1. .gitattributes +1 -0
  2. .gitignore +1 -0
  3. app.py +110 -0
  4. pyproject.toml +37 -0
  5. requirements.txt +29 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.mp4 filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ __pycache__
app.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ import tempfile
4
+
5
+ import spaces
6
+ import gradio as gr
7
+ import torch
8
+
9
+ title_markdown = ("""
10
+ <div style="display: flex; justify-content: flex-start; align-items: center; text-align: center;">
11
+ <div style="margin-right: 20px; display: flex; align-items: center;">
12
+ <a href="https://github.com/ShareGPT4Omni/ShareGPT4Video" style="text-decoration: none; display: flex; align-items: center;">
13
+ <img src="https://raw.githubusercontent.com/ShareGPT4V/ShareGPT4V-Resources/master/images/share4video_tight.png" alt="ShareGPT4Video🚀" style="max-width: 120px; height: auto;">
14
+ </a>
15
+ </div>
16
+ <div>
17
+ <h1>ShareGPT4Video: Improving Video Understanding and Generation with Better Captions</h1>
18
+ <h5 style="margin: 0;">If you like our project, please give us a star ✨ on Github for the latest update.</h5>
19
+ <h5 style="margin: 0;"> <a href="https://sharegpt4video.github.io/">[Project Page]</a> <a href="https://github.com/ShareGPT4Omni/ShareGPT4Video">[Code]</a> <a href="https://arxiv.org/abs/2406.04325v1">[Paper]</a>
20
+ </div>
21
+ </div>
22
+ """)
23
+
24
+ block_css = """
25
+ #buttons button {
26
+ min-width: min(120px,100%);
27
+ }
28
+ """
29
+
30
+ learn_more_markdown = ("""
31
+ ### License
32
+ The service is a research preview intended for non-commercial use only, subject to the model [License](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) of LLaMA, [Terms of Use](https://openai.com/policies/terms-of-use) of the data generated by OpenAI, and [Privacy Practices](https://chrome.google.com/webstore/detail/sharegpt-share-your-chatg/daiacboceoaocpibfodeljbdfacokfjb) of ShareGPT. Please contact us if you find any potential violation.
33
+ """)
34
+
35
+
36
+ model_path = ''
37
+ device = 'cuda'
38
+ load_8bit = False
39
+ load_4bit = False
40
+ dtype = torch.float16
41
+
42
+
43
+ @spaces.GPU(duration=60)
44
+ def generate_slidingcaptioning(video):
45
+ return 'text'
46
+
47
+ @spaces.GPU(duration=60)
48
+ def generate_fastcaptioning(video):
49
+ return 'text'
50
+
51
+ @spaces.GPU(duration=60)
52
+ def generate_promptrecaptioning(text):
53
+ return text
54
+
55
+ def save_video_to_local(video_path):
56
+ filename = os.path.join('temp', next(
57
+ tempfile._get_candidate_names()) + '.mp4')
58
+ shutil.copyfile(video_path, filename)
59
+ return filename
60
+
61
+ with gr.Blocks(title='ShareCaptioner-Video', theme=gr.themes.Default(), css=block_css) as demo:
62
+ gr.Markdown(title_markdown)
63
+ state = gr.State()
64
+ state_ = gr.State()
65
+ first_run = gr.State()
66
+
67
+ with gr.Row():
68
+ gr.Markdown("### The ShareCaptioner-Video is a Four-in-One exceptional video captioning model with the following capabilities:\n1. Fast captioning, 2. Sliding Captioning, 3. Clip Summarizing, 4. Prompt Re-Captioning")
69
+ with gr.Row():
70
+ gr.Markdown("(THE DEMO OF \"Clip Summarizing\" IS COMING SOON...)")
71
+ with gr.Row():
72
+ with gr.Column(scale=6):
73
+ with gr.Row():
74
+ video = gr.Video(label="Input Video")
75
+ cur_dir = os.path.dirname(os.path.abspath(__file__))
76
+ with gr.Row():
77
+ textbox = gr.Textbox(
78
+ show_label=False, placeholder="Input Text", container=False
79
+ )
80
+ with gr.Row():
81
+ with gr.Column(scale=2, min_width=50):
82
+ submit_btn_sc = gr.Button(
83
+ value="Sliding Captioning", variant="primary", interactive=True
84
+ )
85
+ with gr.Column(scale=2, min_width=50):
86
+ submit_btn_fc = gr.Button(
87
+ value="Fast Captioning", variant="primary", interactive=True
88
+ )
89
+ with gr.Column(scale=2, min_width=50):
90
+ submit_btn_pr = gr.Button(
91
+ value="Prompt Re-captioning", variant="primary", interactive=True
92
+ )
93
+ with gr.Column(scale=4, min_width=200):
94
+ with gr.Row():
95
+ textbox_out = gr.Textbox(
96
+ show_label=False, placeholder="Output", container=False
97
+ )
98
+ gr.Markdown(learn_more_markdown)
99
+
100
+ submit_btn_sc.click(generate_slidingcaptioning, [video],[textbox_out])
101
+ submit_btn_fc.click(generate_fastcaptioning, [video], [textbox_out])
102
+ submit_btn_pr.click(generate_promptrecaptioning, [textbox], [textbox_out])
103
+
104
+ ### for local launch
105
+ demo.launch(server_name="0.0.0.0",
106
+ server_port=28358,
107
+ share=True)
108
+
109
+ ### for huggingface launch
110
+ # demo.launch()
pyproject.toml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [build-system]
2
+ requires = ["setuptools>=61.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "llava"
7
+ version = "1.2.2.post1"
8
+ description = "Towards GPT-4 like large language and visual assistant."
9
+ readme = "README.md"
10
+ requires-python = ">=3.8"
11
+ classifiers = [
12
+ "Programming Language :: Python :: 3",
13
+ "License :: OSI Approved :: Apache Software License",
14
+ ]
15
+ dependencies = [
16
+ "torch==2.1.2", "torchvision==0.16.2",
17
+ "transformers==4.37.2", "tokenizers==0.15.1", "sentencepiece==0.1.99", "shortuuid",
18
+ "accelerate==0.21.0", "peft", "bitsandbytes",
19
+ "pydantic", "markdown2[all]", "numpy", "scikit-learn==1.2.2",
20
+ "gradio==4.16.0", "gradio_client==0.8.1", "openai", "spaces",
21
+ "requests", "httpx==0.24.0", "uvicorn", "fastapi", "decord",
22
+ "einops==0.6.1", "einops-exts==0.0.4", "timm==0.6.13",
23
+ ]
24
+
25
+ [project.optional-dependencies]
26
+ train = ["deepspeed==0.12.6", "ninja", "wandb"]
27
+ build = ["build", "twine"]
28
+
29
+ [project.urls]
30
+ "Homepage" = "https://llava-vl.github.io"
31
+ "Bug Tracker" = "https://github.com/haotian-liu/LLaVA/issues"
32
+
33
+ [tool.setuptools.packages.find]
34
+ exclude = ["assets*", "benchmark*", "docs", "dist*", "playground*", "scripts*", "tests*"]
35
+
36
+ [tool.wheel]
37
+ exclude = ["assets*", "benchmark*", "docs", "dist*", "playground*", "scripts*", "tests*"]
requirements.txt ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ torch==2.1.2
2
+ torchvision==0.16.2
3
+ transformers==4.37.2
4
+ tokenizers==0.15.1
5
+ sentencepiece==0.1.99
6
+ accelerate==0.21.0
7
+ peft
8
+ bitsandbytes
9
+ pydantic
10
+ markdown2[all]
11
+ numpy
12
+ scikit-learn==1.2.2
13
+ gradio==4.16.0
14
+ gradio_client==0.8.1
15
+ openai
16
+ spaces
17
+ requests
18
+ httpx==0.24.0
19
+ uvicorn
20
+ fastapi
21
+ decord
22
+ einops==0.6.1
23
+ einops-exts==0.0.4
24
+ timm==0.6.13
25
+ deepspeed==0.12.6
26
+ ninja
27
+ wandb
28
+ build
29
+ twine