Spaces:

IDEA-CCNL
/

Ziya-BLIP2-14B-Visual-v1-Demo

Runtime error

App Files Files Community

gxy commited on May 31, 2023

Commit

ee55870

•

1 Parent(s): b7e2438

FEAT: update requirement

Browse files

Files changed (3) hide show

app.py +4 -196
launch.py +198 -0
requirements.txt +1 -2

app.py CHANGED Viewed

@@ -1,198 +1,6 @@
-#!/usr/bin/env python
-# this code modify from https://huggingface.co/spaces/lykeven/visualglm-6b
-import gradio as gr
-import re
-from PIL import Image
-import torch
-from io import BytesIO
-import hashlib
 import os
-from transformers import LlamaForCausalLM, LlamaTokenizer, BlipImageProcessor, BitsAndBytesConfig, AutoModelForCausalLM
-DESCRIPTION = '''# <a href="https://huggingface.co/IDEA-CCNL/Ziya-BLIP2-14B-Visual-v1">Ziya-Blip2-14B</a>'''
-MAINTENANCE_NOTICE1 = 'Hint 1: If the app report "Something went wrong, connection error out", please turn off your proxy and retry.\nHint 2: If you upload a large size of image like 10MB, it may take some time to upload and process. Please be patient and wait.'
-MAINTENANCE_NOTICE2 = '提示1: 如果应用报了“Something went wrong, connection error out”的错误，请关闭代理并重试。\n提示2: 如果你上传了很大的图片，比如10MB大小，那将需要一些时间来上传和处理，请耐心等待。'
-NOTES = 'This app is adapted from <a href="https://huggingface.co/IDEA-CCNL/Ziya-BLIP2-14B-Visual-v1">https://huggingface.co/IDEA-CCNL/Ziya-BLIP2-14B-Visual-v1</a>. It would be recommended to check out the repo if you want to see the detail of our model. And most of the codes attach to this demo are modify from <a href="https://huggingface.co/spaces/lykeven/visualglm-6b">lykeven/visualglm-6b</a>.'
-import json
-default_chatbox = []
-def is_chinese(text):
-    zh_pattern = re.compile(u'[\u4e00-\u9fa5]+')
-    return zh_pattern.search(text)
-AUTH_TOKEN = os.getenv("AUTH_TOKEN")
-LM_MODEL_PATH = "gxy/Ziya-LLaMA-13B-v1"
-lm_model = LlamaForCausalLM.from_pretrained(
-    LM_MODEL_PATH,
-    device_map="auto",
-    torch_dtype=torch.float16,
-    use_auth_token=AUTH_TOKEN,
-    quantization_config=BitsAndBytesConfig(load_in_8bit=True))
-tokenizer = LlamaTokenizer.from_pretrained(LM_MODEL_PATH)
-# visual model
-OPENAI_CLIP_MEAN = [0.48145466, 0.4578275, 0.40821073]
-OPENAI_CLIP_STD = [0.26862954, 0.26130258, 0.27577711]
-# demo.py is in the project path, so we can use local path ".". Otherwise you should use "IDEA-CCNL/Ziya-BLIP2-14B-Visual-v1"
-model = AutoModelForCausalLM.from_pretrained(
-    "IDEA-CCNL/Ziya-BLIP2-14B-Visual-v1",
-    trust_remote_code=True,
-    torch_dtype=torch.float16)
-model.cuda()  # if you use on cpu, comment this line
-model.language_model = lm_model
-image_size = model.config.vision_config.image_size
-image_processor = BlipImageProcessor(
-    size={"height": image_size, "width": image_size},
-    image_mean=OPENAI_CLIP_MEAN,
-    image_std=OPENAI_CLIP_STD,
-)
-def post(
-        input_text,
-        temperature,
-        top_p,
-        image_prompt,
-        result_previous,
-        hidden_image
-        ):
-    result_text = [(ele[0], ele[1]) for ele in result_previous]
-    previous_querys = []
-    previous_outputs = []
-    for i in range(len(result_text)-1, -1, -1):
-        if result_text[i][0] == "":
-            del result_text[i]
-        else:
-            previous_querys.append(result_text[i][0])
-            previous_outputs.append(result_text[i][1])
-    is_zh = is_chinese(input_text)
-    if image_prompt is None:
-        print("Image empty")
-        if is_zh:
-            result_text.append((input_text, '图片为空！请上传图片并重试。'))
-        else:
-            result_text.append((input_text, 'Image empty! Please upload a image and retry.'))
-        return input_text, result_text, hidden_image
-    elif input_text == "":
-        print("Text empty")
-        result_text.append((input_text, 'Text empty! Please enter text and retry.'))
-        return "", result_text, hidden_image
-    generate_config = {
-        "max_new_tokens": 128,
-        "top_p": top_p,
-        "temperature": temperature,
-        "repetition_penalty": 1.18,
-    }
-    img = Image.open(image_prompt)
-    pixel_values = image_processor(
-        img,
-        return_tensors="pt").pixel_values.to(
-            model.device).to(model.dtype)
-    output_buffer = BytesIO()
-    img.save(output_buffer, "PNG")
-    byte_data = output_buffer.getvalue()
-    md = hashlib.md5()
-    md.update(byte_data)
-    img_hash = md.hexdigest()
-    if img_hash != hidden_image:
-        previous_querys = []
-        previous_outputs = []
-        result_text = []
-    answer = model.chat(
-        tokenizer=tokenizer,
-        pixel_values=pixel_values,
-        query=input_text,
-        previous_querys=previous_querys,
-        previous_outputs=previous_outputs,
-        **generate_config,
-    )
-    result_text.append((input_text, answer))
-    print(result_text)
-    return "", result_text, img_hash
-def clear_fn(value):
-    return "", default_chatbox, None
-def clear_fn2(value):
-    return default_chatbox
-def io_fn(a, b, c):
-    print(f"call io_fn")
-    return a, b
-def change_language(value):
-    if value == "Change hint to English":
-        return "提示变为中文", MAINTENANCE_NOTICE1
-    else:
-        return "Change hint to English", MAINTENANCE_NOTICE2
-def main():
-    gr.close_all()
-    examples = []
-    with open("./examples/example_inputs.jsonl") as f:
-        for line in f:
-            data = json.loads(line)
-            examples.append(data)
-    with gr.Blocks(css='style.css') as demo:
-        with gr.Row():
-            with gr.Column(scale=4.5):
-                with gr.Group():
-                    input_text = gr.Textbox(label='Input Text', placeholder='Please enter text prompt below and press ENTER.')
-                    with gr.Row():
-                        run_button = gr.Button('Generate')
-                        clear_button = gr.Button('Clear')
-                    image_prompt = gr.Image(type="filepath", label="Image Prompt", value=None)
-                with gr.Row():
-                    temperature = gr.Slider(maximum=1, value=0.7, minimum=0, label='Temperature')
-                    top_p = gr.Slider(maximum=1, value=0.1, minimum=0, label='Top P')
-                with gr.Group():
-                    with gr.Row():
-                        with gr.Column(scale=7):
-                            maintenance_notice = gr.Markdown(MAINTENANCE_NOTICE1)
-                        with gr.Column(scale=2):
-                            change_button = gr.Button('Change hint to English', visible=False)
-            with gr.Column(scale=5.5):
-                result_text = gr.components.Chatbot(label='Multi-round conversation History', value=[]).style(height=550)
-                hidden_image_hash = gr.Textbox(visible=False)
-        gr_examples = gr.Examples(examples=[[example["text"], example["image"]] for example in examples],
-                                  inputs=[input_text, image_prompt],
-                                  label="Example Inputs (Click to insert an examplet into the input box)",
-                                  examples_per_page=3)
-        gr.Markdown(NOTES)
-        print(gr.__version__)
-        run_button.click(fn=post,inputs=[input_text, temperature, top_p, image_prompt, result_text, hidden_image_hash],
-                         outputs=[input_text, result_text, hidden_image_hash])
-        input_text.submit(fn=post,inputs=[input_text, temperature, top_p, image_prompt, result_text, hidden_image_hash],
-                         outputs=[input_text, result_text, hidden_image_hash])
-        clear_button.click(fn=clear_fn, inputs=clear_button, outputs=[input_text, result_text, image_prompt])
-        image_prompt.upload(fn=clear_fn2, inputs=clear_button, outputs=[result_text])
-        image_prompt.clear(fn=clear_fn2, inputs=clear_button, outputs=[result_text])
-        print(gr.__version__)
-    demo.queue(concurrency_count=10)
-    demo.launch(server_name="0.0.0.0")
-if __name__ == '__main__':
-    main()

 import os
+os.system(
+    f"git clone https://github.com/TimDettmers/bitsandbytes.git /home/user/app/bitsandbytes")
+os.system(f"cd /home/user/app/bitsandbytes && CUDA_VERSION=113 make cuda11x && python setup.py install")
+os.system(f"python /home/user/app/launch.py")

launch.py ADDED Viewed

	@@ -0,0 +1,198 @@

+#!/usr/bin/env python
+# this code modify from https://huggingface.co/spaces/lykeven/visualglm-6b
+import gradio as gr
+import re
+from PIL import Image
+import torch
+from io import BytesIO
+import hashlib
+import os
+from transformers import LlamaForCausalLM, LlamaTokenizer, BlipImageProcessor, BitsAndBytesConfig, AutoModelForCausalLM
+DESCRIPTION = '''# <a href="https://huggingface.co/IDEA-CCNL/Ziya-BLIP2-14B-Visual-v1">Ziya-Blip2-14B</a>'''
+MAINTENANCE_NOTICE1 = 'Hint 1: If the app report "Something went wrong, connection error out", please turn off your proxy and retry.\nHint 2: If you upload a large size of image like 10MB, it may take some time to upload and process. Please be patient and wait.'
+MAINTENANCE_NOTICE2 = '提示1: 如果应用报了“Something went wrong, connection error out”的错误，请关闭代理并重试。\n提示2: 如果你上传了很大的图片，比如10MB大小，那将需要一些时间来上传和处理，请耐心等待。'
+NOTES = 'This app is adapted from <a href="https://huggingface.co/IDEA-CCNL/Ziya-BLIP2-14B-Visual-v1">https://huggingface.co/IDEA-CCNL/Ziya-BLIP2-14B-Visual-v1</a>. It would be recommended to check out the repo if you want to see the detail of our model. And most of the codes attach to this demo are modify from <a href="https://huggingface.co/spaces/lykeven/visualglm-6b">lykeven/visualglm-6b</a>.'
+import json
+default_chatbox = []
+def is_chinese(text):
+    zh_pattern = re.compile(u'[\u4e00-\u9fa5]+')
+    return zh_pattern.search(text)
+AUTH_TOKEN = os.getenv("AUTH_TOKEN")
+LM_MODEL_PATH = "gxy/Ziya-LLaMA-13B-v1"
+lm_model = LlamaForCausalLM.from_pretrained(
+    LM_MODEL_PATH,
+    device_map="auto",
+    torch_dtype=torch.float16,
+    use_auth_token=AUTH_TOKEN,
+    quantization_config=BitsAndBytesConfig(load_in_8bit=True))
+tokenizer = LlamaTokenizer.from_pretrained(LM_MODEL_PATH)
+# visual model
+OPENAI_CLIP_MEAN = [0.48145466, 0.4578275, 0.40821073]
+OPENAI_CLIP_STD = [0.26862954, 0.26130258, 0.27577711]
+# demo.py is in the project path, so we can use local path ".". Otherwise you should use "IDEA-CCNL/Ziya-BLIP2-14B-Visual-v1"
+model = AutoModelForCausalLM.from_pretrained(
+    "IDEA-CCNL/Ziya-BLIP2-14B-Visual-v1",
+    trust_remote_code=True,
+    torch_dtype=torch.float16)
+model.cuda()  # if you use on cpu, comment this line
+model.language_model = lm_model
+image_size = model.config.vision_config.image_size
+image_processor = BlipImageProcessor(
+    size={"height": image_size, "width": image_size},
+    image_mean=OPENAI_CLIP_MEAN,
+    image_std=OPENAI_CLIP_STD,
+)
+def post(
+        input_text,
+        temperature,
+        top_p,
+        image_prompt,
+        result_previous,
+        hidden_image
+        ):
+    result_text = [(ele[0], ele[1]) for ele in result_previous]
+    previous_querys = []
+    previous_outputs = []
+    for i in range(len(result_text)-1, -1, -1):
+        if result_text[i][0] == "":
+            del result_text[i]
+        else:
+            previous_querys.append(result_text[i][0])
+            previous_outputs.append(result_text[i][1])
+    is_zh = is_chinese(input_text)
+    if image_prompt is None:
+        print("Image empty")
+        if is_zh:
+            result_text.append((input_text, '图片为空！请上传图片并重试。'))
+        else:
+            result_text.append((input_text, 'Image empty! Please upload a image and retry.'))
+        return input_text, result_text, hidden_image
+    elif input_text == "":
+        print("Text empty")
+        result_text.append((input_text, 'Text empty! Please enter text and retry.'))
+        return "", result_text, hidden_image
+    generate_config = {
+        "max_new_tokens": 128,
+        "top_p": top_p,
+        "temperature": temperature,
+        "repetition_penalty": 1.18,
+    }
+    img = Image.open(image_prompt)
+    pixel_values = image_processor(
+        img,
+        return_tensors="pt").pixel_values.to(
+            model.device).to(model.dtype)
+    output_buffer = BytesIO()
+    img.save(output_buffer, "PNG")
+    byte_data = output_buffer.getvalue()
+    md = hashlib.md5()
+    md.update(byte_data)
+    img_hash = md.hexdigest()
+    if img_hash != hidden_image:
+        previous_querys = []
+        previous_outputs = []
+        result_text = []
+    answer = model.chat(
+        tokenizer=tokenizer,
+        pixel_values=pixel_values,
+        query=input_text,
+        previous_querys=previous_querys,
+        previous_outputs=previous_outputs,
+        **generate_config,
+    )
+    result_text.append((input_text, answer))
+    print(result_text)
+    return "", result_text, img_hash
+def clear_fn(value):
+    return "", default_chatbox, None
+def clear_fn2(value):
+    return default_chatbox
+def io_fn(a, b, c):
+    print(f"call io_fn")
+    return a, b
+def change_language(value):
+    if value == "Change hint to English":
+        return "提示变为中文", MAINTENANCE_NOTICE1
+    else:
+        return "Change hint to English", MAINTENANCE_NOTICE2
+def main():
+    gr.close_all()
+    examples = []
+    with open("./examples/example_inputs.jsonl") as f:
+        for line in f:
+            data = json.loads(line)
+            examples.append(data)
+    with gr.Blocks(css='style.css') as demo:
+        with gr.Row():
+            with gr.Column(scale=4.5):
+                with gr.Group():
+                    input_text = gr.Textbox(label='Input Text', placeholder='Please enter text prompt below and press ENTER.')
+                    with gr.Row():
+                        run_button = gr.Button('Generate')
+                        clear_button = gr.Button('Clear')
+                    image_prompt = gr.Image(type="filepath", label="Image Prompt", value=None)
+                with gr.Row():
+                    temperature = gr.Slider(maximum=1, value=0.7, minimum=0, label='Temperature')
+                    top_p = gr.Slider(maximum=1, value=0.1, minimum=0, label='Top P')
+                with gr.Group():
+                    with gr.Row():
+                        with gr.Column(scale=7):
+                            maintenance_notice = gr.Markdown(MAINTENANCE_NOTICE1)
+                        with gr.Column(scale=2):
+                            change_button = gr.Button('Change hint to English', visible=False)
+            with gr.Column(scale=5.5):
+                result_text = gr.components.Chatbot(label='Multi-round conversation History', value=[]).style(height=550)
+                hidden_image_hash = gr.Textbox(visible=False)
+        gr_examples = gr.Examples(examples=[[example["text"], example["image"]] for example in examples],
+                                  inputs=[input_text, image_prompt],
+                                  label="Example Inputs (Click to insert an examplet into the input box)",
+                                  examples_per_page=3)
+        gr.Markdown(NOTES)
+        print(gr.__version__)
+        run_button.click(fn=post,inputs=[input_text, temperature, top_p, image_prompt, result_text, hidden_image_hash],
+                         outputs=[input_text, result_text, hidden_image_hash])
+        input_text.submit(fn=post,inputs=[input_text, temperature, top_p, image_prompt, result_text, hidden_image_hash],
+                         outputs=[input_text, result_text, hidden_image_hash])
+        clear_button.click(fn=clear_fn, inputs=clear_button, outputs=[input_text, result_text, image_prompt])
+        image_prompt.upload(fn=clear_fn2, inputs=clear_button, outputs=[result_text])
+        image_prompt.clear(fn=clear_fn2, inputs=clear_button, outputs=[result_text])
+        print(gr.__version__)
+    demo.queue(concurrency_count=10)
+    demo.launch(server_name="0.0.0.0")
+if __name__ == '__main__':
+    main()

requirements.txt CHANGED Viewed

@@ -1,4 +1,3 @@
 git+https://github.com/huggingface/transformers.git
 torch
-git+https://github.com/huggingface/accelerate.git
-https://github.com/GGGGGGXY/bitsandbytes/releases/download/0.39/bitsandbytes-0.39.0-py3-none-any.whl

 git+https://github.com/huggingface/transformers.git
 torch
+git+https://github.com/huggingface/accelerate.git