J-LAB commited on
Commit
bebcf7e
1 Parent(s): 58d100f

Upload 4 files

Browse files
Files changed (4) hide show
  1. README.md +6 -6
  2. app.py +113 -0
  3. pre-requirements.txt +1 -0
  4. requirements.txt +5 -0
README.md CHANGED
@@ -1,12 +1,12 @@
1
  ---
2
- title: FluxiIA LLama-Florence
3
- emoji: 🌖
4
- colorFrom: pink
5
- colorTo: pink
6
  sdk: gradio
7
- sdk_version: 5.5.0
8
  app_file: app.py
9
  pinned: false
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: FluxiAI ChatbotVision
3
+ emoji: 💬
4
+ colorFrom: yellow
5
+ colorTo: purple
6
  sdk: gradio
7
+ sdk_version: 4.36.1
8
  app_file: app.py
9
  pinned: false
10
  ---
11
 
12
+ An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).
app.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spaces
2
+ import gradio as gr
3
+ from transformers import AutoProcessor, AutoModelForCausalLM
4
+ from huggingface_hub import InferenceClient
5
+ import io
6
+ from PIL import Image
7
+ import torch
8
+ import numpy as np
9
+ import subprocess
10
+ subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
11
+
12
+ device = "cuda" if torch.cuda.is_available() else "cpu"
13
+ model_id = 'J-LAB/Florence-vl3'
14
+ model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True).to(device).eval()
15
+ processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
16
+
17
+ @spaces.GPU
18
+ def run_example(task_prompt, image):
19
+ inputs = processor(text=task_prompt, images=image, return_tensors="pt", padding=True).to(device)
20
+ generated_ids = model.generate(
21
+ input_ids=inputs["input_ids"],
22
+ pixel_values=inputs["pixel_values"],
23
+ max_new_tokens=1024,
24
+ early_stopping=False,
25
+ do_sample=False,
26
+ num_beams=3,
27
+ )
28
+ generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
29
+ parsed_answer = processor.post_process_generation(
30
+ generated_text,
31
+ task=task_prompt,
32
+ image_size=(image.width, image.height)
33
+ )
34
+ return parsed_answer
35
+
36
+ def process_image(image, task_prompt):
37
+ if isinstance(image, str): # Check if the image path is provided
38
+ image = Image.open(image)
39
+ elif isinstance(image, np.ndarray):
40
+ image = Image.fromarray(image) # Convert NumPy array to PIL Image
41
+
42
+ if task_prompt == 'Product Caption':
43
+ task_prompt = '<MORE_DETAILED_CAPTION>'
44
+ elif task_prompt == 'OCR':
45
+ task_prompt = '<OCR>'
46
+
47
+ results = run_example(task_prompt, image)
48
+
49
+ # Remove the key and get the text value
50
+ if results and task_prompt in results:
51
+ output_text = results[task_prompt]
52
+ else:
53
+ output_text = ""
54
+
55
+ return output_text
56
+
57
+ # Inicializando o cliente
58
+ client = InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct")
59
+
60
+ # Função de resposta para o chatbot
61
+ def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p, image):
62
+ image_result = ""
63
+ if image is not None:
64
+ try:
65
+ image_result_caption = process_image(image, 'Product Caption')
66
+ image_result_ocr = process_image(image, 'OCR')
67
+ image_result = image_result_caption + " " + image_result_ocr # Concatenar os resultados
68
+ except Exception as e:
69
+ image_result = f"An error occurred with image processing: {str(e)}"
70
+
71
+ # Construindo a mensagem completa com o resultado da imagem
72
+ full_message = message
73
+ if image_result:
74
+ full_message = f"\n<image>{image_result}</image>\n\n{message}"
75
+
76
+ # Adicionando mensagens ao histórico
77
+ messages = [{"role": "system", "content": f'{system_message} a descrição das imagens enviadas pelo usuário ficam dentro da tag <image> </image>'}]
78
+ for user, assistant in history:
79
+ if user:
80
+ messages.append({"role": "user", "content": user})
81
+ if assistant:
82
+ messages.append({"role": "assistant", "content": assistant})
83
+
84
+ messages.append({"role": "user", "content": full_message})
85
+
86
+ # Gerando a resposta
87
+ response = ""
88
+ try:
89
+ for msg in client.chat_completion(messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p):
90
+ token = msg.choices[0].delta.content
91
+ response += token
92
+ except Exception as e:
93
+ response = f"An error occurred: {str(e)}" # Retornando apenas o texto da mensagem de erro
94
+
95
+ # Atualizando o histórico, mas sem mostrar image_result no chat
96
+ history.append((message, response))
97
+ return history, gr.update(value=None), gr.update(value="")
98
+
99
+ # Configurando a interface do Gradio
100
+ with gr.Blocks() as demo:
101
+ chatbot = gr.Chatbot()
102
+ chat_input = gr.Textbox(placeholder="Enter message...", show_label=False)
103
+ image_input = gr.Image(type="filepath", label="Upload an image")
104
+ submit_btn = gr.Button("Send Message")
105
+ system_message = gr.Textbox(value="Você é um chatbot útil que sempre responde em português", label="System message")
106
+ max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens")
107
+ temperature = gr.Slider(minimum=0.1, maximum=1.5, value=0.7, step=0.1, label="Temperature")
108
+ top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
109
+
110
+ submit_btn.click(respond, inputs=[chat_input, chatbot, system_message, max_tokens, temperature, top_p, image_input], outputs=[chatbot, image_input, chat_input])
111
+
112
+ if __name__ == "__main__":
113
+ demo.launch(debug=True, quiet=True)
pre-requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ pip>=23.0.0
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ huggingface_hub
2
+ spaces
3
+ transformers
4
+ timm
5
+ openai