visheratin commited on
Commit
3aabbc8
1 Parent(s): 9d8bdb3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -3
app.py CHANGED
@@ -61,7 +61,7 @@ def answer_question(image, question, max_crops, num_tokens, sample, temperature,
61
  "eos_token_id": processor.tokenizer.eos_token_id,
62
  "pad_token_id": processor.tokenizer.eos_token_id,
63
  "temperature": temperature,
64
- "do_sample": sample,
65
  "top_k": top_k,
66
  }
67
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
@@ -82,7 +82,10 @@ def answer_question(image, question, max_crops, num_tokens, sample, temperature,
82
  with gr.Blocks() as demo:
83
  gr.HTML("<h1 class='gradio-heading'><center>MC-LLaVA 3B</center></h1>")
84
  gr.HTML(
85
- "<center><p class='gradio-sub-heading'>MC-LLaVA 3B is a model that can answer questions about small details in high-resolution images. Check out the <a href='https://huggingface.co/visheratin/MC-LLaVA-3b'>model card</a> for more details. If you have any questions or ideas hot to make the model better, <a href='https://x.com/visheratin'>let me know</a></p></center>"
 
 
 
86
  )
87
  with gr.Group():
88
  with gr.Row():
@@ -107,4 +110,4 @@ with gr.Blocks() as demo:
107
  submit.click(answer_question, [img, prompt, max_crops, num_tokens, sample, temperature, top_k], output)
108
  prompt.submit(answer_question, [img, prompt, max_crops, num_tokens, sample, temperature, top_k], output)
109
 
110
- demo.queue().launch(debug=True)
 
61
  "eos_token_id": processor.tokenizer.eos_token_id,
62
  "pad_token_id": processor.tokenizer.eos_token_id,
63
  "temperature": temperature,
64
+ "sample": sample,
65
  "top_k": top_k,
66
  }
67
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
 
82
  with gr.Blocks() as demo:
83
  gr.HTML("<h1 class='gradio-heading'><center>MC-LLaVA 3B</center></h1>")
84
  gr.HTML(
85
+ "<center><p class='gradio-sub-heading'>MC-LLaVA 3B is a model that can answer questions about small details in high-resolution images. Check out the <a href='https://huggingface.co/visheratin/MC-LLaVA-3b'>model card</a> for more details. If you have any questions or ideas hot to make the model better, <a href='https://x.com/visheratin'>let me know</a>.</p></center>"
86
+ )
87
+ gr.HTML(
88
+ "<center><p class='gradio-sub-heading'>There are two main parameters - max number of crops and number of image tokens. The first one controls into how many parts will the image be cut. This is especially useful when you are working with high-resolution images. The second parameter controls how many image features will be extracted for LLM to be processed. You can increase it if you are trying to extract info from a small part of the image, e.g., text.</p></center>"
89
  )
90
  with gr.Group():
91
  with gr.Row():
 
110
  submit.click(answer_question, [img, prompt, max_crops, num_tokens, sample, temperature, top_k], output)
111
  prompt.submit(answer_question, [img, prompt, max_crops, num_tokens, sample, temperature, top_k], output)
112
 
113
+ demo.queue().launch()