ManishThota commited on
Commit
9916357
1 Parent(s): 865f315

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -0
app.py CHANGED
@@ -6,6 +6,8 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
6
  #Setting device to cuda
7
  torch.set_default_device("cuda")
8
 
 
 
9
  # # Ensure GPU usage if available
10
  # device = "cuda" if torch.cuda.is_available() else "cpu"
11
  # torch.set_default_tensor_type('torch.cuda.FloatTensor' if device=='cuda' else 'torch.FloatTensor')
@@ -42,6 +44,7 @@ def gradio_predict(image, question, max_tokens):
42
  answer = predict_answer(image, question, max_tokens)
43
  return answer
44
 
 
45
  # Define the Gradio interface
46
  iface = gr.Interface(
47
  fn=gradio_predict,
@@ -49,6 +52,7 @@ iface = gr.Interface(
49
  gr.Textbox(label="Question", placeholder="e.g. Can you explain the slide?", scale=4),
50
  gr.Slider(2, 500, value=25, label="Token Count", info="Choose between 2 and 500")],
51
  outputs=gr.TextArea(label="Answer"),
 
52
  title="Sparrow - Tiny 3B | Visual Question Answering",
53
  description="An interactive chat model that can answer questions about images in an Academic context. \n We can input images, and the system will analyze them to provide information about their contents. I've utilized this capability by feeding slides from PowerPoint presentations used in classes and the lecture content passed as text. Consequently, the model now mimics the behavior and responses of my professors. So, if I present any PowerPoint slide, it explains it just like my professor would, further it can be personalized.",
54
  )
 
6
  #Setting device to cuda
7
  torch.set_default_device("cuda")
8
 
9
+ torch.hub.download_url_to_file('https://github.com/manishkumart/SparrowVQE/blob/main/data/Images/week_01/week_01_page_024.png', 'week_01_page_024.png')
10
+
11
  # # Ensure GPU usage if available
12
  # device = "cuda" if torch.cuda.is_available() else "cpu"
13
  # torch.set_default_tensor_type('torch.cuda.FloatTensor' if device=='cuda' else 'torch.FloatTensor')
 
44
  answer = predict_answer(image, question, max_tokens)
45
  return answer
46
 
47
+ examples = [["week_01_page_024.png", "Can you explain the slide?"]]
48
  # Define the Gradio interface
49
  iface = gr.Interface(
50
  fn=gradio_predict,
 
52
  gr.Textbox(label="Question", placeholder="e.g. Can you explain the slide?", scale=4),
53
  gr.Slider(2, 500, value=25, label="Token Count", info="Choose between 2 and 500")],
54
  outputs=gr.TextArea(label="Answer"),
55
+ examples=examples
56
  title="Sparrow - Tiny 3B | Visual Question Answering",
57
  description="An interactive chat model that can answer questions about images in an Academic context. \n We can input images, and the system will analyze them to provide information about their contents. I've utilized this capability by feeding slides from PowerPoint presentations used in classes and the lecture content passed as text. Consequently, the model now mimics the behavior and responses of my professors. So, if I present any PowerPoint slide, it explains it just like my professor would, further it can be personalized.",
58
  )