Spaces:

ToletiSri
/

Capstone

Sleeping

ToletiSri commited on Jan 28

Commit

24edbec

•

1 Parent(s): 397a3a6

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -8,10 +8,10 @@ from PIL import Image
 class _MLPVectorProjector(nn.Module):
-    def __init__(
         self, input_hidden_size: int, lm_hidden_size: int, num_layers: int, width: int
     ):
-        super(_MLPVectorProjector, self).__init__()
         self.mlps = nn.ModuleList()
         for _ in range(width):
             mlp = [nn.Linear(input_hidden_size, lm_hidden_size, bias=False)]
@@ -59,8 +59,13 @@ def encode_image(image_path):
     return img_embedding
 #Get the projection model
 #Get the fine-tuned phi-2 model
 def example_inference(input_text, count): #, image, img_qn, audio):
@@ -87,6 +92,7 @@ def textMode(text, count):
 def imageMode(image, question):
     image_embedding = encode_image(image)
     return "In progress"
 def audioMode(audio):
@@ -120,7 +126,7 @@ with gr.Blocks() as demo:
         text_output = gr.Textbox(label="Chat GPT like text")
     with gr.Tab("Image mode"):
         with gr.Row():
-            image_input = gr.Image()
             image_text_input = gr.Textbox(placeholder="Enter a question/prompt around the image", label="Question/Prompt")
         image_button = gr.Button("Submit")
         image_text_output = gr.Textbox(label="Answer")

 class _MLPVectorProjector(nn.Module):
+    def init(
         self, input_hidden_size: int, lm_hidden_size: int, num_layers: int, width: int
     ):
+        super(_MLPVectorProjector, self).init()
         self.mlps = nn.ModuleList()
         for _ in range(width):
             mlp = [nn.Linear(input_hidden_size, lm_hidden_size, bias=False)]
     return img_embedding
 #Get the projection model
+img_proj_head = _MLPVectorProjector(512, 2560, 1, 4).to("cuda")
+img_proj_head.load_state_dict(torch.load('projection_finetuned.pth'))
 #Get the fine-tuned phi-2 model
+phi2_finetuned = AutoModelForCausalLM.from_pretrained(
+    "phi2_adaptor_fineTuned", trust_remote_code=True,
+    torch_dtype = torch.float32).to("cuda")
 def example_inference(input_text, count): #, image, img_qn, audio):
 def imageMode(image, question):
     image_embedding = encode_image(image)
+    imgToTextEmb = img_proj_head(image_embedding)
     return "In progress"
 def audioMode(audio):
         text_output = gr.Textbox(label="Chat GPT like text")
     with gr.Tab("Image mode"):
         with gr.Row():
+            image_input = gr.Image(type="filepath")
             image_text_input = gr.Textbox(placeholder="Enter a question/prompt around the image", label="Question/Prompt")
         image_button = gr.Button("Submit")
         image_text_output = gr.Textbox(label="Answer")