Matanew1 commited on
Commit
201bd55
1 Parent(s): 2d4d17f
Files changed (2) hide show
  1. app.py +21 -28
  2. temp_image.jpg +0 -0
app.py CHANGED
@@ -1,53 +1,46 @@
1
  from fastapi import FastAPI, File, UploadFile, Form
2
  from fastapi.responses import JSONResponse
3
- from transformers import pipeline
4
  from fastapi.middleware.cors import CORSMiddleware
 
5
 
6
  app = FastAPI()
7
 
8
  app.add_middleware(
9
  CORSMiddleware,
10
- allow_origins=["*"], # Allows all origins
11
  allow_credentials=True,
12
- allow_methods=["*"], # Allows all methods
13
- allow_headers=["*"], # Allows all headers
14
  )
15
 
16
- # Initialize the VQA pipeline
17
- vqa_pipe = pipeline("visual-question-answering", model="Salesforce/blip-vqa-capfilt-large", max_new_tokens=20)
 
18
 
19
 
20
  @app.post('/answer_question')
21
  async def answer_question(image: UploadFile = File(...), question: str = Form(...)):
22
- """
23
- This is the VQA API
24
- Call this api passing an image and a question about the image
25
- ---
26
- parameters:
27
- - name: image
28
- in: formData
29
- type: file
30
- required: true
31
- - name: question
32
- in: formData
33
- type: string
34
- required: true
35
- responses:
36
- 200:
37
- description: Returns the answer to the question about the image
38
- """
39
- # Save the image locally
40
  image_path = 'temp_image.jpg'
41
  with open(image_path, 'wb') as f:
42
  f.write(await image.read())
43
 
44
- # Use the VQA pipeline to get the answer
45
- result = vqa_pipe(image=image_path, question=question)
46
 
47
- # Return the answer as JSON
48
- return JSONResponse(content={'answer': result[0]['answer']})
 
 
 
 
 
 
 
 
49
 
50
 
51
  if __name__ == '__main__':
52
  import uvicorn
 
53
  uvicorn.run(app, host="0.0.0.0", port=8080)
 
1
  from fastapi import FastAPI, File, UploadFile, Form
2
  from fastapi.responses import JSONResponse
3
+ from transformers import BlipProcessor, TFBlipForQuestionAnswering
4
  from fastapi.middleware.cors import CORSMiddleware
5
+ from PIL import Image
6
 
7
  app = FastAPI()
8
 
9
  app.add_middleware(
10
  CORSMiddleware,
11
+ allow_origins=["*"],
12
  allow_credentials=True,
13
+ allow_methods=["*"],
14
+ allow_headers=["*"],
15
  )
16
 
17
+ # Initialize the processor and model manually
18
+ processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-capfilt-large")
19
+ model = TFBlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-capfilt-large")
20
 
21
 
22
  @app.post('/answer_question')
23
  async def answer_question(image: UploadFile = File(...), question: str = Form(...)):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  image_path = 'temp_image.jpg'
25
  with open(image_path, 'wb') as f:
26
  f.write(await image.read())
27
 
28
+ # Open the image using PIL
29
+ pil_image = Image.open(image_path)
30
 
31
+ # Process the image and question
32
+ inputs = processor(images=pil_image, text=question, return_tensors="tf")
33
+ pixel_values = inputs["pixel_values"]
34
+ input_ids = inputs["input_ids"]
35
+ attention_mask = inputs["attention_mask"]
36
+
37
+ outputs = model.generate(input_ids=input_ids, attention_mask=attention_mask, pixel_values=pixel_values)
38
+ answer = processor.decode(outputs[0], skip_special_tokens=True)
39
+
40
+ return JSONResponse(content={'answer': answer})
41
 
42
 
43
  if __name__ == '__main__':
44
  import uvicorn
45
+
46
  uvicorn.run(app, host="0.0.0.0", port=8080)
temp_image.jpg ADDED