captain-awesome commited on
Commit
90d7fd0
1 Parent(s): 5c73650

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -32
app.py CHANGED
@@ -1,34 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
 
3
- models = {
4
- "object-detection": "facebook/detr-resnet-50",
5
- # "image-classification": "microsoft/resnet-50",
6
- # "text-to-image": "runwayml/stable-diffusion-v1-5",
7
- # "image-to-text": "nlpconnect/vit-gpt2-image-captioning",
8
- # "audio-classification": "mtg-upf/discogs-maest-30s-pw-73e-ts",
9
- # "audio-to-audio": "speechbrain/mtl-mimic-voicebank",
10
- # "automatic-speech-recognition": "jonatasgrosman/wav2vec2-large-xlsr-53-english",
11
- # "conversational": "microsoft/DialoGPT-medium",
12
- # "feature-extraction": "cambridgeltl/SapBERT-from-PubMedBERT-fulltext",
13
- # "fill-mask": "bert-base-uncased",
14
- # "question-answering": "deepset/roberta-base-squad2",
15
- # "summarization": "facebook/bart-large-cnn",
16
- # "text-classification": "cardiffnlp/twitter-roberta-base-sentiment-latest",
17
- # "text-generation": "gpt2",
18
- # "text2text-generation": "vennify/t5-base-grammar-correction",
19
- # "translation": "Helsinki-NLP/opus-mt-fr-en",
20
- # "zero-shot-classification": "facebook/bart-large-mnli",
21
- # "sentence-similarity": "sentence-transformers/all-mpnet-base-v2",
22
- # "text-to-speech": "facebook/mms-tts-eng",
23
- # "token-classification": "benjamin/wtp-canine-s-1l",
24
- # "document-question-answering": "fxmarty/tiny-doc-qa-vision-encoder-decoder",
25
- # "visual-question-answering": "Salesforce/blip-vqa-capfilt-large",
26
- }
27
-
28
- with gr.Blocks() as demo:
29
- trust_remote_code=True
30
- gr.Markdown("## Gradio Pipelines Tasks")
31
- for k, v in models.items():
32
- with gr.Tab(k):
33
- gr.load(v, src="models")
34
- demo.launch()
 
 
1
+ Hugging Face's logo
2
+ Hugging Face
3
+ Search models, datasets, users...
4
+ Models
5
+ Datasets
6
+ Spaces
7
+ Posts
8
+ Docs
9
+ Solutions
10
+ Pricing
11
+
12
+
13
+
14
+ Spaces:
15
+
16
+ Satyacoder
17
+ /
18
+ vision_test
19
+
20
+
21
+ like
22
+ 0
23
+ App
24
+ Files
25
+ Community
26
+ vision_test
27
+ /
28
+ app.py
29
+
30
+ Satyacoder's picture
31
+ Satyacoder
32
+ Update app.py
33
+ 8602d39
34
+ 5 months ago
35
+ raw
36
+ history
37
+ blame
38
+ contribute
39
+ delete
40
+ No virus
41
+ 1.72 kB
42
+ from transformers import DetrImageProcessor, DetrForObjectDetection
43
+ from transformers import BlipProcessor, BlipForConditionalGeneration
44
+ import torch
45
+ from PIL import Image
46
+ import requests
47
  import gradio as gr
48
 
49
+ box_processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
50
+ box_model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
51
+
52
+ caption_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
53
+ caption_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
54
+
55
+ def predict_bounding_boxes(imageurl:str):
56
+ try:
57
+ response = requests.get(imageurl, stream=True)
58
+ response.raise_for_status()
59
+
60
+ image_data = Image.open(response.raw)
61
+ inputs = box_processor(images=image_data, return_tensors="pt")
62
+ outputs = box_model(**inputs)
63
+
64
+ target_sizes = torch.tensor([image_data.size[::-1]])
65
+ results = box_processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.70)[0]
66
+
67
+ detections = [{"score": score.item(), "label": box_model.config.id2label[label.item()], "box": box.tolist()} for score, label, box in zip(results["scores"], results["labels"], results["boxes"])]
68
+
69
+ raw_image = image_data.convert('RGB')
70
+ inputs = caption_processor(raw_image, return_tensors="pt")
71
+ out = caption_model.generate(**inputs)
72
+ label = caption_processor.decode(out[0], skip_special_tokens=True)
73
+ return {"image label": label, "detections": detections}
74
+
75
+ except Exception as e:
76
+
77
+ return {"error": str(e)}
78
+
79
+ app = gr.Interface(fn=predict_bounding_boxes, inputs="text", outputs="json")
80
+ app.api = True
81
+ app.launch()