File size: 4,019 Bytes
548a451 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
import gradio as gr
import requests
import json
import re
# sentence="""
# The general domain of video segmentation is currently fragmented into different tasks spanning multiple benchmarks. Despite rapid progress in the state-of-the-art, current methods are overwhelmingly task-specific and cannot conceptually generalize to other tasks. Inspired by recent approaches with multi-task capability, we propose TarViS: a novel, unified network architecture that can be applied to any task that requires segmenting a set of arbitrarily defined 'targets' in video. Our approach is flexible with respect to how tasks define these targets, since it models the latter as abstract 'queries' which are then used to predict pixel-precise target masks. A single TarViS model can be trained jointly on a collection of datasets spanning different tasks, and can hot-swap between tasks during inference without any task-specific retraining. To demonstrate its effectiveness, we apply TarViS to four different tasks, namely Video Instance Segmentation (VIS), Video Panoptic Segmentation (VPS), Video Object Segmentation (VOS) and Point Exemplar-guided Tracking (PET). Our unified, jointly trained model achieves state-of-the-art performance on 5/7 benchmarks spanning these four tasks, and competitive performance on the remaining two.We presented TarViS: a novel, unified approach for tackling any task requiring pixel-precise segmentation of a set of targets in video. We adopt a generalized paradigm where the task-specific targets are encoded into a set of queries which are then input to our network together with the video features. The network is trained to produce segmentation masks for each target entity, but is inherently agnostic to the task-specific definition of these targets. To demonstrate the effectiveness of our approach, we applied it to four different video segmentation tasks (VIS, VPS, VOS, PET). We showed that a single TarViS model can be jointly trained for all tasks, and during inference can hot-swap between tasks without any task-specific fine-tuning. Our model achieved state-of-the-art performance on five benchmarks (YouTube VIS, OVIS, KITTI-STEP, VIPSeg and BURST) and has multiple, promising directions for future work
# """
def get_access_token():
"""
使用 AK,SK 生成鉴权签名(Access Token)
:return: access_token,或是None(如果错误)
"""
url = "https://aip.baidubce.com/oauth/2.0/token"
params = {"grant_type": "client_credentials", "client_id": API_KEY, "client_secret": SECRET_KEY}
return str(requests.post(url, params=params).json().get("access_token"))
def baidu(sentence):
global API_KEY,SECRET_KEY
API_KEY = "Z5vrSPB7bXSlfa6q9vH13lUa"
SECRET_KEY = "Hr5I3MBuSnRNzIu59gKaMYDHQRKelzCu"
url = "https://aip.baidubce.com/rpc/2.0/mt/texttrans/v1?access_token=" + get_access_token()
payload = json.dumps({
"from": "en",
"to": "zh",
"q": sentence })
headers = {
'Content-Type': 'application/json',
'Accept': 'application/json'
}
response = requests.request("POST", url, headers=headers, data=payload)
res = response.text
regex = 'dst":"(.*?)","src'
resu = re.findall(regex,res,re.S)
return resu
def query(payload):
API_URL = "https://api-inference.huggingface.co/models/facebook/bart-large-cnn"
headers = {"Authorization": "Bearer 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"}
response = requests.post(API_URL, headers=headers, json=payload)
return response.json()
def bart(sentence):
summary = query({
"inputs": sentence,
"parameters":{"top_p":0.6,"repetition_penalty":1.0,"max_length":350,"min_length":250},
"options": {"wait_for_model": True},
})
return summary[0]['summary_text']
def main2(sentence):
summary = bart(sentence)
trans = baidu(summary)[0]
return trans
if __name__ == '__main__':
iface = gr.Interface(fn = main2, inputs="text", outputs="text")
iface.launch() |