Spaces:
Running
on
Zero
Running
on
Zero
File size: 4,758 Bytes
2d9a728 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 |
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# InternVideo 2 demo\n",
"\n",
"It can be used to test the capabilities of InternVideo2 and to verify that the models are loaded correctly"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"import pathlib\n",
"import sys\n",
"import os\n",
"sys.path.append(str(pathlib.Path(os.path.abspath('')).parent))\n",
"\n",
"from tools.genrl_utils import viclip_global_instance\n",
"viclip_global_instance.instantiate()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import cv2\n",
"import numpy as np\n",
"import torch\n",
"from tools.genrl_utils import INTERNVIDEO_PATH\n",
"\n",
"def _frame_from_video(video):\n",
" while video.isOpened():\n",
" success, frame = video.read()\n",
" if success:\n",
" yield frame\n",
" else:\n",
" break\n",
"\n",
"ASSET_PATH = pathlib.Path(os.path.abspath('')).parent / 'assets'\n",
"\n",
"# 83 % - A man in a gray sweater plays fetch with his dog in the snowy yard, throwing a toy and watching it run.\n",
"video = cv2.VideoCapture( str(INTERNVIDEO_PATH / 'InternVideo2/multi_modality/demo/example1.mp4') )\n",
"# # 99 % - A karate kick\n",
"# video = cv2.VideoCapture( str( ASSET_PATH / 'video_samples/karate_kick.mp4') ) \n",
"# # 99 % - A headstand\n",
"# video = cv2.VideoCapture( str( ASSET_PATH / 'video_samples/headstand.mp4') ) \n",
"\n",
"frames = [x for x in _frame_from_video(video)]\n",
"processed_frames = viclip_global_instance.viclip.preprocess_transf(torch.from_numpy(np.stack(frames[:8], axis=0)).permute(0,3,1,2) / 255)\n",
"frames_tensor = processed_frames.reshape(1, 8, 3, 224,224)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"text_candidates = [\"A playful dog and its owner wrestle in the snowy yard, chasing each other with joyous abandon.\",\n",
" \"A man in a gray coat walks through the snowy landscape, pulling a sleigh loaded with toys.\",\n",
" \"A person dressed in a blue jacket shovels the snow-covered pavement outside their house.\",\n",
" \"A pet dog excitedly runs through the snowy yard, chasing a toy thrown by its owner.\",\n",
" \"A person stands on the snowy floor, pushing a sled loaded with blankets, preparing for a fun-filled ride.\",\n",
" \"A man in a gray hat and coat walks through the snowy yard, carefully navigating around the trees.\",\n",
" \"A playful dog slides down a snowy hill, wagging its tail with delight.\",\n",
" \"A person in a blue jacket walks their pet on a leash, enjoying a peaceful winter walk among the trees.\",\n",
" \"A man in a gray sweater plays fetch with his dog in the snowy yard, throwing a toy and watching it run.\",\n",
" \"A person bundled up in a blanket walks through the snowy landscape, enjoying the serene winter scenery.\",\n",
" \"A person playing with a kid in the street\",\n",
" \"A group of friends playing bowling.\",\n",
" \"A japanese girl eating noodles\",\n",
" \"A painting by Monet\",\n",
" \"A karate kick\",\n",
" \"A headstand\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"text_feat = viclip_global_instance.viclip.get_txt_feat(text_candidates)\n",
"video_feat = viclip_global_instance.viclip.get_vid_features(frames_tensor.to(viclip_global_instance.viclip.device))\n",
"\n",
"sorted_probs, sorted_idxs = (100.0 * video_feat @ text_feat.T).softmax(dim=-1)[0].topk(len(text_feat))\n",
"\n",
"for p, i in zip(sorted_probs, sorted_idxs):\n",
" if p > 0.01:\n",
" print(int(p * 100), '% - ', text_candidates[i])"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.14"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
|