Soutrik commited on
Commit
16d3463
·
1 Parent(s): 4f7a99d

right set of test codes for litserver , server side, client side and benchmark code

Browse files
client.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # Copyright The Lightning AI team.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ import requests
16
+
17
+ response = requests.post("http://127.0.0.1:8080/predict", json={"input": 4.0})
18
+ print(f"Status: {response.status_code}\nResponse:\n {response.text}")
notebooks/training_lightning_tests.ipynb CHANGED
@@ -829,6 +829,128 @@
829
  " return loggers"
830
  ]
831
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
832
  {
833
  "cell_type": "code",
834
  "execution_count": null,
 
829
  " return loggers"
830
  ]
831
  },
832
+ {
833
+ "cell_type": "code",
834
+ "execution_count": 1,
835
+ "metadata": {},
836
+ "outputs": [
837
+ {
838
+ "name": "stderr",
839
+ "output_type": "stream",
840
+ "text": [
841
+ "/anaconda/envs/emlo_env/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
842
+ " from .autonotebook import tqdm as notebook_tqdm\n"
843
+ ]
844
+ },
845
+ {
846
+ "name": "stdout",
847
+ "output_type": "stream",
848
+ "text": [
849
+ "['bat_resnext26ts', 'beit_base_patch16_224', 'beit_base_patch16_384', 'beit_large_patch16_224', 'beit_large_patch16_384', 'beit_large_patch16_512', 'beitv2_base_patch16_224', 'beitv2_large_patch16_224', 'botnet26t_256', 'botnet50ts_256', 'caformer_b36', 'caformer_m36', 'caformer_s18', 'caformer_s36', 'cait_m36_384', 'cait_m48_448', 'cait_s24_224', 'cait_s24_384', 'cait_s36_384', 'cait_xs24_384', 'cait_xxs24_224', 'cait_xxs24_384', 'cait_xxs36_224', 'cait_xxs36_384', 'coat_lite_medium', 'coat_lite_medium_384', 'coat_lite_mini', 'coat_lite_small', 'coat_lite_tiny', 'coat_mini', 'coat_small', 'coat_tiny', 'coatnet_0_224', 'coatnet_0_rw_224', 'coatnet_1_224', 'coatnet_1_rw_224', 'coatnet_2_224', 'coatnet_2_rw_224', 'coatnet_3_224', 'coatnet_3_rw_224', 'coatnet_4_224', 'coatnet_5_224', 'coatnet_bn_0_rw_224', 'coatnet_nano_cc_224', 'coatnet_nano_rw_224', 'coatnet_pico_rw_224', 'coatnet_rmlp_0_rw_224', 'coatnet_rmlp_1_rw2_224', 'coatnet_rmlp_1_rw_224', 'coatnet_rmlp_2_rw_224', 'coatnet_rmlp_2_rw_384', 'coatnet_rmlp_3_rw_224', 'coatnet_rmlp_nano_rw_224', 'coatnext_nano_rw_224', 'convformer_b36', 'convformer_m36', 'convformer_s18', 'convformer_s36', 'convit_base', 'convit_small', 'convit_tiny', 'convmixer_768_32', 'convmixer_1024_20_ks9_p14', 'convmixer_1536_20', 'convnext_atto', 'convnext_atto_ols', 'convnext_base', 'convnext_femto', 'convnext_femto_ols', 'convnext_large', 'convnext_large_mlp', 'convnext_nano', 'convnext_nano_ols', 'convnext_pico', 'convnext_pico_ols', 'convnext_small', 'convnext_tiny', 'convnext_tiny_hnf', 'convnext_xlarge', 'convnext_xxlarge', 'convnextv2_atto', 'convnextv2_base', 'convnextv2_femto', 'convnextv2_huge', 'convnextv2_large', 'convnextv2_nano', 'convnextv2_pico', 'convnextv2_small', 'convnextv2_tiny', 'crossvit_9_240', 'crossvit_9_dagger_240', 'crossvit_15_240', 'crossvit_15_dagger_240', 'crossvit_15_dagger_408', 'crossvit_18_240', 'crossvit_18_dagger_240', 'crossvit_18_dagger_408', 'crossvit_base_240', 'crossvit_small_240', 'crossvit_tiny_240', 'cs3darknet_focus_l', 'cs3darknet_focus_m', 'cs3darknet_focus_s', 'cs3darknet_focus_x', 'cs3darknet_l', 'cs3darknet_m', 'cs3darknet_s', 'cs3darknet_x', 'cs3edgenet_x', 'cs3se_edgenet_x', 'cs3sedarknet_l', 'cs3sedarknet_x', 'cs3sedarknet_xdw', 'cspdarknet53', 'cspresnet50', 'cspresnet50d', 'cspresnet50w', 'cspresnext50', 'darknet17', 'darknet21', 'darknet53', 'darknetaa53', 'davit_base', 'davit_base_fl', 'davit_giant', 'davit_huge', 'davit_huge_fl', 'davit_large', 'davit_small', 'davit_tiny', 'deit3_base_patch16_224', 'deit3_base_patch16_384', 'deit3_huge_patch14_224', 'deit3_large_patch16_224', 'deit3_large_patch16_384', 'deit3_medium_patch16_224', 'deit3_small_patch16_224', 'deit3_small_patch16_384', 'deit_base_distilled_patch16_224', 'deit_base_distilled_patch16_384', 'deit_base_patch16_224', 'deit_base_patch16_384', 'deit_small_distilled_patch16_224', 'deit_small_patch16_224', 'deit_tiny_distilled_patch16_224', 'deit_tiny_patch16_224', 'densenet121', 'densenet161', 'densenet169', 'densenet201', 'densenet264d', 'densenetblur121d', 'dla34', 'dla46_c', 'dla46x_c', 'dla60', 'dla60_res2net', 'dla60_res2next', 'dla60x', 'dla60x_c', 'dla102', 'dla102x', 'dla102x2', 'dla169', 'dm_nfnet_f0', 'dm_nfnet_f1', 'dm_nfnet_f2', 'dm_nfnet_f3', 'dm_nfnet_f4', 'dm_nfnet_f5', 'dm_nfnet_f6', 'dpn48b', 'dpn68', 'dpn68b', 'dpn92', 'dpn98', 'dpn107', 'dpn131', 'eca_botnext26ts_256', 'eca_halonext26ts', 'eca_nfnet_l0', 'eca_nfnet_l1', 'eca_nfnet_l2', 'eca_nfnet_l3', 'eca_resnet33ts', 'eca_resnext26ts', 'eca_vovnet39b', 'ecaresnet26t', 'ecaresnet50d', 'ecaresnet50d_pruned', 'ecaresnet50t', 'ecaresnet101d', 'ecaresnet101d_pruned', 'ecaresnet200d', 'ecaresnet269d', 'ecaresnetlight', 'ecaresnext26t_32x4d', 'ecaresnext50t_32x4d', 'edgenext_base', 'edgenext_small', 'edgenext_small_rw', 'edgenext_x_small', 'edgenext_xx_small', 'efficientformer_l1', 'efficientformer_l3', 'efficientformer_l7', 'efficientformerv2_l', 'efficientformerv2_s0', 'efficientformerv2_s1', 'efficientformerv2_s2', 'efficientnet_b0', 'efficientnet_b0_g8_gn', 'efficientnet_b0_g16_evos', 'efficientnet_b0_gn', 'efficientnet_b1', 'efficientnet_b1_pruned', 'efficientnet_b2', 'efficientnet_b2_pruned', 'efficientnet_b3', 'efficientnet_b3_g8_gn', 'efficientnet_b3_gn', 'efficientnet_b3_pruned', 'efficientnet_b4', 'efficientnet_b5', 'efficientnet_b6', 'efficientnet_b7', 'efficientnet_b8', 'efficientnet_blur_b0', 'efficientnet_cc_b0_4e', 'efficientnet_cc_b0_8e', 'efficientnet_cc_b1_8e', 'efficientnet_el', 'efficientnet_el_pruned', 'efficientnet_em', 'efficientnet_es', 'efficientnet_es_pruned', 'efficientnet_h_b5', 'efficientnet_l2', 'efficientnet_lite0', 'efficientnet_lite1', 'efficientnet_lite2', 'efficientnet_lite3', 'efficientnet_lite4', 'efficientnet_x_b3', 'efficientnet_x_b5', 'efficientnetv2_l', 'efficientnetv2_m', 'efficientnetv2_rw_m', 'efficientnetv2_rw_s', 'efficientnetv2_rw_t', 'efficientnetv2_s', 'efficientnetv2_xl', 'efficientvit_b0', 'efficientvit_b1', 'efficientvit_b2', 'efficientvit_b3', 'efficientvit_l1', 'efficientvit_l2', 'efficientvit_l3', 'efficientvit_m0', 'efficientvit_m1', 'efficientvit_m2', 'efficientvit_m3', 'efficientvit_m4', 'efficientvit_m5', 'ese_vovnet19b_dw', 'ese_vovnet19b_slim', 'ese_vovnet19b_slim_dw', 'ese_vovnet39b', 'ese_vovnet39b_evos', 'ese_vovnet57b', 'ese_vovnet99b', 'eva02_base_patch14_224', 'eva02_base_patch14_448', 'eva02_base_patch16_clip_224', 'eva02_enormous_patch14_clip_224', 'eva02_large_patch14_224', 'eva02_large_patch14_448', 'eva02_large_patch14_clip_224', 'eva02_large_patch14_clip_336', 'eva02_small_patch14_224', 'eva02_small_patch14_336', 'eva02_tiny_patch14_224', 'eva02_tiny_patch14_336', 'eva_giant_patch14_224', 'eva_giant_patch14_336', 'eva_giant_patch14_560', 'eva_giant_patch14_clip_224', 'eva_large_patch14_196', 'eva_large_patch14_336', 'fastvit_ma36', 'fastvit_mci0', 'fastvit_mci1', 'fastvit_mci2', 'fastvit_s12', 'fastvit_sa12', 'fastvit_sa24', 'fastvit_sa36', 'fastvit_t8', 'fastvit_t12', 'fbnetc_100', 'fbnetv3_b', 'fbnetv3_d', 'fbnetv3_g', 'flexivit_base', 'flexivit_large', 'flexivit_small', 'focalnet_base_lrf', 'focalnet_base_srf', 'focalnet_huge_fl3', 'focalnet_huge_fl4', 'focalnet_large_fl3', 'focalnet_large_fl4', 'focalnet_small_lrf', 'focalnet_small_srf', 'focalnet_tiny_lrf', 'focalnet_tiny_srf', 'focalnet_xlarge_fl3', 'focalnet_xlarge_fl4', 'gc_efficientnetv2_rw_t', 'gcresnet33ts', 'gcresnet50t', 'gcresnext26ts', 'gcresnext50ts', 'gcvit_base', 'gcvit_small', 'gcvit_tiny', 'gcvit_xtiny', 'gcvit_xxtiny', 'gernet_l', 'gernet_m', 'gernet_s', 'ghostnet_050', 'ghostnet_100', 'ghostnet_130', 'ghostnetv2_100', 'ghostnetv2_130', 'ghostnetv2_160', 'gmixer_12_224', 'gmixer_24_224', 'gmlp_b16_224', 'gmlp_s16_224', 'gmlp_ti16_224', 'halo2botnet50ts_256', 'halonet26t', 'halonet50ts', 'halonet_h1', 'haloregnetz_b', 'hardcorenas_a', 'hardcorenas_b', 'hardcorenas_c', 'hardcorenas_d', 'hardcorenas_e', 'hardcorenas_f', 'hgnet_base', 'hgnet_small', 'hgnet_tiny', 'hgnetv2_b0', 'hgnetv2_b1', 'hgnetv2_b2', 'hgnetv2_b3', 'hgnetv2_b4', 'hgnetv2_b5', 'hgnetv2_b6', 'hiera_base_224', 'hiera_base_abswin_256', 'hiera_base_plus_224', 'hiera_huge_224', 'hiera_large_224', 'hiera_small_224', 'hiera_small_abswin_256', 'hiera_tiny_224', 'hieradet_small', 'hrnet_w18', 'hrnet_w18_small', 'hrnet_w18_small_v2', 'hrnet_w18_ssld', 'hrnet_w30', 'hrnet_w32', 'hrnet_w40', 'hrnet_w44', 'hrnet_w48', 'hrnet_w48_ssld', 'hrnet_w64', 'inception_next_base', 'inception_next_small', 'inception_next_tiny', 'inception_resnet_v2', 'inception_v3', 'inception_v4', 'lambda_resnet26rpt_256', 'lambda_resnet26t', 'lambda_resnet50ts', 'lamhalobotnet50ts_256', 'lcnet_035', 'lcnet_050', 'lcnet_075', 'lcnet_100', 'lcnet_150', 'legacy_senet154', 'legacy_seresnet18', 'legacy_seresnet34', 'legacy_seresnet50', 'legacy_seresnet101', 'legacy_seresnet152', 'legacy_seresnext26_32x4d', 'legacy_seresnext50_32x4d', 'legacy_seresnext101_32x4d', 'legacy_xception', 'levit_128', 'levit_128s', 'levit_192', 'levit_256', 'levit_256d', 'levit_384', 'levit_384_s8', 'levit_512', 'levit_512_s8', 'levit_512d', 'levit_conv_128', 'levit_conv_128s', 'levit_conv_192', 'levit_conv_256', 'levit_conv_256d', 'levit_conv_384', 'levit_conv_384_s8', 'levit_conv_512', 'levit_conv_512_s8', 'levit_conv_512d', 'maxvit_base_tf_224', 'maxvit_base_tf_384', 'maxvit_base_tf_512', 'maxvit_large_tf_224', 'maxvit_large_tf_384', 'maxvit_large_tf_512', 'maxvit_nano_rw_256', 'maxvit_pico_rw_256', 'maxvit_rmlp_base_rw_224', 'maxvit_rmlp_base_rw_384', 'maxvit_rmlp_nano_rw_256', 'maxvit_rmlp_pico_rw_256', 'maxvit_rmlp_small_rw_224', 'maxvit_rmlp_small_rw_256', 'maxvit_rmlp_tiny_rw_256', 'maxvit_small_tf_224', 'maxvit_small_tf_384', 'maxvit_small_tf_512', 'maxvit_tiny_pm_256', 'maxvit_tiny_rw_224', 'maxvit_tiny_rw_256', 'maxvit_tiny_tf_224', 'maxvit_tiny_tf_384', 'maxvit_tiny_tf_512', 'maxvit_xlarge_tf_224', 'maxvit_xlarge_tf_384', 'maxvit_xlarge_tf_512', 'maxxvit_rmlp_nano_rw_256', 'maxxvit_rmlp_small_rw_256', 'maxxvit_rmlp_tiny_rw_256', 'maxxvitv2_nano_rw_256', 'maxxvitv2_rmlp_base_rw_224', 'maxxvitv2_rmlp_base_rw_384', 'maxxvitv2_rmlp_large_rw_224', 'mixer_b16_224', 'mixer_b32_224', 'mixer_l16_224', 'mixer_l32_224', 'mixer_s16_224', 'mixer_s32_224', 'mixnet_l', 'mixnet_m', 'mixnet_s', 'mixnet_xl', 'mixnet_xxl', 'mnasnet_050', 'mnasnet_075', 'mnasnet_100', 'mnasnet_140', 'mnasnet_small', 'mobilenet_edgetpu_100', 'mobilenet_edgetpu_v2_l', 'mobilenet_edgetpu_v2_m', 'mobilenet_edgetpu_v2_s', 'mobilenet_edgetpu_v2_xs', 'mobilenetv1_100', 'mobilenetv1_100h', 'mobilenetv1_125', 'mobilenetv2_035', 'mobilenetv2_050', 'mobilenetv2_075', 'mobilenetv2_100', 'mobilenetv2_110d', 'mobilenetv2_120d', 'mobilenetv2_140', 'mobilenetv3_large_075', 'mobilenetv3_large_100', 'mobilenetv3_large_150d', 'mobilenetv3_rw', 'mobilenetv3_small_050', 'mobilenetv3_small_075', 'mobilenetv3_small_100', 'mobilenetv4_conv_aa_large', 'mobilenetv4_conv_aa_medium', 'mobilenetv4_conv_blur_medium', 'mobilenetv4_conv_large', 'mobilenetv4_conv_medium', 'mobilenetv4_conv_small', 'mobilenetv4_hybrid_large', 'mobilenetv4_hybrid_large_075', 'mobilenetv4_hybrid_medium', 'mobilenetv4_hybrid_medium_075', 'mobileone_s0', 'mobileone_s1', 'mobileone_s2', 'mobileone_s3', 'mobileone_s4', 'mobilevit_s', 'mobilevit_xs', 'mobilevit_xxs', 'mobilevitv2_050', 'mobilevitv2_075', 'mobilevitv2_100', 'mobilevitv2_125', 'mobilevitv2_150', 'mobilevitv2_175', 'mobilevitv2_200', 'mvitv2_base', 'mvitv2_base_cls', 'mvitv2_huge_cls', 'mvitv2_large', 'mvitv2_large_cls', 'mvitv2_small', 'mvitv2_small_cls', 'mvitv2_tiny', 'nasnetalarge', 'nest_base', 'nest_base_jx', 'nest_small', 'nest_small_jx', 'nest_tiny', 'nest_tiny_jx', 'nextvit_base', 'nextvit_large', 'nextvit_small', 'nf_ecaresnet26', 'nf_ecaresnet50', 'nf_ecaresnet101', 'nf_regnet_b0', 'nf_regnet_b1', 'nf_regnet_b2', 'nf_regnet_b3', 'nf_regnet_b4', 'nf_regnet_b5', 'nf_resnet26', 'nf_resnet50', 'nf_resnet101', 'nf_seresnet26', 'nf_seresnet50', 'nf_seresnet101', 'nfnet_f0', 'nfnet_f1', 'nfnet_f2', 'nfnet_f3', 'nfnet_f4', 'nfnet_f5', 'nfnet_f6', 'nfnet_f7', 'nfnet_l0', 'pit_b_224', 'pit_b_distilled_224', 'pit_s_224', 'pit_s_distilled_224', 'pit_ti_224', 'pit_ti_distilled_224', 'pit_xs_224', 'pit_xs_distilled_224', 'pnasnet5large', 'poolformer_m36', 'poolformer_m48', 'poolformer_s12', 'poolformer_s24', 'poolformer_s36', 'poolformerv2_m36', 'poolformerv2_m48', 'poolformerv2_s12', 'poolformerv2_s24', 'poolformerv2_s36', 'pvt_v2_b0', 'pvt_v2_b1', 'pvt_v2_b2', 'pvt_v2_b2_li', 'pvt_v2_b3', 'pvt_v2_b4', 'pvt_v2_b5', 'rdnet_base', 'rdnet_large', 'rdnet_small', 'rdnet_tiny', 'regnetv_040', 'regnetv_064', 'regnetx_002', 'regnetx_004', 'regnetx_004_tv', 'regnetx_006', 'regnetx_008', 'regnetx_016', 'regnetx_032', 'regnetx_040', 'regnetx_064', 'regnetx_080', 'regnetx_120', 'regnetx_160', 'regnetx_320', 'regnety_002', 'regnety_004', 'regnety_006', 'regnety_008', 'regnety_008_tv', 'regnety_016', 'regnety_032', 'regnety_040', 'regnety_040_sgn', 'regnety_064', 'regnety_080', 'regnety_080_tv', 'regnety_120', 'regnety_160', 'regnety_320', 'regnety_640', 'regnety_1280', 'regnety_2560', 'regnetz_005', 'regnetz_040', 'regnetz_040_h', 'regnetz_b16', 'regnetz_b16_evos', 'regnetz_c16', 'regnetz_c16_evos', 'regnetz_d8', 'regnetz_d8_evos', 'regnetz_d32', 'regnetz_e8', 'repghostnet_050', 'repghostnet_058', 'repghostnet_080', 'repghostnet_100', 'repghostnet_111', 'repghostnet_130', 'repghostnet_150', 'repghostnet_200', 'repvgg_a0', 'repvgg_a1', 'repvgg_a2', 'repvgg_b0', 'repvgg_b1', 'repvgg_b1g4', 'repvgg_b2', 'repvgg_b2g4', 'repvgg_b3', 'repvgg_b3g4', 'repvgg_d2se', 'repvit_m0_9', 'repvit_m1', 'repvit_m1_0', 'repvit_m1_1', 'repvit_m1_5', 'repvit_m2', 'repvit_m2_3', 'repvit_m3', 'res2net50_14w_8s', 'res2net50_26w_4s', 'res2net50_26w_6s', 'res2net50_26w_8s', 'res2net50_48w_2s', 'res2net50d', 'res2net101_26w_4s', 'res2net101d', 'res2next50', 'resmlp_12_224', 'resmlp_24_224', 'resmlp_36_224', 'resmlp_big_24_224', 'resnest14d', 'resnest26d', 'resnest50d', 'resnest50d_1s4x24d', 'resnest50d_4s2x40d', 'resnest101e', 'resnest200e', 'resnest269e', 'resnet10t', 'resnet14t', 'resnet18', 'resnet18d', 'resnet26', 'resnet26d', 'resnet26t', 'resnet32ts', 'resnet33ts', 'resnet34', 'resnet34d', 'resnet50', 'resnet50_clip', 'resnet50_clip_gap', 'resnet50_gn', 'resnet50_mlp', 'resnet50c', 'resnet50d', 'resnet50s', 'resnet50t', 'resnet50x4_clip', 'resnet50x4_clip_gap', 'resnet50x16_clip', 'resnet50x16_clip_gap', 'resnet50x64_clip', 'resnet50x64_clip_gap', 'resnet51q', 'resnet61q', 'resnet101', 'resnet101_clip', 'resnet101_clip_gap', 'resnet101c', 'resnet101d', 'resnet101s', 'resnet152', 'resnet152c', 'resnet152d', 'resnet152s', 'resnet200', 'resnet200d', 'resnetaa34d', 'resnetaa50', 'resnetaa50d', 'resnetaa101d', 'resnetblur18', 'resnetblur50', 'resnetblur50d', 'resnetblur101d', 'resnetrs50', 'resnetrs101', 'resnetrs152', 'resnetrs200', 'resnetrs270', 'resnetrs350', 'resnetrs420', 'resnetv2_50', 'resnetv2_50d', 'resnetv2_50d_evos', 'resnetv2_50d_frn', 'resnetv2_50d_gn', 'resnetv2_50t', 'resnetv2_50x1_bit', 'resnetv2_50x3_bit', 'resnetv2_101', 'resnetv2_101d', 'resnetv2_101x1_bit', 'resnetv2_101x3_bit', 'resnetv2_152', 'resnetv2_152d', 'resnetv2_152x2_bit', 'resnetv2_152x4_bit', 'resnext26ts', 'resnext50_32x4d', 'resnext50d_32x4d', 'resnext101_32x4d', 'resnext101_32x8d', 'resnext101_32x16d', 'resnext101_32x32d', 'resnext101_64x4d', 'rexnet_100', 'rexnet_130', 'rexnet_150', 'rexnet_200', 'rexnet_300', 'rexnetr_100', 'rexnetr_130', 'rexnetr_150', 'rexnetr_200', 'rexnetr_300', 'sam2_hiera_base_plus', 'sam2_hiera_large', 'sam2_hiera_small', 'sam2_hiera_tiny', 'samvit_base_patch16', 'samvit_base_patch16_224', 'samvit_huge_patch16', 'samvit_large_patch16', 'sebotnet33ts_256', 'sedarknet21', 'sehalonet33ts', 'selecsls42', 'selecsls42b', 'selecsls60', 'selecsls60b', 'selecsls84', 'semnasnet_050', 'semnasnet_075', 'semnasnet_100', 'semnasnet_140', 'senet154', 'sequencer2d_l', 'sequencer2d_m', 'sequencer2d_s', 'seresnet18', 'seresnet33ts', 'seresnet34', 'seresnet50', 'seresnet50t', 'seresnet101', 'seresnet152', 'seresnet152d', 'seresnet200d', 'seresnet269d', 'seresnetaa50d', 'seresnext26d_32x4d', 'seresnext26t_32x4d', 'seresnext26ts', 'seresnext50_32x4d', 'seresnext101_32x4d', 'seresnext101_32x8d', 'seresnext101_64x4d', 'seresnext101d_32x8d', 'seresnextaa101d_32x8d', 'seresnextaa201d_32x8d', 'skresnet18', 'skresnet34', 'skresnet50', 'skresnet50d', 'skresnext50_32x4d', 'spnasnet_100', 'swin_base_patch4_window7_224', 'swin_base_patch4_window12_384', 'swin_large_patch4_window7_224', 'swin_large_patch4_window12_384', 'swin_s3_base_224', 'swin_s3_small_224', 'swin_s3_tiny_224', 'swin_small_patch4_window7_224', 'swin_tiny_patch4_window7_224', 'swinv2_base_window8_256', 'swinv2_base_window12_192', 'swinv2_base_window12to16_192to256', 'swinv2_base_window12to24_192to384', 'swinv2_base_window16_256', 'swinv2_cr_base_224', 'swinv2_cr_base_384', 'swinv2_cr_base_ns_224', 'swinv2_cr_giant_224', 'swinv2_cr_giant_384', 'swinv2_cr_huge_224', 'swinv2_cr_huge_384', 'swinv2_cr_large_224', 'swinv2_cr_large_384', 'swinv2_cr_small_224', 'swinv2_cr_small_384', 'swinv2_cr_small_ns_224', 'swinv2_cr_small_ns_256', 'swinv2_cr_tiny_224', 'swinv2_cr_tiny_384', 'swinv2_cr_tiny_ns_224', 'swinv2_large_window12_192', 'swinv2_large_window12to16_192to256', 'swinv2_large_window12to24_192to384', 'swinv2_small_window8_256', 'swinv2_small_window16_256', 'swinv2_tiny_window8_256', 'swinv2_tiny_window16_256', 'test_byobnet', 'test_efficientnet', 'test_vit', 'tf_efficientnet_b0', 'tf_efficientnet_b1', 'tf_efficientnet_b2', 'tf_efficientnet_b3', 'tf_efficientnet_b4', 'tf_efficientnet_b5', 'tf_efficientnet_b6', 'tf_efficientnet_b7', 'tf_efficientnet_b8', 'tf_efficientnet_cc_b0_4e', 'tf_efficientnet_cc_b0_8e', 'tf_efficientnet_cc_b1_8e', 'tf_efficientnet_el', 'tf_efficientnet_em', 'tf_efficientnet_es', 'tf_efficientnet_l2', 'tf_efficientnet_lite0', 'tf_efficientnet_lite1', 'tf_efficientnet_lite2', 'tf_efficientnet_lite3', 'tf_efficientnet_lite4', 'tf_efficientnetv2_b0', 'tf_efficientnetv2_b1', 'tf_efficientnetv2_b2', 'tf_efficientnetv2_b3', 'tf_efficientnetv2_l', 'tf_efficientnetv2_m', 'tf_efficientnetv2_s', 'tf_efficientnetv2_xl', 'tf_mixnet_l', 'tf_mixnet_m', 'tf_mixnet_s', 'tf_mobilenetv3_large_075', 'tf_mobilenetv3_large_100', 'tf_mobilenetv3_large_minimal_100', 'tf_mobilenetv3_small_075', 'tf_mobilenetv3_small_100', 'tf_mobilenetv3_small_minimal_100', 'tiny_vit_5m_224', 'tiny_vit_11m_224', 'tiny_vit_21m_224', 'tiny_vit_21m_384', 'tiny_vit_21m_512', 'tinynet_a', 'tinynet_b', 'tinynet_c', 'tinynet_d', 'tinynet_e', 'tnt_b_patch16_224', 'tnt_s_patch16_224', 'tresnet_l', 'tresnet_m', 'tresnet_v2_l', 'tresnet_xl', 'twins_pcpvt_base', 'twins_pcpvt_large', 'twins_pcpvt_small', 'twins_svt_base', 'twins_svt_large', 'twins_svt_small', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn', 'vgg19', 'vgg19_bn', 'visformer_small', 'visformer_tiny', 'vit_base_mci_224', 'vit_base_patch8_224', 'vit_base_patch14_dinov2', 'vit_base_patch14_reg4_dinov2', 'vit_base_patch16_18x2_224', 'vit_base_patch16_224', 'vit_base_patch16_224_miil', 'vit_base_patch16_384', 'vit_base_patch16_clip_224', 'vit_base_patch16_clip_384', 'vit_base_patch16_clip_quickgelu_224', 'vit_base_patch16_gap_224', 'vit_base_patch16_plus_240', 'vit_base_patch16_reg4_gap_256', 'vit_base_patch16_rope_reg1_gap_256', 'vit_base_patch16_rpn_224', 'vit_base_patch16_siglip_224', 'vit_base_patch16_siglip_256', 'vit_base_patch16_siglip_384', 'vit_base_patch16_siglip_512', 'vit_base_patch16_siglip_gap_224', 'vit_base_patch16_siglip_gap_256', 'vit_base_patch16_siglip_gap_384', 'vit_base_patch16_siglip_gap_512', 'vit_base_patch16_xp_224', 'vit_base_patch32_224', 'vit_base_patch32_384', 'vit_base_patch32_clip_224', 'vit_base_patch32_clip_256', 'vit_base_patch32_clip_384', 'vit_base_patch32_clip_448', 'vit_base_patch32_clip_quickgelu_224', 'vit_base_patch32_plus_256', 'vit_base_r26_s32_224', 'vit_base_r50_s16_224', 'vit_base_r50_s16_384', 'vit_base_resnet26d_224', 'vit_base_resnet50d_224', 'vit_betwixt_patch16_gap_256', 'vit_betwixt_patch16_reg1_gap_256', 'vit_betwixt_patch16_reg4_gap_256', 'vit_betwixt_patch16_reg4_gap_384', 'vit_betwixt_patch16_rope_reg4_gap_256', 'vit_betwixt_patch32_clip_224', 'vit_giant_patch14_224', 'vit_giant_patch14_clip_224', 'vit_giant_patch14_dinov2', 'vit_giant_patch14_reg4_dinov2', 'vit_giant_patch16_gap_224', 'vit_gigantic_patch14_224', 'vit_gigantic_patch14_clip_224', 'vit_huge_patch14_224', 'vit_huge_patch14_clip_224', 'vit_huge_patch14_clip_336', 'vit_huge_patch14_clip_378', 'vit_huge_patch14_clip_quickgelu_224', 'vit_huge_patch14_clip_quickgelu_378', 'vit_huge_patch14_gap_224', 'vit_huge_patch14_xp_224', 'vit_huge_patch16_gap_448', 'vit_large_patch14_224', 'vit_large_patch14_clip_224', 'vit_large_patch14_clip_336', 'vit_large_patch14_clip_quickgelu_224', 'vit_large_patch14_clip_quickgelu_336', 'vit_large_patch14_dinov2', 'vit_large_patch14_reg4_dinov2', 'vit_large_patch14_xp_224', 'vit_large_patch16_224', 'vit_large_patch16_384', 'vit_large_patch16_siglip_256', 'vit_large_patch16_siglip_384', 'vit_large_patch16_siglip_gap_256', 'vit_large_patch16_siglip_gap_384', 'vit_large_patch32_224', 'vit_large_patch32_384', 'vit_large_r50_s32_224', 'vit_large_r50_s32_384', 'vit_little_patch16_reg1_gap_256', 'vit_little_patch16_reg4_gap_256', 'vit_medium_patch16_clip_224', 'vit_medium_patch16_gap_240', 'vit_medium_patch16_gap_256', 'vit_medium_patch16_gap_384', 'vit_medium_patch16_reg1_gap_256', 'vit_medium_patch16_reg4_gap_256', 'vit_medium_patch16_rope_reg1_gap_256', 'vit_medium_patch32_clip_224', 'vit_mediumd_patch16_reg4_gap_256', 'vit_mediumd_patch16_reg4_gap_384', 'vit_mediumd_patch16_rope_reg1_gap_256', 'vit_pwee_patch16_reg1_gap_256', 'vit_relpos_base_patch16_224', 'vit_relpos_base_patch16_cls_224', 'vit_relpos_base_patch16_clsgap_224', 'vit_relpos_base_patch16_plus_240', 'vit_relpos_base_patch16_rpn_224', 'vit_relpos_base_patch32_plus_rpn_256', 'vit_relpos_medium_patch16_224', 'vit_relpos_medium_patch16_cls_224', 'vit_relpos_medium_patch16_rpn_224', 'vit_relpos_small_patch16_224', 'vit_relpos_small_patch16_rpn_224', 'vit_small_patch8_224', 'vit_small_patch14_dinov2', 'vit_small_patch14_reg4_dinov2', 'vit_small_patch16_18x2_224', 'vit_small_patch16_36x1_224', 'vit_small_patch16_224', 'vit_small_patch16_384', 'vit_small_patch32_224', 'vit_small_patch32_384', 'vit_small_r26_s32_224', 'vit_small_r26_s32_384', 'vit_small_resnet26d_224', 'vit_small_resnet50d_s16_224', 'vit_so150m_patch16_reg4_gap_256', 'vit_so150m_patch16_reg4_map_256', 'vit_so400m_patch14_siglip_224', 'vit_so400m_patch14_siglip_384', 'vit_so400m_patch14_siglip_gap_224', 'vit_so400m_patch14_siglip_gap_384', 'vit_so400m_patch14_siglip_gap_448', 'vit_so400m_patch14_siglip_gap_896', 'vit_srelpos_medium_patch16_224', 'vit_srelpos_small_patch16_224', 'vit_tiny_patch16_224', 'vit_tiny_patch16_384', 'vit_tiny_r_s16_p8_224', 'vit_tiny_r_s16_p8_384', 'vit_wee_patch16_reg1_gap_256', 'vit_xsmall_patch16_clip_224', 'vitamin_base_224', 'vitamin_large2_224', 'vitamin_large2_256', 'vitamin_large2_336', 'vitamin_large2_384', 'vitamin_large_224', 'vitamin_large_256', 'vitamin_large_336', 'vitamin_large_384', 'vitamin_small_224', 'vitamin_xlarge_256', 'vitamin_xlarge_336', 'vitamin_xlarge_384', 'volo_d1_224', 'volo_d1_384', 'volo_d2_224', 'volo_d2_384', 'volo_d3_224', 'volo_d3_448', 'volo_d4_224', 'volo_d4_448', 'volo_d5_224', 'volo_d5_448', 'volo_d5_512', 'vovnet39a', 'vovnet57a', 'wide_resnet50_2', 'wide_resnet101_2', 'xception41', 'xception41p', 'xception65', 'xception65p', 'xception71', 'xcit_large_24_p8_224', 'xcit_large_24_p8_384', 'xcit_large_24_p16_224', 'xcit_large_24_p16_384', 'xcit_medium_24_p8_224', 'xcit_medium_24_p8_384', 'xcit_medium_24_p16_224', 'xcit_medium_24_p16_384', 'xcit_nano_12_p8_224', 'xcit_nano_12_p8_384', 'xcit_nano_12_p16_224', 'xcit_nano_12_p16_384', 'xcit_small_12_p8_224', 'xcit_small_12_p8_384', 'xcit_small_12_p16_224', 'xcit_small_12_p16_384', 'xcit_small_24_p8_224', 'xcit_small_24_p8_384', 'xcit_small_24_p16_224', 'xcit_small_24_p16_384', 'xcit_tiny_12_p8_224', 'xcit_tiny_12_p8_384', 'xcit_tiny_12_p16_224', 'xcit_tiny_12_p16_384', 'xcit_tiny_24_p8_224', 'xcit_tiny_24_p8_384', 'xcit_tiny_24_p16_224', 'xcit_tiny_24_p16_384']\n"
850
+ ]
851
+ }
852
+ ],
853
+ "source": [
854
+ "import timm\n",
855
+ "print(timm.list_models())"
856
+ ]
857
+ },
858
+ {
859
+ "cell_type": "markdown",
860
+ "metadata": {},
861
+ "source": [
862
+ "##### testing the litserve model"
863
+ ]
864
+ },
865
+ {
866
+ "cell_type": "code",
867
+ "execution_count": 2,
868
+ "metadata": {},
869
+ "outputs": [],
870
+ "source": [
871
+ "import requests\n",
872
+ "from urllib.request import urlopen\n",
873
+ "import base64"
874
+ ]
875
+ },
876
+ {
877
+ "cell_type": "code",
878
+ "execution_count": 33,
879
+ "metadata": {},
880
+ "outputs": [
881
+ {
882
+ "name": "stdout",
883
+ "output_type": "stream",
884
+ "text": [
885
+ "<class 'bytes'>\n"
886
+ ]
887
+ }
888
+ ],
889
+ "source": [
890
+ "url = \"https://media.istockphoto.com/id/541844008/photo/portland-grand-floral-parade-2016.jpg?s=2048x2048&w=is&k=20&c=ZuvR6oDv5WxwL5dhXKAbevysEXhXV47shJdpzkqen5Y=\"\n",
891
+ "img_data = urlopen(url).read()\n",
892
+ "print(type(img_data))"
893
+ ]
894
+ },
895
+ {
896
+ "cell_type": "code",
897
+ "execution_count": 34,
898
+ "metadata": {},
899
+ "outputs": [
900
+ {
901
+ "name": "stdout",
902
+ "output_type": "stream",
903
+ "text": [
904
+ "<class 'str'>\n"
905
+ ]
906
+ }
907
+ ],
908
+ "source": [
909
+ "# Convert to base64 string\n",
910
+ "img_bytes = base64.b64encode(img_data).decode('utf-8')\n",
911
+ "print(type(img_bytes))"
912
+ ]
913
+ },
914
+ {
915
+ "cell_type": "code",
916
+ "execution_count": 35,
917
+ "metadata": {},
918
+ "outputs": [],
919
+ "source": [
920
+ "response = requests.post(\n",
921
+ " \"http://localhost:8080/predict\", json={\"image\": img_bytes} # image is the key\n",
922
+ ")"
923
+ ]
924
+ },
925
+ {
926
+ "cell_type": "code",
927
+ "execution_count": 36,
928
+ "metadata": {},
929
+ "outputs": [
930
+ {
931
+ "name": "stdout",
932
+ "output_type": "stream",
933
+ "text": [
934
+ "\\nTop 5 Predictions:\n",
935
+ "mountain_bike, all-terrain_bike, off-roader: 82.13%\n",
936
+ "maillot: 5.09%\n",
937
+ "crash_helmet: 1.84%\n",
938
+ "bicycle-built-for-two, tandem_bicycle, tandem: 1.83%\n",
939
+ "alp: 0.69%\n"
940
+ ]
941
+ }
942
+ ],
943
+ "source": [
944
+ "if response.status_code == 200:\n",
945
+ " predictions = response.json()[\"predictions\"]\n",
946
+ " print(\"\\\\nTop 5 Predictions:\")\n",
947
+ " for pred in predictions:\n",
948
+ " print(f\"{pred['label']}: {pred['probability']:.2%}\")\n",
949
+ "else:\n",
950
+ " print(f\"Error: {response.status_code}\")\n",
951
+ " print(response.text)"
952
+ ]
953
+ },
954
  {
955
  "cell_type": "code",
956
  "execution_count": null,
poetry.lock CHANGED
The diff for this file is too large to render. See raw diff
 
pyproject.toml CHANGED
@@ -70,6 +70,9 @@ celery = "^5.4.0"
70
  fastapi-cache2 = "^0.2.2"
71
  aiocache = "^0.12.3"
72
  dvc-s3 = "^3.2.0"
 
 
 
73
 
74
  [tool.poetry.dev-dependencies]
75
  pytest-asyncio = "^0.20.3"
 
70
  fastapi-cache2 = "^0.2.2"
71
  aiocache = "^0.12.3"
72
  dvc-s3 = "^3.2.0"
73
+ litserve = "^0.2.4"
74
+ gpustat = "^1.1.1"
75
+ nvitop = "^1.3.2"
76
 
77
  [tool.poetry.dev-dependencies]
78
  pytest-asyncio = "^0.20.3"
src/litserve_api_test.py ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import concurrent.futures
3
+ import time
4
+ import numpy as np
5
+ import requests
6
+ import psutil
7
+ from urllib.request import urlopen
8
+ import matplotlib.pyplot as plt
9
+
10
+ # Try importing `gpustat` for GPU monitoring
11
+ try:
12
+ import gpustat
13
+
14
+ GPU_AVAILABLE = True
15
+ except ImportError:
16
+ GPU_AVAILABLE = False
17
+
18
+ # Constants
19
+ SERVER_URL = "http://localhost:8080" # Base server URL
20
+ TEST_IMAGE_URL = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png"
21
+
22
+
23
+ def fetch_and_prepare_payload():
24
+ """
25
+ Fetch the test image and prepare a base64 payload.
26
+ """
27
+ try:
28
+ img_data = urlopen(TEST_IMAGE_URL).read()
29
+ return base64.b64encode(img_data).decode("utf-8")
30
+ except Exception as e:
31
+ print(f"Error fetching the image: {e}")
32
+ return None
33
+
34
+
35
+ def send_request(payload, batch=False):
36
+ """
37
+ Send a single or batch request and measure response time.
38
+ """
39
+ start_time = time.time()
40
+ endpoint = f"{SERVER_URL}/predict"
41
+ try:
42
+ if batch:
43
+ response = requests.post(endpoint, json=[{"image": img} for img in payload])
44
+ else:
45
+ response = requests.post(endpoint, json={"image": payload})
46
+ response_time = time.time() - start_time
47
+ predictions = response.json() if response.status_code == 200 else None
48
+ return response_time, response.status_code, predictions
49
+ except Exception as e:
50
+ print(f"Error sending request: {e}")
51
+ return None, None, None
52
+
53
+
54
+ def get_system_metrics():
55
+ """
56
+ Get current CPU and GPU usage.
57
+ """
58
+ metrics = {"cpu_usage": psutil.cpu_percent(0.1)}
59
+ if GPU_AVAILABLE:
60
+ try:
61
+ gpu_stats = gpustat.GPUStatCollection.new_query()
62
+ metrics["gpu_usage"] = sum([gpu.utilization for gpu in gpu_stats.gpus])
63
+ except Exception:
64
+ metrics["gpu_usage"] = -1
65
+ else:
66
+ metrics["gpu_usage"] = -1
67
+ return metrics
68
+
69
+
70
+ def benchmark_api(num_requests=100, concurrency_level=10, batch=False):
71
+ """
72
+ Benchmark the API server.
73
+ """
74
+ payload = fetch_and_prepare_payload()
75
+ if not payload:
76
+ print("Error preparing payload. Benchmark aborted.")
77
+ return
78
+
79
+ payloads = [payload] * num_requests if batch else [payload]
80
+ system_metrics = []
81
+ response_times = []
82
+ status_codes = []
83
+ predictions = []
84
+
85
+ # Start benchmark timer
86
+ start_benchmark_time = time.time()
87
+
88
+ with concurrent.futures.ThreadPoolExecutor(
89
+ max_workers=concurrency_level
90
+ ) as executor:
91
+ futures = [
92
+ executor.submit(send_request, payloads if batch else payload, batch)
93
+ for _ in range(num_requests)
94
+ ]
95
+ while any(not f.done() for f in futures):
96
+ system_metrics.append(get_system_metrics())
97
+ time.sleep(0.1)
98
+
99
+ for future in futures:
100
+ result = future.result()
101
+ if result:
102
+ response_time, status_code, prediction = result
103
+ response_times.append(response_time)
104
+ status_codes.append(status_code)
105
+ predictions.append(prediction)
106
+
107
+ # Stop benchmark timer
108
+ total_benchmark_time = time.time() - start_benchmark_time
109
+
110
+ avg_cpu = np.mean([m["cpu_usage"] for m in system_metrics])
111
+ avg_gpu = np.mean([m["gpu_usage"] for m in system_metrics]) if GPU_AVAILABLE else -1
112
+
113
+ success_rate = (status_codes.count(200) / num_requests) * 100 if status_codes else 0
114
+ avg_response_time = np.mean(response_times) * 1000 if response_times else 0 # ms
115
+ requests_per_second = num_requests / total_benchmark_time
116
+
117
+ print("\n--- Sample Predictions ---")
118
+ for i, prediction in enumerate(
119
+ predictions[:5]
120
+ ): # Show predictions for the first 5 requests
121
+ print(f"Request {i + 1}: {prediction}")
122
+
123
+ return {
124
+ "total_requests": num_requests,
125
+ "concurrency_level": concurrency_level,
126
+ "total_time": total_benchmark_time,
127
+ "avg_response_time": avg_response_time,
128
+ "success_rate": success_rate,
129
+ "requests_per_second": requests_per_second,
130
+ "avg_cpu_usage": avg_cpu,
131
+ "avg_gpu_usage": avg_gpu,
132
+ }
133
+
134
+
135
+ def run_benchmarks():
136
+ """
137
+ Run comprehensive benchmarks and create plots.
138
+ """
139
+ concurrency_levels = [1, 8, 16, 32]
140
+ metrics = []
141
+
142
+ print("Running API benchmarks...")
143
+ for concurrency in concurrency_levels:
144
+ print(f"\nTesting concurrency level: {concurrency}")
145
+ result = benchmark_api(
146
+ num_requests=50, concurrency_level=concurrency, batch=False
147
+ )
148
+ if result:
149
+ metrics.append(result)
150
+ print(
151
+ f"Concurrency {concurrency}: "
152
+ f"{result['requests_per_second']:.2f} reqs/sec, "
153
+ f"CPU: {result['avg_cpu_usage']:.1f}%, "
154
+ f"GPU: {result['avg_gpu_usage']:.1f}%"
155
+ )
156
+
157
+ # Generate plots
158
+ plt.figure(figsize=(12, 6))
159
+
160
+ # Throughput
161
+ plt.subplot(1, 2, 1)
162
+ plt.plot(
163
+ concurrency_levels,
164
+ [m["requests_per_second"] for m in metrics],
165
+ "r-o",
166
+ label="Throughput",
167
+ )
168
+ plt.xlabel("Concurrency Level")
169
+ plt.ylabel("Requests per Second")
170
+ plt.title("API Throughput")
171
+ plt.grid(True)
172
+
173
+ # Resource Usage
174
+ plt.subplot(1, 2, 2)
175
+ plt.plot(
176
+ concurrency_levels,
177
+ [m["avg_cpu_usage"] for m in metrics],
178
+ "b-o",
179
+ label="CPU Usage",
180
+ )
181
+ if GPU_AVAILABLE:
182
+ plt.plot(
183
+ concurrency_levels,
184
+ [m["avg_gpu_usage"] for m in metrics],
185
+ "g-o",
186
+ label="GPU Usage",
187
+ )
188
+ plt.xlabel("Concurrency Level")
189
+ plt.ylabel("Resource Usage (%)")
190
+ plt.title("Resource Usage")
191
+ plt.legend()
192
+ plt.grid(True)
193
+
194
+ plt.tight_layout()
195
+ plt.savefig("benchmark_results.png")
196
+ print("Benchmark results saved as 'benchmark_results.png'.")
197
+
198
+
199
+ if __name__ == "__main__":
200
+ run_benchmarks()
src/litserve_test_client.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from urllib.request import urlopen
3
+ import base64
4
+ import os
5
+
6
+
7
+ def fetch_image(url):
8
+ """
9
+ Fetch image data from a URL.
10
+ """
11
+ return urlopen(url).read()
12
+
13
+
14
+ def encode_image_to_base64(img_data):
15
+ """
16
+ Encode image bytes to a base64 string.
17
+ """
18
+ return base64.b64encode(img_data).decode("utf-8")
19
+
20
+
21
+ def send_prediction_request(base64_image, server_url):
22
+ """
23
+ Send a single base64 image to the prediction API and retrieve predictions.
24
+ """
25
+ try:
26
+ response = requests.post(f"{server_url}/predict", json={"image": base64_image})
27
+ return response
28
+ except requests.exceptions.RequestException as e:
29
+ print(f"Error connecting to the server: {e}")
30
+ return None
31
+
32
+
33
+ def send_batch_prediction_request(base64_images, server_url):
34
+ """
35
+ Send a batch of base64 images to the prediction API and retrieve predictions.
36
+ """
37
+ try:
38
+ response = requests.post(
39
+ f"{server_url}/predict", json=[{"image": img} for img in base64_images]
40
+ )
41
+ return response
42
+ except requests.exceptions.RequestException as e:
43
+ print(f"Error connecting to the server: {e}")
44
+ return None
45
+
46
+
47
+ def main():
48
+ # Server URL (default or from environment)
49
+ server_url = os.getenv("SERVER_URL", "http://localhost:8080")
50
+
51
+ # Example URLs for testing
52
+ image_urls = [
53
+ "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png",
54
+ "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png",
55
+ ]
56
+
57
+ # Fetch and encode images
58
+ try:
59
+ print("Fetching and encoding images...")
60
+ base64_images = [encode_image_to_base64(fetch_image(url)) for url in image_urls]
61
+ print("Images fetched and encoded successfully.")
62
+ except Exception as e:
63
+ print(f"Error fetching or encoding images: {e}")
64
+ return
65
+
66
+ # Test single image prediction
67
+ try:
68
+ print("\n--- Single Image Prediction ---")
69
+ single_response = send_prediction_request(base64_images[0], server_url)
70
+ if single_response and single_response.status_code == 200:
71
+ predictions = single_response.json().get("predictions", [])
72
+ if predictions:
73
+ print("Top 5 Predictions:")
74
+ for pred in predictions:
75
+ print(f"{pred['label']}: {pred['probability']:.2%}")
76
+ else:
77
+ print("No predictions returned.")
78
+ elif single_response:
79
+ print(f"Error: {single_response.status_code}")
80
+ print(single_response.text)
81
+ except Exception as e:
82
+ print(f"Error sending single prediction request: {e}")
83
+
84
+
85
+ if __name__ == "__main__":
86
+ main()
src/litserve_test_server.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import timm
3
+ from PIL import Image
4
+ import io
5
+ import litserve as lit
6
+ import base64
7
+ import requests
8
+ import logging
9
+
10
+
11
+ class ImageClassifierAPI(lit.LitAPI):
12
+ def setup(self, device):
13
+ """Initialize the model and necessary components."""
14
+ self.device = device
15
+ logging.info("Setting up the model and components.")
16
+
17
+ # Create and load the model
18
+ self.model = timm.create_model("resnet50.a1_in1k", pretrained=True)
19
+ self.model = self.model.to(device).eval()
20
+
21
+ # Disable gradients to save memory
22
+ with torch.no_grad():
23
+ data_config = timm.data.resolve_model_data_config(self.model)
24
+ self.transforms = timm.data.create_transform(
25
+ **data_config, is_training=False
26
+ )
27
+
28
+ # Load labels
29
+ url = "https://storage.googleapis.com/bit_models/ilsvrc2012_wordnet_lemmas.txt"
30
+ try:
31
+ self.labels = requests.get(url).text.strip().split("\n")
32
+ logging.info("Labels loaded successfully.")
33
+ except Exception as e:
34
+ logging.error(f"Failed to load labels: {e}")
35
+ self.labels = []
36
+
37
+ def decode_request(self, request):
38
+ """Handle both single and batch inputs."""
39
+ logging.info(f"decode_request received: {request}")
40
+ if isinstance(request, dict):
41
+ return request["image"]
42
+
43
+ def batch(self, inputs):
44
+ """Batch process images."""
45
+ logging.info(f"batch received inputs: {inputs}")
46
+ if not isinstance(inputs, list):
47
+ raise ValueError("Input to batch must be a list.")
48
+
49
+ batch_tensors = []
50
+ try:
51
+ for image_bytes in inputs:
52
+ if not isinstance(image_bytes, str): # Ensure input is a base64 string
53
+ raise ValueError(
54
+ f"Input must be a base64-encoded string, got: {type(image_bytes)}"
55
+ )
56
+
57
+ # Decode base64 string to bytes
58
+ img_bytes = base64.b64decode(image_bytes)
59
+
60
+ # Convert bytes to PIL Image
61
+ image = Image.open(io.BytesIO(img_bytes)).convert("RGB")
62
+
63
+ # Apply transforms and add to batch
64
+ tensor = self.transforms(image)
65
+ batch_tensors.append(tensor)
66
+
67
+ return torch.stack(batch_tensors).to(self.device)
68
+ except Exception as e:
69
+ logging.error(f"Error decoding image: {e}")
70
+ raise ValueError("Failed to decode and process the images.")
71
+
72
+ @torch.no_grad()
73
+ def predict(self, x):
74
+ """Make predictions on the input batch."""
75
+ outputs = self.model(x)
76
+ probabilities = torch.nn.functional.softmax(outputs, dim=1)
77
+ logging.info("Prediction completed.")
78
+ return probabilities
79
+
80
+ def unbatch(self, output):
81
+ """Unbatch the output."""
82
+ return [output[i] for i in range(output.size(0))]
83
+
84
+ def encode_response(self, output):
85
+ """Convert model output to API response for batches."""
86
+ try:
87
+ probs, indices = torch.topk(output, k=5)
88
+ responses = {
89
+ "predictions": [
90
+ {
91
+ "label": self.labels[idx.item()],
92
+ "probability": prob.item(),
93
+ }
94
+ for prob, idx in zip(probs, indices)
95
+ ]
96
+ }
97
+ logging.info("Batch response successfully encoded.")
98
+ return responses
99
+ except Exception as e:
100
+ logging.error(f"Error encoding batch response: {e}")
101
+ raise ValueError("Failed to encode the batch response.")
102
+
103
+
104
+ if __name__ == "__main__":
105
+ logging.basicConfig(
106
+ level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
107
+ )
108
+ logging.info("Starting the Image Classifier API server.")
109
+
110
+ api = ImageClassifierAPI()
111
+
112
+ # Configure server with optimal settings
113
+ server = lit.LitServer(
114
+ api, accelerator="auto", max_batch_size=16, batch_timeout=0.01
115
+ )
116
+ server.run(port=8080)