{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "99576983-f881-47c8-8b5e-c6f561a93e71", "metadata": {}, "outputs": [], "source": [ "import transformers" ] }, { "cell_type": "code", "execution_count": 2, "id": "58ba19f2-4b91-4f90-a33d-4c1ed17e202a", "metadata": {}, "outputs": [], "source": [ "from transformers import LlavaForConditionalGeneration, LlavaConfig, CLIPVisionConfig, PhiConfig\n", "\n", "# Initializing a CLIP-vision config\n", "vision_config = CLIPVisionConfig()\n", "\n", "# Initializing a Llama config\n", "text_config = PhiConfig()\n", "\n", "# Initializing a Llava llava-1.5-7b style configuration\n", "configuration = LlavaConfig(vision_config, text_config)\n", "\n", "# Initializing a model from the llava-1.5-7b style configuration\n", "model = LlavaForConditionalGeneration(configuration)\n", "\n", "# Accessing the model configuration\n", "configuration = model.config" ] }, { "cell_type": "code", "execution_count": 5, "id": "a806a07a-fe72-45a3-8ceb-8e942c6c845d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "LlavaConfig {\n", " \"ignore_index\": -100,\n", " \"image_token_index\": 32000,\n", " \"model_type\": \"llava\",\n", " \"projector_hidden_act\": \"gelu\",\n", " \"text_config\": {\n", " \"embd_pdrop\": 0.0,\n", " \"hidden_act\": \"gelu_new\",\n", " \"hidden_size\": 2048,\n", " \"intermediate_size\": 8192,\n", " \"layer_norm_eps\": 1e-05,\n", " \"model_type\": \"phi\",\n", " \"num_hidden_layers\": 24,\n", " \"partial_rotary_factor\": 0.5,\n", " \"qk_layernorm\": false,\n", " \"resid_pdrop\": 0.0,\n", " \"vocab_size\": 51200\n", " },\n", " \"transformers_version\": \"4.36.2\",\n", " \"vision_config\": {\n", " \"hidden_size\": 768,\n", " \"image_size\": 224,\n", " \"intermediate_size\": 3072,\n", " \"model_type\": \"clip_vision_model\",\n", " \"num_attention_heads\": 12,\n", " \"num_hidden_layers\": 12,\n", " \"patch_size\": 32,\n", " \"projection_dim\": 512\n", " },\n", " \"vision_feature_layer\": -2,\n", " \"vision_feature_select_strategy\": \"default\",\n", " \"vocab_size\": 32000\n", "}" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.config" ] }, { "cell_type": "code", "execution_count": 6, "id": "79efbc6b-f005-4a5c-82a1-112fa37f1904", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Cloning into 'llava-phi'...\n", "remote: Enumerating objects: 151, done.\u001b[K\n", "remote: Counting objects: 100% (151/151), done.\u001b[K\n", "remote: Compressing objects: 100% (116/116), done.\u001b[K\n", "remote: Total 151 (delta 36), reused 133 (delta 25), pack-reused 0\u001b[K\n", "Receiving objects: 100% (151/151), 333.89 KiB | 112.00 KiB/s, done.\n", "Resolving deltas: 100% (36/36), done.\n" ] } ], "source": [ "!git clone https://github.com/zhuyiche/llava-phi.git" ] }, { "cell_type": "code", "execution_count": null, "id": "cf827184-f334-4d86-ace1-fe9c92f84d66", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.12" } }, "nbformat": 4, "nbformat_minor": 5 }