Tinyants21 commited on
Commit
63a78d0
1 Parent(s): f5f2fbb

add all files without weights

Browse files
VIN_AI_gpt_2.ipynb ADDED
@@ -0,0 +1 @@
 
 
1
+ {"cells":[{"cell_type":"markdown","metadata":{"id":"eHpjThR21g30"},"source":["# Load the library"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":26184,"status":"ok","timestamp":1682639099681,"user":{"displayName":"Nagarajan S","userId":"06327743267508547970"},"user_tz":-120},"id":"rCd-oTGxXyc1","outputId":"314ff6b9-1b83-403d-a334-5f7b29009b42"},"outputs":[{"name":"stdout","output_type":"stream","text":["Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n","Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (4.28.1)\n","Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (23.1)\n","Requirement already satisfied: huggingface-hub<1.0,>=0.11.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.14.1)\n","Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.12.0)\n","Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers) (4.65.0)\n","Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2022.10.31)\n","Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.13.3)\n","Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (1.22.4)\n","Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0)\n","Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.27.1)\n","Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.11.0->transformers) (4.5.0)\n","Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.11.0->transformers) (2023.4.0)\n","Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2.0.12)\n","Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (1.26.15)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.4)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2022.12.7)\n","Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n","Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (2.0.0+cu118)\n","Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch) (3.12.0)\n","Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch) (3.1.2)\n","Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch) (3.1)\n","Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch) (1.11.1)\n","Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch) (2.0.0)\n","Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch) (4.5.0)\n","Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch) (16.0.2)\n","Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch) (3.25.2)\n","Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch) (2.1.2)\n","Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch) (1.3.0)\n","Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n","Requirement already satisfied: PyPDF2 in /usr/local/lib/python3.10/dist-packages (3.0.1)\n","Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n","Requirement already satisfied: python-docx in /usr/local/lib/python3.10/dist-packages (0.8.11)\n","Requirement already satisfied: lxml>=2.3.2 in /usr/local/lib/python3.10/dist-packages (from python-docx) (4.9.2)\n"]}],"source":["!pip install transformers\n","!pip install torch\n","!pip install -U PyPDF2\n","!pip install python-docx"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"JfqnHAnMYfWF"},"outputs":[],"source":["import os\n","import re\n","from PyPDF2 import PdfReader\n","import docx\n","import torch\n","from transformers import GPT2Tokenizer, GPT2LMHeadModel, TextDataset, DataCollatorForLanguageModeling\n","from transformers import Trainer, TrainingArguments"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":1924,"status":"ok","timestamp":1682639138191,"user":{"displayName":"Nagarajan S","userId":"06327743267508547970"},"user_tz":-120},"id":"pANC6ER_16Rx","outputId":"e7427889-20c5-4b87-8c3c-73002bbb22e4"},"outputs":[{"name":"stdout","output_type":"stream","text":["Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"]}],"source":["from google.colab import drive\n","drive.mount('/content/drive')"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"kjhi8kpfYdlV"},"outputs":[],"source":["\n","# Functions to read different file types\n","def read_pdf(file_path):\n"," with open(file_path, \"rb\") as file:\n"," pdf_reader = PdfReader(file)\n"," text = \"\"\n"," for page_num in range(len(pdf_reader.pages)):\n"," text += pdf_reader.pages[page_num].extract_text()\n"," return text\n","\n","def read_word(file_path):\n"," doc = docx.Document(file_path)\n"," text = \"\"\n"," for paragraph in doc.paragraphs:\n"," text += paragraph.text + \"\\n\"\n"," return text\n","\n","def read_txt(file_path):\n"," with open(file_path, \"r\") as file:\n"," text = file.read()\n"," return text\n","\n","def read_documents_from_directory(directory):\n"," combined_text = \"\"\n"," for filename in os.listdir(directory):\n"," file_path = os.path.join(directory, filename)\n"," if filename.endswith(\".pdf\"):\n"," combined_text += read_pdf(file_path)\n"," elif filename.endswith(\".docx\"):\n"," combined_text += read_word(file_path)\n"," elif filename.endswith(\".txt\"):\n"," combined_text += read_txt(file_path)\n"," return combined_text\n","\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"eplbxsLRZBNL"},"outputs":[],"source":["def train_chatbot(directory, model_output_path, train_fraction=0.8):\n"," # Read documents from the directory\n"," combined_text = read_documents_from_directory(directory)\n"," combined_text = re.sub(r'\\n+', '\\n', combined_text).strip() # Remove excess newline characters\n","\n"," # Split the text into training and validation sets\n"," split_index = int(train_fraction * len(combined_text))\n"," train_text = combined_text[:split_index]\n"," val_text = combined_text[split_index:]\n","\n"," # Save the training and validation data as text files\n"," with open(\"train.txt\", \"w\") as f:\n"," f.write(train_text)\n"," with open(\"val.txt\", \"w\") as f:\n"," f.write(val_text)\n","\n"," # Set up the tokenizer and model\n"," tokenizer = GPT2Tokenizer.from_pretrained(\"gpt2-large\") #also try gpt2, gpt2-large and gpt2-medium, also gpt2-xl\n"," model = GPT2LMHeadModel.from_pretrained(\"gpt2-large\") #also try gpt2, gpt2-large and gpt2-medium, also gpt2-xl\n","\n"," # Prepare the dataset\n"," train_dataset = TextDataset(tokenizer=tokenizer, file_path=\"train.txt\", block_size=128)\n"," val_dataset = TextDataset(tokenizer=tokenizer, file_path=\"val.txt\", block_size=128)\n"," data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)\n","\n"," # Set up the training arguments\n"," training_args = TrainingArguments(\n"," output_dir=model_output_path,\n"," overwrite_output_dir=True,\n"," per_device_train_batch_size=4,\n"," per_device_eval_batch_size=4,\n"," num_train_epochs=60,\n"," save_steps=10_000,\n"," save_total_limit=2,\n"," logging_dir='./logs',\n"," )\n","\n"," # Train the model\n"," trainer = Trainer(\n"," model=model,\n"," args=training_args,\n"," data_collator=data_collator,\n"," train_dataset=train_dataset,\n"," eval_dataset=val_dataset,\n"," )\n","\n"," trainer.train()\n"," trainer.save_model(model_output_path)\n"," \n"," # Save the tokenizer\n"," tokenizer.save_pretrained(model_output_path)\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"qeTKPArfgDJW"},"outputs":[],"source":["def generate_response(model, tokenizer, prompt, max_length=100):\n"," input_ids = tokenizer.encode(prompt, return_tensors=\"pt\")\n"," \n"," # Create the attention mask and pad token id\n"," attention_mask = torch.ones_like(input_ids)\n"," pad_token_id = tokenizer.eos_token_id\n","\n"," output = model.generate(\n"," input_ids,\n"," max_length=max_length,\n"," num_return_sequences=1,\n"," attention_mask=attention_mask,\n"," pad_token_id=pad_token_id\n"," )\n","\n"," return tokenizer.decode(output[0], skip_special_tokens=True)\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"H02JwCGW3u1J"},"outputs":[],"source":["#/content/drive/MyDrive/Colab Notebooks/canine_distemper/canine.docx"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"r09pTefOgGPt"},"outputs":[],"source":["\n","def main():\n"," directory = \"/content/drive/MyDrive/Colab Notebooks/canine_distemper/\" # data directory\n"," model_output_path = \"/content/drive/MyDrive/Colab Notebooks/canine_distemper/\"\n","\n"," # Train the chatbot\n"," train_chatbot(directory, model_output_path)\n","\n"," # Load the fine-tuned model and tokenizer\n"," model = GPT2LMHeadModel.from_pretrained(model_output_path)\n"," tokenizer = GPT2Tokenizer.from_pretrained(model_output_path)\n","\n"," # Test the chatbot\n"," prompt = \"What is canine distemper?\" \n"," response = generate_response(model, tokenizer, prompt)\n"," print(\"VIN AI response:\", response)"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":268},"executionInfo":{"elapsed":649728,"status":"ok","timestamp":1682639811752,"user":{"displayName":"Nagarajan S","userId":"06327743267508547970"},"user_tz":-120},"id":"Su8AD2xqY-0i","outputId":"c9c32939-c287-4ad3-e642-4e5420036b34"},"outputs":[{"name":"stderr","output_type":"stream","text":["/usr/local/lib/python3.10/dist-packages/transformers/data/datasets/language_modeling.py:53: FutureWarning: This dataset will be removed from the library soon, preprocessing should be handled with the 🤗 Datasets library. You can have a look at this example script for pointers: https://github.com/huggingface/transformers/blob/main/examples/pytorch/language-modeling/run_mlm.py\n"," warnings.warn(\n","/usr/local/lib/python3.10/dist-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n"," warnings.warn(\n"]},{"data":{"text/html":["\n"," <div>\n"," \n"," <progress value='540' max='540' style='width:300px; height:20px; vertical-align: middle;'></progress>\n"," [540/540 08:50, Epoch 60/60]\n"," </div>\n"," <table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: left;\">\n"," <th>Step</th>\n"," <th>Training Loss</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <td>500</td>\n"," <td>0.142000</td>\n"," </tr>\n"," </tbody>\n","</table><p>"],"text/plain":["<IPython.core.display.HTML object>"]},"metadata":{},"output_type":"display_data"},{"name":"stdout","output_type":"stream","text":["VIN AI response: What is canine distemper?\n","Canine distemper virus (CDV) is a highly contagious, multi-systemic disease that can affect many terrestrial carnivores.2,19,23 Dogs are the principal reservoir host. The disease occurs world-wide.2\n","Etiology\n","Canine distemper virus is an RNA virus of the Paramyxoviridae family and Morbillivirus genus. CDV primarily infects carnivores.\n"]}],"source":["if __name__ == \"__main__\":\n"," main()"]},{"cell_type":"markdown","metadata":{"id":"m0JzfOFRcUDI"},"source":["# Test the model"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"QtVdesbVdTMM"},"outputs":[],"source":["import torch\n","from transformers import GPT2Tokenizer, GPT2LMHeadModel"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"nQnO-P4_dSnF"},"outputs":[],"source":["def generate_response(model, tokenizer, prompt, max_length=250):\n"," input_ids = tokenizer.encode(prompt, return_tensors=\"pt\")\n"," \n"," # Create the attention mask and pad token id\n"," attention_mask = torch.ones_like(input_ids)\n"," pad_token_id = tokenizer.eos_token_id\n","\n"," output = model.generate(\n"," input_ids,\n"," max_length=max_length,\n"," num_return_sequences=1,\n"," attention_mask=attention_mask,\n"," pad_token_id=pad_token_id\n"," )\n","\n"," return tokenizer.decode(output[0], skip_special_tokens=True)\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"0PfghQ3-cXrm"},"outputs":[],"source":["model_path = \"/content/drive/MyDrive/Colab Notebooks/canine_distemper/\"\n","# Load the fine-tuned model and tokenizer\n","my_chat_model = GPT2LMHeadModel.from_pretrained(model_path)\n","my_chat_tokenizer = GPT2Tokenizer.from_pretrained(model_path)"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":94241,"status":"ok","timestamp":1682639973358,"user":{"displayName":"Nagarajan S","userId":"06327743267508547970"},"user_tz":-120},"id":"0POkq5wFEer0","outputId":"b44630c7-87ae-4e87-80d0-53444dd2c201"},"outputs":[{"name":"stdout","output_type":"stream","text":["VIN AI response: What is canine distemper?\n","Canine distemper virus (CDV) is a highly contagious, multi-systemic disease that can affect many terrestrial carnivores.2,19,23 Dogs are the principal reservoir host. The disease occurs world-wide.2\n","Etiology\n","Canine distemper virus is an RNA virus of the Paramyxoviridae family and Morbillivirus genus. CDV primarily infects carnivores. Infected carnivores shed viral particles in droplets in the respiratory tract and urogenital tracts, as well as in the central nervous system (CNS).2,3 Within 24 hours of contact with respiratory tract epithelium, CDV multiples in tissue macrophages and is carried to local lymph nodes. Within a few days, the virus proliferates throughout lymphoid organs, including the spleen, mesenteric lymph nodes, and hepatic Kupffer cells. CDV then spreads hematogenously to epithelial cells of the respiratory, gastrointestinal (GI), and urogenital tracts, as well as the central nervous system (CNS).2,3 Epithelial cells of the respiratory, GI, and\n"]}],"source":["prompt = \"What is canine distemper?\" \n","response = generate_response(my_chat_model, my_chat_tokenizer, prompt, max_length=250) \n","print(\"VIN AI response:\", response)"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":86165,"status":"ok","timestamp":1682640084211,"user":{"displayName":"Nagarajan S","userId":"06327743267508547970"},"user_tz":-120},"id":"I7y14mtCc4He","outputId":"b3dfdb3e-3895-451c-fa23-9cffd3ffb2be"},"outputs":[{"name":"stdout","output_type":"stream","text":["VIN AI response: What is clinical signs and symptoms of canine distemper virus?\n","Clinical signs vary depending on pathogenicity of the viral strain and immune response of the host. Older dogs that have some immunity are more likely to have asymptomatic or mild disease. Puppies are more likely to have severe clinical signs.9\n","Initial clinical signs can include lethargy, anorexia, fever, dehydration, oculonasal discharge, and coughing. Vomiting and diarrhea may also occur. Diarrhea can be hemorrhagic.2,3\n","Clinical signs can worsen with dehydration and oculonasal discharge. Hyperreflexia may also occur.2,3\n","Initial clinical signs can be severe when the host mounts an adequate immune response. With chronic CDV infection, the virus proliferates throughout the CNS, affecting many organs. Neurological signs may not be apparent if the host mounts an adequate immune response. CDV can cause encephalitis, myoclonus, seizures, ataxia, cerebellar signs, paraparesis, tetraparesis, and myoclonus again.2,3\n","Canine distemper virus can cause significant morbidity and mortality in young or\n"]}],"source":["prompt = \"What is clinical signs and symptoms of canine distemper virus?\" \n","response = generate_response(my_chat_model, my_chat_tokenizer, prompt, max_length=250) \n","print(\"VIN AI response:\", response)"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":66078,"status":"ok","timestamp":1682640164729,"user":{"displayName":"Nagarajan S","userId":"06327743267508547970"},"user_tz":-120},"id":"6p0cGwgUC6JA","outputId":"cfba809e-f105-46d8-a53e-f56c3564b8ce"},"outputs":[{"name":"stdout","output_type":"stream","text":["VIN AI response: What are all the treatment options of canine distemper virus?\n","1) Ampicillin 20 mg/kg IV once to effect\n","2) Cephapirin 10-20 mg/kg IV once to effect\n","3) Doxycycline 5-10 mg/kg IM/ IV once to effect\n","For more information on treating pneumonia, please refer to the Canine VINcyclopedia chapter on .\n","Antiemetic therapy is indicated for patients that are vomiting.\n","Dexamethasone may be given once at a dose of 1-2 mg/kg IV to temporarily halt neurologic signs associated with cerebral edema.\n","Dexamethasone may be given once at a dose of 1-2 mg/kg IV to temporarily halt neurologic signs associated with cerebral edema.10\n","MONITORING and PROGNOSIS\n","Antiemetic therapy is indicated for patients that are vomiting.\n","Dexamethasone may be given once at a dose\n"]}],"source":["prompt = \"What are all the treatment options of canine distemper virus\"\n","response = generate_response(my_chat_model, my_chat_tokenizer, prompt, max_length=200) \n","print(\"VIN AI response:\", response)"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":85539,"status":"ok","timestamp":1682640263015,"user":{"displayName":"Nagarajan S","userId":"06327743267508547970"},"user_tz":-120},"id":"Qt7smP00C0Hz","outputId":"e6484a41-d473-4cb2-fb91-c00304d62e80"},"outputs":[{"name":"stdout","output_type":"stream","text":["VIN AI response: What are all the diagnostic options of canine distemper virus?\n","1) PCR Assay: PCR assays can be performed on conjunctival, tonsillar, respiratory, and genital epithelium.1,3,18,25,26 Note that PCR assays are typically performed on conjunctival and urinary epithelium.2 Bones are usually not positive for PCR assays on conjunctival or urinary epithelium.2 CDV antibodies are typically detected in conjunctival and urinary epithelium on day 8 or 9 post infection in vaccinated dogs.2 CDV antibodies are typically present for only the first 3 weeks post infection in vaccinated dogs.2 CDV antibodies are more commonly detected in urine sediment and buffy coat samples on day 14 or 15 post infection.2 CDV antibodies are less commonly detected in CSF on day 18 or 19 post infection.2 CDV antibodies are more commonly detected in CSF on days 7-9 post infection than in CSF on days 1-3 post infection.2 CDV antibodies are more commonly detected in CSF on days 7-9 post infection than in CSF on days 1-3 post infection in\n"]}],"source":["prompt = \"What are all the diagnostic options of canine distemper virus\"\n","response = generate_response(my_chat_model, my_chat_tokenizer, prompt, max_length=250) \n","print(\"VIN AI response:\", response)"]},{"cell_type":"code","execution_count":3,"metadata":{},"outputs":[{"data":{"text/plain":["'c:\\\\Users\\\\arvin\\\\Documents\\\\can'"]},"execution_count":3,"metadata":{},"output_type":"execute_result"}],"source":["pwd"]},{"cell_type":"code","execution_count":1,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":380},"executionInfo":{"elapsed":5831,"status":"error","timestamp":1682844572604,"user":{"displayName":"Nagarajan S","userId":"06327743267508547970"},"user_tz":-120},"id":"T_JhNIhjFeWz","outputId":"18ac96a7-e81b-40cd-8bc1-cdb342a6e403"},"outputs":[{"name":"stderr","output_type":"stream","text":["c:\\Users\\arvin\\anaconda3\\envs\\gpt2\\lib\\site-packages\\tqdm-4.64.1-py3.8.egg\\tqdm\\auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n"," from .autonotebook import tqdm as notebook_tqdm\n"]},{"name":"stdout","output_type":"stream","text":["Running on local URL: http://127.0.0.1:7860\n","\n","To create a public link, set `share=True` in `launch()`.\n"]},{"data":{"text/html":["<div><iframe src=\"http://127.0.0.1:7860/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"],"text/plain":["<IPython.core.display.HTML object>"]},"metadata":{},"output_type":"display_data"},{"data":{"text/plain":[]},"execution_count":1,"metadata":{},"output_type":"execute_result"}],"source":["import torch\n","import gradio as gr\n","from transformers import GPT2Tokenizer, GPT2LMHeadModel\n","\n","def generate_response(model, tokenizer, prompt, max_length=250):\n"," input_ids = tokenizer.encode(prompt, return_tensors=\"pt\")\n"," \n"," # Create the attention mask and pad token id\n"," attention_mask = torch.ones_like(input_ids)\n"," pad_token_id = tokenizer.eos_token_id\n","\n"," output = model.generate(\n"," input_ids,\n"," max_length=max_length,\n"," num_return_sequences=1,\n"," attention_mask=attention_mask,\n"," pad_token_id=pad_token_id\n"," )\n","\n"," return tokenizer.decode(output[0], skip_special_tokens=True)\n","#c:\\Users\\arvin\\Documents\\can\\\n","\n","model_path = \"c:\\\\Users\\\\arvin\\\\Documents\\\\can\\\\\"\n","\n","# Load the fine-tuned model and tokenizer\n","my_chat_model = GPT2LMHeadModel.from_pretrained(model_path)\n","my_chat_tokenizer = GPT2Tokenizer.from_pretrained(model_path)\n","\n","def generate_response_gradio(prompt):\n"," response = generate_response(my_chat_model, my_chat_tokenizer, prompt, max_length=250) \n"," return response\n","\n","gr.Interface(fn=generate_response_gradio, inputs=\"text\", outputs=\"text\", title=\"Canine Distemper FAQ\").launch()\n"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":[]}],"metadata":{"accelerator":"GPU","colab":{"provenance":[]},"gpuClass":"standard","kernelspec":{"display_name":"Python 3","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.8.13"}},"nbformat":4,"nbformat_minor":0}
canine.docx ADDED
Binary file (43 kB). View file
 
config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "gpt2-large",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 1280,
16
+ "n_head": 20,
17
+ "n_inner": null,
18
+ "n_layer": 36,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
+ },
35
+ "torch_dtype": "float32",
36
+ "transformers_version": "4.28.1",
37
+ "use_cache": true,
38
+ "vocab_size": 50257
39
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.28.1"
6
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "unk_token": {
17
+ "content": "<|endoftext|>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "bos_token": {
5
+ "__type": "AddedToken",
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "clean_up_tokenization_spaces": true,
13
+ "eos_token": {
14
+ "__type": "AddedToken",
15
+ "content": "<|endoftext|>",
16
+ "lstrip": false,
17
+ "normalized": true,
18
+ "rstrip": false,
19
+ "single_word": false
20
+ },
21
+ "errors": "replace",
22
+ "model_max_length": 1024,
23
+ "pad_token": null,
24
+ "tokenizer_class": "GPT2Tokenizer",
25
+ "unk_token": {
26
+ "__type": "AddedToken",
27
+ "content": "<|endoftext|>",
28
+ "lstrip": false,
29
+ "normalized": true,
30
+ "rstrip": false,
31
+ "single_word": false
32
+ }
33
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fa29eb7f313073482a2746f24f0b4cd2dff0041ad0a7a0af5dbd44c6885b98e
3
+ size 3579
vocab.json ADDED
The diff for this file is too large to render. See raw diff