{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "81a2413e-8629-4016-aace-17d2f757f726", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "https://hf-mirror.com\n" ] } ], "source": [ "import os\n", "\n", "# 设置环境变量\n", "os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'\n", "\n", "# 打印环境变量以确认设置成功\n", "print(os.environ.get('HF_ENDPOINT'))" ] }, { "cell_type": "code", "execution_count": 2, "id": "89e2d33a-6d84-4ef3-b44e-daa57ac81e58", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2024-11-24 11:21:51.020375: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n", "2024-11-24 11:21:51.036615: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", "2024-11-24 11:21:51.053557: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", "2024-11-24 11:21:51.058466: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", "2024-11-24 11:21:51.071840: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", "To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 AVX512_FP16 AVX_VNNI AMX_TILE AMX_INT8 AMX_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", "2024-11-24 11:21:51.923693: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" ] } ], "source": [ "from transformers import AutoTokenizer, AutoConfig,AutoModel\n", "from transformers import DataCollatorForLanguageModeling\n", "from transformers import Trainer, TrainingArguments\n", "from transformers import AutoConfig, AutoModelForCausalLM,LlamaForCausalLM,LlamaTokenizer\n", "from tokenizers import Tokenizer\n", "from datasets import load_dataset" ] }, { "cell_type": "code", "execution_count": 3, "id": "68fc5c44-b444-402e-aaf2-0ba4e2000e42", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "DatasetDict({\n", " train: Dataset({\n", " features: ['instruction', 'input', 'output'],\n", " num_rows: 19839\n", " })\n", "})" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from datasets import load_dataset\n", "dna_ft_dataset = load_dataset('json', data_files='val_data.json')\n", "dna_ft_dataset" ] }, { "cell_type": "code", "execution_count": 4, "id": "4ab4fd3e-5b59-470e-9b46-f0ffd7b9d1aa", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "DatasetDict({\n", " train: Dataset({\n", " features: ['instruction', 'input', 'output'],\n", " num_rows: 1983\n", " })\n", " test: Dataset({\n", " features: ['instruction', 'input', 'output'],\n", " num_rows: 17856\n", " })\n", "})" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = dna_ft_dataset[\"train\"].train_test_split(train_size=0.1, seed=42)\n", "data" ] }, { "cell_type": "code", "execution_count": 5, "id": "85ca97f5-6864-4d6f-944a-182ed1fa2f00", "metadata": {}, "outputs": [], "source": [ "tokenizer = LlamaTokenizer.from_pretrained(\"dnahlm-llama-7b-sft-v0\") #dnagpt/dnahlm-llama-7b-sft-v0\n", "tokenizer.pad_token = tokenizer.eos_token" ] }, { "cell_type": "code", "execution_count": 6, "id": "e904c0b2-bf21-4036-b510-8e57177c1767", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "99ce92d0373a498d929bed42f770ed16", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Loading checkpoint shards: 0%| | 0/3 [00:00',\n", " '<0x0A>',\n", " '##',\n", " '#',\n", " '▁Inst',\n", " 'ruction',\n", " ':',\n", " '<0x0A>',\n", " 'Det',\n", " 'erm',\n", " 'ine',\n", " '▁core',\n", " '▁prom',\n", " 'oter',\n", " '▁detection',\n", " '▁of',\n", " '▁following',\n", " '▁d',\n", " 'na',\n", " '▁sequence',\n", " ',',\n", " '▁The',\n", " '▁result',\n", " '▁will',\n", " '▁be',\n", " '▁one',\n", " '▁of',\n", " '▁the',\n", " '▁following',\n", " ':',\n", " '▁Non',\n", " '-',\n", " 'prom',\n", " 'oter',\n", " ',',\n", " '▁prom',\n", " 'oter',\n", " '.',\n", " '<0x0A>',\n", " '<0x0A>',\n", " '##',\n", " '#',\n", " '▁Input',\n", " ':',\n", " '<0x0A>',\n", " 'CCGTG',\n", " 'C',\n", " 'GAC',\n", " 'CGGAA',\n", " 'GTG',\n", " 'GGGC',\n", " 'GGC',\n", " 'GAC',\n", " 'CCCGGAA',\n", " 'GTCC',\n", " 'CCGCC',\n", " 'GGGTG',\n", " 'CA',\n", " 'GCT',\n", " 'TG',\n", " 'GTC',\n", " 'GGT',\n", " 'TC',\n", " 'GATCGCC',\n", " '<0x0A>',\n", " '<0x0A>',\n", " '##',\n", " '#',\n", " '▁Response',\n", " ':',\n", " '<0x0A>',\n", " 'prom',\n", " 'oter']" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tokenizer.tokenize(prompt)" ] }, { "cell_type": "code", "execution_count": 11, "id": "f0449aee-1ac6-4db5-873f-afdfb0fc9691", "metadata": {}, "outputs": [], "source": [ "def inference(text, model, tokenizer, max_input_tokens=1000, max_output_tokens=1000):\n", " # Tokenize\n", " input_ids = tokenizer.encode(\n", " text,\n", " return_tensors=\"pt\",\n", " truncation=True,\n", " max_length=max_input_tokens\n", " # return_attention_mask=True,\n", " )\n", "\n", " # Generate\n", " device = model.device\n", " generated_tokens_with_prompt = model.generate(\n", " input_ids=input_ids.to(device),\n", " #max_length=max_output_tokens,\n", " max_new_tokens=8,\n", " temperature=0.01 # 控制生成的多样性\n", " )\n", "\n", " # Decode\n", " generated_text_with_prompt = tokenizer.decode(generated_tokens_with_prompt[0], skip_special_tokens=True)\n", " generated_text_answer = generated_text_with_prompt[len(text):]\n", "\n", "\n", " return generated_text_answer\n", "\n", "# 如果需要进一步清理\n", "def clean_generated_text(text):\n", " # 去除 'Ġ' 符号并替换为空格\n", " text = text.replace('Ġ', ' ')\n", " # 去除多余的空格\n", " text = ' '.join(text.split())\n", " return text" ] }, { "cell_type": "code", "execution_count": 12, "id": "e9041426-eb59-4314-82dd-7b6d6d477783", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "input (test): Below is an instruction that describes a task. Write a response that appropriately completes the request.\n", "\n", "### Instruction:\n", "Determine core promoter detection of following dna sequence, The result will be one of the following: Non-promoter, promoter.\n", "\n", "### Input:\n", "CCGTGCGACCGGAAGTGGGGCGGCGACCCCGGAAGTCCCCGCCGGGTGCAGCTTGGTCGGTTCGATCGCC\n", "\n", "### Response:\n", "\n", "real answer: promoter\n", "--------------------------\n", "\n", "model's answer: \n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/root/miniconda3/lib/python3.12/site-packages/transformers/generation/configuration_utils.py:601: UserWarning: `do_sample` is set to `False`. However, `temperature` is set to `0.01` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `temperature`.\n", " warnings.warn(\n", "Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " Non-promoter\n" ] } ], "source": [ "input_text = format_input(data[\"test\"][0])\n", "\n", "print(\"input (test):\", input_text)\n", "\n", "print(\"real answer:\", data[\"test\"][0][\"output\"])\n", "\n", "print(\"--------------------------\\n\")\n", "\n", "print(\"model's answer: \\n\")\n", "print(inference(input_text, model, tokenizer))" ] }, { "cell_type": "code", "execution_count": 13, "id": "d1489173-84af-4c8e-b66b-0cdbe42c7ea7", "metadata": {}, "outputs": [], "source": [ "test_data = data[\"test\"].shuffle(seed=199).select(range(1000))\n", "\n", "data_list = []\n", "\n", "for entry in test_data:\n", " input_text = format_input(entry)\n", " #print(input_text)\n", " response_text = inference(input_text, model, tokenizer)\n", " #print(response_text)\n", " data = {\n", " \"instruction\":entry[\"instruction\"],\n", " \"input\":entry[\"input\"],\n", " \"output\":entry[\"output\"],\n", " \"model_response\":response_text\n", " }\n", "\n", " data_list.append(data)" ] }, { "cell_type": "code", "execution_count": 14, "id": "39275fe6-ac3b-4558-9f4c-2853a41d48c4", "metadata": {}, "outputs": [], "source": [ "import json\n", "\n", "# 定义输出文件路径\n", "output_file = 'llama-sft-2.json'\n", "\n", "# 将 Dataset 对象导出为 JSON 文件\n", "# test_data.to_json(output_file)\n", "with open(output_file, \"w\") as file:\n", " json.dump(data_list, file, indent=4) # \"indent\" for pretty-printing\n", "\n" ] }, { "cell_type": "code", "execution_count": 15, "id": "7ffaba65-a270-4433-b234-932f5e288f7c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'▁prom oter'" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\" \".join(tokenizer.tokenize(\"promoter\"))" ] }, { "cell_type": "code", "execution_count": 16, "id": "a7e373a4-6857-4874-b2da-58da2928925d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Donor Sites |||||||||||| Donor Sites\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Donor Sites |||||||||||| Non-Splice Sites\n", "promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| promoter\n", "promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| Non-promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "Background Sequences |||||||||||| Background Sequences\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-Splice Sites |||||||||||| Non-Splice Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Binding Sites |||||||||||| Background Sequences\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-Splice Sites |||||||||||| Donor Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Donor Sites |||||||||||| Non-Splice Sites\n", "Non-promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Background Sequences |||||||||||| Background Sequences\n", "Non-promoter |||||||||||| Non-promoter\n", "Binding Sites |||||||||||| Binding Sites\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| promoter\n", "Binding Sites |||||||||||| Binding Sites\n", "promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "Background Sequences |||||||||||| Background Sequences\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Background Sequences |||||||||||| Binding Sites\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "Binding Sites |||||||||||| Binding Sites\n", "promoter |||||||||||| promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "Binding Sites |||||||||||| Binding Sites\n", "Donor Sites |||||||||||| Donor Sites\n", "Non-promoter |||||||||||| promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Acceptor Sites |||||||||||| Acceptor Sites\n", "promoter |||||||||||| Non-promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "Donor Sites |||||||||||| Donor Sites\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "Background Sequences |||||||||||| Binding Sites\n", "Acceptor Sites |||||||||||| Donor Sites\n", "Non-Splice Sites |||||||||||| Acceptor Sites\n", "Donor Sites |||||||||||| Donor Sites\n", "Donor Sites |||||||||||| Donor Sites\n", "Non-promoter |||||||||||| promoter\n", "promoter |||||||||||| Non-promoter\n", "Background Sequences |||||||||||| Background Sequences\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Acceptor Sites |||||||||||| Donor Sites\n", "promoter |||||||||||| promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "Binding Sites |||||||||||| Courses\n", "promoter |||||||||||| promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "Donor Sites |||||||||||| Donor Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "Binding Sites |||||||||||| Binding Sites\n", "Non-Splice Sites |||||||||||| Non-Splice Sites\n", "Binding Sites |||||||||||| Court\n", "Donor Sites |||||||||||| Donor Sites\n", "Non-promoter |||||||||||| promoter\n", "promoter |||||||||||| Non-promoter\n", "Non-Splice Sites |||||||||||| Non-Splice Sites\n", "promoter |||||||||||| promoter\n", "Binding Sites |||||||||||| Background Sequences\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Binding Sites |||||||||||| Binding Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "Non-Splice Sites |||||||||||| Acceptor Sites\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "Background Sequences |||||||||||| Binding Sites\n", "Background Sequences |||||||||||| Background Sequences\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "Non-promoter |||||||||||| promoter\n", "Acceptor Sites |||||||||||| Non-Splice Sites\n", "Non-promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Acceptor Sites |||||||||||| Acceptor Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Acceptor Sites |||||||||||| Acceptor Sites\n", "Donor Sites |||||||||||| Donor Sites\n", "Donor Sites |||||||||||| Donor Sites\n", "Binding Sites |||||||||||| Background Sequences\n", "Binding Sites |||||||||||| Binding Sites\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Binding Sites |||||||||||| Background Sequences\n", "Background Sequences |||||||||||| Background Sequences\n", "Non-promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| Non-promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "Donor Sites |||||||||||| Non-Splice Sites\n", "Binding Sites |||||||||||| Binding Sites\n", "Donor Sites |||||||||||| Donor Sites\n", "promoter |||||||||||| Non-promoter\n", "Acceptor Sites |||||||||||| Donor Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "Donor Sites |||||||||||| Donor Sites\n", "Donor Sites |||||||||||| Donor Sites\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Non-Splice Sites |||||||||||| Non-Splice Sites\n", "promoter |||||||||||| promoter\n", "Background Sequences |||||||||||| Background Sequences\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "Non-promoter |||||||||||| promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "Binding Sites |||||||||||| Binding Sites\n", "Donor Sites |||||||||||| Donor Sites\n", "Binding Sites |||||||||||| Binding Sites\n", "Non-promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Background Sequences |||||||||||| Binding Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "Background Sequences |||||||||||| Background Sequences\n", "Non-promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Background Sequences |||||||||||| Binding Sites\n", "Background Sequences |||||||||||| Background Sequences\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "Background Sequences |||||||||||| Background Sequences\n", "Non-promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-Splice Sites |||||||||||| Non-Splice Sites\n", "promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "Background Sequences |||||||||||| Background Sequences\n", "Background Sequences |||||||||||| Background Sequences\n", "Background Sequences |||||||||||| Background Sequences\n", "Donor Sites |||||||||||| Donor Sites\n", "Binding Sites |||||||||||| Binding Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| promoter\n", "Non-Splice Sites |||||||||||| Non-Splice Sites\n", "Binding Sites |||||||||||| Binding Sites\n", "Donor Sites |||||||||||| Non-Splice Sites\n", "Background Sequences |||||||||||| Binding Sites\n", "promoter |||||||||||| promoter\n", "Non-Splice Sites |||||||||||| Splice Sites\n", "promoter |||||||||||| promoter\n", "Donor Sites |||||||||||| Acceptor Sites\n", "Binding Sites |||||||||||| Binding Sites\n", "Non-promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Donor Sites |||||||||||| Acceptor Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "Non-Splice Sites |||||||||||| Donor Sites\n", "Donor Sites |||||||||||| Donor Sites\n", "Binding Sites |||||||||||| Binding Sites\n", "promoter |||||||||||| promoter\n", "Background Sequences |||||||||||| Background Sequences\n", "promoter |||||||||||| promoter\n", "Binding Sites |||||||||||| Coursing\n", "Non-Splice Sites |||||||||||| Donor Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| promoter\n", "Binding Sites |||||||||||| Binding Sites\n", "Binding Sites |||||||||||| Binding Sites\n", "Background Sequences |||||||||||| Background Sequences\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| promoter\n", "promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "Non-Splice Sites |||||||||||| Non-Splice Sites\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Acceptor Sites |||||||||||| Donor Sites\n", "Non-Splice Sites |||||||||||| Non-Splice Sites\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Non-Splice Sites |||||||||||| Donor Sites\n", "Donor Sites |||||||||||| Donor Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "Acceptor Sites |||||||||||| Donor Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-Splice Sites |||||||||||| Non-Splice Sites\n", "Acceptor Sites |||||||||||| Non-Splice Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "Background Sequences |||||||||||| Background Sequences\n", "Donor Sites |||||||||||| Donor Sites\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Acceptor Sites |||||||||||| Donor Sites\n", "Non-Splice Sites |||||||||||| Donor Sites\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "promoter |||||||||||| promoter\n", "Donor Sites |||||||||||| Non-Splice Sites\n", "Donor Sites |||||||||||| Donor Sites\n", "Donor Sites |||||||||||| Donor Sites\n", "Donor Sites |||||||||||| Donor Sites\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| promoter\n", "Binding Sites |||||||||||| Binding Sites\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Binding Sites |||||||||||| Binding Sites\n", "Binding Sites |||||||||||| Binding Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "Background Sequences |||||||||||| Background Sequences\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Background Sequences |||||||||||| Background Sequences\n", "Binding Sites |||||||||||| Binding Sites\n", "Binding Sites |||||||||||| Background Sequences\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "Donor Sites |||||||||||| Donor Sites\n", "Non-Splice Sites |||||||||||| Acceptor Sites\n", "promoter |||||||||||| promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| promoter\n", "Non-Splice Sites |||||||||||| Donor Sites\n", "Binding Sites |||||||||||| Binding Sites\n", "promoter |||||||||||| promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "Donor Sites |||||||||||| Donor Sites\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Non-Splice Sites |||||||||||| Non-Splice Sites\n", "promoter |||||||||||| promoter\n", "Binding Sites |||||||||||| Background Sequences\n", "Non-promoter |||||||||||| Non-promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "Non-promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "Donor Sites |||||||||||| Donor Sites\n", "Donor Sites |||||||||||| Donor Sites\n", "Binding Sites |||||||||||| Binding Sites\n", "Acceptor Sites |||||||||||| Donor Sites\n", "Non-promoter |||||||||||| promoter\n", "Acceptor Sites |||||||||||| Acceptor Sites\n", "Background Sequences |||||||||||| Background Sequences\n", "Donor Sites |||||||||||| Donor Sites\n", "promoter |||||||||||| promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "Acceptor Sites |||||||||||| Acceptor Sites\n", "Non-Splice Sites |||||||||||| Non-Splice Sites\n", "Background Sequences |||||||||||| Background Sequences\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| promoter\n", "Binding Sites |||||||||||| Binding Sites\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-Splice Sites |||||||||||| Donor Sites\n", "promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| promoter\n", "Binding Sites |||||||||||| Binding Sites\n", "Binding Sites |||||||||||| Background Sequences\n", "Donor Sites |||||||||||| D Donor Sites\n", "promoter |||||||||||| promoter\n", "Background Sequences |||||||||||| Background Sequences\n", "Background Sequences |||||||||||| Background Sequences\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| promoter\n", "Non-Splice Sites |||||||||||| Non-Splice Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "Donor Sites |||||||||||| Acceptor Sites\n", "Donor Sites |||||||||||| Donor Sites\n", "Non-Splice Sites |||||||||||| Donor Sites\n", "Background Sequences |||||||||||| Background Sequences\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Background Sequences |||||||||||| Background Sequences\n", "Donor Sites |||||||||||| Donor Sites\n", "promoter |||||||||||| promoter\n", "Non-Splice Sites |||||||||||| Donor Sites\n", "Binding Sites |||||||||||| Binding Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Acceptor Sites |||||||||||| Donor Sites\n", "promoter |||||||||||| promoter\n", "Acceptor Sites |||||||||||| Donor Sites\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Acceptor Sites |||||||||||| Donor Sites\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Background Sequences |||||||||||| Background Sequences\n", "Non-promoter |||||||||||| Non-promoter\n", "Background Sequences |||||||||||| Background Sequences\n", "Binding Sites |||||||||||| Binding Sites\n", "Background Sequences |||||||||||| Background Sequences\n", "promoter |||||||||||| promoter\n", "Non-Splice Sites |||||||||||| Non-Splice Sites\n", "Background Sequences |||||||||||| Background Sequences\n", "Background Sequences |||||||||||| Background Sequences\n", "Non-promoter |||||||||||| Non-promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "Acceptor Sites |||||||||||| Donor Sites\n", "Non-promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| Non-promoter\n", "Binding Sites |||||||||||| Background Sequences\n", "Binding Sites |||||||||||| Background Sequences\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| promoter\n", "promoter |||||||||||| Non-promoter\n", "Acceptor Sites |||||||||||| Donor Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Binding Sites |||||||||||| Binding Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "Donor Sites |||||||||||| Non-Splice Sites\n", "Donor Sites |||||||||||| Acceptor Sites\n", "Donor Sites |||||||||||| Non-Splice Sites\n", "promoter |||||||||||| promoter\n", "Acceptor Sites |||||||||||| Acceptor Sites\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Binding Sites |||||||||||| Binding Sites\n", "Donor Sites |||||||||||| Donor Sites\n", "Non-promoter |||||||||||| promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "Non-promoter |||||||||||| promoter\n", "Background Sequences |||||||||||| Background Sequences\n", "Non-promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Binding Sites |||||||||||| Binding Sites\n", "Non-promoter |||||||||||| promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Acceptor Sites |||||||||||| Acceptor Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "promoter |||||||||||| promoter\n", "Binding Sites |||||||||||| Background Sequences\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "Donor Sites |||||||||||| Acceptor Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-Splice Sites |||||||||||| Donor Sites\n", "Binding Sites |||||||||||| Background Sequences\n", "promoter |||||||||||| promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Binding Sites |||||||||||| Binding Sites\n", "Acceptor Sites |||||||||||| Donor Sites\n", "Donor Sites |||||||||||| Donor Sites\n", "Donor Sites |||||||||||| Donor Sites\n", "promoter |||||||||||| promoter\n", "Acceptor Sites |||||||||||| Acceptor Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "Non-promoter |||||||||||| promoter\n", "Binding Sites |||||||||||| Background Sequences\n", "Background Sequences |||||||||||| Background Sequences\n", "promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "Background Sequences |||||||||||| Background Sequences\n", "Non-Splice Sites |||||||||||| Non-Splice Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| C promoter\n", "promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "Donor Sites |||||||||||| Donor Sites\n", "Donor Sites |||||||||||| Donor Sites\n", "Background Sequences |||||||||||| Background Sequences\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-Splice Sites |||||||||||| Donor Sites\n", "Binding Sites |||||||||||| Background Sequences\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-Splice Sites |||||||||||| Non-Splice Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| promoter\n", "promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Binding Sites |||||||||||| Binding Sites\n", "Donor Sites |||||||||||| Donor Sites\n", "Non-promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Background Sequences |||||||||||| Binding Sites\n", "Binding Sites |||||||||||| Binding Sites\n", "Acceptor Sites |||||||||||| Donor Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "Non-Splice Sites |||||||||||| Donor Sites\n", "Acceptor Sites |||||||||||| Acceptor Sites\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Background Sequences |||||||||||| Background Sequences\n", "promoter |||||||||||| promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "Background Sequences |||||||||||| Background Sequences\n", "Binding Sites |||||||||||| Binding Sites\n", "Non-promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Binding Sites |||||||||||| Binding Sites\n", "Background Sequences |||||||||||| Background Sequences\n", "Non-promoter |||||||||||| Non-promoter\n", "Acceptor Sites |||||||||||| Donor Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| promoter\n", "Background Sequences |||||||||||| Binding Sites\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Acceptor Sites |||||||||||| Donor Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Acceptor Sites |||||||||||| Acceptor Sites\n", "Non-Splice Sites |||||||||||| Donor Sites\n", "promoter |||||||||||| Non-promoter\n", "Binding Sites |||||||||||| Background Sequences\n", "Acceptor Sites |||||||||||| Donor Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "Donor Sites |||||||||||| Donor Sites\n", "Non-promoter |||||||||||| promoter\n", "Binding Sites |||||||||||| Binding Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "Binding Sites |||||||||||| Binding Sites\n", "Donor Sites |||||||||||| Donor Sites\n", "Background Sequences |||||||||||| Background Sequences\n", "Donor Sites |||||||||||| Donor Sites\n", "Background Sequences |||||||||||| Binding Sites\n", "Binding Sites |||||||||||| Binding Sites\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Binding Sites |||||||||||| Binding Sites\n", "Acceptor Sites |||||||||||| Acceptor Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Non-Splice Sites |||||||||||| Donor Sites\n", "Binding Sites |||||||||||| Binding Sites\n", "Background Sequences |||||||||||| Background Sequences\n", "Donor Sites |||||||||||| Donor Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "Background Sequences |||||||||||| Background Sequences\n", "Donor Sites |||||||||||| Donor Sites\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "Binding Sites |||||||||||| Binding Sites\n", "promoter |||||||||||| Non-promoter\n", "Acceptor Sites |||||||||||| Donor Sites\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Background Sequences |||||||||||| Background Sequences\n", "Background Sequences |||||||||||| Background Sequences\n", "Non-promoter |||||||||||| Non-promoter\n", "Binding Sites |||||||||||| Binding Sites\n", "Background Sequences |||||||||||| Background Sequences\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "Non-Splice Sites |||||||||||| Donor Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "Binding Sites |||||||||||| Binding Sites\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| promoter\n", "promoter |||||||||||| Non-promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "Acceptor Sites |||||||||||| Donor Sites\n", "Binding Sites |||||||||||| Background Sequences\n", "Background Sequences |||||||||||| Background Sequences\n", "Non-promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "promoter |||||||||||| promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "Donor Sites |||||||||||| Donor Sites\n", "promoter |||||||||||| Non-promoter\n", "Binding Sites |||||||||||| Background Sequences\n", "Acceptor Sites |||||||||||| Donor Sites\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Background Sequences |||||||||||| Binding Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| promoter\n", "Acceptor Sites |||||||||||| Donor Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "Acceptor Sites |||||||||||| Donor Sites\n", "promoter |||||||||||| promoter\n", "Binding Sites |||||||||||| Binding Sites\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| promoter\n", "promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "Non-promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "Donor Sites |||||||||||| Donor Sites\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Binding Sites |||||||||||| Background Sequences\n", "Non-promoter |||||||||||| Non-promoter\n", "Acceptor Sites |||||||||||| Donor Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "Binding Sites |||||||||||| Binding Sites\n", "promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "Background Sequences |||||||||||| Binding Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Background Sequences |||||||||||| Binding Sites\n", "Binding Sites |||||||||||| Binding Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Binding Sites |||||||||||| Binding Sites\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| Non-promoter\n", "Acceptor Sites |||||||||||| Donor Sites\n", "Binding Sites |||||||||||| Background Sequences\n", "Background Sequences |||||||||||| Background Sequences\n", "promoter |||||||||||| Non-promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "promoter |||||||||||| promoter\n", "Binding Sites |||||||||||| Binding Sites\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| promoter\n", "Background Sequences |||||||||||| Background Sequences\n", "Non-promoter |||||||||||| Non-promoter\n", "Acceptor Sites |||||||||||| Donor Sites\n", "promoter |||||||||||| promoter\n", "Non-Splice Sites |||||||||||| Donor Sites\n", "Donor Sites |||||||||||| Donor Sites\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| promoter\n", "Acceptor Sites |||||||||||| Splice Sites\n", "Binding Sites |||||||||||| Binding Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "Binding Sites |||||||||||| Binding Sites\n", "promoter |||||||||||| Non-promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "Non-Splice Sites |||||||||||| Non-Splice Sites\n", "Non-promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Binding Sites |||||||||||| Background Sequences\n", "Non-promoter |||||||||||| Non-promoter\n", "Binding Sites |||||||||||| Binding Sites\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "Donor Sites |||||||||||| Donor Sites\n", "Background Sequences |||||||||||| Background Sequences\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Binding Sites |||||||||||| Binding Sites\n", "promoter |||||||||||| promoter\n", "Binding Sites |||||||||||| Binding Sites\n", "promoter |||||||||||| promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Background Sequences |||||||||||| Background Sequences\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "Background Sequences |||||||||||| Binding Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Acceptor Sites |||||||||||| Donor Sites\n", "Background Sequences |||||||||||| Binding Sites\n", "promoter |||||||||||| Non-promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "Non-Splice Sites |||||||||||| Non-Splice Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "Background Sequences |||||||||||| Background Sequences\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| promoter\n", "promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Binding Sites |||||||||||| Binding Sites\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Background Sequences |||||||||||| Binding Sites\n", "Donor Sites |||||||||||| Donor Sites\n", "Donor Sites |||||||||||| Donor Sites\n", "Binding Sites |||||||||||| Binding Sites\n", "Non-promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Non-Splice Sites |||||||||||| Non-Splice Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-Splice Sites |||||||||||| Donor Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| Non-promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "Background Sequences |||||||||||| Background Sequences\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "Donor Sites |||||||||||| Non-Splice Sites\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Background Sequences |||||||||||| Background Sequences\n", "Background Sequences |||||||||||| Background Sequences\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Background Sequences |||||||||||| Background Sequences\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| promoter\n", "Non-Splice Sites |||||||||||| Acceptor Sites\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Background Sequences |||||||||||| Background Sequences\n", "promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| Non-promoter\n", "Background Sequences |||||||||||| Background Sequences\n", "Background Sequences |||||||||||| Background Sequences\n", "Acceptor Sites |||||||||||| Donor Sites\n", "Acceptor Sites |||||||||||| Acceptor Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| Non-promoter\n", "Binding Sites |||||||||||| Background Sequences\n", "Binding Sites |||||||||||| Binding Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "Binding Sites |||||||||||| Binding Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Binding Sites |||||||||||| Background Sequences\n", "Donor Sites |||||||||||| Donor Sites\n", "Acceptor Sites |||||||||||| Donor Sites\n", "Non-promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "Background Sequences |||||||||||| Background Sequences\n", "Donor Sites |||||||||||| Donor Sites\n", "Non-promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Non-Splice Sites |||||||||||| Donor Sites\n", "Binding Sites |||||||||||| Binding Sites\n", "Non-promoter |||||||||||| promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Acceptor Sites |||||||||||| Acceptor Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "Background Sequences |||||||||||| Background Sequences\n", "promoter |||||||||||| Non-promoter\n", "Acceptor Sites |||||||||||| Acceptor Sites\n", "Donor Sites |||||||||||| Donor Sites\n", "promoter |||||||||||| promoter\n", "Binding Sites |||||||||||| Binding Sites\n", "promoter |||||||||||| promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "Donor Sites |||||||||||| Acceptor Sites\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| Non-promoter\n", "Binding Sites |||||||||||| Binding Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Background Sequences |||||||||||| Binding Sites\n", "Non-promoter |||||||||||| promoter\n", "Donor Sites |||||||||||| Non-Splice Sites\n", "Donor Sites |||||||||||| Donor Sites\n", "Non-Splice Sites |||||||||||| Non-Splice Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| promoter\n", "promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "promoter |||||||||||| promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "Donor Sites |||||||||||| Donor Sites\n", "Non-promoter |||||||||||| promoter\n", "Donor Sites |||||||||||| Acceptor Sites\n", "Acceptor Sites |||||||||||| Donor Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Acceptor Sites |||||||||||| Donor Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "Acceptor Sites |||||||||||| Donor Sites\n", "promoter |||||||||||| promoter\n", "promoter |||||||||||| promoter\n", "Background Sequences |||||||||||| Background Sequences\n", "Binding Sites |||||||||||| Binding Sites\n", "Donor Sites |||||||||||| Donor Sites\n", "Binding Sites |||||||||||| Binding Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "promoter |||||||||||| promoter\n", "Background Sequences |||||||||||| Binding Sites\n", "Non-promoter |||||||||||| Non-promoter\n", "Background Sequences |||||||||||| Background Sequences\n", "promoter |||||||||||| promoter\n", "Non-promoter |||||||||||| Non-promoter\n", "presicion 0.739 same 0.253\n" ] } ], "source": [ "import json\n", "from tqdm import tqdm\n", "\n", "\n", "\n", "with open(output_file, \"r\") as file:\n", " test_data = json.load(file)\n", "\n", "all_num = len(test_data)\n", "right_sum = 0\n", "same_sum = 0\n", "for item in test_data:\n", " output = item[\"output\"]\n", " #output = \" \".join(tokenizer.tokenize(output))\n", " model_response = item[\"model_response\"]\n", "\n", " print(output,\"||||||||||||\", model_response)\n", "\n", " if model_response == output: #same it\n", " same_sum = same_sum + 1\n", " \n", " if output.find(\"Non\")==-1: # no Non\n", " if model_response.find(output)!=-1 and model_response.find(\"Non\")==-1: #find it, but no Non\n", " right_sum = right_sum + 1\n", " else:\n", " if model_response.find(output)!=-1: #find it\n", " right_sum = right_sum + 1\n", "\n", "\n", "print(\"presicion\", right_sum/all_num, \"same\", same_sum/all_num)\n" ] }, { "cell_type": "code", "execution_count": null, "id": "294d46f3-2f5b-4e55-ae41-081d5195f5e2", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.3" } }, "nbformat": 4, "nbformat_minor": 5 }