GenerRNA / tokenizer /merges.txt
MasaakiKotera's picture
Upload folder using huggingface_hub
580a9eb verified
raw
history blame
9.67 kB
#version: 0.2
G G
A A
U U
C C
A G
U G
A C
U C
U GG
G C
A GG
U AA
AG C
A UU
A CC
A UG
U CC
G AA
U AC
UG C
A UC
U AG
UU C
AA C
GG C
UU G
AA G
GG G
CC C
A U
UG AA
AC G
A UGG
UAA C
GC G
UC G
AGG C
UG AG
UGG C
AUG C
AG CC
UG CC
A UCC
GG AA
A UAA
U ACC
UU UU
AC GG
AGG G
U AUU
AG AA
UU GG
G CC
U AGC
UC AA
UU AA
GAA C
AC AA
U AGG
UGG G
ACC C
UGG GG
UG AC
A UAC
GC GG
AGG AA
UU CC
AGC G
UCC C
GC AA
AG AC
GG CC
AUU C
UU UC
UU GC
AA AA
AG AG
CC G
UAC G
AC AC
A UAG
GG GG
UG UC
UAA G
U AUC
UU UG
AG UC
AUU G
GGC G
AA AC
AAG UC
AUG AA
UG UG
UC UC
UGG AA
ACC G
AG UG
UGG GC
UU AC
UC GC
UUC GG
UGC G
U AUG
UUC G
UU UAA
UC GG
UU AG
UC UG
UCC G
AAC G
GAA G
AG UGG
UC AC
A UAAC
UG AGG
AUGG C
AG AGG
AG AU
AUG CC
ACC AA
U AGGC
GG GC
AUU AA
GG UGAA
UAC GGG
AUC G
AAG CC
UU GAA
U AU
UAC AC
GC GAA
CC UU
CCC G
AUGC G
AC UC
UG UU
GGC UAAC
AA GG
UC AG
AC GC
UG UAGC
UGAC GC
UGAG AC
UU CCC
GCG UAA
AGC AGCC
AUGG G
AGGC AGC
AG UGGC
AGG GC
UAC AA
UC AGC
AUU GG
GAA GGC
GCGG UAA
AC GAA
UU AAG
UGCC AGCAGCC
CC CC
UG UCG
UU GGG
UGC AA
AGC AA
AUCC C
UACGGG AGGCAGC
AC GGG
UU AAC
UUC GGG
AC UG
UUG CC
ACC GG
UAG AG
UUG UAA
UAG UCC
AGG AUU
UAA GG
UGCCAGCAGCC GCGGUAA
UCC UACGGGAGGCAGC
AGG GG
GC ACAA
AU AUU
AU AUC
AG UU
UAA UACC
UAA AA
UAC GG
AGG AAC
UG CCC
UUG AC
GAA GC
UGG CC
UC UU
U AGGG
AG CCC
UGAGAC ACGG
AUU AGC
AUAA G
GC UGGC
AGG CC
AC UU
AGCG UGGGG
AGC AAAC
ACC CC
AGGAA GG
UAA CC
UUG ACGG
AGGAUU AGAU
AC AG
AGAC UCCUACGGGAGGCAGC
UGG UC
GGC GG
GC GC
UACC C
UGG AGC
AUG UGG
AGUGG GGAA
UGUAGC GGUGAA
GGG CC
UAG UAA
UCC GG
N N
Ġ G
UG AAG
UAG CC
AC AUGC
UC AAC
AG UAC
AGG AUG
AGGAUUAGAU ACCC
AG UGC
U AUGG
ACG CC
UAG UUGG
UG UAA
AA CC
AC UGC
UAUU GG
UGG GCGUAA
UU UUC
UGAG UAAC
UGC UGC
UAG AA
AUU CC
UAA GC
U AUAA
ACG UC
AA GC
AG AAG
AGC GCAA
UG ACGG
AGC GC
UU UUG
UGC AAC
GG UG
UCAGC UCG
UGG UAGUCC
UU UCC
AGC GG
AG AAC
UG AAC
UGG AAC
UUC AA
AG AUU
AC UCC
AG AUG
UU AUU
UGG GAA
AG AGC
AUC AA
GGCUAAC UACG
AGC AAC
AGCGUGGGG AGCAAAC
AGGAUUAGAUACCC UGGUAGUCC
AC AUU
UAA ACG
UG AGGC
UG UGAA
GAA GG
AUGCG UAG
UU GGC
UAGG GG
AG UGAA
AGG GAA
UG AUC
UGGG AC
UUAAG UCCC
ACCG CCCG
GAAC GCUGGC
AAGUC GAAC
AGC GAA
AGC UC
UCAA AGGAA
UCAA CC
GCC UGGGG
CCC UU
UCC CC
UGCC G
UG AUCC
UGAG AUG
UU ACC
AGACUCCUACGGGAGGCAGC AGUGGGGAA
UCG CC
GGG AC
AGGC G
AGGAAGG UGGGG
GGG CCC
UAUU C
UACG UUCCC
UCAAAGGAA UUGACGG
AU ACC
UGAGACACGG CCC
AU AGC
UC AUU
UACAC ACCGCCCG
UC GAA
AU AGG
AUC GG
UU UGG
UG AUGC
UGC GAA
UAG AGG
GCACAA GCGG
AAG UCG
UGGC G
UC GGAA
GGCG ACG
UG AUG
UAGC G
AUAC G
AACG AGCGCAA
UCAGCUCG UGUCG
CCUU ACC
UUGGG UUAAGUCCC
ACG UGC
AC UGG
AG UAA
GCAA GG
UG UGG
UCAGCUCGUGUCG UGAGAUG
U AUCC
AG UUC
AG ACC
ACC AGUGGC
AU AUG
UC AGG
UC UUC
GC AACGAGCGCAA
UUAC UGGGCGUAA
UG AUU
UU AUCC
UG ACC
UC AUC
U AUGC
UAG UC
GCC G
AUC AGC
AUUC G
UAAUACC GC
UGG GGG
AAC GCGAA
AC AGG
UGC GG
GGCUAAC UCCG
AUGUGG UUUAA
AGGAAGGUGGGG AUG
GCC GCG
UCGC UAGUAA
UGAA AC
AC GGC
UUG UACACACCGCCCG
UGGAA UUCC
UCAGCUCGUGUCGUGAGAUG UUGGGUUAAGUCCC
GAA AGCC
AUU GC
UGGUC UGAG
UC GGC
UCC AA
AUUAGC UAGUUGG
UAGGC GG
AG UUG
GAAC GGG
UC ACC
UAA AAC
UC UGG
UAUU G
UUCGGG UUGUAA
AC UUC
ACGUGC UACAA
GAA AA
Ġ C
UC UGC
UU UGC
UACC G
UGCAAC UCG
AUGGC UGUCG
UUG UUGG
UG UGC
AACGCGAA GAA
UG UUC
AG AGGG
AUCC G
GCG UUG
AAC AA
UU GGAA
AU AU
UC UCC
AG AUC
ACG CCG
UUAA CC
AGGC GG
ACAUGC AAGUCGAAC
UGG UGC
UAA UAC
UACC AA
GG CCC
AGC AC
UGAA CC
ACAC UGGGAC
AUC AGG
UGAGUAAC ACG
GCG UG
U AUAC
UUG UC
GAA UC
UU AGC
UCAGCUCGUGUCGUGAGAUGUUGGGUUAAGUCCC GCAACGAGCGCAA
AGCAAC GCCGCG
AG UAGG
UAAC G
GCACAAGCGG UGGAGC
UU UAG
AGG GGG
UACAC ACGUGCUACAA
GGC GC
UGAG AGGG
UGC GC
AGCC G
UGAA UACGUUCCC
UU UAC
NN NN
GGCUAACUCCG UGCCAGCAGCCGCGGUAA
AUGG AA
UGGGC GC
UGGGC GAAAGCC
U AUAG
GAA GGCC
GGC AGC
AUC GC
UG UUG
ACCC G
GGCUAACUACG UGCCAGCAGCCGCGGUAA
AG UCC
AUC AGCC
UG UCC
UCAAAGGAAUUGACGG GGGCCC
UGACGC UGAGGC
UG AGC
UUCC G
AAG AC
GAAC G
AACGCGAAGAA CCUUACC
UGGC GAAGGC
AAC GC
GG AUU
ACACUGGGAC UGAGACACGGCCC
AGCC AUGCC
AUG AC
GCACAAGCGGUGGAGC AUGUGGUUUAA
GCG CC
UG AUGG
UGCUGC AUGGCUGUCG
AGG UC
AUG UC
GCC CC
UG ACG
GGGCC UUGUACACACCGCCCG
ACC UC
GG GGC
UCAC ACC
AUUGG GCGUAA
UAAC AAGG
UUAA AA
ACC GC
UGUAGCGGUGAA AUGCGUAG
UUGG GC
UAAC GGC
ACGCCG UAAACG
GAA AC
AGC UAAC
AUAA AA
AGCGUGGGGAGCAAAC AGGAUUAGAUACCCUGGUAGUCC
AGC GAAC
UCC UC
AGAA AA
UGAA GG
UAUU GCACAA
UGGGCGC AAGCC
UUUU GG
UGGUCUGAG AGGAUG
AUC AG
ACCAA GGCGACG
GAACGCUGGC GGC
GGCUAACUACGUGCCAGCAGCCGCGGUAA UACG
UGAGACACGG UCC
AGAA GC
UCCC G
UUC GGAA
ACGG CC
U AUGAA
UGACGC UGAGG
AUC AUU
ACCAGUGGC GAAGGC
ACCAA GGC
U A
UU UGAC
AC AAC
AUG UGAA
UGUAGCGGUGAA AUGC
UG UAG
UGG GAAC
AUC AUC
GGCC G
UAA UC
GGCUAACUCCGUGCCAGCAGCCGCGGUAA UACGG
UAA AGC
UGGC UC
AGAC UGCC
UG UAC
UUG AG
GG UC
ACAC UGGAAC
UAAUACC GG
UCC GGAA
UUAA UCGGAA
GCCUGGGG AGUAC
AUAA CC
AG UCG
AUGG GC
GGG UC
AG UUCGG
AGGAAGGUGGGGAUG ACGUC
UGAG AA
AUCC CC
GCGG UGAAUACGUUCCC
UGG AGG
AGAG UUUG
UGCAA GCG
UU UAUU
UGG AC
UGAG UAA
GAAGC ACC
AGCCAUGCC GCG
GGC AA
UUG UCG
UAUUGG ACAA
UUGG GG
AUC UC
GAA AGC
UGAA AA
AC AGC
UG AUGAA
GCCUGGGGAGUAC GGCC
GAA CC
UGG GCC
AG AGAA
AUGG GG
AGG CCC
AUAAC G
AUGCG UAGAG
GAA GGCG
UC GGG
UGGC AA
AGCC CC
UC GCGG
GG ACGGG
UCG UAACAAGG
AGC UUGC
UUUU AA
AGCG ACGCC
UUAAUCGGAA UUACUGGGCGUAA
GGAA GG
AC AUC
UGAGUAAC GCG
UU AUG
GCCUGGGG AG
UAG CCG
UGGGG UAA
UUCG AUGC
CCC UUG
CCC GG
UU UGAA
GCAA GGC
AUGG CCCUU
AUU UC
AGG AC
GAACGCUGGC GGCG
GGCGG ACGGG
UUAA GC
U AUGGG
UU AUC
AU AUUC
UGG AG
AAGUC AUC
UGG GGC
AGUC UGCAACUCG
ACCC GC
AG AUGG
GGAA UCGCUAGUAA
UUC UC
AUU AAG
ACG UCAA
CC AA
UUUU GC
UCAA AA
AUG AAG
AUG AAC
AG UAG
AC ACC
UGC UUAAC
ACACUGGAAC UGAGACACGGUCC
ACACUGGGACUGAGACACGGCCC AGACUCCUACGGGAGGCAGCAGUGGGGAA
AC UGGG
UAUC AGC
UGGUGC AUGG
ACAUGC AAGUCG
UAC GAA
UU AUUGGGCGUAA
CCC AA
AUG AUU
AAC GG
UU UCG
AC UUG
AGG AGG
AAAA AA
UUGAA AC
GGGCCUUGUACACACCGCCCG UCACACC
AGGG UGCG
UAAGC ACC
UGC GGC
UGGC GC
UGC AUU
UUUU CC
UCCC AA
UU AGG
AGCG UUG
UGAA GAAGGCC
UCC GC
UU UGGG
UUC GGGG
Ġ AGAGUUUG
UUAUCC GGAUU
UG UUGG
UAGG AA
UGAA GC
Ġ CC
UGC AC
AGUGGC GAACGGG
UAA AC
AC GAACGCUGGCGGCG
UU AAGUC
UGGGG GAA
AUU GAA
AGG UGCUGCAUGGCUGUCG
UAUC G
UUG ACC
UAUU CC
AGUG AGGAA
ACC ACC
UU GCG
ACAA AA
AGG AAG
UAU AUU
UAAUAC AUGC
ACAUU GGGAC
AUGCC CCUU
UGCC CC
AC UAC
AC AAG
GG GAA
AGAA GAAGCACC
UGC GGG
UUC AG
UAGCC GGCC
UUC GAAGC
AGCC AC
AGCAACGCCGCG UGAG
AUU GAACGCUGGCGGC
UAGC UGGUCUGAGAGGAUG
UU GCAA
AUG AAGUC
UAGAGG UGAA
UUGCC AGC
UUAA AAC
AC UAA
UGAG AGG
UC UUG
ACCGG UGGCGAAGGC
AGCC AA
Ġ UACG
AUG CCC
UAGG GC
ACGCCGUAAACG AUG
AG UUGG
UG AGGG
AUU UAA
UGACGG UACC
UC UAA
UGG GGAA
GG AC
UGG UGUAGCGGUGAAAUGC
UC AAG
AG UGGG
AUAC AA
GCUGGC GGCG
GG AG
UAAC ACAUGCAAGUCGAAC
GCG AGG
ACC UGAGAGGG
ACCAGUGGCGAAGGC GGC
UUUU UUUU
UCGC AAG
ACGG UACC
AUG UG
UAC GC
UGCC GC
AU AUAA
GCG UGAG
UAGC GAA
GGUG ACAA
AUU AUU
AG ACG
AA UAA
UCAAAGGAAUUGACGGGGGCCC GCACAAGCGGUGGAGCAUGUGGUUUAA
UUCGG AUUG
AGC UCC
UG UGGC
GGAA UUCC
AUUCG UAG
GG AAC
UGAC AC
UU AGGG
UAGGC GGC
AG UUUU
UGGGC UACACACGUGCUACAA
UGG UUC
ACGUCAA AUCAUC
UGAAG UCGUAACAAGG
AUCC AA
NNNN NNNN
AUC AC
GGGC UCAACC
UAAC GCG
AUG UCC
UUCGG AUCG
UGGGGGAA ACCC
GCC GG
AGGC AA
GGC UAA
UGAUC GGCC
UAGAGGUGAA AUUCGUAG
UG AUAC
AUG ACC
UGAG CC
UGAGG UAA
UC UAC
UGACGCUGAGGC GCGAA
AUU UU
UC GCG
AGC AGG
AUU AG
UCC CCC
UC UGCC
UACCAA GGCGACG
GAA GAA
ĠAGAGUUUG AUCC
GGUGACAA ACCGG
GCAA UGGGCGAAAGCC
UGUAGCGGUGAA AUGCGUAGAG
AGG ACG
AC UGCC
AUGGC GAAGGC
UGG UG
AUGG CC
UC AUG
AGC AUC
GG GGG
AG ACGG
U AUAAC
AGCUAAC GC
U AUAAG
UUG UU
AUGC AA
UUGAC AUCC
AUUC AA
ĠG ACGAACGCUGGCGGCG
GGCUAACUACG UG
AGUGGC GGACGGG
UGGG UAGC
UGAUCC AGCCAUGCCGCG
UUC AC
AGG GGC
UAG AGC
ACC AGCC
GCCUGGGGAG UACGG
ACG CCC
UGCUUAAC ACAUGCAAGUCGAAC
UGGUGCAUGG UUGUCG
UAAC GG
UAAUACCGC AUACG
UGUAGCGGUGAAAUGCGUAG AUAUC
AGCC GCC
GCCUGGGGAGUACGGCC GCAAGG
GGGG GC
UG UGAC
AUU UAC
AGAA GAA
UAUU GC
UUCGAUGC AACGCGAAGAACCUUACC
GGUGAA UACGUUCCC
UAGC AA
ACG UU
UAUUGG UCAA
AU AUGC
AUAG CC
UG UUUU
GGG UUUAA
AGCAA UGCC
UACC CC
UAGG UGG
AGC AUU
AUGAA GC
AGGC GC
UGCC GG
AAGUCAUC AUGGCCCUU
UGCC UAAUACAUGC
CCC GCG
AGCGUGGGGAGCAAAC AGG
ACCAGUGGC GAAGGCG
UGCAAGCG UUAAUCGGAAUUACUGGGCGUAA
UGG UAA
UU UGGC
UAGUCC GG
UACC GG
UG AAGUC
UCAA GC
AGCGUGGGG AGCGAAC
UCG AUG
UAA GAA
U AUGGC
AGCC GG
UGG UGG
ACCC AA
ACC AC
CCC GC
AGAA GG
UUC UUC
GG ACCC
AGG UAG
AG AGGAA
UUC GGC
UCUC AGUUCGG
AUUAA GC
UAC GGGG
UGC UCC
GC AGAU
AGACUCCUACGGGAGGCAGC AGUAGG
AGC UAA
AUCAGG AGGAAC
AUU AC
UC UAG
UAUU AA
AGG UGGUGCAUGGUUGUCG
AGG AGGAAC
ACGUCAAAUCAUC AUGCCCCUU
GCGG CC
UGG AUG
AUU AGAU
UGC AGC
UU UGAG
UGAA GAAGG
UAA UAA
UU UGUC
UGAGG UAACGGC
AUAG AA
AAC UC
UGG GCG
AG UUUC
AUG UUC
UGG ACC
UUCC CC
AG UUGC
UGAA AGCCC
UGC AGG
AAG UCCUC
UG ACCG
UGG AUC
AAG AGG
AUU GCGAA
ACG AUG
UGG AAAC
AUAA GC
GG UGGAA
UGACGCUGAGG AGCGAA
GAAC GC
GAAUC UUCC
UUC GC
ACCUGAGAGGG UGAUCGGCC
ACACUGGAACUGAGACACGGUCC AGACUCCUACGGGAGGCAGCAGUGGGGAA
GGCUAAC UUCG
AG AGAC
AG AGGC
UAC UC
UAAC GGCCC
UUG AGUGC
AAC AC
AG UGCC
AGCGC GCG
UG UGCC
AGC AGC
ACCC GAA
ACAA CC
GGG AGGAAC
ACC UGGGG
ACGAA AGUC
UC AUGC
ACGG GG
AU AUGG
UC UUUU
GGCUAACUCCGUGCCAGCAGCCGCGGUAAUACGG AGGG
UU AGAG
UGAUGC AGCGACGCC
UG UAGG
GC AUGG
AGAA AGCCAC
UGG AUU
UUGC AGAA
UU UCCC
UG UUAA
UU GAAG
UCAA CCG
UGG UAG
GGCUAACUACGUGCCAGCAGCCGCGGUAAUACG UAGG
GAA AGG
UG AGGAA
UU ACCAAGGC
UGUAGC AGUGAA
UAGCCGGCC UGAGAGGG
UGG UAC
UCGCGG AUCAGC
GGAA ACG
UGUG UGAAGAAGGCC
GGCC ACAUUGGGAC
AUC UCC
AGGGC GCG
UG UGGG
UAAC GC
ACC UCC
GGGCCUUGUACACACCGCCCGUCACACC AUGGG
UU UCAA
UGG UCGG
AGGG AC
UAGGC G
UG UAAG
UGG AGGAA
UUGACGG UACC
AGUGAGGAA UAUUGGUCAA
UG ACAA
AUU UUC
AGGUGGUGCAUGGUUGUCG UCAGCUCGUGUCGUGAGAUGUUGGGUUAAGUCCCGCAACGAGCGCAA
AGC ACC
AGAA CC
AUGAAGUC GGAAUCGCUAGUAA
GG UCC
AG UGAC
AG AGCC
UGC UC
AC AUG
UAUG CC
AUUAGC UAG
UCAA CCC
AU AUAC
UAA ACC
AU AUCC
UU AUUC
UAC AGAGGG
UUC GAA
UU GAAC
AG UAAC
AGC GGC
UCC UCC
AGC UCG
UUCGAAGC AACGCGAAGAACCUUACC
U AUCCC
GCGUUG UCCGGAA
UGG CCC
AG UACG
ACGGCC UUCGGGUUGUAA
AAG ACC
UAUUGG GCGUAA
AA GAA
UU UAAG
UUCGGAA UUACUGGGCGUAA
UGAG AUAC
UC UUGC
AG UUAA