diff --git "a/awesome-japanese-nlp-resources-search.json" "b/awesome-japanese-nlp-resources-search.json" new file mode 100644--- /dev/null +++ "b/awesome-japanese-nlp-resources-search.json" @@ -0,0 +1,9906 @@ +[ + { + "description": "SudachiPy 0.6* and above are developed as Sudachi.rs.", + "url": "https://github.com/WorksApplications/sudachi.rs", + "project_name": "sudachi.rs", + "stargazers_count": 172, + "source": "GitHub", + "score": 0.004939547973923782 + }, + { + "description": "Japanese morphological analysis engine written in pure Python", + "url": "https://github.com/mocobeta/janome", + "project_name": "janome", + "stargazers_count": 758, + "source": "GitHub", + "score": 0.02176847304787341 + }, + { + "description": "mecab-python. mecab-python. you can find original version here:http://taku910.github.io/mecab/", + "url": "https://github.com/SamuraiT/mecab-python3", + "project_name": "mecab-python3", + "stargazers_count": 403, + "source": "GitHub", + "score": 0.011573475776112117 + }, + { + "description": "This repository is for building Windows 64-bit MeCab binary and improving MeCab Python binding.", + "url": "https://github.com/ikegami-yukino/mecab", + "project_name": "mecab", + "stargazers_count": 200, + "source": "GitHub", + "score": 0.005743660434795095 + }, + { + "description": "A Cython MeCab wrapper for fast, pythonic Japanese tokenization and morphological analysis.", + "url": "https://github.com/polm/fugashi", + "project_name": "fugashi", + "stargazers_count": 272, + "source": "GitHub", + "score": 0.007811378191321329 + }, + { + "description": "A Japanese tokenizer based on recurrent neural networks", + "url": "https://github.com/taishi-i/nagisa", + "project_name": "nagisa", + "stargazers_count": 324, + "source": "GitHub", + "score": 0.009304729904368054 + }, + { + "description": "A Python Module for JUMAN++/KNP", + "url": "https://github.com/ku-nlp/pyknp", + "project_name": "pyknp", + "stargazers_count": 82, + "source": "GitHub", + "score": 0.002354900778265989 + }, + { + "description": "Python wrapper for KyTea", + "url": "https://github.com/chezou/Mykytea-python", + "project_name": "Mykytea-python", + "stargazers_count": 35, + "source": "GitHub", + "score": 0.0010051405760891415 + }, + { + "description": "Konoha: Simple wrapper of Japanese Tokenizers", + "url": "https://github.com/himkt/konoha", + "project_name": "konoha", + "stargazers_count": 190, + "source": "GitHub", + "score": 0.00545647741305534 + }, + { + "description": "natto-py combines the Python programming language with MeCab, the part-of-speech and morphological analyzer for the Japanese language.", + "url": "https://github.com/buruzaemon/natto-py", + "project_name": "natto-py", + "stargazers_count": 84, + "source": "GitHub", + "score": 0.0024123373826139397 + }, + { + "description": "Rakuten MA (Python version)", + "url": "https://github.com/ikegami-yukino/rakutenma-python", + "project_name": "rakutenma-python", + "stargazers_count": 17, + "source": "GitHub", + "score": 0.0004882111369575831 + }, + { + "description": "Vaporetto is a fast and lightweight pointwise prediction based tokenizer. This is a Python wrapper for Vaporetto.", + "url": "https://github.com/daac-tools/python-vaporetto", + "project_name": "python-vaporetto", + "stargazers_count": 18, + "source": "GitHub", + "score": 0.0005169294391315585 + }, + { + "description": "An easy to use tokenizer for Japanese text, aimed at language learners and non-linguists", + "url": "https://github.com/mkartawijaya/dango", + "project_name": "dango", + "stargazers_count": 1, + "source": "GitHub", + "score": 2.8718302173975473e-05 + }, + { + "description": "Yet another Python binding for Juman++/KNP", + "url": "https://github.com/ku-nlp/rhoknp", + "project_name": "rhoknp", + "stargazers_count": 12, + "source": "GitHub", + "score": 0.0003446196260877057 + }, + { + "description": "Viterbi-based accelerated tokenizer (Python wrapper)", + "url": "https://github.com/daac-tools/python-vibrato", + "project_name": "python-vibrato", + "stargazers_count": 21, + "source": "GitHub", + "score": 0.0006030843456534849 + }, + { + "description": "Python binding for Jagger(C++ implementation of Pattern-based Japanese Morphological Analyzer)", + "url": "https://github.com/lighttransport/jagger-python", + "project_name": "jagger-python", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "A Japanese NLP Library using spaCy as framework based on Universal Dependencies", + "url": "https://github.com/megagonlabs/ginza", + "project_name": "ginza", + "stargazers_count": 642, + "source": "GitHub", + "score": 0.018437149995692254 + }, + { + "description": "Yet Another Japanese Dependency Structure Analyzer", + "url": "https://github.com/ikegami-yukino/cabocha", + "project_name": "cabocha", + "stargazers_count": 3, + "source": "GitHub", + "score": 8.615490652192643e-05 + }, + { + "description": "Tokenizer POS-tagger Lemmatizer and Dependency-parser for modern and contemporary Japanese", + "url": "https://github.com/KoichiYasuoka/UniDic2UD", + "project_name": "UniDic2UD", + "stargazers_count": 21, + "source": "GitHub", + "score": 0.0006030843456534849 + }, + { + "description": "NLP libary for creating pipeline components", + "url": "https://github.com/PKSHATechnology-Research/camphr", + "project_name": "camphr", + "stargazers_count": 342, + "source": "GitHub", + "score": 0.009821659343499612 + }, + { + "description": "Tokenizer POS-tagger Lemmatizer and Dependency-parser for modern and contemporary Japanese with BERT models", + "url": "https://github.com/KoichiYasuoka/SuPar-UniDic", + "project_name": "SuPar-UniDic", + "stargazers_count": 11, + "source": "GitHub", + "score": 0.0003159013239137302 + }, + { + "description": "A* CCG Parser with a Supertag and Dependency Factored Model", + "url": "https://github.com/masashi-y/depccg", + "project_name": "depccg", + "stargazers_count": 80, + "source": "GitHub", + "score": 0.002297464173918038 + }, + { + "description": "A Japanese dependency parser based on BERT", + "url": "https://github.com/ku-nlp/bertknp", + "project_name": "bertknp", + "stargazers_count": 19, + "source": "GitHub", + "score": 0.000545647741305534 + }, + { + "description": "Tokenizer POS-Tagger and Dependency-parser with BERT/RoBERTa/DeBERTa models for Japanese and other languages", + "url": "https://github.com/KoichiYasuoka/esupar", + "project_name": "esupar", + "stargazers_count": 18, + "source": "GitHub", + "score": 0.0005169294391315585 + }, + { + "description": "Heteronym disambiguation library using a fine-tuned BERT model.", + "url": "https://github.com/passaglia/yomikata", + "project_name": "yomikata", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Python binding for J.DepP(C++ implementation of Japanese Dependency Parsers)", + "url": "https://github.com/lighttransport/jdepp-python", + "project_name": "jdepp-python", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Lightweight converter from Japanese Kana-kanji sentences into Kana-Roman.", + "url": "https://github.com/miurahr/pykakasi", + "project_name": "pykakasi", + "stargazers_count": 363, + "source": "GitHub", + "score": 0.010424743689153098 + }, + { + "description": "Japanese to romaji converter in Python", + "url": "https://github.com/polm/cutlet", + "project_name": "cutlet", + "stargazers_count": 179, + "source": "GitHub", + "score": 0.00514057608914161 + }, + { + "description": "Convert English alphabet to Katakana", + "url": "https://github.com/shihono/alphabet2kana", + "project_name": "alphabet2kana", + "stargazers_count": 8, + "source": "GitHub", + "score": 0.00022974641739180378 + }, + { + "description": "Converts Arabic numerals, or 'western' style numbers, to a Japanese context.", + "url": "https://github.com/Greatdane/Convert-Numbers-to-Japanese", + "project_name": "Convert-Numbers-to-Japanese", + "stargazers_count": 29, + "source": "GitHub", + "score": 0.0008328307630452887 + }, + { + "description": "Mozc for Python: Kana-Kanji converter", + "url": "https://github.com/ikegami-yukino/mozcpy", + "project_name": "mozcpy", + "stargazers_count": 21, + "source": "GitHub", + "score": 0.0006030843456534849 + }, + { + "description": "Japanese text parser to separate Hiragana/Katakana string into morae (syllables).", + "url": "https://github.com/tachi-hi/jamorasep", + "project_name": "jamorasep", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "日本語文を音素列へ変換するスクリプト", + "url": "https://github.com/korguchi/text2phoneme", + "project_name": "text2phoneme", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "A fast character conversion and transliteration library based on the scheme defined for Japan National Tax Agency (国税庁) 's", + "url": "https://github.com/opencollector/jntajis-python", + "project_name": "jntajis-python", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Convert japanese kana from ba-bi-bu-be-bo into va-vi-vu-ve-vo", + "url": "https://github.com/eggplants/wiredify", + "project_name": "wiredify", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Simple Python package (CLI/Python API) for getting japanese readings (yomigana) and accents using MeCab.", + "url": "https://github.com/34j/mecab-text-cleaner", + "project_name": "mecab-text-cleaner", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Japanese text normalizer for mecab-neologd", + "url": "https://github.com/ikegami-yukino/neologdn", + "project_name": "neologdn", + "stargazers_count": 248, + "source": "GitHub", + "score": 0.007122138939145917 + }, + { + "description": "Pure-Python Japanese character interconverter for Hiragana, Katakana, Hankaku, and Zenkaku", + "url": "https://github.com/ikegami-yukino/jaconv", + "project_name": "jaconv", + "stargazers_count": 254, + "source": "GitHub", + "score": 0.007294448752189771 + }, + { + "description": "A fast converter between Japanese hankaku and zenkaku characters", + "url": "https://github.com/studio-ousia/mojimoji", + "project_name": "mojimoji", + "stargazers_count": 122, + "source": "GitHub", + "score": 0.003503632865225008 + }, + { + "description": "A powerful text cleaner for Japanese web texts", + "url": "https://github.com/ku-nlp/text-cleaning", + "project_name": "text-cleaning", + "stargazers_count": 6, + "source": "GitHub", + "score": 0.00017230981304385286 + }, + { + "description": "複数の前処理を構成して管理するテキスト前処理ツール", + "url": "https://github.com/HojiChar/HojiChar", + "project_name": "HojiChar", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Utsuho is a Python module that facilitates bidirectional conversion between half-width katakana and full-width katakana in Japanese.", + "url": "https://github.com/juno-rmks/utsuho", + "project_name": "utsuho", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Yet Another Fast Japanese String Converter", + "url": "https://github.com/Hizuru3/python-habachen", + "project_name": "python-habachen", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Sentence boundary disambiguation tool for Japanese texts (日本語文境界判定器)", + "url": "https://github.com/megagonlabs/bunkai", + "project_name": "bunkai", + "stargazers_count": 164, + "source": "GitHub", + "score": 0.004709801556531978 + }, + { + "description": "Japanese Sentence Breaker", + "url": "https://github.com/hppRC/japanese-sentence-breaker", + "project_name": "japanese-sentence-breaker", + "stargazers_count": 13, + "source": "GitHub", + "score": 0.0003733379282616812 + }, + { + "description": "Yet another sentence-level tokenizer for the Japanese text", + "url": "https://github.com/ikegami-yukino/sengiri", + "project_name": "sengiri", + "stargazers_count": 19, + "source": "GitHub", + "score": 0.000545647741305534 + }, + { + "description": "Standalone. Small. Language-neutral. BudouX is the successor to Budou, the machine learning powered line break organizer tool.", + "url": "https://github.com/google/budoux", + "project_name": "budoux", + "stargazers_count": 909, + "source": "GitHub", + "score": 0.026104936676143706 + }, + { + "description": "japanese sentence segmentation library for python", + "url": "https://github.com/wwwcojp/ja_sentence_segmenter", + "project_name": "ja_sentence_segmenter", + "stargazers_count": 45, + "source": "GitHub", + "score": 0.0012923235978288964 + }, + { + "description": "A tool to perform sentence segmentation on Japanese text", + "url": "https://github.com/mkartawijaya/hasami", + "project_name": "hasami", + "stargazers_count": 1, + "source": "GitHub", + "score": 2.8718302173975473e-05 + }, + { + "description": "Japanese Text Segmenter for Python written in Rust", + "url": "https://github.com/alinear-corp/kuzukiri", + "project_name": "kuzukiri", + "stargazers_count": 1, + "source": "GitHub", + "score": 2.8718302173975473e-05 + }, + { + "description": "Comparison of Japanese Sentence Segmentation Tools", + "url": "https://github.com/hkiyomaru/ja-senter-benchmark", + "project_name": "ja-senter-benchmark", + "stargazers_count": 6, + "source": "GitHub", + "score": 0.00017230981304385286 + }, + { + "description": "Dictionary based Sentiment Analysis for Japanese", + "url": "https://github.com/ikegami-yukino/oseti", + "project_name": "oseti", + "stargazers_count": 79, + "source": "GitHub", + "score": 0.0022687458717440623 + }, + { + "description": "Japanese negative positive classification.日本語文書のネガポジを判定。", + "url": "https://github.com/liaoziyang/negapoji", + "project_name": "negapoji", + "stargazers_count": 148, + "source": "GitHub", + "score": 0.004250308721748371 + }, + { + "description": "Emotion analyzer for Japanese text", + "url": "https://github.com/ikegami-yukino/pymlask", + "project_name": "pymlask", + "stargazers_count": 101, + "source": "GitHub", + "score": 0.002900548519571523 + }, + { + "description": "Japanese sentiment analyzer implemented in Python.", + "url": "https://github.com/Hironsan/asari", + "project_name": "asari", + "stargazers_count": 119, + "source": "GitHub", + "score": 0.0034174779587030813 + }, + { + "description": "An example usage of JParaCrawl pre-trained Neural Machine Translation (NMT) models.", + "url": "https://github.com/MorinoseiMorizo/jparacrawl-finetune", + "project_name": "jparacrawl-finetune", + "stargazers_count": 88, + "source": "GitHub", + "score": 0.0025272105913098416 + }, + { + "description": "JASS: Japanese-specific Sequence to Sequence Pre-training for Neural Machine Translation (LREC2020) & Linguistically Driven Multi-Task Pre-Training for Low-Resource Neural Machine Translation (ACM TALLIP)", + "url": "https://github.com/Mao-KU/JASS", + "project_name": "JASS", + "stargazers_count": 14, + "source": "GitHub", + "score": 0.00040205623043565664 + }, + { + "description": "A phenomenon-wise evaluation dataset for Japanese-English machine translation robustness. The dataset is based on the MTNT dataset, with additional annotations of four linguistic phenomena; Proper Noun, Abbreviated Noun, Colloquial Expression, and Variant. COLING 2020.", + "url": "https://github.com/cl-tohoku/PheMT", + "project_name": "PheMT", + "stargazers_count": 10, + "source": "GitHub", + "score": 0.00028718302173975474 + }, + { + "description": "An ambiguous subtitles dataset for visual scene-aware machine translation", + "url": "https://github.com/ku-nlp/VISA", + "project_name": "VISA", + "stargazers_count": 4, + "source": "GitHub", + "score": 0.00011487320869590189 + }, + { + "description": "Character Based Named Entity Recognition.", + "url": "https://github.com/chakki-works/namaco", + "project_name": "namaco", + "stargazers_count": 41, + "source": "GitHub", + "score": 0.0011774503891329945 + }, + { + "description": "Entitypedia is an Extended Named Entity Dictionary from Wikipedia.", + "url": "https://github.com/chakki-works/entitypedia", + "project_name": "entitypedia", + "stargazers_count": 11, + "source": "GitHub", + "score": 0.0003159013239137302 + }, + { + "description": "Converts character span label information to tokenized text-based label information.", + "url": "https://github.com/ken11/noyaki", + "project_name": "noyaki", + "stargazers_count": 3, + "source": "GitHub", + "score": 8.615490652192643e-05 + }, + { + "description": "Code to perform finetuning of the BERT model. BERTモデルのファインチューニングで固有表現抽出用タスクのモデルを作成・使用するサンプルです", + "url": "https://github.com/ken11/bert-japanese-ner-finetuning", + "project_name": "bert-japanese-ner-finetuning", + "stargazers_count": 9, + "source": "GitHub", + "score": 0.0002584647195657793 + }, + { + "description": "詳細なアノテーション基準に基づく症例報告コーパスからの固有表現及び関係の抽出精度の推論を行うコード", + "url": "https://github.com/aih-uth/joint-information-extraction-hs", + "project_name": "joint-information-extraction-hs", + "stargazers_count": 1, + "source": "GitHub", + "score": 2.8718302173975473e-05 + }, + { + "description": "pygeonlp, A python module for geotagging Japanese texts.", + "url": "https://github.com/geonlp-platform/pygeonlp", + "project_name": "pygeonlp", + "stargazers_count": 12, + "source": "GitHub", + "score": 0.0003446196260877057 + }, + { + "description": "BERTによる日本語固有表現抽出のファインチューニング用プログラム", + "url": "https://github.com/jurabiinc/bert-ner-japanese", + "project_name": "bert-ner-japanese", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "About Optical character recognition for Japanese text, with the main focus being Japanese manga", + "url": "https://github.com/kha-white/manga-ocr", + "project_name": "manga-ocr", + "stargazers_count": 312, + "source": "GitHub", + "score": 0.008960110278280349 + }, + { + "description": "Read Japanese manga inside browser with selectable text.", + "url": "https://github.com/kha-white/mokuro", + "project_name": "mokuro", + "stargazers_count": 162, + "source": "GitHub", + "score": 0.004652364952184027 + }, + { + "description": "Handwritten Japanese OCR demo using touch panel to draw the input text using Intel OpenVINO toolkit", + "url": "https://github.com/yas-sim/handwritten-japanese-ocr", + "project_name": "handwritten-japanese-ocr", + "stargazers_count": 26, + "source": "GitHub", + "score": 0.0007466758565233624 + }, + { + "description": "日本語OCR", + "url": "https://github.com/tanreinama/OCR_Japanease", + "project_name": "OCR_Japanease", + "stargazers_count": 133, + "source": "GitHub", + "score": 0.003819534189138738 + }, + { + "description": "NDLOCRのアプリケーション", + "url": "https://github.com/ndl-lab/ndlocr_cli", + "project_name": "ndlocr_cli", + "stargazers_count": 279, + "source": "GitHub", + "score": 0.008012406306539157 + }, + { + "description": "Official Implementation of OCR-free Document Understanding Transformer (Donut) and Synthetic Document Generator (SynthDoG), ECCV 2022", + "url": "https://github.com/clovaai/donut", + "project_name": "donut", + "stargazers_count": 933, + "source": "GitHub", + "score": 0.02679417592831912 + }, + { + "description": "get japanese manga from url to translate manga image", + "url": "https://github.com/ttop32/JMTrans", + "project_name": "JMTrans", + "stargazers_count": 51, + "source": "GitHub", + "score": 0.0014646334108727491 + }, + { + "description": "OCR system for recognizing modern Japanese magazines", + "url": "https://github.com/ducanh841988/Kindai-OCR", + "project_name": "Kindai-OCR", + "stargazers_count": 80, + "source": "GitHub", + "score": 0.002297464173918038 + }, + { + "description": "NDLOCR用テキスト認識モジュール", + "url": "https://github.com/ndl-lab/text_recognition", + "project_name": "text_recognition", + "stargazers_count": 3, + "source": "GitHub", + "score": 8.615490652192643e-05 + }, + { + "description": "Optical character recognition in manga images. Manga OCR desktop application", + "url": "https://github.com/blueaxis/Poricom", + "project_name": "Poricom", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "JGLUE: Japanese General Language Understanding Evaluation", + "url": "https://github.com/yahoojapan/JGLUE", + "project_name": "JGLUE", + "stargazers_count": 135, + "source": "GitHub", + "score": 0.003876970793486689 + }, + { + "description": "Use custom tokenizers in spacy-transformers", + "url": "https://github.com/megagonlabs/ginza-transformers", + "project_name": "ginza-transformers", + "stargazers_count": 15, + "source": "GitHub", + "score": 0.0004307745326096321 + }, + { + "description": "T5による会話生成", + "url": "https://github.com/Jinyamyzk/t5_japanese_dialogue_generation", + "project_name": "t5_japanese_dialogue_generation", + "stargazers_count": 1, + "source": "GitHub", + "score": 2.8718302173975473e-05 + }, + { + "description": "To investigate various DNN text classifiers including MLP, CNN, RNN, BERT approaches.", + "url": "https://github.com/Masao-Taketani/japanese_text_classification", + "project_name": "japanese_text_classification", + "stargazers_count": 9, + "source": "GitHub", + "score": 0.0002584647195657793 + }, + { + "description": "Deploying sentiment analysis server with FastAPI and BERT", + "url": "https://github.com/izuna385/Japanese-BERT-Sentiment-Analyzer", + "project_name": "Japanese-BERT-Sentiment-Analyzer", + "stargazers_count": 1, + "source": "GitHub", + "score": 2.8718302173975473e-05 + }, + { + "description": "Masked Language Model-based Scoring for Japanese and Vietnamese", + "url": "https://github.com/minhpqn/jmlm_scoring", + "project_name": "jmlm_scoring", + "stargazers_count": 2, + "source": "GitHub", + "score": 5.7436604347950946e-05 + }, + { + "description": "AllenNLP integration for Shiba: Japanese CANINE model", + "url": "https://github.com/shunk031/allennlp-shiba-model", + "project_name": "allennlp-shiba-model", + "stargazers_count": 12, + "source": "GitHub", + "score": 0.0003446196260877057 + }, + { + "description": "script to evaluate pre-trained Japanese word2vec model on Japanese similarity dataset", + "url": "https://github.com/shihono/evaluate_japanese_w2v", + "project_name": "evaluate_japanese_w2v", + "stargazers_count": 8, + "source": "GitHub", + "score": 0.00022974641739180378 + }, + { + "description": "BERT-based GEC tagging for Japanese", + "url": "https://github.com/jonnyli1125/gector-ja", + "project_name": "gector-ja", + "stargazers_count": 2, + "source": "GitHub", + "score": 5.7436604347950946e-05 + }, + { + "description": "Japanese-BPEEncoder", + "url": "https://github.com/tanreinama/Japanese-BPEEncoder", + "project_name": "Japanese-BPEEncoder", + "stargazers_count": 21, + "source": "GitHub", + "score": 0.0006030843456534849 + }, + { + "description": "Japanese-BPEEncoder Version 2", + "url": "https://github.com/tanreinama/Japanese-BPEEncoder_V2", + "project_name": "Japanese-BPEEncoder_V2", + "stargazers_count": 9, + "source": "GitHub", + "score": 0.0002584647195657793 + }, + { + "description": "日本語文法誤り訂正ツール", + "url": "https://github.com/youichiro/transformer-copy", + "project_name": "transformer-copy", + "stargazers_count": 17, + "source": "GitHub", + "score": 0.0004882111369575831 + }, + { + "description": "Japanese Stable Diffusion is a Japanese specific latent text-to-image diffusion model capable of generating photo-realistic images given any text input.", + "url": "https://github.com/rinnakk/japanese-stable-diffusion", + "project_name": "japanese-stable-diffusion", + "stargazers_count": 251, + "source": "GitHub", + "score": 0.007208293845667844 + }, + { + "description": "A BERT model for nagisa", + "url": "https://github.com/taishi-i/nagisa_bert", + "project_name": "nagisa_bert", + "stargazers_count": 4, + "source": "GitHub", + "score": 0.00011487320869590189 + }, + { + "description": "Example code for prefix-tuning GPT/GPT-NeoX models and for inference with trained prefixes", + "url": "https://github.com/rinnakk/prefix-tuning-gpt", + "project_name": "prefix-tuning-gpt", + "stargazers_count": 4, + "source": "GitHub", + "score": 0.00011487320869590189 + }, + { + "description": "Training and evaluation scripts for JGLUE, a Japanese language understanding benchmark", + "url": "https://github.com/nobu-g/JGLUE-benchmark", + "project_name": "JGLUE-benchmark", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Japanese Tokenizer for transformers library", + "url": "https://github.com/retarfi/jptranstokenizer", + "project_name": "jptranstokenizer", + "stargazers_count": 2, + "source": "GitHub", + "score": 5.7436604347950946e-05 + }, + { + "description": "JP Language Model Evaluation Harness", + "url": "https://github.com/Stability-AI/lm-evaluation-harness/tree/jp-stable", + "project_name": "jp-stable", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "How do different tokenizers perform on downstream tasks in scriptio continua languages?: A case study in Japanese-ACL SRW 2023", + "url": "https://github.com/hitachi-nlp/compare-ja-tokenizer", + "project_name": "compare-ja-tokenizer", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "A framework for few-shot evaluation of autoregressive language models.", + "url": "https://github.com/tdc-yamada-ya/lm-evaluation-harness-jp-stable", + "project_name": "lm-evaluation-harness-jp-stable", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "llm-lora-classification", + "url": "https://github.com/hppRC/llm-lora-classification", + "project_name": "llm-lora-classification", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "The repository contains scripts and merge scripts that have been modified to adapt an Alpaca-Lora adapter for LoRA tuning when assuming the use of the \"rinna/japanese-gpt-neox...\" [gpt-neox] model converted to ggml.", + "url": "https://github.com/yukaryavka/rinna_gpt-neox_ggml-lora", + "project_name": "rinna_gpt-neox_ggml-lora", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "このリポジトリは日本語LLMのキャラクターロールプレイに関する性能を評価するために作成しました。", + "url": "https://github.com/oshizo/japanese-llm-roleplay-benchmark", + "project_name": "japanese-llm-roleplay-benchmark", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "This repository supports YuzuAI's Rakuda leaderboard of Japanese LLMs, which is a Japanese-focused analogue of LMSYS' Vicuna eval.", + "url": "https://github.com/yuzu-ai/japanese-llm-ranking", + "project_name": "japanese-llm-ranking", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "このツールは、複数のデータセットを横断して日本語の大規模言語モデルを自動評価するものです.", + "url": "https://github.com/llm-jp/llm-jp-eval", + "project_name": "llm-jp-eval", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "This repository contains the code for supervised fine-tuning of LLM-jp models.", + "url": "https://github.com/llm-jp/llm-jp-sft", + "project_name": "llm-jp-sft", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "LLM勉強会(LLM-jp)で開発しているLLM用のトークナイザー関連をまとめたリポジトリです.", + "url": "https://github.com/llm-jp/llm-jp-tokenizer", + "project_name": "llm-jp-tokenizer", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Japanese Language Model Financial Evaluation Harness", + "url": "https://github.com/pfnet-research/japanese-lm-fin-harness", + "project_name": "japanese-lm-fin-harness", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Japanese Vicuna QA Benchmark", + "url": "https://github.com/ku-nlp/ja-vicuna-qa-benchmark", + "project_name": "ja-vicuna-qa-benchmark", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "A tool for dividing the Japanese full name into a family name and a given name.", + "url": "https://github.com/rskmoi/namedivider-python", + "project_name": "namedivider-python", + "stargazers_count": 220, + "source": "GitHub", + "score": 0.006318026478274604 + }, + { + "description": "A curated list of resources dedicated to Python libraries of NLP for Japanese", + "url": "https://github.com/ikegami-yukino/asa-python", + "project_name": "asa-python", + "stargazers_count": 11, + "source": "GitHub", + "score": 0.0003159013239137302 + }, + { + "description": "python版日本語意味役割付与システム(ASA)", + "url": "https://github.com/Takeuchi-Lab-LM/python_asa", + "project_name": "python_asa", + "stargazers_count": 19, + "source": "GitHub", + "score": 0.000545647741305534 + }, + { + "description": "A comparison tool of Japanese tokenizers", + "url": "https://github.com/taishi-i/toiro", + "project_name": "toiro", + "stargazers_count": 105, + "source": "GitHub", + "score": 0.003015421728267425 + }, + { + "description": "自然言語で書かれた時間情報表現を抽出/規格化するルールベースの解析器", + "url": "https://github.com/yagays/ja-timex", + "project_name": "ja-timex", + "stargazers_count": 116, + "source": "GitHub", + "score": 0.003331323052181155 + }, + { + "description": "A set of metrics for feature selection from text data", + "url": "https://github.com/Kensuke-Mitsuzawa/JapaneseTokenizers", + "project_name": "JapaneseTokenizers", + "stargazers_count": 131, + "source": "GitHub", + "score": 0.0037620975847907872 + }, + { + "description": "This repository has implementations of data augmentation for NLP for Japanese.", + "url": "https://github.com/kajyuuen/daaja", + "project_name": "daaja", + "stargazers_count": 60, + "source": "GitHub", + "score": 0.0017230981304385284 + }, + { + "description": "The purpose of this repository is to make prototypes as case study in the context of proof of concept(PoC) and research and development(R&D) that I have written in my website. The main research topics are Auto-Encoders in relation to the representation learning, the statistical machine learning for energy-based models, adversarial generation net…", + "url": "https://github.com/accel-brain/accel-brain-code", + "project_name": "accel-brain-code", + "stargazers_count": 248, + "source": "GitHub", + "score": 0.007122138939145917 + }, + { + "description": "A processor for KyotoCorpus, KWDLC, and AnnotatedFKCCorpus", + "url": "https://github.com/ku-nlp/kyoto-reader", + "project_name": "kyoto-reader", + "stargazers_count": 10, + "source": "GitHub", + "score": 0.00028718302173975474 + }, + { + "description": "Visualization Module for Natural Language Processing", + "url": "https://github.com/takapy0210/nlplot", + "project_name": "nlplot", + "stargazers_count": 197, + "source": "GitHub", + "score": 0.0056575055282731684 + }, + { + "description": "Rapid Automatic Keyword Extraction algorithm for Japanese", + "url": "https://github.com/kanjirz50/rake-ja", + "project_name": "rake-ja", + "stargazers_count": 18, + "source": "GitHub", + "score": 0.0005169294391315585 + }, + { + "description": "Japanese Entity Linker.", + "url": "https://github.com/izuna385/jel", + "project_name": "jel", + "stargazers_count": 10, + "source": "GitHub", + "score": 0.00028718302173975474 + }, + { + "description": "Latest version of MedEX/J (Japanese disease name extractor)", + "url": "https://github.com/sociocom/MedNER-J", + "project_name": "MedNER-J", + "stargazers_count": 14, + "source": "GitHub", + "score": 0.00040205623043565664 + }, + { + "description": "Zunda: Japanese Enhanced Modality Analyzer client for Python.", + "url": "https://github.com/ikegami-yukino/zunda-python", + "project_name": "zunda-python", + "stargazers_count": 9, + "source": "GitHub", + "score": 0.0002584647195657793 + }, + { + "description": "https://www.nlp.ecei.tohoku.ac.jp/projects/aio/", + "url": "https://github.com/cl-tohoku/AIO2_DPR_baseline", + "project_name": "AIO2_DPR_baseline", + "stargazers_count": 15, + "source": "GitHub", + "score": 0.0004307745326096321 + }, + { + "description": "A PyTorch implementation of the Japanese Predicate-Argument Structure (PAS) analyser presented in the paper of Matsubayashi & Inui (2018) with some improvements.", + "url": "https://github.com/cl-tohoku/showcase", + "project_name": "showcase", + "stargazers_count": 6, + "source": "GitHub", + "score": 0.00017230981304385286 + }, + { + "description": "Darts-clone python binding", + "url": "https://github.com/rixwew/darts-clone-python", + "project_name": "darts-clone-python", + "stargazers_count": 18, + "source": "GitHub", + "score": 0.0005169294391315585 + }, + { + "description": "Example codes for Japanese Realistic Textual Entailment Corpus", + "url": "https://github.com/megagonlabs/jrte-corpus_example", + "project_name": "jrte-corpus_example", + "stargazers_count": 2, + "source": "GitHub", + "score": 5.7436604347950946e-05 + }, + { + "description": "Feature annotator to morphemes and phrases based on KNP rule files (pure-Python)", + "url": "https://github.com/megagonlabs/desuwa", + "project_name": "desuwa", + "stargazers_count": 5, + "source": "GitHub", + "score": 0.00014359151086987737 + }, + { + "description": "Restaurant Search System through Dialogue in Japanese.", + "url": "https://github.com/Hironsan/HotPepperGourmetDialogue", + "project_name": "HotPepperGourmetDialogue", + "stargazers_count": 277, + "source": "GitHub", + "score": 0.007954969702191207 + }, + { + "description": "Samples codes for natural language processing in Japanese", + "url": "https://github.com/upura/nlp-recipes-ja", + "project_name": "nlp-recipes-ja", + "stargazers_count": 56, + "source": "GitHub", + "score": 0.0016082249217426266 + }, + { + "description": "Small example scripts for working with Japanese texts in Python", + "url": "https://github.com/olsgaard/Japanese_nlp_scripts", + "project_name": "Japanese_nlp_scripts", + "stargazers_count": 22, + "source": "GitHub", + "score": 0.0006318026478274604 + }, + { + "description": "Japanese version of DNorm", + "url": "https://github.com/sociocom/DNorm-J", + "project_name": "DNorm-J", + "stargazers_count": 7, + "source": "GitHub", + "score": 0.00020102811521782832 + }, + { + "description": "EventGraph is a development platform for high-level NLP applications in Japanese.", + "url": "https://github.com/ku-nlp/pyknp-eventgraph", + "project_name": "pyknp-eventgraph", + "stargazers_count": 9, + "source": "GitHub", + "score": 0.0002584647195657793 + }, + { + "description": "Ishi: A volition classifier for Japanese", + "url": "https://github.com/ku-nlp/ishi", + "project_name": "ishi", + "stargazers_count": 1, + "source": "GitHub", + "score": 2.8718302173975473e-05 + }, + { + "description": "ベイズ階層言語モデルによる教師なし形態素解析", + "url": "https://github.com/musyoku/python-npylm", + "project_name": "python-npylm", + "stargazers_count": 31, + "source": "GitHub", + "score": 0.0008902673673932397 + }, + { + "description": "条件付確率場とベイズ階層言語モデルの統合による半教師あり形態素解析", + "url": "https://github.com/musyoku/python-npycrf", + "project_name": "python-npycrf", + "stargazers_count": 11, + "source": "GitHub", + "score": 0.0003159013239137302 + }, + { + "description": "教師なし品詞タグ推定", + "url": "https://github.com/musyoku/unsupervised-pos-tagging", + "project_name": "unsupervised-pos-tagging", + "stargazers_count": 16, + "source": "GitHub", + "score": 0.00045949283478360757 + }, + { + "description": "Negima is a Python package to extract phrases in Japanese text by using the part-of-speeches based rules you defined.", + "url": "https://github.com/cocodrips/negima", + "project_name": "negima", + "stargazers_count": 12, + "source": "GitHub", + "score": 0.0003446196260877057 + }, + { + "description": "Extractive summarizer using BertSum as summarization model", + "url": "https://github.com/neilctwu/YouyakuMan", + "project_name": "YouyakuMan", + "stargazers_count": 50, + "source": "GitHub", + "score": 0.0014359151086987738 + }, + { + "description": "A parser for Japanese number (Kanji, arabic) in the natural language.", + "url": "https://github.com/takumakanari/japanese-numbers-python", + "project_name": "japanese-numbers-python", + "stargazers_count": 15, + "source": "GitHub", + "score": 0.0004307745326096321 + }, + { + "description": "Lookup japanese words by radical patterns", + "url": "https://github.com/itayperl/kantan", + "project_name": "kantan", + "stargazers_count": 5, + "source": "GitHub", + "score": 0.00014359151086987737 + }, + { + "description": "Get Japanese dialogue corpus", + "url": "https://github.com/knok/make-meidai-dialogue", + "project_name": "make-meidai-dialogue", + "stargazers_count": 40, + "source": "GitHub", + "score": 0.001148732086959019 + }, + { + "description": "A summarizer for Japanese articles.", + "url": "https://github.com/ryuryukke/japanese_summarizer", + "project_name": "japanese_summarizer", + "stargazers_count": 8, + "source": "GitHub", + "score": 0.00022974641739180378 + }, + { + "description": "ChirpText is a collection of text processing tools for Python.", + "url": "https://github.com/letuananh/chirptext", + "project_name": "chirptext", + "stargazers_count": 5, + "source": "GitHub", + "score": 0.00014359151086987737 + }, + { + "description": "Japanese Address Munger", + "url": "https://github.com/alvations/yubin", + "project_name": "yubin", + "stargazers_count": 3, + "source": "GitHub", + "score": 8.615490652192643e-05 + }, + { + "description": "Japanese Wikipedia Cleaner", + "url": "https://github.com/hppRC/jawiki-cleaner", + "project_name": "jawiki-cleaner", + "stargazers_count": 4, + "source": "GitHub", + "score": 0.00011487320869590189 + }, + { + "description": "A python library to convert Japanese to phoneme.", + "url": "https://github.com/iory/japanese2phoneme", + "project_name": "japanese2phoneme", + "stargazers_count": 1, + "source": "GitHub", + "score": 2.8718302173975473e-05 + }, + { + "description": "This repository contains codes related to the experiments in \"An Experimental Evaluation of Japanese Tokenizers for Sentiment-Based Text Classification\"", + "url": "https://github.com/arusl/anlp_nlp2021_d3-1", + "project_name": "anlp_nlp2021_d3-1", + "stargazers_count": 1, + "source": "GitHub", + "score": 2.8718302173975473e-05 + }, + { + "description": "About", + "url": "https://github.com/shibuiwilliam/aozora_classification", + "project_name": "aozora_classification", + "stargazers_count": 10, + "source": "GitHub", + "score": 0.00028718302173975474 + }, + { + "description": "Generates plain or tokenized text files from the Aozora Bunko", + "url": "https://github.com/borh/aozora-corpus-generator", + "project_name": "aozora-corpus-generator", + "stargazers_count": 5, + "source": "GitHub", + "score": 0.00014359151086987737 + }, + { + "description": "A fast LSTM Language Model for large vocabulary language like Japanese and Chinese", + "url": "https://github.com/jiali-ms/JLM", + "project_name": "JLM", + "stargazers_count": 108, + "source": "GitHub", + "score": 0.0031015766347893513 + }, + { + "description": "Testing of Neural Topic Modeling for Japanese articles", + "url": "https://github.com/m3yrin/NTM", + "project_name": "NTM", + "stargazers_count": 8, + "source": "GitHub", + "score": 0.00022974641739180378 + }, + { + "description": "This is a English-Japanese lexicon for Machine Learning and Deep Learning terminology.", + "url": "https://github.com/Machine-Learning-Tokyo/EN-JP-ML-Lexicon", + "project_name": "EN-JP-ML-Lexicon", + "stargazers_count": 31, + "source": "GitHub", + "score": 0.0008902673673932397 + }, + { + "description": "Easy-to-use scripts to fine-tune GPT-2-JA with your own texts, to generate sentences, and to tweet them automatically.", + "url": "https://github.com/discus0434/text-generation", + "project_name": "text-generation", + "stargazers_count": 15, + "source": "GitHub", + "score": 0.0004307745326096321 + }, + { + "description": "Neural Image Caption (NIC) on chainer, its pretrained models on English and Japanese image caption datasets.", + "url": "https://github.com/yuyay/chainer_nic", + "project_name": "chainer_nic", + "stargazers_count": 18, + "source": "GitHub", + "score": 0.0005169294391315585 + }, + { + "description": "The official repository for \"UnihanLM: Coarse-to-Fine Chinese-Japanese Language Model Pretraining with the Unihan Database\", AACL-IJCNLP 2020", + "url": "https://github.com/JetRunner/unihan-lm", + "project_name": "unihan-lm", + "stargazers_count": 2, + "source": "GitHub", + "score": 5.7436604347950946e-05 + }, + { + "description": "Code to perform finetuning of the mBART model.", + "url": "https://github.com/ken11/mbart-finetuning", + "project_name": "mbart-finetuning", + "stargazers_count": 2, + "source": "GitHub", + "score": 5.7436604347950946e-05 + }, + { + "description": "xvector model on jtubespeech", + "url": "https://github.com/sarulab-speech/xvector_jtubespeech", + "project_name": "xvector_jtubespeech", + "stargazers_count": 20, + "source": "GitHub", + "score": 0.0005743660434795095 + }, + { + "description": "TinySegmenter用の学習モデルを自作するためのツール.", + "url": "https://github.com/shogo82148/TinySegmenterMaker", + "project_name": "TinySegmenterMaker", + "stargazers_count": 63, + "source": "GitHub", + "score": 0.001809253036960455 + }, + { + "description": "日本語とグロンギ語の相互変換スクリプト", + "url": "https://github.com/shogo82148/Grongish", + "project_name": "Grongish", + "stargazers_count": 15, + "source": "GitHub", + "score": 0.0004307745326096321 + }, + { + "description": "WordCloudでの日本語文章をMecab(形態素解析エンジン)を使用せずに形態素解析チックな表示を実現するスクリプト", + "url": "https://github.com/aocattleya/WordCloud-Japanese", + "project_name": "WordCloud-Japanese", + "stargazers_count": 10, + "source": "GitHub", + "score": 0.00028718302173975474 + }, + { + "description": "日本語ワードネットを利用したDBアクセスライブラリ", + "url": "https://github.com/hiraokusky/snark", + "project_name": "snark", + "stargazers_count": 7, + "source": "GitHub", + "score": 0.00020102811521782832 + }, + { + "description": "日本語文を絵文字だけの文に変換するなにか", + "url": "https://github.com/mkan0141/toEmoji", + "project_name": "toEmoji", + "stargazers_count": 3, + "source": "GitHub", + "score": 8.615490652192643e-05 + }, + { + "description": "専門用語抽出アルゴリズムの実装の練習", + "url": "https://github.com/kanjirz50/termextract", + "project_name": "termextract", + "stargazers_count": 15, + "source": "GitHub", + "score": 0.0004307745326096321 + }, + { + "description": "Japanese-Dialog-Transformerの応答候補に対して、KenLMによるN-gram言語モデルでスコアリングし、フィルタリング若しくはリランキングを行う。", + "url": "https://github.com/TUT-SLP-lab/JDT-with-KenLM-scoring", + "project_name": "JDT-with-KenLM-scoring", + "stargazers_count": 1, + "source": "GitHub", + "score": 2.8718302173975473e-05 + }, + { + "description": "Mixture of Unigram Model and Infinite Mixture of Unigram Model in Python. (混合ユニグラムモデルと無限混合ユニグラムモデル)", + "url": "https://github.com/KentoW/mixture-of-unigram-model", + "project_name": "mixture-of-unigram-model", + "stargazers_count": 6, + "source": "GitHub", + "score": 0.00017230981304385286 + }, + { + "description": "Hidden Markov Model (HMM) and Infinite Hidden Markov Model (iHMM) in Python. (隠れマルコフモデルと無限隠れマルコフモデル)", + "url": "https://github.com/KentoW/hidden-markov-model", + "project_name": "hidden-markov-model", + "stargazers_count": 5, + "source": "GitHub", + "score": 0.00014359151086987737 + }, + { + "description": "Ngram language model in Python. (Nグラム言語モデル)", + "url": "https://github.com/KentoW/Ngram-language-model", + "project_name": "Ngram-language-model", + "stargazers_count": 4, + "source": "GitHub", + "score": 0.00011487320869590189 + }, + { + "description": "Automatic Speech Recognition with deepspeech2 model in pytorch with support from Zakuro AI.", + "url": "https://github.com/JeanMaximilienCadic/ASRDeepSpeech", + "project_name": "ASRDeepSpeech", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Neural IME: Neural Input Method Engine", + "url": "https://github.com/yohokuno/neural_ime", + "project_name": "neural_ime", + "stargazers_count": 62, + "source": "GitHub", + "score": 0.0017805347347864793 + }, + { + "description": "Can neural networks transliterate Romaji into Japanese correctly?", + "url": "https://github.com/Kyubyong/neural_japanese_transliterator", + "project_name": "neural_japanese_transliterator", + "stargazers_count": 170, + "source": "GitHub", + "score": 0.004882111369575831 + }, + { + "description": "tokenizer specified for Japanese", + "url": "https://github.com/SamuraiT/tinysegmenter", + "project_name": "tinysegmenter", + "stargazers_count": 44, + "source": "GitHub", + "score": 0.0012636052956549208 + }, + { + "description": "Data Augmentation for Japanese Text on AugLy", + "url": "https://github.com/chck/AugLy-jp", + "project_name": "AugLy-jp", + "stargazers_count": 6, + "source": "GitHub", + "score": 0.00017230981304385286 + }, + { + "description": "A Python script for adding furigana to Japanese epub books using Mecab and Unidic.", + "url": "https://github.com/Mumumu4/furigana4epub", + "project_name": "furigana4epub", + "stargazers_count": 7, + "source": "GitHub", + "score": 0.00020102811521782832 + }, + { + "description": "Japanese verb/adjective inflections tool", + "url": "https://github.com/SmashinFries/PyKatsuyou", + "project_name": "PyKatsuyou", + "stargazers_count": 3, + "source": "GitHub", + "score": 8.615490652192643e-05 + }, + { + "description": "Pure Python Japanese address geocoder", + "url": "https://github.com/t-sagara/jageocoder", + "project_name": "jageocoder", + "stargazers_count": 18, + "source": "GitHub", + "score": 0.0005169294391315585 + }, + { + "description": "New kana-kanji conversion engine", + "url": "https://github.com/yoriyuki/nksnd", + "project_name": "nksnd", + "stargazers_count": 25, + "source": "GitHub", + "score": 0.0007179575543493869 + }, + { + "description": "A Japanese Medical Information Extraction Toolkit", + "url": "https://github.com/racerandom/JaMIE", + "project_name": "JaMIE", + "stargazers_count": 7, + "source": "GitHub", + "score": 0.00020102811521782832 + }, + { + "description": "fasttextとword2vecの比較と、実行スクリプト、学習スクリプトです", + "url": "https://github.com/GINK03/fasttext-vs-word2vec-on-twitter-data", + "project_name": "fasttext-vs-word2vec-on-twitter-data", + "stargazers_count": 49, + "source": "GitHub", + "score": 0.0014071968065247982 + }, + { + "description": "最小のサーチエンジン/PageRank/tf-idf", + "url": "https://github.com/GINK03/minimal-search-engine", + "project_name": "minimal-search-engine", + "stargazers_count": 18, + "source": "GitHub", + "score": 0.0005169294391315585 + }, + { + "description": "5chの過去ログをスクレイピングして、過去流行った単語(ex, 香具師, orz)などを追跡調査", + "url": "https://github.com/GINK03/5ch-analysis", + "project_name": "5ch-analysis", + "stargazers_count": 58, + "source": "GitHub", + "score": 0.0016656615260905775 + }, + { + "description": "Twitter日本語評判分析データセットのためのツイートダウンローダ", + "url": "https://github.com/tatHi/tweet_extructor", + "project_name": "tweet_extructor", + "stargazers_count": 3, + "source": "GitHub", + "score": 8.615490652192643e-05 + }, + { + "description": "Aggregating Japanese words based on Juman++ and ConceptNet5.5", + "url": "https://github.com/hkiyomaru/japanese-word-aggregation", + "project_name": "japanese-word-aggregation", + "stargazers_count": 1, + "source": "GitHub", + "score": 2.8718302173975473e-05 + }, + { + "description": "A Japanese inflection converter", + "url": "https://github.com/hkiyomaru/jinf", + "project_name": "jinf", + "stargazers_count": 4, + "source": "GitHub", + "score": 0.00011487320869590189 + }, + { + "description": "A unified language analyzer for Japanese", + "url": "https://github.com/ku-nlp/kwja", + "project_name": "kwja", + "stargazers_count": 77, + "source": "GitHub", + "score": 0.0022113092673961116 + }, + { + "description": "Reproduced package based on Masked Language Model Scoring (ACL2020).", + "url": "https://github.com/Ryutaro-A/mlm-scoring-transformers", + "project_name": "mlm-scoring-transformers", + "stargazers_count": 3, + "source": "GitHub", + "score": 8.615490652192643e-05 + }, + { + "description": "[PyTorch] ClipCap for Japanese", + "url": "https://github.com/Japanese-Image-Captioning/ClipCap-for-Japanese", + "project_name": "ClipCap-for-Japanese", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "[PyTorch] Show, Attend and Tell for Japanese", + "url": "https://github.com/Japanese-Image-Captioning/SAT-for-Japanese", + "project_name": "SAT-for-Japanese", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Python library for CJK (Chinese, Japanese, and Korean) language dictionary", + "url": "https://github.com/cihai/cihai", + "project_name": "cihai", + "stargazers_count": 61, + "source": "GitHub", + "score": 0.001751816432612504 + }, + { + "description": "MARINE : Multi-task leaRnIng-based JapaNese accent Estimation", + "url": "https://github.com/6gsn/marine", + "project_name": "marine", + "stargazers_count": 15, + "source": "GitHub", + "score": 0.0004307745326096321 + }, + { + "description": "Finetuning Whisper ASR model", + "url": "https://github.com/sarulab-speech/whisper-asr-finetune", + "project_name": "whisper-asr-finetune", + "stargazers_count": 18, + "source": "GitHub", + "score": 0.0005169294391315585 + }, + { + "description": "A PyTorch Implementation of japanese chatbot using BERT and Transformer's decoder", + "url": "https://github.com/CjangCjengh/japanese_chatbot", + "project_name": "japanese_chatbot", + "stargazers_count": 28, + "source": "GitHub", + "score": 0.0008041124608713133 + }, + { + "description": "部首文字正規化ライブラリ", + "url": "https://github.com/yamamaya/radicalchar", + "project_name": "radicalchar", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Yet another Japanese IME for IBus/Linux", + "url": "https://github.com/tokuhirom/akaza", + "project_name": "akaza", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Japanese postal code data.", + "url": "https://github.com/polm/posuto", + "project_name": "posuto", + "stargazers_count": 168, + "source": "GitHub", + "score": 0.0048246747652278795 + }, + { + "description": "Tacotron2 implementation of Japanese", + "url": "https://github.com/CjangCjengh/tacotron2-japanese", + "project_name": "tacotron2-japanese", + "stargazers_count": 248, + "source": "GitHub", + "score": 0.007122138939145917 + }, + { + "description": "ひらがなIME for IBus", + "url": "https://github.com/esrille/ibus-hiragana", + "project_name": "ibus-hiragana", + "stargazers_count": 54, + "source": "GitHub", + "score": 0.0015507883173946756 + }, + { + "description": "ふりがなパッド", + "url": "https://github.com/esrille/furiganapad", + "project_name": "furiganapad", + "stargazers_count": 9, + "source": "GitHub", + "score": 0.0002584647195657793 + }, + { + "description": "Japanese synonym library", + "url": "https://github.com/WorksApplications/chikkarpy", + "project_name": "chikkarpy", + "stargazers_count": 48, + "source": "GitHub", + "score": 0.0013784785043508229 + }, + { + "description": "Mecab + NEologd + Docker + Python3", + "url": "https://github.com/p-geon/ja-tokenizer-docker-py", + "project_name": "ja-tokenizer-docker-py", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "JapaneseEmbeddingEval", + "url": "https://github.com/oshizo/JapaneseEmbeddingEval", + "project_name": "JapaneseEmbeddingEval", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "GPTがYouTuberをやります", + "url": "https://github.com/karakuri-ai/gptuber-by-langchain", + "project_name": "gptuber-by-langchain", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Extend GNOME On-Screen Keyboard for Input Methods", + "url": "https://github.com/google/shuwa", + "project_name": "shuwa", + "stargazers_count": 99, + "source": "GitHub", + "score": 0.002843111915223572 + }, + { + "description": "This repository provides the code for Japanese NLI model, a fine-tuned masked language model.", + "url": "https://github.com/CyberAgentAILab/japanese-nli-model", + "project_name": "japanese-nli-model", + "stargazers_count": 2, + "source": "GitHub", + "score": 5.7436604347950946e-05 + }, + { + "description": "A tool for Japanese-English translation and English-Japanese translation by using FuguMT", + "url": "https://github.com/tos-kamiya/tra-fugu", + "project_name": "tra-fugu", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "ぷるーふおぶこんせぷと で公開した機械翻訳エンジンを利用する翻訳環境です。 フォームに入力された文字列の翻訳、PDFの翻訳が可能です。", + "url": "https://github.com/s-taka/fugumt", + "project_name": "fugumt", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "JaSPICE: Automatic Evaluation Metric Using Predicate-Argument Structures for Image Captioning Models", + "url": "https://github.com/keio-smilab23/JaSPICE", + "project_name": "JaSPICE", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "jp-localization", + "url": "https://github.com/yantaisa11/Retrieval-based-Voice-Conversion-WebUI-JP-localization", + "project_name": "Retrieval-based-Voice-Conversion-WebUI-JP-localization", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Python wrapper for OpenJTalk", + "url": "https://github.com/r9y9/pyopenjtalk", + "project_name": "pyopenjtalk", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Make learning Japanese easier by adding readings for every kanji in the eBook", + "url": "https://github.com/rabbit19981023/yomigana-ebook", + "project_name": "yomigana-ebook", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Whisper based Japanese subtitle generator", + "url": "https://github.com/Ayanaminn/N46Whisper", + "project_name": "N46Whisper", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Rinna-3.6B、OpenCALM等の日本語対応LLM(大規模言語モデル)用の簡易Webインタフェースです", + "url": "https://github.com/noir55/japanese_llm_simple_webui", + "project_name": "japanese_llm_simple_webui", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "pdf-translator translates English PDF files into Japanese, preserving the original layout.", + "url": "https://github.com/discus0434/pdf-translator", + "project_name": "pdf-translator", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Haystack + Elasticsearch + wikipedia(ja) を用いた、日本語の質問応答システムのサンプル", + "url": "https://github.com/Shingo-Kamata/japanese_qa_demo_with_haystack_and_es", + "project_name": "japanese_qa_demo_with_haystack_and_es", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Automatically exported from code.google.com/p/mozc-morse", + "url": "https://github.com/google/mozc-devices", + "project_name": "mozc-devices", + "stargazers_count": 1530, + "source": "GitHub", + "score": 0.043939002326182475 + }, + { + "description": "A Japanese text frontend processing toolkit", + "url": "https://github.com/faruzan0820/natsume", + "project_name": "natsume", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "日本語TTS(VITS)の学習と音声合成のGradio WebUI", + "url": "https://github.com/litagin02/vits-japros-webui", + "project_name": "vits-japros-webui", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "A Japanese law parser", + "url": "https://github.com/takuyaa/ja-law-parser", + "project_name": "ja-law-parser", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Japanese dictation kit using Julius", + "url": "https://github.com/julius-speech/dictation-kit", + "project_name": "dictation-kit", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Juliusを使ったセグメンテーション支援ツール", + "url": "https://github.com/Hiroshiba/julius4seg", + "project_name": "julius4seg", + "stargazers_count": 3, + "source": "GitHub", + "score": 8.615490652192643e-05 + }, + { + "description": "無料で使える中品質なテキスト読み上げソフトウェア、VOICEVOXの音声合成エンジン", + "url": "https://github.com/VOICEVOX/voicevox_engine", + "project_name": "voicevox_engine", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "LLaVA-JP is a Japanese VLM trained by LLaVA method", + "url": "https://github.com/tosiyuki/LLaVA-JP", + "project_name": "LLaVA-JP", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Open source RAG with Llama Index for Japanese LLM in low resource settting", + "url": "https://github.com/AkimParis/RAG-Japanese", + "project_name": "RAG-Japanese", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Japanese Spelling Error Corrector using BERT(Masked-Language Model). BERTに基づいて日本語校正", + "url": "https://github.com/er-ri/bertjsc", + "project_name": "bertjsc", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Project of llm evaluation to Japanese tasks", + "url": "https://github.com/wandb/llm-leaderboard", + "project_name": "llm-leaderboard", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "About", + "url": "https://github.com/nobu-g/jglue-evaluation-scripts", + "project_name": "jglue-evaluation-scripts", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Modifying LAVIS' BLIP2 Q-former with models pretrained on Japanese datasets.", + "url": "https://github.com/ZhaoPeiduo/BLIP2-Japanese", + "project_name": "BLIP2-Japanese", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "wikipedia 日本語の文を、各種日本語の embeddings や faiss index へと変換するスクリプト等。", + "url": "https://github.com/hotchpotch/wikipedia-passages-jawiki-embeddings-utils", + "project_name": "wikipedia-passages-jawiki-embeddings-utils", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Exploring Japanese SimCSE", + "url": "https://github.com/hpprc/simple-simcse-ja", + "project_name": "simple-simcse-ja", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Wikipediaの日本語記事を元に、ユーザの質問に回答するGradioベースのRAGのサンプル", + "url": "https://github.com/lawofcycles/wikipedia-japanese-open-rag", + "project_name": "wikipedia-japanese-open-rag", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "GPT-4 を用いて、言語モデルの応答を自動評価するスクリプト", + "url": "https://github.com/northern-system-service/gpt4-autoeval", + "project_name": "gpt4-autoeval", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "日本語T5モデル", + "url": "https://github.com/sonoisa/t5-japanese", + "project_name": "t5-japanese", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "A repo for evaluating Japanese LLMs ・ 日本語LLMを評価するレポ", + "url": "https://github.com/lightblue-tech/japanese_llm_eval", + "project_name": "japanese_llm_eval", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "The evaluation scripts of JMTEB (Japanese Massive Text Embedding Benchmark)", + "url": "https://github.com/sbintuitions/jmteb", + "project_name": "jmteb", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "日本語音声に対して音素ラベルをアラインメントするためのツールです", + "url": "https://github.com/dwangomediavillage/pydomino", + "project_name": "pydomino", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Yet another Japanese morphological analyzer", + "url": "https://github.com/taku910/mecab", + "project_name": "mecab", + "stargazers_count": 793, + "source": "GitHub", + "score": 0.02277361362396255 + }, + { + "description": "Juman++ (a Morphological Analyzer Toolkit)", + "url": "https://github.com/ku-nlp/jumanpp", + "project_name": "jumanpp", + "stargazers_count": 322, + "source": "GitHub", + "score": 0.009247293300020103 + }, + { + "description": "The Kyoto Text Analysis Toolkit for word segmentation and pronunciation estimation, etc.", + "url": "https://github.com/neubig/kytea", + "project_name": "kytea", + "stargazers_count": 193, + "source": "GitHub", + "score": 0.005542632319577267 + }, + { + "description": "Yet Another Japanese Dependency Structure Analyzer", + "url": "https://github.com/taku910/cabocha", + "project_name": "cabocha", + "stargazers_count": 97, + "source": "GitHub", + "score": 0.002785675310875621 + }, + { + "description": "A Japanese Parser", + "url": "https://github.com/ku-nlp/knp", + "project_name": "knp", + "stargazers_count": 23, + "source": "GitHub", + "score": 0.000660520950001436 + }, + { + "description": "Joint source channel model for Japanese Kana Kanji conversion, Chinese pinyin input and CJE mixed input.", + "url": "https://github.com/yohokuno/jsc", + "project_name": "jsc", + "stargazers_count": 14, + "source": "GitHub", + "score": 0.00040205623043565664 + }, + { + "description": "An input method without morphological analysis.", + "url": "https://github.com/codefirst/aquaskk", + "project_name": "aquaskk", + "stargazers_count": 297, + "source": "GitHub", + "score": 0.008529335745670715 + }, + { + "description": "a Japanese Input Method Editor designed for multi-platform", + "url": "https://github.com/google/mozc", + "project_name": "mozc", + "stargazers_count": 1763, + "source": "GitHub", + "score": 0.050630366732718764 + }, + { + "description": "Trimatch: An (Exact|Prefix|Approximate) String Matching Library", + "url": "https://github.com/tuem/trimatch", + "project_name": "trimatch", + "stargazers_count": 2, + "source": "GitHub", + "score": 5.7436604347950946e-05 + }, + { + "description": "Resembla: Word-based Japanese similar sentence search library", + "url": "https://github.com/tuem/resembla", + "project_name": "resembla", + "stargazers_count": 70, + "source": "GitHub", + "score": 0.002010281152178283 + }, + { + "description": "▽▼ SKK-like Japanese Input Method Editor for Windows", + "url": "https://github.com/nathancorvussolis/corvusskk", + "project_name": "corvusskk", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "A morphological analysis library.", + "url": "https://github.com/lindera-morphology/lindera", + "project_name": "lindera", + "stargazers_count": 238, + "source": "GitHub", + "score": 0.0068349559174061625 + }, + { + "description": "Vaporetto: Very Accelerated POintwise pREdicTion based TOkenizer", + "url": "https://github.com/daac-tools/vaporetto", + "project_name": "vaporetto", + "stargazers_count": 190, + "source": "GitHub", + "score": 0.00545647741305534 + }, + { + "description": "Japanese Morphological Analysis written in Rust", + "url": "https://github.com/Leko/goya", + "project_name": "goya", + "stargazers_count": 66, + "source": "GitHub", + "score": 0.0018954079434823814 + }, + { + "description": "vibrato: Viterbi-based accelerated tokenizer", + "url": "https://github.com/daac-tools/vibrato", + "project_name": "vibrato", + "stargazers_count": 234, + "source": "GitHub", + "score": 0.006720082708710261 + }, + { + "description": "A Japanese Morphological Analyzer written in pure Rust", + "url": "https://github.com/agatan/yoin", + "project_name": "yoin", + "stargazers_count": 21, + "source": "GitHub", + "score": 0.0006030843456534849 + }, + { + "description": "Safe Rust bindings for mecab a part-of-speech and morphological analyzer library", + "url": "https://github.com/tsurai/mecab-rs", + "project_name": "mecab-rs", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "A morphological analyzer using mecab dictionary", + "url": "https://github.com/nakagami/awabi", + "project_name": "awabi", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "and Romaji", + "url": "https://github.com/PSeitz/wana_kana_rust", + "project_name": "wana_kana_rust", + "stargazers_count": 49, + "source": "GitHub", + "score": 0.0014071968065247982 + }, + { + "description": "A Rust library to convert Japanese Half-width-kana[半角カナ] and Wide-alphanumeric[全角英数] into normal ones", + "url": "https://github.com/gemmarx/unicode-jp-rs", + "project_name": "unicode-jp-rs", + "stargazers_count": 12, + "source": "GitHub", + "score": 0.0003446196260877057 + }, + { + "description": "[Mirror] CLI program for transliterating romaji text to either hiragana or katakana", + "url": "https://github.com/gbrlsnchs/kana", + "project_name": "kana", + "stargazers_count": 3, + "source": "GitHub", + "score": 8.615490652192643e-05 + }, + { + "description": "Lindera tokenizer for Tantivy.", + "url": "https://github.com/lindera-morphology/lindera-tantivy", + "project_name": "lindera-tantivy", + "stargazers_count": 35, + "source": "GitHub", + "score": 0.0010051405760891415 + }, + { + "description": "A Tantivy tokenizer using Vibrato.", + "url": "https://github.com/akr4/tantivy-vibrato", + "project_name": "tantivy-vibrato", + "stargazers_count": 2, + "source": "GitHub", + "score": 5.7436604347950946e-05 + }, + { + "description": "A fast implementation of the Aho-Corasick algorithm using the compact double-array data structure in Rust.", + "url": "https://github.com/daac-tools/daachorse", + "project_name": "daachorse", + "stargazers_count": 145, + "source": "GitHub", + "score": 0.004164153815226444 + }, + { + "description": "Finding all pairs of similar documents time- and memory-efficiently", + "url": "https://github.com/legalforce-research/find-simdoc", + "project_name": "find-simdoc", + "stargazers_count": 52, + "source": "GitHub", + "score": 0.0014933517130467247 + }, + { + "description": "Rust library of natural language dictionaries using character-wise double-array tries.", + "url": "https://github.com/daac-tools/crawdad", + "project_name": "crawdad", + "stargazers_count": 19, + "source": "GitHub", + "score": 0.000545647741305534 + }, + { + "description": "Comparison code of various tokenizers", + "url": "https://github.com/legalforce-research/tokenizer-speed-bench", + "project_name": "tokenizer-speed-bench", + "stargazers_count": 2, + "source": "GitHub", + "score": 5.7436604347950946e-05 + }, + { + "description": "Here provides benchmark tools to compare the performance of data structures for string matching.", + "url": "https://github.com/legalforce-research/stringmatch-bench", + "project_name": "stringmatch-bench", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Using Vim as an input method for X11 apps", + "url": "https://github.com/algon-320/vime", + "project_name": "vime", + "stargazers_count": 195, + "source": "GitHub", + "score": 0.005600068923925217 + }, + { + "description": "無料で使える中品質なテキスト読み上げソフトウェア、VOICEVOXのコア", + "url": "https://github.com/VOICEVOX/voicevox_core", + "project_name": "voicevox_core", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Yet another Japanese IME for IBus/Linux", + "url": "https://github.com/akaza-im/akaza", + "project_name": "akaza", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "A free online, self-hostable, multilang Japanese dictionary.", + "url": "https://github.com/WeDontPanic/Jotoba", + "project_name": "Jotoba", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Google 日本語入力用DvorakJPローマ字テーブル / DvorakJP Roman Table for Google Japanese Input", + "url": "https://github.com/shinespark/dvorakjp-romantable", + "project_name": "dvorakjp-romantable", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Japanese glossator for assisted reading of text using Ichiran", + "url": "https://github.com/Netdex/niinii", + "project_name": "niinii", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "SKK (Simple Kana Kanji henkan) library", + "url": "https://github.com/naokiri/cskk", + "project_name": "cskk", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Learn Japanese vocabs 🇯🇵 by doing quizzes on CLI!", + "url": "https://github.com/tysonwu/japanki", + "project_name": "japanki", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Japanese text preprocessor for Text-to-Speech applications (OpenJTalk rewrite in rust language)", + "url": "https://github.com/jpreprocess/jpreprocess", + "project_name": "jpreprocess", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "裁判例のデータ一覧を裁判所のホームページ(https://www.courts.go.jp/index.html) をスクレイピングして生成するソフトウェア", + "url": "https://github.com/japanese-law-analysis/listup_precedent", + "project_name": "listup_precedent", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "JavaScript implementation of Japanese morphological analyzer", + "url": "https://github.com/takuyaa/kuromoji.js", + "project_name": "kuromoji.js", + "stargazers_count": 721, + "source": "GitHub", + "score": 0.020705895867436316 + }, + { + "description": "morphological analyzer (word segmentor + PoS Tagger) for Chinese and Japanese written purely in JavaScript.", + "url": "https://github.com/rakuten-nlp/rakutenma", + "project_name": "rakutenma", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Yet another mecab wrapper for nodejs", + "url": "https://github.com/golbin/node-mecab-ya", + "project_name": "node-mecab-ya", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "a User-Extensible Morphological Analyzer for Japanese. 日本語形態素解析システム", + "url": "https://github.com/thammin/juman-bin", + "project_name": "juman-bin", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Asynchronous japanese morphological analyser using MeCab.", + "url": "https://github.com/hecomi/node-mecab-async", + "project_name": "node-mecab-async", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Japanese language library for converting Japanese sentence to Hiragana, Katakana or Romaji with furigana and okurigana modes supported.", + "url": "https://github.com/hexenq/kuroshiro", + "project_name": "kuroshiro", + "stargazers_count": 613, + "source": "GitHub", + "score": 0.017604319232646964 + }, + { + "description": "Kuromoji morphological analyzer for kuroshiro.", + "url": "https://github.com/hexenq/kuroshiro-analyzer-kuromoji", + "project_name": "kuroshiro-analyzer-kuromoji", + "stargazers_count": 36, + "source": "GitHub", + "score": 0.001033858878263117 + }, + { + "description": "Node.js module for converting Japanese Hiragana and Katakana script to, and from, Romaji using Hepburn romanisation", + "url": "https://github.com/lovell/hepburn", + "project_name": "hepburn", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Converts Japanese Numerals into number", + "url": "https://github.com/twada/japanese-numerals-to-number", + "project_name": "japanese-numerals-to-number", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Javascript libraries to process text: Arabic, Japanese, etc.", + "url": "https://github.com/kariminf/jslingua", + "project_name": "jslingua", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Javascript library for detecting and transliterating Hiragana <--> Katakana <--> Romaji", + "url": "https://github.com/WaniKani/WanaKana", + "project_name": "WanaKana", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Normalize and fix common issues with Romaji-based Japanese names.", + "url": "https://github.com/jeresig/node-romaji-name", + "project_name": "node-romaji-name", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Utility collections for making Japanese text old-fashioned", + "url": "https://github.com/hakatashi/kyujitai.js", + "project_name": "kyujitai.js", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "オープンソースの住所正規化ライブラリ。", + "url": "https://github.com/geolonia/normalize-japanese-addresses", + "project_name": "normalize-japanese-addresses", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Raw data for Japanese Anime", + "url": "https://github.com/bangumi-data/bangumi-data", + "project_name": "bangumi-data", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Japanese pop-up dictionary extension for Chrome and Firefox.", + "url": "https://github.com/FooSoft/yomichan", + "project_name": "yomichan", + "stargazers_count": 837, + "source": "GitHub", + "score": 0.02403721891961747 + }, + { + "description": "GUIで動作する文書校正ツール GUI tool for textlinting.", + "url": "https://github.com/gecko655/proofreading-tool", + "project_name": "proofreading-tool", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "A web-app displaying the 2200 kanji characters taught in James Heisig's \"Remembering the Kanji\", 6th edition.", + "url": "https://github.com/minosvasilias/kanjigrid", + "project_name": "kanjigrid", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Monorepo for Kanji, Furigana, Japanese DB, and others", + "url": "https://github.com/echamudi/japanese-toolkit", + "project_name": "japanese-toolkit", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "文の敬体(ですます調)、常体(である調)を解析するJavaScriptライブラリ", + "url": "https://github.com/textlint-ja/analyze-desumasu-dearu", + "project_name": "analyze-desumasu-dearu", + "stargazers_count": 15, + "source": "GitHub", + "score": 0.0004307745326096321 + }, + { + "description": "Japanese pitch accent utils", + "url": "https://github.com/DJTB/hatsuon", + "project_name": "hatsuon", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Sentiment Analysis in Japanese. sentiment_ja with JavaScript", + "url": "https://github.com/otodn/sentiment_ja_js", + "project_name": "sentiment_ja_js", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "mecab-ipadic seed dictionary reader", + "url": "https://github.com/takuyaa/mecab-ipadic-seed", + "project_name": "mecab-ipadic-seed", + "stargazers_count": 7, + "source": "GitHub", + "score": 0.00020102811521782832 + }, + { + "description": "Well, a different Japanese word everyday.", + "url": "https://github.com/LuanRT/Japanese-Word-Of-The-Day", + "project_name": "Japanese-Word-Of-The-Day", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Extend GNOME On-Screen Keyboard for Input Methods", + "url": "https://github.com/esrille/oskim", + "project_name": "oskim", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "東日本大震災発生から24時間以内につぶやかれたジオタグ付きツイートのデジタルアーカイブです。", + "url": "https://github.com/wtnv-lab/tweetMapping", + "project_name": "tweetMapping", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Predict pitch accent in Japanese", + "url": "https://github.com/shirakaba/pitch-accent", + "project_name": "pitch-accent", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "「ひらがな」または「カタカナ」を日本語で発音する際の音声記号(IPA)に変換するコマンド", + "url": "https://github.com/amanoese/kana2ipa", + "project_name": "kana2ipa", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "無料で使える中品質なテキスト読み上げソフトウェア、VOICEVOXのエディター", + "url": "https://github.com/VOICEVOX/voicevox", + "project_name": "voicevox", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Self-contained Japanese Morphological Analyzer written in pure Go", + "url": "https://github.com/ikawaha/kagome", + "project_name": "kagome", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "テキストを壱百満天原サロメお嬢様風の口調に変換します", + "url": "https://github.com/jiro4989/ojosama", + "project_name": "ojosama", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Japanese Dictionary", + "url": "https://github.com/gojp/nihongo", + "project_name": "nihongo", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "External dictionary importer for Yomichan.", + "url": "https://github.com/FooSoft/yomichan-import", + "project_name": "yomichan-import", + "stargazers_count": 69, + "source": "GitHub", + "score": 0.001981562850004308 + }, + { + "description": "THE IDOLM@STER words dictionary for Japanese IME (by imas-db.jp)", + "url": "https://github.com/maruamyu/imas-ime-dic", + "project_name": "imas-ime-dic", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Kanji transliteration to hiragana/katakana/romaji, in Go", + "url": "https://github.com/sarumaj/go-kakasi", + "project_name": "go-kakasi", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "A Go library for Zenkaku/Hankaku conversion", + "url": "https://github.com/ktnyt/go-moji", + "project_name": "go-moji", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "おじさんがLINEやメールで送ってきそうな文を生成する", + "url": "https://github.com/greymd/ojichat", + "project_name": "ojichat", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Kuromoji is a self-contained and very easy to use Japanese morphological analyzer designed for search", + "url": "https://github.com/atilika/kuromoji", + "project_name": "kuromoji", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "A lexicon for Sudachi", + "url": "https://github.com/WorksApplications/SudachiDict", + "project_name": "SudachiDict", + "stargazers_count": 183, + "source": "GitHub", + "score": 0.005255449297837512 + }, + { + "description": "Java library for identifying Japanese characters from images", + "url": "https://github.com/sakarika/kanjitomo-ocr", + "project_name": "kanjitomo-ocr", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Java library and command-line tool to transliterate Japanese kanji to romaji (Latin alphabet)", + "url": "https://github.com/nicolas-raoul/jakaroma", + "project_name": "jakaroma", + "stargazers_count": 52, + "source": "GitHub", + "score": 0.0014933517130467247 + }, + { + "description": "Kanji transliteration to hiragana/katakana/romaji, in Java", + "url": "https://github.com/nicolas-raoul/kakasi-java", + "project_name": "kakasi-java", + "stargazers_count": 46, + "source": "GitHub", + "score": 0.001321041900002872 + }, + { + "description": "A desktop language immersion companion for learners of Japanese", + "url": "https://github.com/fauu/Kamite", + "project_name": "Kamite", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Async Japanese Tokenizer Native Plugin for React Native for iOS and Android", + "url": "https://github.com/craftzdog/react-native-japanese-tokenizer", + "project_name": "react-native-japanese-tokenizer", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Japanese analyzer uses kuromoji japanese tokenizer for ElasticSearch", + "url": "https://github.com/suguru/elasticsearch-analysis-japanese", + "project_name": "elasticsearch-analysis-japanese", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "A Java library to converts between Japanese Hiragana, Katakana, and Romaji scripts.", + "url": "https://github.com/andree-surya/moji4j", + "project_name": "moji4j", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Japanese text normalizer for mecab-neologd", + "url": "https://github.com/ikegami-yukino/neologdn-java", + "project_name": "neologdn-java", + "stargazers_count": 4, + "source": "GitHub", + "score": 0.00011487320869590189 + }, + { + "description": "The Japanese analysis plugin for elasticsearch", + "url": "https://github.com/worksapplications/elasticsearch-sudachi", + "project_name": "elasticsearch-sudachi", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Word2vec (word to vectors) approach for Japanese language using Gensim and Mecab.", + "url": "https://github.com/philipperemy/japanese-words-to-vectors", + "project_name": "japanese-words-to-vectors", + "stargazers_count": 76, + "source": "GitHub", + "score": 0.002182590965222136 + }, + { + "description": "Japanese word embedding with Sudachi and NWJC", + "url": "https://github.com/WorksApplications/chiVe", + "project_name": "chiVe", + "stargazers_count": 113, + "source": "GitHub", + "score": 0.0032451681456592287 + }, + { + "description": "elmo-japanese", + "url": "https://github.com/cl-tohoku/elmo-japanese", + "project_name": "elmo-japanese", + "stargazers_count": 5, + "source": "GitHub", + "score": 0.00014359151086987737 + }, + { + "description": "Python Implementation of EmbedRank", + "url": "https://github.com/yagays/embedrank", + "project_name": "embedrank", + "stargazers_count": 46, + "source": "GitHub", + "score": 0.001321041900002872 + }, + { + "description": "青空文庫全書籍のWord2Vecビルダー+構築済みモデル", + "url": "https://github.com/eggplants/aovec", + "project_name": "aovec", + "stargazers_count": 3, + "source": "GitHub", + "score": 8.615490652192643e-05 + }, + { + "description": "This is a repository for the AI LAB article \"係り受けに基づく日本語単語埋込 (Dependency-based Japanese Word Embeddings)\" ( Article URL https://ai-lab.lapras.com/nlp/japanese-word-embedding/)", + "url": "https://github.com/lapras-inc/dependency-based-japanese-word-embeddings", + "project_name": "dependency-based-japanese-word-embeddings", + "stargazers_count": 8, + "source": "GitHub", + "score": 0.00022974641739180378 + }, + { + "description": "Yet Another Japanese-Wikipedia Entity Vectors", + "url": "https://github.com/wikiwikification/jawikivec", + "project_name": "jawikivec", + "stargazers_count": 3, + "source": "GitHub", + "score": 8.615490652192643e-05 + }, + { + "description": "最新の日本語Wikipediaのダンプデータから,MeCabを用いてIPA辞書と最新のNeologd辞書の両方で形態素解析を実施し,その結果に基づいた word2vec,fastText,GloVeの単語分散表現を学習するためのスクリプト", + "url": "https://github.com/kamigaito/jawiki_word_vector_updater", + "project_name": "jawiki_word_vector_updater", + "stargazers_count": 10, + "source": "GitHub", + "score": 0.00028718302173975474 + }, + { + "description": "BERT models for Japanese text.", + "url": "https://github.com/cl-tohoku/bert-japanese", + "project_name": "bert-japanese", + "stargazers_count": 400, + "source": "GitHub", + "score": 0.01148732086959019 + }, + { + "description": "Code for producing Japanese pretrained models provided by rinna Co., Ltd.", + "url": "https://github.com/rinnakk/japanese-pretrained-models", + "project_name": "japanese-pretrained-models", + "stargazers_count": 488, + "source": "GitHub", + "score": 0.014014531460900032 + }, + { + "description": "BERT with SentencePiece for Japanese text.", + "url": "https://github.com/yoheikikuta/bert-japanese", + "project_name": "bert-japanese", + "stargazers_count": 480, + "source": "GitHub", + "score": 0.013784785043508227 + }, + { + "description": "Japanese tokenizer for Transformers", + "url": "https://github.com/WorksApplications/SudachiTra", + "project_name": "SudachiTra", + "stargazers_count": 59, + "source": "GitHub", + "score": 0.001694379828264553 + }, + { + "description": "Code for evaluating Japanese pretrained models provided by NTT Ltd.", + "url": "https://github.com/nttcslab/japanese-dialog-transformers", + "project_name": "japanese-dialog-transformers", + "stargazers_count": 214, + "source": "GitHub", + "score": 0.006145716665230751 + }, + { + "description": "Pytorch implementation and pre-trained Japanese model for CANINE, the efficient character-level transformer.", + "url": "https://github.com/octanove/shiba", + "project_name": "shiba", + "stargazers_count": 86, + "source": "GitHub", + "score": 0.002469773986961891 + }, + { + "description": "A PyTorch Implementation of japanese chatbot using BERT and Transformer's decoder", + "url": "https://github.com/reppy4620/Dialog", + "project_name": "Dialog", + "stargazers_count": 65, + "source": "GitHub", + "score": 0.0018666896413084058 + }, + { + "description": "BERT and ELECTRA models of PyTorch implementations for Japanese text.", + "url": "https://github.com/retarfi/language-pretraining", + "project_name": "language-pretraining", + "stargazers_count": 23, + "source": "GitHub", + "score": 0.000660520950001436 + }, + { + "description": "Trials of pre-trained BERT models for the medical domain in Japanese.", + "url": "https://github.com/ou-medinfo/medbertjp", + "project_name": "medbertjp", + "stargazers_count": 10, + "source": "GitHub", + "score": 0.00028718302173975474 + }, + { + "description": "ILYS-aoba-chatbot", + "url": "https://github.com/cl-tohoku/ILYS-aoba-chatbot", + "project_name": "ILYS-aoba-chatbot", + "stargazers_count": 21, + "source": "GitHub", + "score": 0.0006030843456534849 + }, + { + "description": "Codes to pre-train Japanese T5 models", + "url": "https://github.com/megagonlabs/t5-japanese", + "project_name": "t5-japanese", + "stargazers_count": 39, + "source": "GitHub", + "score": 0.0011200137847850436 + }, + { + "description": "PytorchでBERTの日本語学習済みモデルを利用する", + "url": "https://github.com/yagays/pytorch_bert_japanese", + "project_name": "pytorch_bert_japanese", + "stargazers_count": 32, + "source": "GitHub", + "score": 0.0009189856695672151 + }, + { + "description": "Laboro BERT Japanese: Japanese BERT Pre-Trained With Web-Corpus", + "url": "https://github.com/laboroai/Laboro-BERT-Japanese", + "project_name": "Laboro-BERT-Japanese", + "stargazers_count": 72, + "source": "GitHub", + "score": 0.002067717756526234 + }, + { + "description": "Japanese BERT Pretrained Model", + "url": "https://github.com/tanreinama/RoBERTa-japanese", + "project_name": "RoBERTa-japanese", + "stargazers_count": 22, + "source": "GitHub", + "score": 0.0006318026478274604 + }, + { + "description": "aMLP Transformer Model for Japanese", + "url": "https://github.com/tanreinama/aMLP-japanese", + "project_name": "aMLP-japanese", + "stargazers_count": 13, + "source": "GitHub", + "score": 0.0003733379282616812 + }, + { + "description": "Japanese BERT trained on Aozora Bunko and Wikipedia, pre-tokenized by MeCab with UniDic & SudachiPy", + "url": "https://github.com/akirakubo/bert-japanese-aozora", + "project_name": "bert-japanese-aozora", + "stargazers_count": 38, + "source": "GitHub", + "score": 0.001091295482611068 + }, + { + "description": "Code to train Sentence BERT Japanese model for Hugging Face Model Hub", + "url": "https://github.com/colorfulscoop/sbert-ja", + "project_name": "sbert-ja", + "stargazers_count": 10, + "source": "GitHub", + "score": 0.00028718302173975474 + }, + { + "description": "Official fine-tuning code for \"Emotion Analysis of Japanese Tweets and Comparison to Vaccinations in Japan\"", + "url": "https://github.com/PatrickJohnRamos/BERT-Japan-vaccination", + "project_name": "BERT-Japan-vaccination", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Japanese GPT2 Generation Model", + "url": "https://github.com/tanreinama/gpt2-japanese", + "project_name": "gpt2-japanese", + "stargazers_count": 276, + "source": "GitHub", + "score": 0.007926251400017232 + }, + { + "description": "gpt-2 based text2text conversion model", + "url": "https://github.com/tanreinama/text2text-japanese", + "project_name": "text2text-japanese", + "stargazers_count": 31, + "source": "GitHub", + "score": 0.0008902673673932397 + }, + { + "description": "GPT-2 Japanese model for HuggingFace's transformers", + "url": "https://github.com/colorfulscoop/gpt-ja", + "project_name": "gpt-ja", + "stargazers_count": 1, + "source": "GitHub", + "score": 2.8718302173975473e-05 + }, + { + "description": "MT model trained using the friendly_JA Corpus attempting to make Japanese easier/more accessible to occidental people by using the Latin/English derived katakana lexicon instead of the standard Sino-Japanese lexicon", + "url": "https://github.com/astremo/friendly_JA-Model", + "project_name": "friendly_JA-Model", + "stargazers_count": 1, + "source": "GitHub", + "score": 2.8718302173975473e-05 + }, + { + "description": "BERT with SentencePiece for Japanese text.", + "url": "https://github.com/alinear-corp/albert-japanese", + "project_name": "albert-japanese", + "stargazers_count": 31, + "source": "GitHub", + "score": 0.0008902673673932397 + }, + { + "description": "日本語WikipediaコーパスでBERTのPre-Trainedモデルを生成するためのリポジトリ", + "url": "https://github.com/Kosuke-Szk/ja_text_bert", + "project_name": "ja_text_bert", + "stargazers_count": 112, + "source": "GitHub", + "score": 0.003216449843485253 + }, + { + "description": "A Japanese DistilBERT pretrained model, which was trained on Wikipedia.", + "url": "https://github.com/BandaiNamcoResearchInc/DistilBERT-base-jp", + "project_name": "DistilBERT-base-jp", + "stargazers_count": 149, + "source": "GitHub", + "score": 0.004279027023922345 + }, + { + "description": "This repository provides snippets to use RoBERTa pre-trained on Japanese corpus. Our dataset consists of Japanese Wikipedia and web-scrolled articles, 25GB in total. The released model is built based on that from HuggingFace.", + "url": "https://github.com/informatix-inc/bert", + "project_name": "bert", + "stargazers_count": 21, + "source": "GitHub", + "score": 0.0006030843456534849 + }, + { + "description": "Laboro DistilBERT Japanese", + "url": "https://github.com/laboroai/Laboro-DistilBERT-Japanese", + "project_name": "Laboro-DistilBERT-Japanese", + "stargazers_count": 14, + "source": "GitHub", + "score": 0.00040205623043565664 + }, + { + "description": "- Language Understanding with Knowledge-based Embeddings", + "url": "https://github.com/studio-ousia/luke", + "project_name": "luke", + "stargazers_count": 583, + "source": "GitHub", + "score": 0.0167427701674277 + }, + { + "description": "General-purpose Swich transformer based Japanese language mode", + "url": "https://github.com/tanreinama/GPTSAN", + "project_name": "GPTSAN", + "stargazers_count": 63, + "source": "GitHub", + "score": 0.001809253036960455 + }, + { + "description": "Japanese CLIP by rinna Co., Ltd.", + "url": "https://github.com/rinnakk/japanese-clip", + "project_name": "japanese-clip", + "stargazers_count": 39, + "source": "GitHub", + "score": 0.0011200137847850436 + }, + { + "description": "We pretrained a BART-based Japanese masked language model on paper abstracts from the academic database CiNii Articles", + "url": "https://github.com/EhimeNLP/AcademicBART", + "project_name": "AcademicBART", + "stargazers_count": 2, + "source": "GitHub", + "score": 5.7436604347950946e-05 + }, + { + "description": "We pretrained a RoBERTa-based Japanese masked language model on paper abstracts from the academic database CiNii Articles.", + "url": "https://github.com/EhimeNLP/AcademicRoBERTa", + "project_name": "AcademicRoBERTa", + "stargazers_count": 4, + "source": "GitHub", + "score": 0.00011487320869590189 + }, + { + "description": "DistilBERT model pre-trained on 131 GB of Japanese web text. The teacher model is BERT-base that built in-house at LINE.", + "url": "https://github.com/line/LINE-DistilBERT-Japanese", + "project_name": "LINE-DistilBERT-Japanese", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "日本語に翻訳したStanford Alpacaのデータセットを用いてLLaMAをファインチューニングし作成したLow-Rank AdapterのリンクとGenerateサンプルコード", + "url": "https://github.com/kunishou/Japanese-Alpaca-LoRA", + "project_name": "Japanese-Alpaca-LoRA", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Pretrained models, codes and guidances to pretrain official ALBERT(https://github.com/google-research/albert) on Japanese Wikipedia Resources", + "url": "https://github.com/nknytk/albert-japanese-tinysegmenter", + "project_name": "albert-japanese-tinysegmenter", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Japanese LLaMa experiment", + "url": "https://github.com/lighttransport/japanese-llama-experiment", + "project_name": "japanese-llama-experiment", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "EasyLightChatAssistant は軽量で検閲や規制のないローカル日本語モデルのLightChatAssistant を、KoboldCpp で簡単にお試しする環境です。", + "url": "https://github.com/zuntan03/easylightchatassistant", + "project_name": "easylightchatassistant", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "ChatGPTを使ってVRChat上でお喋り出来るようにするプログラム。", + "url": "https://github.com/Yuchi-Games/VRChatGPT", + "project_name": "VRChatGPT", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "AITuberの基礎となる部分を開発しています", + "url": "https://github.com/M-gen/AITuberDegikkoMirii", + "project_name": "AITuberDegikkoMirii", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Shell command launcher with natural language", + "url": "https://github.com/hirokidaichi/wanna", + "project_name": "wanna", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "ChatdollKit enables you to make your 3D model into a chatbot", + "url": "https://github.com/uezo/ChatdollKit", + "project_name": "ChatdollKit", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "GUI for ChatGPT API For Japanese", + "url": "https://github.com/gyokuro33/ChuanhuChatGPTJapanese", + "project_name": "ChuanhuChatGPTJapanese", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "ChatGPT3.5を搭載した伺かゴースト「AI妹アイちゃん」です。利用には別途ChatGPTのAPIキーが必要です。", + "url": "https://github.com/manju-summoner/AISisterAIChan", + "project_name": "AISisterAIChan", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "VRChatにAI Botを作るためのリポジトリ", + "url": "https://github.com/Geson-anko/vrchatbot", + "project_name": "vrchatbot", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "GPTがYouTuberをやります", + "url": "https://github.com/karakuri-ai/gptuber-by-langchain", + "project_name": "gptuber-by-langchain", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "A chatbox application built using Nuxt 3 powered by Open AI Text completion endpoint. You can select different personality of your AI friend. The default will respond in Japanese. You can use this app to practice your Nihongo skills!", + "url": "https://github.com/supershaneski/openai-chatfriend", + "project_name": "openai-chatfriend", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "This Chrome extension can translate selected Japanese text to Hiragana by using ChatGPT.", + "url": "https://github.com/franzwong/chrome-ext-translate-to-hiragana-with-chatgpt", + "project_name": "chrome-ext-translate-to-hiragana-with-chatgpt", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "このサンプルでは、Retrieval Augmented Generation パターンを使用して、独自のデータに対してChatGPT のような体験を作成するためのいくつかのアプローチを示しています。", + "url": "https://github.com/nohanaga/azure-search-openai-demo", + "project_name": "azure-search-openai-demo", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "ChatVRMはブラウザで簡単に3Dキャラクターと会話ができるデモアプリケーションです。", + "url": "https://github.com/pixiv/chatvrm", + "project_name": "chatvrm", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "A Chrome extention to replace the selected text softly", + "url": "https://github.com/kmizu/sftly-replace", + "project_name": "sftly-replace", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Summarize arXiv paper with figures", + "url": "https://github.com/rkmt/summarize_arxv", + "project_name": "summarize_arxv", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Building AI-based conversational avatars lightning fast", + "url": "https://github.com/uezo/aiavatarkit", + "project_name": "aiavatarkit", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "このリポジトリは、神戸市役所でのChatGPTの試行利用に向けて作成したフロー等をソリューション化し公開するものです。", + "url": "https://github.com/City-of-Kobe/pva-aoai-integration-solution", + "project_name": "pva-aoai-integration-solution", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Azure OpenAIを活用したアプリケーション実装のリファレンスを目的として、アプリのサンプル(リファレンスアーキテクチャ、サンプルコードとデプロイ手順)を無償提供しています。", + "url": "https://github.com/azure-samples/jp-azureopenai-samples", + "project_name": "jp-azureopenai-samples", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "OpenAIのAPIを利用して、設定したキャラクターと日本語で会話するチャットスクリプトです。", + "url": "https://github.com/mutaguchi/character_chat", + "project_name": "character_chat", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "OpenAIのChatGPT APIをSlack上で利用するためのSlackbotスクリプト (日本語での利用が前提)", + "url": "https://github.com/sifue/chatgpt-slackbot", + "project_name": "chatgpt-slackbot", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "ChatGPT の Prompt のサンプルです。", + "url": "https://github.com/dahatake/chatgpt-prompt-sample-japanese", + "project_name": "chatgpt-prompt-sample-japanese", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "A Japanese Kanji Flashcard App built using Python and Langchain, enhanced with the intelligence of GPT-4.", + "url": "https://github.com/adilmoujahid/kanji-flashcard-app-gpt4", + "project_name": "kanji-flashcard-app-gpt4", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Evaluating GPT-4 and ChatGPT on Japanese Medical Licensing Examinations", + "url": "https://github.com/jungokasai/IgakuQA", + "project_name": "IgakuQA", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "日本語タスクにおけるLLMを用いた疑似学習データ生成の検討", + "url": "https://github.com/retrieva/japagen", + "project_name": "japagen", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Neologism dictionary based on the language resources on the Web for mecab-ipadic", + "url": "https://github.com/neologd/mecab-ipadic-neologd", + "project_name": "mecab-ipadic-neologd", + "stargazers_count": 2571, + "source": "GitHub", + "score": 0.07383475488929095 + }, + { + "description": "A Japanese accent dictionary generator", + "url": "https://github.com/PKSHATechnology-Research/tdmelodic", + "project_name": "tdmelodic", + "stargazers_count": 79, + "source": "GitHub", + "score": 0.0022687458717440623 + }, + { + "description": "Python 3 library for manipulating Jim Breen's JMdict, KanjiDic2, JMnedict and kanji-radical mappings", + "url": "https://github.com/neocl/jamdict", + "project_name": "jamdict", + "stargazers_count": 92, + "source": "GitHub", + "score": 0.002642083800005744 + }, + { + "description": "Unidic packaged for installation via pip.", + "url": "https://github.com/polm/unidic-py", + "project_name": "unidic-py", + "stargazers_count": 33, + "source": "GitHub", + "score": 0.0009477039717411907 + }, + { + "description": "Japanese Company Lexicon (JCLdic)", + "url": "https://github.com/chakki-works/Japanese-Company-Lexicon", + "project_name": "Japanese-Company-Lexicon", + "stargazers_count": 76, + "source": "GitHub", + "score": 0.002182590965222136 + }, + { + "description": "Sudachi向け万病辞書", + "url": "https://github.com/yagays/manbyo-sudachi", + "project_name": "manbyo-sudachi", + "stargazers_count": 4, + "source": "GitHub", + "score": 0.00011487320869590189 + }, + { + "description": "Generate SKK/MeCab dictionary from Wikipedia(Japanese edition)", + "url": "https://github.com/tokuhirom/jawiki-kana-kanji-dict", + "project_name": "jawiki-kana-kanji-dict", + "stargazers_count": 30, + "source": "GitHub", + "score": 0.0008615490652192642 + }, + { + "description": "dictionary to find emotion related to text", + "url": "https://github.com/sociocom/JIWC-Dictionary", + "project_name": "JIWC-Dictionary", + "stargazers_count": 27, + "source": "GitHub", + "score": 0.0007753941586973378 + }, + { + "description": "This repository contains source dictionary files to build dictionaries for JUMAN and Juman++.", + "url": "https://github.com/ku-nlp/JumanDIC", + "project_name": "JumanDIC", + "stargazers_count": 1, + "source": "GitHub", + "score": 2.8718302173975473e-05 + }, + { + "description": "IPAdic packaged for easy use from Python.", + "url": "https://github.com/polm/ipadic-py", + "project_name": "ipadic-py", + "stargazers_count": 21, + "source": "GitHub", + "score": 0.0006030843456534849 + }, + { + "description": "A small version of UniDic for easy pip installs.", + "url": "https://github.com/polm/unidic-lite", + "project_name": "unidic-lite", + "stargazers_count": 22, + "source": "GitHub", + "score": 0.0006318026478274604 + }, + { + "description": "日本語で絵文字入力をするための IME 追加辞書 orange_book Google 日本語入力などで日本語から絵文字への変換を可能にする IME 拡張辞書", + "url": "https://github.com/peaceiris/emoji-ime-dictionary", + "project_name": "emoji-ime-dictionary", + "stargazers_count": 281, + "source": "GitHub", + "score": 0.008069842910887108 + }, + { + "description": "日英変換・英語略語展開のための IME 追加辞書 orange_book 日本語から英語への和英変換や英語略語の展開を Google 日本語入力や ATOK などで可能にする IME 拡張辞書", + "url": "https://github.com/peaceiris/google-ime-dictionary", + "project_name": "google-ime-dictionary", + "stargazers_count": 53, + "source": "GitHub", + "score": 0.0015220700152207 + }, + { + "description": "ニコニコ大百科とピクシブ百科事典の共通部分のIME辞書", + "url": "https://github.com/ncaq/dic-nico-intersection-pixiv", + "project_name": "dic-nico-intersection-pixiv", + "stargazers_count": 47, + "source": "GitHub", + "score": 0.0013497602021768473 + }, + { + "description": "GoogleIME用カタカナ語辞書プロジェクトのアーカイブです。Project archive of Google IME user dictionary from Katakana word ( Japanese loanword ) to English.", + "url": "https://github.com/KEINOS/google-ime-user-dictionary-ja-en", + "project_name": "google-ime-user-dictionary-ja-en", + "stargazers_count": 36, + "source": "GitHub", + "score": 0.001033858878263117 + }, + { + "description": "Google日本語入力の顔文字辞書∩(,,Ò‿Ó,,)∩", + "url": "https://github.com/tiwanari/emoticon", + "project_name": "emoticon", + "stargazers_count": 39, + "source": "GitHub", + "score": 0.0011200137847850436 + }, + { + "description": "open source mozc dictionaryをMeCab辞書のフォーマットに変換したものです。", + "url": "https://github.com/akirakubo/mecab-mozcdic", + "project_name": "mecab-mozcdic", + "stargazers_count": 5, + "source": "GitHub", + "score": 0.00014359151086987737 + }, + { + "description": "電音IME: Microsoft IMEなどで利用することを想定した「電音部」関連用語の辞書", + "url": "https://github.com/albno273/denonbu-ime-dic", + "project_name": "denonbu-ime-dic", + "stargazers_count": 2, + "source": "GitHub", + "score": 5.7436604347950946e-05 + }, + { + "description": "Microsoft IMEなどで利用することを想定した「にじさんじ」関連用語の用語辞書です。", + "url": "https://github.com/Umichang/nijisanji-ime-dic", + "project_name": "nijisanji-ime-dic", + "stargazers_count": 24, + "source": "GitHub", + "score": 0.0006892392521754114 + }, + { + "description": "Microsoft IMEなどで利用することを想定した、現状判明している全てのポケモンの名前を網羅した用語辞書です。", + "url": "https://github.com/Umichang/pokemon-ime-dic", + "project_name": "pokemon-ime-dic", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "English-Japanese Dictionary data (Public Domain) EJDict-hand", + "url": "https://github.com/kujirahand/EJDict", + "project_name": "EJDict", + "stargazers_count": 147, + "source": "GitHub", + "score": 0.004221590419574395 + }, + { + "description": "贵樣ばこゐ辞畫を使て正レい日本语を使ラことが出來ゑ。", + "url": "https://github.com/Rinrin0413/Ayashiy-Nipongo-Dic", + "project_name": "Ayashiy-Nipongo-Dic", + "stargazers_count": 10, + "source": "GitHub", + "score": 0.00028718302173975474 + }, + { + "description": "Windows/macOSで使える原神の単語辞書です", + "url": "https://github.com/kotofurumiya/genshin-dict", + "project_name": "genshin-dict", + "stargazers_count": 21, + "source": "GitHub", + "score": 0.0006030843456534849 + }, + { + "description": "JMdict and JMnedict in JSON format", + "url": "https://github.com/scriptin/jmdict-simplified", + "project_name": "jmdict-simplified", + "stargazers_count": 95, + "source": "GitHub", + "score": 0.00272823870652767 + }, + { + "description": "Convert external words into Mozc system dictionary", + "url": "https://github.com/reasonset/mozcdict-ext", + "project_name": "mozcdict-ext", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "MonsterHunterのユーザー辞書を作りたい…", + "url": "https://github.com/utubo/mh-dict-jp", + "project_name": "mh-dict-jp", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Convert data from Japanese dictionary websites and applications into portable file formats", + "url": "https://github.com/stephenmk/jitenbot", + "project_name": "jitenbot", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Neologism dictionary based on the language resources on the Web for mecab-unidic", + "url": "https://github.com/neologd/mecab-unidic-neologd", + "project_name": "mecab-unidic-neologd", + "stargazers_count": 70, + "source": "GitHub", + "score": 0.002010281152178283 + }, + { + "description": "ホロライブ(ホロライブプロダクション)に関する辞書ファイルです。./dictionary フォルダ内のテキストファイルを使って、IMEに単語を追加できます。詳細はREADME.mdをご覧ください。", + "url": "https://github.com/heppokofrontend/hololive-dictionary", + "project_name": "hololive-dictionary", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "JMdict, JMnedict, KANJIDIC for Yomitan/Yomichan.", + "url": "https://github.com/themoeway/jmdict-yomitan", + "project_name": "jmdict-yomitan", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "JLPT level tags for words in Yomichan", + "url": "https://github.com/stephenmk/yomichan-jlpt-vocab", + "project_name": "yomichan-jlpt-vocab", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "A free and openly licensed Japanese-to-English dictionary compatible with multiple dictionary clients", + "url": "https://github.com/stephenmk/Jitendex", + "project_name": "Jitendex", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "japanese android/cli/web dictionary based on jmdict/kanjidic — 日本語 辞典 和英辞典 漢英字典 和独辞典 和蘭辞典", + "url": "https://github.com/obfusk/jiten", + "project_name": "jiten", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Pixiv Encyclopedia Dictionary for Yomitan", + "url": "https://github.com/MarvNC/pixiv-yomitan", + "project_name": "pixiv-yomitan", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "うちなーぐち辞典(沖縄語辞典)", + "url": "https://github.com/nanjakkun/uchinaaguchi_dict", + "project_name": "uchinaaguchi_dict", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Wikipediaを用いた日本語の固有表現抽出データセット", + "url": "https://github.com/stockmarkteam/ner-wikipedia-dataset", + "project_name": "ner-wikipedia-dataset", + "stargazers_count": 97, + "source": "GitHub", + "score": 0.002785675310875621 + }, + { + "description": "Japanese IOB2 tagged corpus for Named Entity Recognition.", + "url": "https://github.com/Hironsan/IOB2Corpus", + "project_name": "IOB2Corpus", + "stargazers_count": 58, + "source": "GitHub", + "score": 0.0016656615260905775 + }, + { + "description": "首都大日本語 Twitter コーパス", + "url": "https://github.com/tmu-nlp/TwitterCorpus", + "project_name": "TwitterCorpus", + "stargazers_count": 20, + "source": "GitHub", + "score": 0.0005743660434795095 + }, + { + "description": "Parallel Universal Dependencies.", + "url": "https://github.com/megagonlabs/UD_Japanese-PUD", + "project_name": "UD_Japanese-PUD", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Japanese data from the Google UDT 2.0.", + "url": "https://github.com/megagonlabs/UD_Japanese-GSD", + "project_name": "UD_Japanese-GSD", + "stargazers_count": 26, + "source": "GitHub", + "score": 0.0007466758565233624 + }, + { + "description": "Kyoto University Web Document Leads Corpus", + "url": "https://github.com/ku-nlp/KWDLC", + "project_name": "KWDLC", + "stargazers_count": 69, + "source": "GitHub", + "score": 0.001981562850004308 + }, + { + "description": "Annotated Fuman Kaitori Center Corpus", + "url": "https://github.com/ku-nlp/AnnotatedFKCCorpus", + "project_name": "AnnotatedFKCCorpus", + "stargazers_count": 16, + "source": "GitHub", + "score": 0.00045949283478360757 + }, + { + "description": "50k English-Japanese Parallel Corpus for Machine Translation Benchmark.", + "url": "https://github.com/odashi/small_parallel_enja", + "project_name": "small_parallel_enja", + "stargazers_count": 83, + "source": "GitHub", + "score": 0.0023836190804399646 + }, + { + "description": "A Web Crawled Corpus for Japanese-Chinese NMT", + "url": "https://github.com/zhang-jinyi/Web-Crawled-Corpus-for-Japanese-Chinese-NMT", + "project_name": "Web-Crawled-Corpus-for-Japanese-Chinese-NMT", + "stargazers_count": 3, + "source": "GitHub", + "score": 8.615490652192643e-05 + }, + { + "description": "Coursera Corpus Mining and Multistage Fine-Tuning for Improving Lectures Translation", + "url": "https://github.com/shyyhs/CourseraParallelCorpusMining", + "project_name": "CourseraParallelCorpusMining", + "stargazers_count": 4, + "source": "GitHub", + "score": 0.00011487320869590189 + }, + { + "description": "A large parallel corpus of English and Japanese", + "url": "https://github.com/rpryzant/JESC", + "project_name": "JESC", + "stargazers_count": 66, + "source": "GitHub", + "score": 0.0018954079434823814 + }, + { + "description": "AMI Meeting Parallel Corpus", + "url": "https://github.com/tsuruoka-lab/AMI-Meeting-Parallel-Corpus", + "project_name": "AMI-Meeting-Parallel-Corpus", + "stargazers_count": 6, + "source": "GitHub", + "score": 0.00017230981304385286 + }, + { + "description": "This directory includes a giant Japanese-English subtitle corpus. The raw data comes from the Stanford’s JESC project.", + "url": "https://github.com/DayuanJiang/giant_ja-en_parallel_corpus", + "project_name": "giant_ja-en_parallel_corpus", + "stargazers_count": 2, + "source": "GitHub", + "score": 5.7436604347950946e-05 + }, + { + "description": "Small Japanese-English Subtitle Corpus", + "url": "https://github.com/yusugomori/jesc_small", + "project_name": "jesc_small", + "stargazers_count": 1, + "source": "GitHub", + "score": 2.8718302173975473e-05 + }, + { + "description": "禁止用語や単語レベルを考慮した日英対訳コーパスです。", + "url": "https://github.com/marmooo/graded-enja-corpus", + "project_name": "graded-enja-corpus", + "stargazers_count": 3, + "source": "GitHub", + "score": 8.615490652192643e-05 + }, + { + "description": "CJK computer science terms comparison / 中日韓電腦科學術語對照 / 日中韓のコンピュータ科学の用語対照 / 한·중·일 전산학 용어 대조", + "url": "https://github.com/dahlia/cjk-compsci-terms", + "project_name": "cjk-compsci-terms", + "stargazers_count": 87, + "source": "GitHub", + "score": 0.0024984922891358665 + }, + { + "description": "Scripts for creating a Japanese-English parallel corpus and training NMT models", + "url": "https://github.com/laboroai/Laboro-ParaCorpus", + "project_name": "Laboro-ParaCorpus", + "stargazers_count": 9, + "source": "GitHub", + "score": 0.0002584647195657793 + }, + { + "description": "google-vs-deepl-je", + "url": "https://github.com/Tzawa/google-vs-deepl-je", + "project_name": "google-vs-deepl-je", + "stargazers_count": 3, + "source": "GitHub", + "score": 8.615490652192643e-05 + }, + { + "description": "Japanese Movie Recommendation Dialogue dataset", + "url": "https://github.com/ku-nlp/JMRD", + "project_name": "JMRD", + "stargazers_count": 16, + "source": "GitHub", + "score": 0.00045949283478360757 + }, + { + "description": "おーぷん2ちゃんねるをクロールして作成した対話コーパス", + "url": "https://github.com/1never/open2ch-dialogue-corpus", + "project_name": "open2ch-dialogue-corpus", + "stargazers_count": 65, + "source": "GitHub", + "score": 0.0018666896413084058 + }, + { + "description": "The Business Scene Dialogue corpus", + "url": "https://github.com/tsuruoka-lab/BSD", + "project_name": "BSD", + "stargazers_count": 52, + "source": "GitHub", + "score": 0.0014933517130467247 + }, + { + "description": "Accommodation Search Dialog Corpus (宿泊施設探索対話コーパス)", + "url": "https://github.com/megagonlabs/asdc", + "project_name": "asdc", + "stargazers_count": 23, + "source": "GitHub", + "score": 0.000660520950001436 + }, + { + "description": "日本語の対話データ for seq2seq etc", + "url": "https://github.com/MokkeMeguru/japanese-corpus", + "project_name": "japanese-corpus", + "stargazers_count": 1, + "source": "GitHub", + "score": 2.8718302173975473e-05 + }, + { + "description": "This repository contains the Japanese–English bilingual chat corpus BPersona-chat published in the paper Chat Translation Error Detection for Assisting Cross-lingual Communications at AACL-IJCNLP 2022's Workshop Eval4NLP 2022.", + "url": "https://github.com/cl-tohoku/BPersona-chat", + "project_name": "BPersona-chat", + "stargazers_count": 1, + "source": "GitHub", + "score": 2.8718302173975473e-05 + }, + { + "description": "Japanese Daily Dialogue, or 日本語日常対話コーパス in Japanese, is a high-quality multi-turn dialogue dataset containing daily conversations on five topics: dailylife, school, travel, health, and entertainment.", + "url": "https://github.com/jqk09a/japanese-daily-dialogue", + "project_name": "japanese-daily-dialogue", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "LLM構築用の日本語チャットデータセット", + "url": "https://github.com/masanorihirano/llm-japanese-dataset", + "project_name": "llm-japanese-dataset", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Japanese Realistic Textual Entailment Corpus (NLP 2020, LREC 2020)", + "url": "https://github.com/megagonlabs/jrte-corpus", + "project_name": "jrte-corpus", + "stargazers_count": 70, + "source": "GitHub", + "score": 0.002010281152178283 + }, + { + "description": "A JSON kanji dataset with updated JLPT levels and WaniKani information", + "url": "https://github.com/davidluzgouveia/kanji-data", + "project_name": "kanji-data", + "stargazers_count": 70, + "source": "GitHub", + "score": 0.002010281152178283 + }, + { + "description": "Japanese Word Similarity Dataset", + "url": "https://github.com/tmu-nlp/JapaneseWordSimilarityDataset", + "project_name": "JapaneseWordSimilarityDataset", + "stargazers_count": 94, + "source": "GitHub", + "score": 0.0026995204043536946 + }, + { + "description": "A paraphrase database for Japanese text simplification", + "url": "https://github.com/tmu-nlp/simple-jppdb", + "project_name": "simple-jppdb", + "stargazers_count": 27, + "source": "GitHub", + "score": 0.0007753941586973378 + }, + { + "description": "chakki's Aspect-Based Sentiment Analysis dataset", + "url": "https://github.com/chakki-works/chABSA-dataset", + "project_name": "chABSA-dataset", + "stargazers_count": 118, + "source": "GitHub", + "score": 0.003388759656529106 + }, + { + "description": "JaQuAD: Japanese Question Answering Dataset for Machine Reading Comprehension (2022, Skelter Labs)", + "url": "https://github.com/SkelterLabsInc/JaQuAD", + "project_name": "JaQuAD", + "stargazers_count": 83, + "source": "GitHub", + "score": 0.0023836190804399646 + }, + { + "description": "Japanese Adversarial Natural Language Inference Dataset", + "url": "https://github.com/verypluming/JaNLI", + "project_name": "JaNLI", + "stargazers_count": 15, + "source": "GitHub", + "score": 0.0004307745326096321 + }, + { + "description": "Evidence-based Explanation Dataset (AACL-IJCNLP 2020)", + "url": "https://github.com/megagonlabs/ebe-dataset", + "project_name": "ebe-dataset", + "stargazers_count": 16, + "source": "GitHub", + "score": 0.00045949283478360757 + }, + { + "description": "UNICODE絵文字の日本語読み/キーワード/分類辞書", + "url": "https://github.com/yagays/emoji-ja", + "project_name": "emoji-ja", + "stargazers_count": 65, + "source": "GitHub", + "score": 0.0018666896413084058 + }, + { + "description": "Wikipediaから作成した日本語名寄せデータセット", + "url": "https://github.com/yagays/nayose-wikipedia-ja", + "project_name": "nayose-wikipedia-ja", + "stargazers_count": 33, + "source": "GitHub", + "score": 0.0009477039717411907 + }, + { + "description": "Japanese text8 corpus for word embedding.", + "url": "https://github.com/Hironsan/ja.text8", + "project_name": "ja.text8", + "stargazers_count": 101, + "source": "GitHub", + "score": 0.002900548519571523 + }, + { + "description": "3行要約データセット", + "url": "https://github.com/KodairaTomonori/ThreeLineSummaryDataset", + "project_name": "ThreeLineSummaryDataset", + "stargazers_count": 24, + "source": "GitHub", + "score": 0.0006892392521754114 + }, + { + "description": "This repo contains a list of the 44,998 most common Japanese words in order of frequency, as determined by the University of Leeds Corpus.", + "url": "https://github.com/hingston/japanese", + "project_name": "japanese", + "stargazers_count": 40, + "source": "GitHub", + "score": 0.001148732086959019 + }, + { + "description": "Kanji usage frequency data collected from various sources", + "url": "https://github.com/scriptin/kanji-frequency", + "project_name": "kanji-frequency", + "stargazers_count": 100, + "source": "GitHub", + "score": 0.0028718302173975476 + }, + { + "description": "TEDxJP-10K ASR Evaluation Dataset", + "url": "https://github.com/laboroai/TEDxJP-10K", + "project_name": "TEDxJP-10K", + "stargazers_count": 15, + "source": "GitHub", + "score": 0.0004307745326096321 + }, + { + "description": "Corpus of Annual Reports in Japan", + "url": "https://github.com/chakki-works/CoARiJ", + "project_name": "CoARiJ", + "stargazers_count": 72, + "source": "GitHub", + "score": 0.002067717756526234 + }, + { + "description": "日本語で書かれた技術書を収集した生コーパス/ツール", + "url": "https://github.com/textlint-ja/technological-book-corpus-ja", + "project_name": "technological-book-corpus-ja", + "stargazers_count": 15, + "source": "GitHub", + "score": 0.0004307745326096321 + }, + { + "description": "Chunked word annotation for ITA corpus", + "url": "https://github.com/shirayu/ita-corpus-chuwa", + "project_name": "ita-corpus-chuwa", + "stargazers_count": 4, + "source": "GitHub", + "score": 0.00011487320869590189 + }, + { + "description": "Utility scripts for preprocessing Wikipedia texts for NLP", + "url": "https://github.com/singletongue/wikipedia-utils", + "project_name": "wikipedia-utils", + "stargazers_count": 44, + "source": "GitHub", + "score": 0.0012636052956549208 + }, + { + "description": "日本語における不適切表現を収集します。自然言語処理の時のデータクリーニング用等に使えると思います。", + "url": "https://github.com/MosasoM/inappropriate-words-ja", + "project_name": "inappropriate-words-ja", + "stargazers_count": 118, + "source": "GitHub", + "score": 0.003388759656529106 + }, + { + "description": "参議院の公式ウェブサイトから会派、議員、議案、質問主意書のデータを整理しました。", + "url": "https://github.com/smartnews-smri/house-of-councillors", + "project_name": "house-of-councillors", + "stargazers_count": 95, + "source": "GitHub", + "score": 0.00272823870652767 + }, + { + "description": "国会議案データベース:衆議院", + "url": "https://github.com/smartnews-smri/house-of-representatives", + "project_name": "house-of-representatives", + "stargazers_count": 155, + "source": "GitHub", + "score": 0.004451336836966199 + }, + { + "description": "STAIR captions: large-scale Japanese image caption dataset", + "url": "https://github.com/STAIR-Lab-CIT/STAIR-captions", + "project_name": "STAIR-captions", + "stargazers_count": 75, + "source": "GitHub", + "score": 0.0021538726630481605 + }, + { + "description": "Japanese Translation of Winograd Schema Challenge", + "url": "https://github.com/ku-nlp/Winograd-Schema-Challenge-Ja", + "project_name": "Winograd-Schema-Challenge-Ja", + "stargazers_count": 6, + "source": "GitHub", + "score": 0.00017230981304385286 + }, + { + "description": "An extension of the BSD corpus with audio and speaker attribute information", + "url": "https://github.com/ku-nlp/speechBSD", + "project_name": "speechBSD", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "ITAコーパスの文章リスト", + "url": "https://github.com/mmorise/ita-corpus", + "project_name": "ita-corpus", + "stargazers_count": 118, + "source": "GitHub", + "score": 0.003388759656529106 + }, + { + "description": "モーラバランス型日本語コーパス", + "url": "https://github.com/mmorise/rohan4600", + "project_name": "rohan4600", + "stargazers_count": 27, + "source": "GitHub", + "score": 0.0007753941586973378 + }, + { + "description": "言語処理学会年次大会講演の全リスト・機械可読版など", + "url": "https://github.com/whym/anlp-jp-history", + "project_name": "anlp-jp-history", + "stargazers_count": 2, + "source": "GitHub", + "score": 5.7436604347950946e-05 + }, + { + "description": "敬語変換タスクにおける評価用データセット", + "url": "https://github.com/cl-tohoku/keigo_transfer_task", + "project_name": "keigo_transfer_task", + "stargazers_count": 17, + "source": "GitHub", + "score": 0.0004882111369575831 + }, + { + "description": "English loanwords in Japanese", + "url": "https://github.com/jamesohortle/loanwords_gairaigo", + "project_name": "loanwords_gairaigo", + "stargazers_count": 12, + "source": "GitHub", + "score": 0.0003446196260877057 + }, + { + "description": "Japanese-Wikipedia Wikification Corpus", + "url": "https://github.com/wikiwikification/jawikicorpus", + "project_name": "jawikicorpus", + "stargazers_count": 4, + "source": "GitHub", + "score": 0.00011487320869590189 + }, + { + "description": "This is the corpus of Japanese Text that general policy speech of prime minister of Japan", + "url": "https://github.com/yuukimiyo/GeneralPolicySpeechOfPrimeMinisterOfJapan", + "project_name": "GeneralPolicySpeechOfPrimeMinisterOfJapan", + "stargazers_count": 4, + "source": "GitHub", + "score": 0.00011487320869590189 + }, + { + "description": "WRIME: 主観と客観の感情分析データセット", + "url": "https://github.com/ids-cv/wrime", + "project_name": "wrime", + "stargazers_count": 85, + "source": "GitHub", + "score": 0.0024410556847879153 + }, + { + "description": "JTubeSpeech: Corpus of Japanese speech collected from YouTube", + "url": "https://github.com/sarulab-speech/jtubespeech", + "project_name": "jtubespeech", + "stargazers_count": 146, + "source": "GitHub", + "score": 0.0041928721174004195 + }, + { + "description": "日本語Wikipediaで使用される頻出単語のリスト", + "url": "https://github.com/maeda6uiui-backup/WikipediaWordFrequencyList", + "project_name": "WikipediaWordFrequencyList", + "stargazers_count": 2, + "source": "GitHub", + "score": 5.7436604347950946e-05 + }, + { + "description": "車両不具合情報に関するデータセット", + "url": "https://github.com/rindybell/kokkosho_data", + "project_name": "kokkosho_data", + "stargazers_count": 1, + "source": "GitHub", + "score": 2.8718302173975473e-05 + }, + { + "description": "デジタル化資料OCRテキスト化事業において作成されたOCR学習用データセット", + "url": "https://github.com/ndl-lab/pdmocrdataset-part1", + "project_name": "pdmocrdataset-part1", + "stargazers_count": 47, + "source": "GitHub", + "score": 0.0013497602021768473 + }, + { + "description": "全国書誌データから作成した振り仮名のデータセット", + "url": "https://github.com/ndl-lab/huriganacorpus-ndlbib", + "project_name": "huriganacorpus-ndlbib", + "stargazers_count": 13, + "source": "GitHub", + "score": 0.0003733379282616812 + }, + { + "description": "JVS (Japanese versatile speech) コーパスの自作のラベル", + "url": "https://github.com/Hiroshiba/jvs_hiho", + "project_name": "jvs_hiho", + "stargazers_count": 23, + "source": "GitHub", + "score": 0.000660520950001436 + }, + { + "description": "Allows Sudachi to normalize from hiragana to katakana from any compound word list", + "url": "https://github.com/po3rin/hirakanadic", + "project_name": "hirakanadic", + "stargazers_count": 5, + "source": "GitHub", + "score": 0.00014359151086987737 + }, + { + "description": "約100年に渡るアニメ作品リストデータベース", + "url": "https://github.com/anilogia/animedb", + "project_name": "animedb", + "stargazers_count": 309, + "source": "GitHub", + "score": 0.008873955371758422 + }, + { + "description": "サイバーセキュリティに関連する公的な組織の日英対応", + "url": "https://github.com/SaitoLab/security_words", + "project_name": "security_words", + "stargazers_count": 21, + "source": "GitHub", + "score": 0.0006030843456534849 + }, + { + "description": "日本の国会議員のデータ", + "url": "https://github.com/sugi2000/Data-on-Japanese-Diet-Members", + "project_name": "Data-on-Japanese-Diet-Members", + "stargazers_count": 2, + "source": "GitHub", + "score": 5.7436604347950946e-05 + }, + { + "description": "歴史資料の市民参加型翻刻プラットフォーム「みんなで翻刻」のテキストデータ置き場です。 / Transcription texts created on Minna de Honkoku (https://honkoku.org), a crowdsourced transcription platform for historical Japanese documents.", + "url": "https://github.com/yuta1984/honkoku-data", + "project_name": "honkoku-data", + "stargazers_count": 10, + "source": "GitHub", + "score": 0.00028718302173975474 + }, + { + "description": "wikiHow dataset (Japanese version)", + "url": "https://github.com/Katsumata420/wikihow_japanese", + "project_name": "wikihow_japanese", + "stargazers_count": 28, + "source": "GitHub", + "score": 0.0008041124608713133 + }, + { + "description": "Engineer Vocabulary List in Japanese/English", + "url": "https://github.com/mercari/engineer-vocabulary-list", + "project_name": "engineer-vocabulary-list", + "stargazers_count": 1574, + "source": "GitHub", + "score": 0.0452026076218374 + }, + { + "description": "Japanese Sentences Involving Compositional Knowledge (JSICK) Dataset/JSICK-stress Test Set", + "url": "https://github.com/verypluming/JSICK", + "project_name": "JSICK", + "stargazers_count": 26, + "source": "GitHub", + "score": 0.0007466758565233624 + }, + { + "description": "Phishing URL dataset from JPCERT/CC", + "url": "https://github.com/JPCERTCC/phishurl-list", + "project_name": "phishurl-list", + "stargazers_count": 102, + "source": "GitHub", + "score": 0.0029292668217454983 + }, + { + "description": "A Japanese Corpus of Many Specialized Domains (JCMS)", + "url": "https://github.com/shigashiyama/jcms", + "project_name": "jcms", + "stargazers_count": 1, + "source": "GitHub", + "score": 2.8718302173975473e-05 + }, + { + "description": "text-only archives of www.aozora.gr.jp", + "url": "https://github.com/aozorahack/aozorabunko_text", + "project_name": "aozorabunko_text", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "friendly_JA is a parallel Japanese-to-Japanese corpus aimed at making Japanese easier by using the Latin/English derived katakana lexicon instead of the standard Sino-Japanese lexicon", + "url": "https://github.com/astremo/friendly_JA-Corpus", + "project_name": "friendly_JA-Corpus", + "stargazers_count": 1, + "source": "GitHub", + "score": 2.8718302173975473e-05 + }, + { + "description": "Topologically ordered lists of kanji for effective learning", + "url": "https://github.com/scriptin/topokanji", + "project_name": "topokanji", + "stargazers_count": 148, + "source": "GitHub", + "score": 0.004250308721748371 + }, + { + "description": "ISBN-13における日本語での出版物 (978-4-XXXXXXXXX) に関するデータ等", + "url": "https://github.com/uribo/isbn4groups", + "project_name": "isbn4groups", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "NMeCab: About Japanese morphological analyzer on .NET", + "url": "https://github.com/komutan/NMeCab", + "project_name": "NMeCab", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "デジタル化資料から作成したOCRテキストデータのngram頻度統計情報のデータセット", + "url": "https://github.com/ndl-lab/ndlngramdata", + "project_name": "ndlngramdata", + "stargazers_count": 10, + "source": "GitHub", + "score": 0.00028718302173975474 + }, + { + "description": "2023年1月にリニューアルしたNDL Ngram Viewerのソースコード等一式", + "url": "https://github.com/ndl-lab/ndlngramviewer_v2", + "project_name": "ndlngramviewer_v2", + "stargazers_count": 2, + "source": "GitHub", + "score": 5.7436604347950946e-05 + }, + { + "description": "法律・判例関係のデータセット", + "url": "https://github.com/japanese-law-analysis/data_set", + "project_name": "data_set", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "WRIME for huggingface datasets", + "url": "https://github.com/shunk031/huggingface-datasets_wrime", + "project_name": "huggingface-datasets_wrime", + "stargazers_count": 1, + "source": "GitHub", + "score": 2.8718302173975473e-05 + }, + { + "description": "NDL古典籍OCR学習用データセット(みんなで翻刻加工データ)", + "url": "https://github.com/ndl-lab/ndl-minhon-ocrdataset", + "project_name": "ndl-minhon-ocrdataset", + "stargazers_count": 2, + "source": "GitHub", + "score": 5.7436604347950946e-05 + }, + { + "description": "GIS & Archaeological Simulator. 2023 in development.", + "url": "https://github.com/AsPJT/PAX_SAPIENTICA", + "project_name": "PAX_SAPIENTICA", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Japanese version of LIWC2015", + "url": "https://github.com/tasukuigarashi/j-liwc2015", + "project_name": "j-liwc2015", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Japanese Livedoor news corpus for huggingface datasets", + "url": "https://github.com/shunk031/huggingface-datasets_livedoor-news-corpus", + "project_name": "huggingface-datasets_livedoor-news-corpus", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "JGLUE: Japanese General Language Understanding Evaluation for huggingface datasets", + "url": "https://github.com/shunk031/huggingface-datasets_JGLUE", + "project_name": "huggingface-datasets_JGLUE", + "stargazers_count": 4, + "source": "GitHub", + "score": 0.00011487320869590189 + }, + { + "description": "JCommonsenseMorality is a dataset created through crowdsourcing that reflects the commonsense morality of Japanese annotators.", + "url": "https://github.com/Language-Media-Lab/commonsense-moral-ja", + "project_name": "commonsense-moral-ja", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "COMET-ATOMIC ja", + "url": "https://github.com/nlp-waseda/comet-atomic-ja", + "project_name": "comet-atomic-ja", + "stargazers_count": 15, + "source": "GitHub", + "score": 0.0004307745326096321 + }, + { + "description": "Dialogue Commonsense Graph in Japanese", + "url": "https://github.com/nlp-waseda/dcsg-ja", + "project_name": "dcsg-ja", + "stargazers_count": 4, + "source": "GitHub", + "score": 0.00011487320869590189 + }, + { + "description": "\"Proposal and Evaluation of Japanese Toxicity Schema\" provides a schema and dataset for toxicity in the Japanese language.", + "url": "https://github.com/inspection-ai/japanese-toxic-dataset", + "project_name": "japanese-toxic-dataset", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "CAMERA (CyberAgent Multimodal Evaluation for Ad Text GeneRAtion) is the Japanese ad text generation dataset.", + "url": "https://github.com/CyberAgentAILab/camera", + "project_name": "camera", + "stargazers_count": 9, + "source": "GitHub", + "score": 0.0002584647195657793 + }, + { + "description": "日本語フェイクニュースデータセット", + "url": "https://github.com/tanreinama/Japanese-Fakenews-Dataset", + "project_name": "Japanese-Fakenews-Dataset", + "stargazers_count": 10, + "source": "GitHub", + "score": 0.00028718302173975474 + }, + { + "description": "jpn_explainable_qa_dataset", + "url": "https://github.com/aiishii/jpn_explainable_qa_dataset", + "project_name": "jpn_explainable_qa_dataset", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "COPA Dataset in Japanese", + "url": "https://github.com/nlp-titech/copa-japanese", + "project_name": "copa-japanese", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Word Familiarity Rate for 'Word List by Semantic Principles (WLSP)'", + "url": "https://github.com/masayu-a/WLSP-familiarity", + "project_name": "WLSP-familiarity", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "A cross-linguistic study of pronoun substitutes and address terms", + "url": "https://github.com/matbahasa/ProSub", + "project_name": "ProSub", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "なんとかデータベース( https://supleks.jp/ )からのスクレイピングツールと収集データ", + "url": "https://github.com/nuko-yokohama/ramendb", + "project_name": "ramendb", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "CAMERA (CyberAgent Multimodal Evaluation for Ad Text GeneRAtion) for huggingface datasets", + "url": "https://github.com/shunk031/huggingface-datasets_CAMERA", + "project_name": "huggingface-datasets_CAMERA", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "FactCheckSentenceNLIデータセット", + "url": "https://github.com/nlp-waseda/FactCheckSentenceNLI-FCSNLI-", + "project_name": "FactCheckSentenceNLI-FCSNLI-", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "databricks/dolly-v2-12b の学習データに使用されたdatabricks-dolly-15k.jsonl を日本語に翻訳したデータセットになります。", + "url": "https://github.com/kunishou/databricks-dolly-15k-ja", + "project_name": "databricks-dolly-15k-ja", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "EaST-MELD is an English-Japanese dataset for emotion-aware speech translation based on MELD.", + "url": "https://github.com/ku-nlp/EaST-MELD", + "project_name": "EaST-MELD", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Mecon Audio(Medical Conference Audio)は厚生労働省主催の先進医療会議の議事録の読み上げデータセットです。", + "url": "https://github.com/elith-co-jp/meconaudio", + "project_name": "meconaudio", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "全国の町丁目レベル(277,191件)の住所データのオープンデータ", + "url": "https://github.com/geolonia/japanese-addresses", + "project_name": "japanese-addresses", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "The full-text search system for Aozora Bunko by Groonga. 青空文庫全文検索ライブラリ兼Webアプリ。", + "url": "https://github.com/myokoym/aozorasearch", + "project_name": "aozorasearch", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "This repository contains scripts to reproduce the LLM-jp corpus.", + "url": "https://github.com/llm-jp/llm-jp-corpus", + "project_name": "llm-jp-corpus", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "alpacaデータセットを日本語化したものです", + "url": "https://github.com/shi3z/alpaca_ja", + "project_name": "alpaca_ja", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Japanese instruction data (日本語指示データ)", + "url": "https://github.com/megagonlabs/instruction_ja", + "project_name": "instruction_ja", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Top 5000 Japanese family names, with readings, ordered by frequency.", + "url": "https://github.com/siikamiika/japanese-family-names", + "project_name": "japanese-family-names", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Japanese language data on kanji, radicals, media files, fonts and related resources from Kanji alive", + "url": "https://github.com/kanjialive/kanji-data-media", + "project_name": "kanji-data-media", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Construct large-scale Japanese audio corpus at home", + "url": "https://github.com/reazon-research/reazonspeech", + "project_name": "reazonspeech", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "青空文庫及びサピエの点字データから作成した振り仮名のデータセット", + "url": "https://github.com/ndl-lab/huriganacorpus-aozora", + "project_name": "huriganacorpus-aozora", + "stargazers_count": 6, + "source": "GitHub", + "score": 0.00017230981304385286 + }, + { + "description": "An open collection of annotated voices in Japanese language", + "url": "https://github.com/koniwa/koniwa", + "project_name": "koniwa", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "日本語マルチタスク言語理解ベンチマーク Japanese Massive Multitask Language Understanding Benchmark", + "url": "https://github.com/nlp-waseda/JMMLU", + "project_name": "JMMLU", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "青空文庫振り仮名注釈付き音声コーパスのデータセット", + "url": "https://github.com/ndl-lab/hurigana-speech-corpus-aozora", + "project_name": "hurigana-speech-corpus-aozora", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "検索拡張(RAG)評価のための日本語Q&Aデータセット", + "url": "https://github.com/hotchpotch/jqara", + "project_name": "jqara", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "JEMHopQA (Japanese Explainable Multi-hop Question Answering) is a Japanese multi-hop QA dataset that can evaluate internal reasoning.", + "url": "https://github.com/aiishii/jemhopqa", + "project_name": "jemhopqa", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Repository for Japanese Document-level Relation Extraction Dataset (plan to be released in March).", + "url": "https://github.com/youmima/jacred", + "project_name": "jacred", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "JADES is a dataset for text simplification in Japanese, described in \"JADES: New Text Simplification Dataset in Japanese Targeted at Non-Native Speakers\" (the paper will be available soon).", + "url": "https://github.com/naist-nlp/jades", + "project_name": "jades", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "2023年8月にメルボルン大学から公開された安全性評価データセット『Do-Not-Answer』を日本語LLMの評価においても使用できるように日本語に自動翻訳し、さらに日本文化も考慮して修正したデータセット。", + "url": "https://github.com/kunishou/do-not-answer-ja", + "project_name": "do-not-answer-ja", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "OpenAssistant のオープンソースデータ OASST1 を日本語に翻訳したデータセットになります。", + "url": "https://github.com/kunishou/oasst1-89k-ja", + "project_name": "oasst1-89k-ja", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "日本語情報検索評価のための小規模でカジュアルなWebタイトルと概要のデータセット", + "url": "https://github.com/hotchpotch/jacwir", + "project_name": "jacwir", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "日本語学習者のための科学技術業界でよく使われる片仮名と元の単語対照表", + "url": "https://github.com/laoshubaby/japanese-technical-dict", + "project_name": "japanese-technical-dict", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Dataset of UniMorph in Japanese", + "url": "https://github.com/cl-tohoku/j-unimorph", + "project_name": "j-unimorph", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "spaCy tutorial in English and Japanese. spacy-transformers, BERT, GiNZA.", + "url": "https://github.com/yuibi/spacy_tutorial", + "project_name": "spacy_tutorial", + "stargazers_count": 59, + "source": "GitHub", + "score": 0.001694379828264553 + }, + { + "description": "Tutorial to train fastText with Japanese corpus", + "url": "https://github.com/icoxfog417/fastTextJapaneseTutorial", + "project_name": "fastTextJapaneseTutorial", + "stargazers_count": 198, + "source": "GitHub", + "score": 0.005686223830447144 + }, + { + "description": "AllenNLP-NER-ja: AllenNLP による日本語を対象とした固有表現抽出", + "url": "https://github.com/shunk031/allennlp-NER-ja", + "project_name": "allennlp-NER-ja", + "stargazers_count": 5, + "source": "GitHub", + "score": 0.00014359151086987737 + }, + { + "description": "Experiment for Japanese Text classification using chariot and PyTorch", + "url": "https://github.com/ymym3412/chariot-PyTorch-Japanese-text-classification", + "project_name": "chariot-PyTorch-Japanese-text-classification", + "stargazers_count": 4, + "source": "GitHub", + "score": 0.00011487320869590189 + }, + { + "description": "日本語NLPライブラリGiNZAのすゝめ", + "url": "https://github.com/poyo46/ginza-examples", + "project_name": "ginza-examples", + "stargazers_count": 13, + "source": "GitHub", + "score": 0.0003733379282616812 + }, + { + "description": "DocumentClassificationUsingBERT-Japanese", + "url": "https://github.com/nekoumei/DocumentClassificationUsingBERT-Japanese", + "project_name": "DocumentClassificationUsingBERT-Japanese", + "stargazers_count": 37, + "source": "GitHub", + "score": 0.0010625771804370927 + }, + { + "description": "Google Colaboratoryで日本語のBERTを動かす方法です。", + "url": "https://github.com/YutaroOgawa/BERT_Japanese_Google_Colaboratory", + "project_name": "BERT_Japanese_Google_Colaboratory", + "stargazers_count": 23, + "source": "GitHub", + "score": 0.000660520950001436 + }, + { + "description": "「BERTによる自然言語処理入門: Transformersを使った実践プログラミング」サポートページ", + "url": "https://github.com/stockmarkteam/bert-book", + "project_name": "bert-book", + "stargazers_count": 185, + "source": "GitHub", + "score": 0.0053128859021854625 + }, + { + "description": "Janome を使ったテキストマイニング入門チュートリアルです。", + "url": "https://github.com/mocobeta/janome-tutorial", + "project_name": "janome-tutorial", + "stargazers_count": 27, + "source": "GitHub", + "score": 0.0007753941586973378 + }, + { + "description": "日本語の言語モデルのハンズオン資料です", + "url": "https://github.com/hnishi/handson-language-models", + "project_name": "handson-language-models", + "stargazers_count": 1, + "source": "GitHub", + "score": 2.8718302173975473e-05 + }, + { + "description": "Google Colabで日本語テキスト推論を試す", + "url": "https://github.com/verypluming/JapaneseNLI", + "project_name": "JapaneseNLI", + "stargazers_count": 5, + "source": "GitHub", + "score": 0.00014359151086987737 + }, + { + "description": "deep-learning-with-pytorchの日本語版repositoryです。", + "url": "https://github.com/Gin5050/deep-learning-with-pytorch-ja", + "project_name": "deep-learning-with-pytorch-ja", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "ディープラーニングによる自然言語処理(共立出版)のサポートページです", + "url": "https://github.com/python-nlp-book/python-nlp-book", + "project_name": "python-nlp-book", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "「大規模言語モデル入門」(技術評論社, 2023)のGitHubリポジトリ", + "url": "https://github.com/ghmagazine/llm-book", + "project_name": "llm-book", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "環境構築手順とソースコード", + "url": "https://github.com/hiroshi-matsuda-rit/nlp2024-tutorial-3", + "project_name": "nlp2024-tutorial-3", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "日本語情報検索チュートリアル", + "url": "https://github.com/mpkato/japanese-ir-tutorial", + "project_name": "japanese-ir-tutorial", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "A list of pre-trained BERT models for Japanese with word/subword tokenization + vocabulary construction algorithm information", + "url": "https://github.com/himkt/awesome-bert-japanese", + "project_name": "awesome-bert-japanese", + "stargazers_count": 125, + "source": "GitHub", + "score": 0.0035897877717469343 + }, + { + "description": "文法誤り訂正に関する日本語文献を収集・分類するためのリポジトリ", + "url": "https://github.com/gotutiyan/GEC-Info-ja", + "project_name": "GEC-Info-ja", + "stargazers_count": 4, + "source": "GitHub", + "score": 0.00011487320869590189 + }, + { + "description": "lists of text corpus and more (mainly Japanese)", + "url": "https://github.com/ikegami-yukino/dataset-list", + "project_name": "dataset-list", + "stargazers_count": 110, + "source": "GitHub", + "score": 0.003159013239137302 + }, + { + "description": "ディープラーニングモデルの性能を体系的に最大化するためのプレイブック", + "url": "https://github.com/Valkyrja3607/tuning_playbook_ja", + "project_name": "tuning_playbook_ja", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "Trying to consolidate japanese phonetic, and in particular pitch accent resources into one list", + "url": "https://github.com/olety/japanese-pitch-accent-resources", + "project_name": "japanese-pitch-accent-resources", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "description": "オープンソースの日本語LLMまとめ", + "url": "https://github.com/llm-jp/awesome-japanese-llm", + "project_name": "awesome-japanese-llm", + "stargazers_count": 0, + "source": "GitHub", + "score": 0.0 + }, + { + "url": "https://huggingface.co/jonatasgrosman/wav2vec2-large-xlsr-53-japanese", + "downloads": 2698612, + "description": "Fine-tuned XLSR-53 large model for speech recognition in JapaneseFine-tuned facebook/wav2vec2-large-xlsr-53 on Japanese using the train and validation splits of Common Voice 6.1, CSS10 and JSUT.When using this model, make sure that your speech input is sampled at 16kHz.", + "source": "Hugging Face", + "score": 0.3030053832451995, + "project_name": "wav2vec2-large-xlsr-53-japanese" + }, + { + "url": "https://huggingface.co/tohoku-nlp/bert-base-japanese", + "downloads": 1661621, + "description": "BERT base Japanese (IPA dictionary)This is a BERT model pretrained on texts in the Japanese language.", + "source": "Hugging Face", + "score": 0.18657002485472962, + "project_name": "bert-base-japanese" + }, + { + "url": "https://huggingface.co/tsmatz/xlm-roberta-ner-japanese", + "downloads": 1093110, + "description": "xlm-roberta-ner-japanese(Japanese caption : 日本語の固有表現抽出のモデル)This model is a fine-tuned version of xlm-roberta-base (pre-trained cross-lingual RobertaModel) trained for named entity recognition (NER) token classification.", + "source": "Hugging Face", + "score": 0.12273650842698396, + "project_name": "xlm-roberta-ner-japanese" + }, + { + "url": "https://huggingface.co/sonoisa/sentence-bert-base-ja-mean-tokens-v2", + "downloads": 985141, + "description": "This is a Japanese sentence-BERT model.", + "source": "Hugging Face", + "score": 0.11061353994407461, + "project_name": "sentence-bert-base-ja-mean-tokens-v2" + }, + { + "url": "https://huggingface.co/tohoku-nlp/bert-base-japanese-whole-word-masking", + "downloads": 324187, + "description": "BERT base Japanese (IPA dictionary, whole word masking enabled)This is a BERT model pretrained on texts in the Japanese language.", + "source": "Hugging Face", + "score": 0.036400344391157934, + "project_name": "bert-base-japanese-whole-word-masking" + }, + { + "url": "https://huggingface.co/sonoisa/sentence-luke-japanese-base-lite", + "downloads": 283558, + "description": "This is a Japanese sentence-LUKE model.", + "source": "Hugging Face", + "score": 0.03183844156264119, + "project_name": "sentence-luke-japanese-base-lite" + }, + { + "url": "https://huggingface.co/kha-white/manga-ocr-base", + "downloads": 119002, + "description": "Manga OCROptical character recognition for Japanese text, with the main focus being Japanese manga.", + "source": "Hugging Face", + "score": 0.013361775096584922, + "project_name": "manga-ocr-base" + }, + { + "url": "https://huggingface.co/tohoku-nlp/bert-base-japanese-char", + "downloads": 116955, + "description": "BERT base Japanese (character tokenization)This is a BERT model pretrained on texts in the Japanese language.", + "source": "Hugging Face", + "score": 0.013131933971034853, + "project_name": "bert-base-japanese-char" + }, + { + "url": "https://huggingface.co/tohoku-nlp/bert-base-japanese-char-v2", + "downloads": 112400, + "description": "BERT base Japanese (character-level tokenization with whole word masking, jawiki-20200831)This is a BERT model pretrained on texts in the Japanese language.", + "source": "Hugging Face", + "score": 0.012620489746862617, + "project_name": "bert-base-japanese-char-v2" + }, + { + "url": "https://huggingface.co/augmxnt/shisa-gamma-7b-v1", + "downloads": 110413, + "description": "shisa-gamma-7b-v1For more information see our main Shisa 7B modelWe applied a version of our fine-tune data set onto Japanese Stable LM Base Gamma 7B and it performed pretty well, just sharing since it might be of interest.", + "source": "Hugging Face", + "score": 0.01239738553754753, + "project_name": "shisa-gamma-7b-v1" + }, + { + "url": "https://huggingface.co/elyza/ELYZA-japanese-Llama-2-7b-instruct", + "downloads": 95472, + "description": "ELYZA-japanese-Llama-2-7bModel DescriptionELYZA-japanese-Llama-2-7b", + "source": "Hugging Face", + "score": 0.010719781113100247, + "project_name": "ELYZA-japanese-Llama-2-7b-instruct" + }, + { + "url": "https://huggingface.co/tohoku-nlp/bert-base-japanese-char-whole-word-masking", + "downloads": 95114, + "description": "BERT base Japanese (character tokenization, whole word masking enabled)This is a BERT model pretrained on texts in the Japanese language.", + "source": "Hugging Face", + "score": 0.010679584179564867, + "project_name": "bert-base-japanese-char-whole-word-masking" + }, + { + "url": "https://huggingface.co/sonoisa/sentence-bert-base-ja-mean-tokens", + "downloads": 64742, + "description": "This is a Japanese sentence-BERT model.", + "source": "Hugging Face", + "score": 0.007269357181417968, + "project_name": "sentence-bert-base-ja-mean-tokens" + }, + { + "url": "https://huggingface.co/colorfulscoop/sbert-base-ja", + "downloads": 62410, + "description": "Sentence BERT base Japanese modelThis repository contains a Sentence BERT base model for Japanese.", + "source": "Hugging Face", + "score": 0.007007515703751744, + "project_name": "sbert-base-ja" + }, + { + "url": "https://huggingface.co/hotchpotch/japanese-reranker-cross-encoder-xsmall-v1", + "downloads": 59267, + "description": "hotchpotch/japanese-reranker-cross-encoder-xsmall-v1日本語で学習させた Reranker (CrossEncoder) シリーズです。", + "source": "Hugging Face", + "score": 0.006654613574976039, + "project_name": "japanese-reranker-cross-encoder-xsmall-v1" + }, + { + "url": "https://huggingface.co/tohoku-nlp/bert-base-japanese-v3", + "downloads": 46245, + "description": "BERT base Japanese (unidic-lite with whole word masking, CC-100 and jawiki-20230102)This is a BERT model pretrained on texts in the Japanese language.", + "source": "Hugging Face", + "score": 0.00519247818811087, + "project_name": "bert-base-japanese-v3" + }, + { + "url": "https://huggingface.co/llm-book/bert-base-japanese-v3-ner-wikipedia-dataset", + "downloads": 45039, + "description": "llm-book/bert-base-japanese-v3-ner-wikipedia-dataset「大規模言語モデル入門」の第6章で紹介している固有表現認識のモデルです。", + "source": "Hugging Face", + "score": 0.005057066171787771, + "project_name": "bert-base-japanese-v3-ner-wikipedia-dataset" + }, + { + "url": "https://huggingface.co/pkshatech/GLuCoSE-base-ja", + "downloads": 44822, + "description": "GLuCoSE (General Luke-based Contrastive Sentence Embedding)-base-Japanese日本語のREADME/Japanese READMEGLuCoSE (General LUke-based COntrastive Sentence Embedding, \"glucose\") is a Japanese text embedding model based on LUKE.", + "source": "Hugging Face", + "score": 0.005032700991404593, + "project_name": "GLuCoSE-base-ja" + }, + { + "url": "https://huggingface.co/sazyou-roukaku/BracingEvoMix", + "downloads": 39639, + "description": "License:CreativeML Open RAIL-MAdditional Copyright: sazyou_roukaku (TwitterID @sazyou_roukaku) as of May 31, 2023このモデルは『CreativeML Open RAIL-M』でLicenseそのものに変更はありません。", + "source": "Hugging Face", + "score": 0.004450743710639567, + "project_name": "BracingEvoMix" + }, + { + "url": "https://huggingface.co/ku-nlp/deberta-v2-base-japanese", + "downloads": 38782, + "description": "Model Card for Japanese DeBERTa V2 baseModel", + "source": "Hugging Face", + "score": 0.004354518090416602, + "project_name": "deberta-v2-base-japanese" + }, + { + "url": "https://huggingface.co/setu4993/LaBSE", + "downloads": 29646, + "description": "LaBSEModel descriptionLanguage-agnostic BERT Sentence Encoder (LaBSE) is a BERT-based model trained for sentence embedding for 109 languages.", + "source": "Hugging Face", + "score": 0.0033287103117036402, + "project_name": "LaBSE" + }, + { + "url": "https://huggingface.co/cyberagent/open-calm-3b", + "downloads": 21301, + "description": "OpenCALM-3BModel DescriptionOpenCALM is a suite of decoder-only language models pre-trained on Japanese datasets, developed by CyberAgent, Inc.", + "source": "Hugging Face", + "score": 0.0023917175453551658, + "project_name": "open-calm-3b" + }, + { + "url": "https://huggingface.co/tohoku-nlp/bert-base-japanese-v2", + "downloads": 19955, + "description": "BERT base Japanese (unidic-lite with whole word masking, jawiki-20200831)This is a BERT model pretrained on texts in the Japanese language.", + "source": "Hugging Face", + "score": 0.002240586057817113, + "project_name": "bert-base-japanese-v2" + }, + { + "url": "https://huggingface.co/rinna/llama-3-youko-8b", + "downloads": 19436, + "description": "Llama 3 Youko 8B (rinna/llama-3-youko-8b)", + "source": "Hugging Face", + "score": 0.002182311732384536, + "project_name": "llama-3-youko-8b" + }, + { + "url": "https://huggingface.co/rinna/japanese-clip-vit-b-16", + "downloads": 13420, + "description": "rinna/japanese-clip-vit-b-16This is a Japanese CLIP (Contrastive Language-Image Pre-Training) model trained by rinna Co.", + "source": "Hugging Face", + "score": 0.0015068235978905368, + "project_name": "japanese-clip-vit-b-16" + }, + { + "url": "https://huggingface.co/christian-phu/bert-finetuned-japanese-sentiment", + "downloads": 13291, + "description": "bert-finetuned-japanese-sentimentThis model is a fine-tuned version of cl-tohoku/bert-base-japanese-v2 on product amazon reviews japanese dataset.", + "source": "Hugging Face", + "score": 0.0014923392279853297, + "project_name": "bert-finetuned-japanese-sentiment" + }, + { + "url": "https://huggingface.co/jarvisx17/japanese-sentiment-analysis", + "downloads": 12002, + "description": "japanese-sentiment-analysisThis model was trained from scratch on the chABSA dataset.", + "source": "Hugging Face", + "score": 0.0013476078108705083, + "project_name": "japanese-sentiment-analysis" + }, + { + "url": "https://huggingface.co/sazyou-roukaku/chilled_remix", + "downloads": 11873, + "description": "【告知】chilled_remix及びreversemixは2023年5月21日にVersion変更を行い、v2へ移行いたしました。", + "source": "Hugging Face", + "score": 0.0013331234409653012, + "project_name": "chilled_remix" + }, + { + "url": "https://huggingface.co/rinna/japanese-roberta-base", + "downloads": 11138, + "description": "japanese-roberta-baseThis repository provides a base-sized Japanese RoBERTa model.", + "source": "Hugging Face", + "score": 0.0012505962170867957, + "project_name": "japanese-roberta-base" + }, + { + "url": "https://huggingface.co/rinna/japanese-gpt-neox-3.6b", + "downloads": 10734, + "description": "japanese-gpt-neox-3.6bOverviewThis repository provides a Japanese GPT-NeoX model of 3.6 billion parameters.", + "source": "Hugging Face", + "score": 0.00120523431443793, + "project_name": "japanese-gpt-neox-3.6b" + }, + { + "url": "https://huggingface.co/KoichiYasuoka/bert-base-japanese-upos", + "downloads": 9155, + "description": "bert-base-japanese-uposModel DescriptionThis is a BERT model pre-trained on Japanese Wikipedia texts for POS-tagging and dependency-parsing, derived from bert-base-japanese-char-extended.", + "source": "Hugging Face", + "score": 0.0010279411355207052, + "project_name": "bert-base-japanese-upos" + }, + { + "url": "https://huggingface.co/mmnga/DataPilot-ArrowPro-7B-RobinHood-gguf", + "downloads": 8667, + "description": "DataPilot-ArrowPro-7B-RobinHood-ggufDataPilotさんが公開しているArrowPro-7B-RobinHoodのggufフォーマット変換版です。", + "source": "Hugging Face", + "score": 0.0009731475501428675, + "project_name": "DataPilot-ArrowPro-7B-RobinHood-gguf" + }, + { + "url": "https://huggingface.co/tokyotech-llm/Swallow-70b-instruct-hf", + "downloads": 8662, + "description": "SwallowOur Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", + "source": "Hugging Face", + "score": 0.0009725861404566192, + "project_name": "Swallow-70b-instruct-hf" + }, + { + "url": "https://huggingface.co/mmnga/DataPilot-ArrowPro-7B-KUJIRA-gguf", + "downloads": 8552, + "description": "DataPilot-ArrowPro-7B-KUJIRA-ggufDataPilotさんが公開しているArrowPro-7B-KUJIRAのggufフォーマット変換版です。", + "source": "Hugging Face", + "score": 0.0009602351273591558, + "project_name": "DataPilot-ArrowPro-7B-KUJIRA-gguf" + }, + { + "url": "https://huggingface.co/tokyotech-llm/Swallow-7b-NVE-instruct-hf", + "downloads": 8459, + "description": "SwallowOur Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", + "source": "Hugging Face", + "score": 0.0009497929071949367, + "project_name": "Swallow-7b-NVE-instruct-hf" + }, + { + "url": "https://huggingface.co/elyza/ELYZA-japanese-Llama-2-13b-instruct", + "downloads": 8227, + "description": "ELYZA-japanese-Llama-2-13bModel DescriptionELYZA-japanese-Llama-2-13b は、 Llama 2をベースとして日本語能力を拡張するために追加事前学習を行ったモデルです。", + "source": "Hugging Face", + "score": 0.0009237434977530139, + "project_name": "ELYZA-japanese-Llama-2-13b-instruct" + }, + { + "url": "https://huggingface.co/MCZK/ArrowPro-7B-RobinHood-GGUF", + "downloads": 7578, + "description": "DataPilot様の ArrowPro-7B-RobinHood をGGUF形式に変換したものです。", + "source": "Hugging Face", + "score": 0.0008508725204779797, + "project_name": "ArrowPro-7B-RobinHood-GGUF" + }, + { + "url": "https://huggingface.co/elyza/ELYZA-japanese-Llama-2-7b-fast-instruct", + "downloads": 7410, + "description": "ELYZA-japanese-Llama-2-7bModel DescriptionELYZA-japanese-Llama-2-7b", + "source": "Hugging Face", + "score": 0.0008320091550200356, + "project_name": "ELYZA-japanese-Llama-2-7b-fast-instruct" + }, + { + "url": "https://huggingface.co/tohoku-nlp/bert-large-japanese-v2", + "downloads": 7273, + "description": "BERT large Japanese (unidic-lite with whole word masking, CC-100 and jawiki-20230102)This is a BERT model pretrained on texts in the Japanese language.", + "source": "Hugging Face", + "score": 0.0008166265296168312, + "project_name": "bert-large-japanese-v2" + }, + { + "url": "https://huggingface.co/mmnga/Ninja-v1-gguf", + "downloads": 6985, + "description": "Ninja-v1-ggufLocal-Novel-LLM-projectさんが公開しているNinja-v1のggufフォーマット変換版です。", + "source": "Hugging Face", + "score": 0.000784289331688927, + "project_name": "Ninja-v1-gguf" + }, + { + "url": "https://huggingface.co/mmnga/Ninja-v1-NSFW-gguf", + "downloads": 6974, + "description": "Ninja-v1-NSFW-ggufLocal-Novel-LLM-projectさんが公開しているNinja-v1-NSFWのggufフォーマット変換版です。", + "source": "Hugging Face", + "score": 0.0007830542303791806, + "project_name": "Ninja-v1-NSFW-gguf" + }, + { + "url": "https://huggingface.co/abeja/gpt-neox-japanese-2.7b", + "downloads": 6899, + "description": "gpt-neox-japanese-2.7bThe open PR is merged on 2022/9/14.You can use this model with v4.23 and higher versions of transformers as follows,pip install transformersThis repository provides a 2.7B-parameter Japanese GPT-NeoX-based model.", + "source": "Hugging Face", + "score": 0.0007746330850854555, + "project_name": "gpt-neox-japanese-2.7b" + }, + { + "url": "https://huggingface.co/tokyotech-llm/Swallow-7b-instruct-hf", + "downloads": 6861, + "description": "SwallowOur Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", + "source": "Hugging Face", + "score": 0.0007703663714699682, + "project_name": "Swallow-7b-instruct-hf" + }, + { + "url": "https://huggingface.co/sonoisa/t5-base-japanese", + "downloads": 6743, + "description": "日本語T5事前学習済みモデルThis is a T5 (Text-to-Text Transfer Transformer) model pretrained on Japanese corpus.", + "source": "Hugging Face", + "score": 0.0007571171028745074, + "project_name": "t5-base-japanese" + }, + { + "url": "https://huggingface.co/stabilityai/japanese-stablelm-instruct-gamma-7b", + "downloads": 6725, + "description": "Japanese Stable LM Instruct Gamma 7BModel", + "source": "Hugging Face", + "score": 0.0007550960280040134, + "project_name": "japanese-stablelm-instruct-gamma-7b" + }, + { + "url": "https://huggingface.co/mmnga/tokyotech-llm-Swallow-7b-instruct-v0.1-gguf", + "downloads": 6641, + "description": "tokyotech-llm-Swallow-7b-instruct-v0.1-gguftokyotech-llmさんが公開しているSwallow-7b-instruct-v0.1のggufフォーマット変換版です。", + "source": "Hugging Face", + "score": 0.0007456643452750414, + "project_name": "tokyotech-llm-Swallow-7b-instruct-v0.1-gguf" + }, + { + "url": "https://huggingface.co/cyberagent/open-calm-medium", + "downloads": 6058, + "description": "OpenCALM-MediumModel DescriptionOpenCALM is a suite of decoder-only language models pre-trained on Japanese datasets, developed by", + "source": "Hugging Face", + "score": 0.0006802039758584853, + "project_name": "open-calm-medium" + }, + { + "url": "https://huggingface.co/elyza/ELYZA-japanese-Llama-2-13b-fast-instruct", + "downloads": 6028, + "description": "ELYZA-japanese-Llama-2-13b-fast-instructModel DescriptionELYZA-japanese-Llama-2-13b は、 Llama 2をベースとして日本語能力を拡張するために追加事前学習を行ったモデルです。", + "source": "Hugging Face", + "score": 0.0006768355177409952, + "project_name": "ELYZA-japanese-Llama-2-13b-fast-instruct" + }, + { + "url": "https://huggingface.co/elyza/ELYZA-japanese-Llama-2-7b", + "downloads": 6004, + "description": "ELYZA-japanese-Llama-2-7bModel DescriptionELYZA-japanese-Llama-2-7b", + "source": "Hugging Face", + "score": 0.0006741407512470032, + "project_name": "ELYZA-japanese-Llama-2-7b" + }, + { + "url": "https://huggingface.co/stabilityai/japanese-stablelm-base-gamma-7b", + "downloads": 5942, + "description": "Japanese Stable LM Base Gamma 7BModel", + "source": "Hugging Face", + "score": 0.0006671792711375238, + "project_name": "japanese-stablelm-base-gamma-7b" + }, + { + "url": "https://huggingface.co/mmnga/Vecteus-v1-gguf", + "downloads": 5901, + "description": "Vecteus-v1-ggufLocal-Novel-LLM-projectさんが公開しているVecteus-v1のggufフォーマット変換版です。", + "source": "Hugging Face", + "score": 0.0006625757117102875, + "project_name": "Vecteus-v1-gguf" + }, + { + "url": "https://huggingface.co/rinna/japanese-gpt2-xsmall", + "downloads": 5771, + "description": "japanese-gpt2-xsmallThis repository provides an extra-small-sized Japanese GPT-2 model.", + "source": "Hugging Face", + "score": 0.0006479790598678307, + "project_name": "japanese-gpt2-xsmall" + }, + { + "url": "https://huggingface.co/den2nova/FlexDreamHK", + "downloads": 5738, + "description": "🎈 FlexDreamHKFlexDreamHKはリークされたNovelAIモデルの入っていない、あるいはそのリスクを可能な限り低くしたモデルを目指して作成しました。", + "source": "Hugging Face", + "score": 0.0006442737559385917, + "project_name": "FlexDreamHK" + }, + { + "url": "https://huggingface.co/MCZK/ArrowPro-7B-KUJIRA-GGUF", + "downloads": 5717, + "description": "DataPilot様の ArrowPro-7B-KUJIRA をGGUF形式に変換したものです。", + "source": "Hugging Face", + "score": 0.0006419158352563486, + "project_name": "ArrowPro-7B-KUJIRA-GGUF" + }, + { + "url": "https://huggingface.co/mmnga/lightblue-suzume-llama-3-8B-multilingual-gguf", + "downloads": 5679, + "description": "lightblue-suzume-llama-3-8B-multilingual-gguflightblueさんが公開しているsuzume-llama-3-8B-multilingualのggufフォーマット変換版です。", + "source": "Hugging Face", + "score": 0.0006376491216408612, + "project_name": "lightblue-suzume-llama-3-8B-multilingual-gguf" + }, + { + "url": "https://huggingface.co/mmnga/Mistral-7B-Instruct-v0.3-gguf", + "downloads": 5678, + "description": "Mistral-7B-Instruct-v0.3-ggufmistralaiさんが公開しているMistral-7B-Instruct-v0.3のggufフォーマット変換版です。", + "source": "Hugging Face", + "score": 0.0006375368397036116, + "project_name": "Mistral-7B-Instruct-v0.3-gguf" + }, + { + "url": "https://huggingface.co/reazon-research/reazonspeech-nemo-v2", + "downloads": 5512, + "description": "reazonspeech-nemo-v2reazonspeech-nemo-v2 is an automatic speech recognition model trainedon ReazonSpeech v2.0 corpus.", + "source": "Hugging Face", + "score": 0.0006188980381201668, + "project_name": "reazonspeech-nemo-v2" + }, + { + "url": "https://huggingface.co/tohoku-nlp/bert-large-japanese", + "downloads": 5504, + "description": "BERT large Japanese (unidic-lite with whole word masking, jawiki-20200831)This is a BERT model pretrained on texts in the Japanese language.", + "source": "Hugging Face", + "score": 0.0006179997826221695, + "project_name": "bert-large-japanese" + }, + { + "url": "https://huggingface.co/elyza/ELYZA-japanese-Llama-2-7b-fast", + "downloads": 5484, + "description": "ELYZA-japanese-Llama-2-7bModel DescriptionELYZA-japanese-Llama-2-7b", + "source": "Hugging Face", + "score": 0.0006157541438771762, + "project_name": "ELYZA-japanese-Llama-2-7b-fast" + }, + { + "url": "https://huggingface.co/stabilityai/japanese-stable-clip-vit-l-16", + "downloads": 5461, + "description": "By clicking \"Agree\", you agree to the License Agreement and acknowledge Stability AI's Privacy Policy.", + "source": "Hugging Face", + "score": 0.0006131716593204338, + "project_name": "japanese-stable-clip-vit-l-16" + }, + { + "url": "https://huggingface.co/oshizo/sbert-jsnli-luke-japanese-base-lite", + "downloads": 5406, + "description": "sbert-jsnli-luke-japanese-base-liteThis is a sentence-transformers model: It maps sentences & paragraphs to a 768 dimensional dense vector space and can be used for tasks like clustering or semantic search.", + "source": "Hugging Face", + "score": 0.000606996152771702, + "project_name": "sbert-jsnli-luke-japanese-base-lite" + }, + { + "url": "https://huggingface.co/mmnga/umiyuki-Umievo-itr012-Gleipnir-7B-gguf", + "downloads": 5328, + "description": "umiyuki-Umievo-itr012-Gleipnir-7B-ggufumiyukiさんが公開しているUmievo-itr012-Gleipnir-7Bのggufフォーマット変換版です。", + "source": "Hugging Face", + "score": 0.000598238161666228, + "project_name": "umiyuki-Umievo-itr012-Gleipnir-7B-gguf" + }, + { + "url": "https://huggingface.co/mmnga/Meta-Llama-3-8B-Instruct-gguf", + "downloads": 5193, + "description": "Meta-Llama-3-8B-Instruct-ggufmeta-llamaさんが公開しているMeta-Llama-3-8B-Instructのggufフォーマット変換版です。", + "source": "Hugging Face", + "score": 0.0005830801001375229, + "project_name": "Meta-Llama-3-8B-Instruct-gguf" + }, + { + "url": "https://huggingface.co/mmnga/ArrowPro-7B-KillerWhale-gguf", + "downloads": 5183, + "description": "ArrowPro-7B-KillerWhale-ggufDataPilotさんが公開しているArrowPro-7B-KillerWhaleのggufフォーマット変換版です。", + "source": "Hugging Face", + "score": 0.0005819572807650263, + "project_name": "ArrowPro-7B-KillerWhale-gguf" + }, + { + "url": "https://huggingface.co/mmnga/Ninja-v1-NSFW-128k-gguf", + "downloads": 5179, + "description": "Ninja-v1-NSFW-128k-ggufLocal-Novel-LLM-projectさんが公開しているNinja-v1-NSFW-128kのggufフォーマット変換版です。", + "source": "Hugging Face", + "score": 0.0005815081530160276, + "project_name": "Ninja-v1-NSFW-128k-gguf" + }, + { + "url": "https://huggingface.co/nlp-waseda/roberta-base-japanese-with-auto-jumanpp", + "downloads": 5144, + "description": "nlp-waseda/roberta-base-japanese-with-auto-jumanppModel descriptionThis is a Japanese RoBERTa base model pretrained on Japanese Wikipedia and the Japanese portion of CC-100.How to useYou can use this model for masked language modeling as follows:from transformers import AutoTokenizer, AutoModelForMaskedLMtokenizer = AutoTokenizer.from_pretrained(\"nlp-waseda/roberta-base-japanese-with-auto-jumanpp\")", + "source": "Hugging Face", + "score": 0.0005775782852122893, + "project_name": "roberta-base-japanese-with-auto-jumanpp" + }, + { + "url": "https://huggingface.co/ku-nlp/deberta-v2-large-japanese-char-wwm", + "downloads": 5118, + "description": "Model Card for Japanese character-level DeBERTa V2 largeModel descriptionThis is a Japanese DeBERTa V2 large model pre-trained on Japanese Wikipedia, the Japanese portion of CC-100, and the Japanese portion of OSCAR.This model is trained with character-level tokenization and whole word masking.", + "source": "Hugging Face", + "score": 0.0005746589548437979, + "project_name": "deberta-v2-large-japanese-char-wwm" + }, + { + "url": "https://huggingface.co/cyberagent/open-calm-7b", + "downloads": 4966, + "description": "OpenCALM-7BModel DescriptionOpenCALM is a suite of decoder-only language models pre-trained on Japanese datasets, developed by CyberAgent, Inc.", + "source": "Hugging Face", + "score": 0.0005575921003818484, + "project_name": "open-calm-7b" + }, + { + "url": "https://huggingface.co/elyza/ELYZA-japanese-Llama-2-13b", + "downloads": 4950, + "description": "ELYZA-japanese-Llama-2-13bModel DescriptionELYZA-japanese-Llama-2-13b は、 Llama 2をベースとして日本語能力を拡張するために追加事前学習を行ったモデルです。", + "source": "Hugging Face", + "score": 0.0005557955893858537, + "project_name": "ELYZA-japanese-Llama-2-13b" + }, + { + "url": "https://huggingface.co/rinna/youri-7b", + "downloads": 4883, + "description": "rinna/youri-7bOverviewWe conduct continual pre-training of llama2-7b on 40B tokens from a mixture of Japanese and English datasets.", + "source": "Hugging Face", + "score": 0.000548272699590126, + "project_name": "youri-7b" + }, + { + "url": "https://huggingface.co/elyza/ELYZA-japanese-Llama-2-13b-fast", + "downloads": 4589, + "description": "ELYZA-japanese-Llama-2-13b-fastModel DescriptionELYZA-japanese-Llama-2-13b は、 Llama 2をベースとして日本語能力を拡張するために追加事前学習を行ったモデルです。", + "source": "Hugging Face", + "score": 0.0005152618100387237, + "project_name": "ELYZA-japanese-Llama-2-13b-fast" + }, + { + "url": "https://huggingface.co/line-corporation/line-distilbert-base-japanese", + "downloads": 4482, + "description": "LINE DistilBERT", + "source": "Hugging Face", + "score": 0.0005032476427530094, + "project_name": "line-distilbert-base-japanese" + }, + { + "url": "https://huggingface.co/rinna/bilingual-gpt-neox-4b", + "downloads": 4400, + "description": "bilingual-gpt-neox-4bOverviewThis repository provides an English-Japanese bilingual GPT-NeoX model of 3.8 billion parameters.", + "source": "Hugging Face", + "score": 0.0004940405238985367, + "project_name": "bilingual-gpt-neox-4b" + }, + { + "url": "https://huggingface.co/FINGU-AI/FinguAI-Chat-v1", + "downloads": 4372, + "description": "FINGU-AI/FinguAI-Chat-v1OverviewThe FINGU-AI/FinguAI-Chat-v1 model offers a specialized curriculum tailored to English, Korean, and Japanese speakers interested in finance, investment, and legal frameworks.", + "source": "Hugging Face", + "score": 0.000490896629655546, + "project_name": "FinguAI-Chat-v1" + }, + { + "url": "https://huggingface.co/cyberagent/open-calm-large", + "downloads": 4290, + "description": "OpenCALM-LargeModel DescriptionOpenCALM is a suite of decoder-only language models pre-trained on Japanese datasets, developed by", + "source": "Hugging Face", + "score": 0.00048168951080107325, + "project_name": "open-calm-large" + }, + { + "url": "https://huggingface.co/tokyotech-llm/Swallow-7b-hf", + "downloads": 4093, + "description": "SwallowOur Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", + "source": "Hugging Face", + "score": 0.0004595699691628888, + "project_name": "Swallow-7b-hf" + }, + { + "url": "https://huggingface.co/mmnga/Phi-3-medium-128k-instruct-gguf", + "downloads": 4009, + "description": "Phi-3-medium-128k-instruct-ggufmicrosoftさんが公開しているPhi-3-medium-128k-instructのggufフォーマット変換版です。", + "source": "Hugging Face", + "score": 0.00045013828643391667, + "project_name": "Phi-3-medium-128k-instruct-gguf" + }, + { + "url": "https://huggingface.co/kotoba-tech/kotoba-whisper-v1.0", + "downloads": 4005, + "description": "Kotoba-WhisperKotoba-Whisper is a collection of distilled Whisper models for Japanese ASR, developed through the collaboration bewteenAsahi Ushio and Kotoba Technologies.", + "source": "Hugging Face", + "score": 0.00044968915868491804, + "project_name": "kotoba-whisper-v1.0" + }, + { + "url": "https://huggingface.co/cyberagent/calm2-7b", + "downloads": 3930, + "description": "CyberAgentLM2-7B (CALM2-7B)", + "source": "Hugging Face", + "score": 0.00044126801339119297, + "project_name": "calm2-7b" + }, + { + "url": "https://huggingface.co/mmnga/ELYZA-japanese-Llama-2-7b-fast-instruct-gguf", + "downloads": 3887, + "description": "ELYZA-japanese-Llama-2-7b-fast-instruct-ggufELYZAさんが公開しているELYZA-japanese-Llama-2-7b-fast-instructのggufフォーマット変換版です。", + "source": "Hugging Face", + "score": 0.00043643989008945727, + "project_name": "ELYZA-japanese-Llama-2-7b-fast-instruct-gguf" + }, + { + "url": "https://huggingface.co/rinna/japanese-gpt2-medium", + "downloads": 3784, + "description": "japanese-gpt2-mediumThis repository provides a medium-sized Japanese GPT-2 model.", + "source": "Hugging Face", + "score": 0.0004248748505527415, + "project_name": "japanese-gpt2-medium" + }, + { + "url": "https://huggingface.co/mmnga/japanese-stablelm-2-instruct-1_6b-gguf", + "downloads": 3750, + "description": "japanese-stablelm-2-instruct-1_6b-ggufstabilityaiさんが公開しているjapanese-stablelm-2-instruct-1_6bのggufフォーマット変換版です。", + "source": "Hugging Face", + "score": 0.0004210572646862528, + "project_name": "japanese-stablelm-2-instruct-1_6b-gguf" + }, + { + "url": "https://huggingface.co/rinna/japanese-gpt2-small", + "downloads": 3712, + "description": "japanese-gpt2-smallThis repository provides a small-sized Japanese GPT-2 model.", + "source": "Hugging Face", + "score": 0.00041679055107076546, + "project_name": "japanese-gpt2-small" + }, + { + "url": "https://huggingface.co/megagonlabs/transformers-ud-japanese-electra-base-ginza-510", + "downloads": 3701, + "description": "transformers-ud-japanese-electra-ginza-510 (sudachitra-wordpiece, mC4 Japanese)This is an ELECTRA model pretrained on approximately 200M Japanese sentences extracted from the mC4 and finetuned by spaCy v3 on UD_Japanese_BCCWJ r2.8.The base pretrain model is megagonlabs/transformers-ud-japanese-electra-base-discrimininator.", + "source": "Hugging Face", + "score": 0.00041555544976101913, + "project_name": "transformers-ud-japanese-electra-base-ginza-510" + }, + { + "url": "https://huggingface.co/rinna/japanese-gpt-neox-3.6b-instruction-sft-v2", + "downloads": 3566, + "description": "japanese-gpt-neox-3.6b-instruction-sft-v2OverviewThis repository provides a Japanese GPT-NeoX model of 3.6 billion parameters.", + "source": "Hugging Face", + "score": 0.000400397388232314, + "project_name": "japanese-gpt-neox-3.6b-instruction-sft-v2" + }, + { + "url": "https://huggingface.co/cyberagent/open-calm-small", + "downloads": 3543, + "description": "OpenCALM-SmallModel DescriptionOpenCALM is a suite of decoder-only language models pre-trained on Japanese datasets, developed by", + "source": "Hugging Face", + "score": 0.0003978149036755717, + "project_name": "open-calm-small" + }, + { + "url": "https://huggingface.co/kotoba-tech/kotoba-whisper-v1.1", + "downloads": 3529, + "description": "Kotoba-Whisper-v1.1Kotoba-Whisper-v1.1 is a Japanese ASR model based on kotoba-tech/kotoba-whisper-v1.0, withadditional postprocessing stacks integrated as pipeline.", + "source": "Hugging Face", + "score": 0.00039624295655407634, + "project_name": "kotoba-whisper-v1.1" + }, + { + "url": "https://huggingface.co/augmxnt/shisa-7b-v1", + "downloads": 3483, + "description": "Shisa 7BShisa 7B (shisa-7b-v1)", + "source": "Hugging Face", + "score": 0.0003910779874405916, + "project_name": "shisa-7b-v1" + }, + { + "url": "https://huggingface.co/Helsinki-NLP/opus-tatoeba-en-ja", + "downloads": 3474, + "description": "en-jasource group: Englishtarget group: JapaneseOPUS readme: eng-jpnmodel: transformer-alignsource language(s): engtarget language(s): jpnmodel: transformer-alignpre-processing: normalization + SentencePiece (spm32k,spm32k)", + "source": "Hugging Face", + "score": 0.00039006745000534464, + "project_name": "opus-tatoeba-en-ja" + }, + { + "url": "https://huggingface.co/augmxnt/shisa-base-7b-v1", + "downloads": 3419, + "description": "shisa-base-7b-v1shisa-base-7b-v1 takes Mistral 7B and adds an additional 8B tokens of primarily Japanese pre-training.", + "source": "Hugging Face", + "score": 0.00038389194345661294, + "project_name": "shisa-base-7b-v1" + }, + { + "url": "https://huggingface.co/rinna/bilingual-gpt-neox-4b-8k", + "downloads": 3317, + "description": "bilingual-gpt-neox-4b-8kOverviewNotice: This model requires transformers>=4.31.0 to work properly.", + "source": "Hugging Face", + "score": 0.00037243918585714686, + "project_name": "bilingual-gpt-neox-4b-8k" + }, + { + "url": "https://huggingface.co/tokyotech-llm/Swallow-7b-plus-hf", + "downloads": 3286, + "description": "SwallowOur Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", + "source": "Hugging Face", + "score": 0.00036895844580240716, + "project_name": "Swallow-7b-plus-hf" + }, + { + "url": "https://huggingface.co/mmnga/Phi-3-mini-128k-instruct-gguf", + "downloads": 3243, + "description": "Phi-3-mini-128k-instruct-ggufmicrosoftさんが公開しているPhi-3-mini-128k-instructのggufフォーマット変換版です。", + "source": "Hugging Face", + "score": 0.00036413032250067146, + "project_name": "Phi-3-mini-128k-instruct-gguf" + }, + { + "url": "https://huggingface.co/Fugaku-LLM/Fugaku-LLM-13B-instruct", + "downloads": 3224, + "description": "Fugaku-LLM利用規約この利用規約(以下「本規約」といいます)は、富士通株式会社、国立研究開発法人理化学研究所、国立大学法人東京工業大学、国立大学法人東北大学、株式会社サイバーエージェント、国立大学法人東海国立大学機構、及び株式会社Kotoba Technologies Japan (以下「開発者」といいます)による、スーパーコンピュータ「富岳」政策対応枠における大規模言語モデル分散並列学習手法の開発の成果物として公開する大規模言語モデル(以下「Fugaku-LLM」といいます)の利用に関する条件を定めるものです。", + "source": "Hugging Face", + "score": 0.00036199696569292776, + "project_name": "Fugaku-LLM-13B-instruct" + }, + { + "url": "https://huggingface.co/haqishen/Llama-3-8B-Japanese-Instruct", + "downloads": 3206, + "description": "IntroductionWho am I: Qishen Ha", + "source": "Hugging Face", + "score": 0.00035997589082243373, + "project_name": "Llama-3-8B-Japanese-Instruct" + }, + { + "url": "https://huggingface.co/tokyotech-llm/Swallow-70b-hf", + "downloads": 3197, + "description": "SwallowOur Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", + "source": "Hugging Face", + "score": 0.00035896535338718674, + "project_name": "Swallow-70b-hf" + }, + { + "url": "https://huggingface.co/mmnga/ELYZA-japanese-Llama-2-13b-fast-instruct-gguf", + "downloads": 3106, + "description": "ELYZA-japanese-Llama-2-13b-fast-instruct-ggufELYZAさんが公開しているELYZA-japanese-Llama-2-13b-fast-instructのggufフォーマット変換版です。", + "source": "Hugging Face", + "score": 0.000348747697097467, + "project_name": "ELYZA-japanese-Llama-2-13b-fast-instruct-gguf" + }, + { + "url": "https://huggingface.co/cheonboy/sentence_embedding_japanese", + "downloads": 3047, + "description": "This is a Japanese sentence-LUKE model.", + "source": "Hugging Face", + "score": 0.0003421230627997366, + "project_name": "sentence_embedding_japanese" + }, + { + "url": "https://huggingface.co/pkshatech/simcse-ja-bert-base-clcmlp", + "downloads": 2977, + "description": "Japanese SimCSE (BERT-base)", + "source": "Hugging Face", + "score": 0.0003342633271922599, + "project_name": "simcse-ja-bert-base-clcmlp" + }, + { + "url": "https://huggingface.co/mmnga/Llama-3-70B-japanese-suzume-vector-v0.1", + "downloads": 2935, + "description": "Model Card for Model ID実験モデルです /", + "source": "Hugging Face", + "score": 0.00032954748582777387, + "project_name": "Llama-3-70B-japanese-suzume-vector-v0.1" + }, + { + "url": "https://huggingface.co/Aratako/c4ai-command-r-v01-japanese-instruct", + "downloads": 2928, + "description": "c4ai-command-r-v01-japanese-instructGGUF版はこちら/Click here for the GGUF version概要CohereForAI/c4ai-command-r-v01を、ichikara-instructionを使って追加で日本語インストラクションチューニングを施したモデルです。", + "source": "Hugging Face", + "score": 0.0003287615122670262, + "project_name": "c4ai-command-r-v01-japanese-instruct" + }, + { + "url": "https://huggingface.co/bclavie/JaColBERT", + "downloads": 2874, + "description": "このドキュメントの日本語版はまだ作成中です。", + "source": "Hugging Face", + "score": 0.0003226982876555442, + "project_name": "JaColBERT" + }, + { + "url": "https://huggingface.co/OrionStarAI/Orion-14B-Chat", + "downloads": 2786, + "description": "Orion-14B🌐English | 🇨🇳中文 | 🇯🇵日本語 | 🇰🇷한국어🤗", + "source": "Hugging Face", + "score": 0.00031281747717757345, + "project_name": "Orion-14B-Chat" + }, + { + "url": "https://huggingface.co/ushikado/yuyuyui-chatbot", + "downloads": 2762, + "description": "yuyuyui-chatbotThis model is based on rinna/japanese-gpt2-medium and finetuned on Yuyuyui scenario corpus.", + "source": "Hugging Face", + "score": 0.0003101227106835814, + "project_name": "yuyuyui-chatbot" + }, + { + "url": "https://huggingface.co/Lasorco/lametta", + "downloads": 2761, + "description": "このモデルは何?", + "source": "Hugging Face", + "score": 0.0003100104287463317, + "project_name": "lametta" + }, + { + "url": "https://huggingface.co/rinna/japanese-gpt-neox-3.6b-instruction-ppo", + "downloads": 2737, + "description": "japanese-gpt-neox-3.6b-instruction-ppoOverviewThis repository provides a Japanese GPT-NeoX model of 3.6 billion parameters.", + "source": "Hugging Face", + "score": 0.0003073156622523397, + "project_name": "japanese-gpt-neox-3.6b-instruction-ppo" + }, + { + "url": "https://huggingface.co/mmnga/ELYZA-japanese-Llama-2-7b-instruct-gguf", + "downloads": 2623, + "description": "ELYZA-japanese-Llama-2-7b-instruct-ggufELYZAさんが公開しているELYZA-japanese-Llama-2-7b-instructのggufフォーマット変換版です。", + "source": "Hugging Face", + "score": 0.00029451552140587764, + "project_name": "ELYZA-japanese-Llama-2-7b-instruct-gguf" + }, + { + "url": "https://huggingface.co/rinna/japanese-hubert-base", + "downloads": 2623, + "description": "rinna/japanese-hubert-baseOverviewThis is a Japanese HuBERT Base model trained by rinna Co.", + "source": "Hugging Face", + "score": 0.00029451552140587764, + "project_name": "japanese-hubert-base" + }, + { + "url": "https://huggingface.co/MCZK/Assistance-7B-GGUF", + "downloads": 2577, + "description": "Local-Novel-LLM-project様の Assistance をGGUF形式に変換したものです。", + "source": "Hugging Face", + "score": 0.00028935055229239293, + "project_name": "Assistance-7B-GGUF" + }, + { + "url": "https://huggingface.co/mmnga/Fugaku-LLM-13B-instruct-gguf", + "downloads": 2570, + "description": "Fugaku-LLM-13B-instruct-ggufFugaku-LLMさんが公開しているFugaku-LLM-13B-instructのggufフォーマット変換版です。", + "source": "Hugging Face", + "score": 0.0002885645787316453, + "project_name": "Fugaku-LLM-13B-instruct-gguf" + }, + { + "url": "https://huggingface.co/MCZK/Japanese-Chat-Umievo-itr004-7b-GGUF", + "downloads": 2493, + "description": "umiyuki様の Japanese-Chat-Umievo-itr004-7b をGGUF形式に変換したものです。", + "source": "Hugging Face", + "score": 0.0002799188695634209, + "project_name": "Japanese-Chat-Umievo-itr004-7b-GGUF" + }, + { + "url": "https://huggingface.co/mmnga/aibuncho-japanese-novel-gpt-j-6b-gguf", + "downloads": 2485, + "description": "AIBunCho/japanese-novel-gpt-j-6bAI BunChoさんが公開しているjapanese-novel-gpt-j-6bのgguf変換版です。", + "source": "Hugging Face", + "score": 0.00027902061406542356, + "project_name": "aibuncho-japanese-novel-gpt-j-6b-gguf" + }, + { + "url": "https://huggingface.co/tokyotech-llm/Swallow-MX-8x7b-NVE-v0.1", + "downloads": 2354, + "description": "Swallow-MX-8x7b-NVE-v0.1Our Swallow-MX-8x7b-NVE-v0.1 model has undergone continuous pre-training from the Mixtral-8x7B-Instruct-v0.1, primarily with the addition of Japanese language data.", + "source": "Hugging Face", + "score": 0.0002643116802857171, + "project_name": "Swallow-MX-8x7b-NVE-v0.1" + }, + { + "url": "https://huggingface.co/mmnga/aixsatoshi-Honyaku-13b-gguf", + "downloads": 2305, + "description": "aixsatoshi-Honyaku-13b-ggufaixsatoshiさんが公開しているHonyaku-13bのggufフォーマット変換版です。", + "source": "Hugging Face", + "score": 0.0002588098653604834, + "project_name": "aixsatoshi-Honyaku-13b-gguf" + }, + { + "url": "https://huggingface.co/rinna/japanese-gpt-1b", + "downloads": 2281, + "description": "japanese-gpt-1bThis repository provides a 1.3B-parameter Japanese GPT model.", + "source": "Hugging Face", + "score": 0.0002561150988664914, + "project_name": "japanese-gpt-1b" + }, + { + "url": "https://huggingface.co/OrionStarAI/Orion-14B-Base", + "downloads": 2259, + "description": "Orion-14B🌐English | 🇨🇳中文 | 🇯🇵日本語 |🇰🇷한국어🤗", + "source": "Hugging Face", + "score": 0.0002536448962469987, + "project_name": "Orion-14B-Base" + }, + { + "url": "https://huggingface.co/stockmark/stockmark-13b", + "downloads": 2148, + "description": "stockmark/stockmark-13bStockmark-13b is a 13 billion parameter LLM pretrained from scratch based on Japanese corpus of about 220B tokens.", + "source": "Hugging Face", + "score": 0.0002411816012122856, + "project_name": "stockmark-13b" + }, + { + "url": "https://huggingface.co/karakuri-ai/karakuri-lm-70b-chat-v0.1", + "downloads": 2145, + "description": "KARAKURI LMKARAKURI LM is a pretrained language model that builds upon Llama 2.Our model enhances Llama 2's capabilities by incorporating additional Japanese vocabulary and further pretraining on a mixture of Japanese and multilingual corpora.", + "source": "Hugging Face", + "score": 0.00024084475540053662, + "project_name": "karakuri-lm-70b-chat-v0.1" + }, + { + "url": "https://huggingface.co/rinna/nekomata-14b", + "downloads": 2119, + "description": "rinna/nekomata-14bOverviewWe conduct continual pre-training of qwen-14b on 66B tokens from a mixture of Japanese and English datasets.", + "source": "Hugging Face", + "score": 0.00023792542503204525, + "project_name": "nekomata-14b" + }, + { + "url": "https://huggingface.co/tohoku-nlp/bert-base-japanese-char-v3", + "downloads": 2072, + "description": "BERT base Japanese (character-level tokenization with whole word masking, CC-100 and jawiki-20230102)This is a BERT model pretrained on texts in the Japanese language.", + "source": "Hugging Face", + "score": 0.0002326481739813109, + "project_name": "bert-base-japanese-char-v3" + }, + { + "url": "https://huggingface.co/TKU410410103/hubert-base-japanese-asr", + "downloads": 1999, + "description": "hubert-base-asrThis model is a fine-tuned version of rinna/japanese-hubert-base on the common_voice_11_0 dataset for ASR tasks.", + "source": "Hugging Face", + "score": 0.00022445159256208517, + "project_name": "hubert-base-japanese-asr" + }, + { + "url": "https://huggingface.co/Local-Novel-LLM-project/Vecteus-v1", + "downloads": 1999, + "description": "Our ModelsVecteusNinja-v1Ninja-v1-NSFWNinja-v1-128kNinja-v1-NSFW-128kModel Card for VecTeus-v1.0The Mistral-7B--based Large Language Model (LLM) is an noveldataset fine-tuned version of the Mistral-7B-v0.1VecTeus has the following changes compared to Mistral-7B-v0.1.128k context window (8k context in v0.1)Achieving both high quality Japanese and English generationCan be generated NSFWMemory ability that does not forget even after long-context generationThis model was created with the help of GPUs from the f", + "source": "Hugging Face", + "score": 0.00022445159256208517, + "project_name": "Vecteus-v1" + }, + { + "url": "https://huggingface.co/Aratako/Ninja-v1-RP-expressive-GGUF", + "downloads": 1996, + "description": "Ninja-v1-RP-expressive-GGUF概要Aratako/Ninja-v1-RP-expressiveの量子化済みGGUF版です。", + "source": "Hugging Face", + "score": 0.00022411474675033618, + "project_name": "Ninja-v1-RP-expressive-GGUF" + }, + { + "url": "https://huggingface.co/Vsukiyaki/Yaki-Dofu-Mix", + "downloads": 1962, + "description": "Yaki-Dofu-Mix概要 / OverviewYaki-Dofu-Mixは、アニメ風の画風に特化したマージモデルです。 ", + "source": "Hugging Face", + "score": 0.00022029716088384747, + "project_name": "Yaki-Dofu-Mix" + }, + { + "url": "https://huggingface.co/mmnga/rinna-llama-3-youko-8b-gguf", + "downloads": 1823, + "description": "rinna-llama-3-youko-8b-ggufrinnaさんが公開しているllama-3-youko-8bのggufフォーマット変換版です。", + "source": "Hugging Face", + "score": 0.00020468997160614372, + "project_name": "rinna-llama-3-youko-8b-gguf" + }, + { + "url": "https://huggingface.co/stabilityai/japanese-stablelm-instruct-beta-70b", + "downloads": 1766, + "description": "Japanese-StableLM-Instruct-Beta-70BA cute robot wearing a kimono writes calligraphy with one single brush — Stable Diffusion XLModel Descriptionjapanese-stablelm-instruct-beta-70b is a 70B-parameter decoder-only language model based on japanese-stablelm-base-beta-70b and further fine tuned on Databricks Dolly-15k, Anthropic HH, and other public data.", + "source": "Hugging Face", + "score": 0.00019828990118291268, + "project_name": "japanese-stablelm-instruct-beta-70b" + }, + { + "url": "https://huggingface.co/Lasorco/lametta_old", + "downloads": 1746, + "description": "old?", + "source": "Hugging Face", + "score": 0.0001960442624379193, + "project_name": "lametta_old" + }, + { + "url": "https://huggingface.co/second-state/Llama-3-8B-Japanese-Instruct-GGUF", + "downloads": 1706, + "description": "Llama-3-8B-Japanese-Instruct-GGUFOriginal Modelhaqishen/Llama-3-8B-Japanese-InstructRun with LlamaEdgeLlamaEdge version: v0.10.1 and abovePrompt templatePrompt type: llama-3-chatPrompt string<|begin_of_text|><|start_header_id|>system<|end_header_id|>{{ system_prompt }}<|eot_id|><|start_header_id|>user<|end_header_id|>", + "source": "Hugging Face", + "score": 0.00019155298494793262, + "project_name": "Llama-3-8B-Japanese-Instruct-GGUF" + }, + { + "url": "https://huggingface.co/tokyotech-llm/Swallow-7b-instruct-v0.1", + "downloads": 1653, + "description": "SwallowOur Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", + "source": "Hugging Face", + "score": 0.00018560204227370024, + "project_name": "Swallow-7b-instruct-v0.1" + }, + { + "url": "https://huggingface.co/retrieva-jp/t5-small-medium", + "downloads": 1652, + "description": "Model card for model IDThis is a T5 v1.1 model, pre-trained on a Japanese corpus.", + "source": "Hugging Face", + "score": 0.0001854897603364506, + "project_name": "t5-small-medium" + }, + { + "url": "https://huggingface.co/OrionStarAI/Orion-14B-Chat-RAG", + "downloads": 1637, + "description": "Orion-14B🌐English | 🇨🇳中文 | 🇯🇵日本語 | 🇰🇷한국어🤗", + "source": "Hugging Face", + "score": 0.00018380553127770558, + "project_name": "Orion-14B-Chat-RAG" + }, + { + "url": "https://huggingface.co/Fugaku-LLM/Fugaku-LLM-13B-instruct-gguf", + "downloads": 1619, + "description": "Fugaku-LLM利用規約この利用規約(以下「本規約」といいます)は、富士通株式会社、国立研究開発法人理化学研究所、国立大学法人東京工業大学、国立大学法人東北大学、株式会社サイバーエージェント、国立大学法人東海国立大学機構、及び株式会社Kotoba Technologies Japan (以下「開発者」といいます)による、スーパーコンピュータ「富岳」政策対応枠における大規模言語モデル分散���列学習手法の開発の成果物として公開する大規模言語モデル(以下「Fugaku-LLM」といいます)の利用に関する条件を定めるものです。", + "source": "Hugging Face", + "score": 0.00018178445640721155, + "project_name": "Fugaku-LLM-13B-instruct-gguf" + }, + { + "url": "https://huggingface.co/alabnii/jmedroberta-base-sentencepiece-vocab50000", + "downloads": 1596, + "description": "alabnii/jmedroberta-base-sentencepiece-vocab50000Model descriptionThis is a Japanese RoBERTa base model pre-trained on academic articles in medical sciences collected by Japan Science and Technology Agency (JST).", + "source": "Hugging Face", + "score": 0.0001792019718504692, + "project_name": "jmedroberta-base-sentencepiece-vocab50000" + }, + { + "url": "https://huggingface.co/TKU410410103/wav2vec2-base-japanese-asr", + "downloads": 1591, + "description": "wav2vec2-base-asrThis model is a fine-tuned version of rinna/japanese-wav2vec2-base on the common_voice_11_0 dataset for ASR tasks.", + "source": "Hugging Face", + "score": 0.00017864056216422087, + "project_name": "wav2vec2-base-japanese-asr" + }, + { + "url": "https://huggingface.co/dahara1/weblab-10b-instruction-sft-GPTQ", + "downloads": 1591, + "description": "weblab-10b-instruction-sft-GPTQOriginal model weblab-10b-instruction-sft which is a Japanese-centric multilingual GPT-NeoX model of 10 billion parameters created by matsuo-labTakeshi Kojima.", + "source": "Hugging Face", + "score": 0.00017864056216422087, + "project_name": "weblab-10b-instruction-sft-GPTQ" + }, + { + "url": "https://huggingface.co/tokyotech-llm/Swallow-13b-instruct-hf", + "downloads": 1559, + "description": "SwallowOur Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", + "source": "Hugging Face", + "score": 0.00017504754017223152, + "project_name": "Swallow-13b-instruct-hf" + }, + { + "url": "https://huggingface.co/stabilityai/japanese-stablelm-base-beta-70b", + "downloads": 1558, + "description": "Japanese-StableLM-Base-Beta-70BA cute robot wearing a kimono writes calligraphy with one single brush — Stable Diffusion XLModel Descriptionjapanese-stablelm-base-beta-70b is a 70B-parameter decoder-only language model based on Llama-2-70b that has been fine-tuned on a diverse collection of Japanese data, with the intent of maximizing downstream performance on Japanese language tasks.", + "source": "Hugging Face", + "score": 0.00017493525823498185, + "project_name": "japanese-stablelm-base-beta-70b" + }, + { + "url": "https://huggingface.co/Mizuiro-sakura/luke-japanese-large-sentiment-analysis-wrime", + "downloads": 1557, + "description": "このモデルはLuke-japanese-large-liteをファインチューニングしたものです。", + "source": "Hugging Face", + "score": 0.00017482297629773218, + "project_name": "luke-japanese-large-sentiment-analysis-wrime" + }, + { + "url": "https://huggingface.co/KoichiYasuoka/bert-base-japanese-wikipedia-ud-head", + "downloads": 1541, + "description": "bert-base-japanese-wikipedia-ud-headModel", + "source": "Hugging Face", + "score": 0.0001730264653017375, + "project_name": "bert-base-japanese-wikipedia-ud-head" + }, + { + "url": "https://huggingface.co/mmnga/tokyotech-llm-Swallow-13b-instruct-v0.1-gguf", + "downloads": 1537, + "description": "tokyotech-llm-Swallow-13b-instruct-v0.1-gguftokyotech-llmさんが公開しているSwallow-13b-instruct-v0.1のggufフォーマット変換版です。", + "source": "Hugging Face", + "score": 0.00017257733755273884, + "project_name": "tokyotech-llm-Swallow-13b-instruct-v0.1-gguf" + }, + { + "url": "https://huggingface.co/stabilityai/japanese-stablelm-2-instruct-1_6b", + "downloads": 1534, + "description": "By clicking \"Agree\", you agree to the License Agreement and acknowledge Stability AI's Privacy Policy.", + "source": "Hugging Face", + "score": 0.00017224049174098982, + "project_name": "japanese-stablelm-2-instruct-1_6b" + }, + { + "url": "https://huggingface.co/Aratako/Ninja-v1-RP-GGUF", + "downloads": 1517, + "description": "Ninja-v1-RP-GGUF概要Aratako/Ninja-v1-RPの量子化済みGGUF版です。", + "source": "Hugging Face", + "score": 0.00017033169880774547, + "project_name": "Ninja-v1-RP-GGUF" + }, + { + "url": "https://huggingface.co/KBlueLeaf/guanaco-7b-leh-v2", + "downloads": 1499, + "description": "Guanaco-leh-V2: A Multilingual Instruction-Following Language Model Based on LLaMA", + "source": "Hugging Face", + "score": 0.00016831062393725147, + "project_name": "guanaco-7b-leh-v2" + }, + { + "url": "https://huggingface.co/mmnga/tokyotech-llm-Swallow-MS-7b-instruct-v0.1-gguf", + "downloads": 1491, + "description": "tokyotech-llm-Swallow-MS-7b-instruct-v0.1-gguftokyotech-llmさんが公開しているSwallow-MS-7b-instruct-v0.1のggufフォーマット変換版です。", + "source": "Hugging Face", + "score": 0.00016741236843925412, + "project_name": "tokyotech-llm-Swallow-MS-7b-instruct-v0.1-gguf" + }, + { + "url": "https://huggingface.co/dahara1/ELYZA-japanese-Llama-2-7b-fast-instruct-GPTQ", + "downloads": 1477, + "description": "Model Card for Model IDOriginal model elyza/ELYZA-japanese-Llama-2-7b-fast-instruct which is based on Meta's \"Llama 2\" and has undergone additional pre-training in Japanese, and thier original post-training and speed up tuning.", + "source": "Hugging Face", + "score": 0.00016584042131775878, + "project_name": "ELYZA-japanese-Llama-2-7b-fast-instruct-GPTQ" + }, + { + "url": "https://huggingface.co/mmnga/haqishen-Llama-3-8B-Japanese-Instruct-gguf", + "downloads": 1464, + "description": "haqishen-Llama-3-8B-Japanese-Instruct-ggufhaqishenさんが公開しているLlama-3-8B-Japanese-Instructのggufフォーマット変換版です。", + "source": "Hugging Face", + "score": 0.0001643807561335131, + "project_name": "haqishen-Llama-3-8B-Japanese-Instruct-gguf" + }, + { + "url": "https://huggingface.co/mmnga/stockmark-gpt-neox-japanese-1.4b-gguf", + "downloads": 1449, + "description": "stockmark-gpt-neox-japanese-1.4b-ggufstockmarkさんが公開しているgpt-neox-japanese-1.4bのggufフォーマット変換版です。", + "source": "Hugging Face", + "score": 0.0001626965270747681, + "project_name": "stockmark-gpt-neox-japanese-1.4b-gguf" + }, + { + "url": "https://huggingface.co/line-corporation/japanese-large-lm-3.6b-instruction-sft", + "downloads": 1422, + "description": "japanese-large-lm-3.6b-instruction-sftThis repository provides a 3.6B parameters Japanese language model, fine-tuned and trained by LINE Corporation.", + "source": "Hugging Face", + "score": 0.00015966491476902708, + "project_name": "japanese-large-lm-3.6b-instruction-sft" + }, + { + "url": "https://huggingface.co/ku-nlp/deberta-v2-tiny-japanese-char-wwm", + "downloads": 1421, + "description": "Model Card for Japanese character-level DeBERTa V2 tinyModel descriptionThis is a Japanese DeBERTa V2 tiny model pre-trained on Japanese Wikipedia, the Japanese portion of CC-100, and the Japanese portion of OSCAR.This model is trained with character-level tokenization and whole word masking.", + "source": "Hugging Face", + "score": 0.0001595526328317774, + "project_name": "deberta-v2-tiny-japanese-char-wwm" + }, + { + "url": "https://huggingface.co/mmnga/c4ai-command-r-plus-gguf", + "downloads": 1420, + "description": "c4ai-command-r-plus-ggufCohereForAIさんが公開しているc4ai-command-r-plusのggufフォーマット変換版です。", + "source": "Hugging Face", + "score": 0.00015944035089452774, + "project_name": "c4ai-command-r-plus-gguf" + }, + { + "url": "https://huggingface.co/hotchpotch/japanese-reranker-cross-encoder-large-v1", + "downloads": 1358, + "description": "hotchpotch/japanese-reranker-cross-encoder-large-v1日本語で学習させた Reranker (CrossEncoder) シリーズです。", + "source": "Hugging Face", + "score": 0.00015247887078504837, + "project_name": "japanese-reranker-cross-encoder-large-v1" + }, + { + "url": "https://huggingface.co/Aratako/Ninja-v1-RP-expressive-v2-GGUF", + "downloads": 1350, + "description": "Ninja-v1-RP-expressive-GGUF概要Aratako/Ninja-v1-RP-expressive-v2の量子化済みGGUF版です。", + "source": "Hugging Face", + "score": 0.00015158061528705103, + "project_name": "Ninja-v1-RP-expressive-v2-GGUF" + }, + { + "url": "https://huggingface.co/rinna/japanese-cloob-vit-b-16", + "downloads": 1285, + "description": "rinna/japanese-cloob-vit-b-16This is a Japanese CLOOB (Contrastive Leave One Out Boost) model trained by rinna Co.", + "source": "Hugging Face", + "score": 0.00014428228936582264, + "project_name": "japanese-cloob-vit-b-16" + }, + { + "url": "https://huggingface.co/mmnga/aixsatoshi-Llama-3-8b-Cosmopedia-japanese-gguf", + "downloads": 1285, + "description": "aixsatoshi-Llama-3-8b-Cosmopedia-japanese-ggufaixsatoshiさんが公開しているLlama-3-8b-Cosmopedia-japaneseのggufフォーマット変換版です。", + "source": "Hugging Face", + "score": 0.00014428228936582264, + "project_name": "aixsatoshi-Llama-3-8b-Cosmopedia-japanese-gguf" + }, + { + "url": "https://huggingface.co/line-corporation/clip-japanese-base", + "downloads": 1254, + "description": "clip-japanese-baseThis is a Japanese CLIP (Contrastive Language-Image Pre-training) model developed by LY Corporation.", + "source": "Hugging Face", + "score": 0.00014080154931108294, + "project_name": "clip-japanese-base" + }, + { + "url": "https://huggingface.co/TKU410410103/uniTKU-hubert-japanese-asr", + "downloads": 1239, + "description": "uniTKU-hubert-japanese-asrThis model was fine-tuned on a dataset provided by uniTKU, and it has maintained the original performance metrics on the common_voice_11_0 dataset.", + "source": "Hugging Face", + "score": 0.00013911732025233793, + "project_name": "uniTKU-hubert-japanese-asr" + }, + { + "url": "https://huggingface.co/cyberagent/open-calm-1b", + "downloads": 1230, + "description": "OpenCALM-1BModel DescriptionOpenCALM is a suite of decoder-only language models pre-trained on Japanese datasets, developed by CyberAgent, Inc.", + "source": "Hugging Face", + "score": 0.00013810678281709092, + "project_name": "open-calm-1b" + }, + { + "url": "https://huggingface.co/TFMC/Japanese-Starling-ChatV-7B-GGUF", + "downloads": 1185, + "description": "Japanese-Starling-ChatV-7B-GGUFGGUF conversion of \"Japanese-Starling-ChatV-7B\"\"Japanese-Starling-ChatV-7B\" is a Japanese chat model built on top of \"chatntq-ja-7b-v1.0\", originally based on Mistral-7B-v0.1.I applied the chat vector acquired by subtracting the weights of Mistral-7B-v0.1 from the weights of \"Starling-LM-7B-beta\" to this model.", + "source": "Hugging Face", + "score": 0.0001330540956408559, + "project_name": "Japanese-Starling-ChatV-7B-GGUF" + }, + { + "url": "https://huggingface.co/Fugaku-LLM/Fugaku-LLM-13B", + "downloads": 1176, + "description": "Fugaku-LLM利用規約この利用規約(以下「本規約」といいます)は、富士通株式会社、国立研究開発法人理化学研究所、国立大学法人東京工業大学、国立大学法人東北大学、株式会社サイバーエージェント、国立大学法人東海国立大学機構、及び株式会社Kotoba Technologies Japan (以下「開発者」といいます)による、スーパーコンピュータ「富岳」政策対応枠における大規模言語モデル分散並列学習手法の開発の成果物として公開する大規模言語モデル(以下「Fugaku-LLM」といいます)の利用に関する条件を定めるものです。", + "source": "Hugging Face", + "score": 0.0001320435582056089, + "project_name": "Fugaku-LLM-13B" + }, + { + "url": "https://huggingface.co/ken11/albert-base-japanese-v1", + "downloads": 1164, + "description": "albert-base-japanese-v1日本語事前学習済みALBERTモデルですHow to useファインチューニングこのモデルはPreTrainedモデルです基本的には各種タスク用にファインチューニングして使用されることを想定していますFill-MaskこのモデルではTokenizerにSentencepieceを利用していますそのままでは[MASK]トークンのあとに余計なトークンが混入する問題があるので、利用する際には以下のようにする必要がありますfor PyTorchfrom transformers import (AlbertForMaskedLM, AlbertTokenizerFast)import torchtokenizer = AlbertTokenizerFast.from_pretrained(\"ken11/albert-base-japanese-v1\")", + "source": "Hugging Face", + "score": 0.0001306961749586129, + "project_name": "albert-base-japanese-v1" + }, + { + "url": "https://huggingface.co/TKU410410103/hubert-large-japanese-asr", + "downloads": 1139, + "description": "hubert-large-asrThis model is a fine-tuned version of rinna/japanese-hubert-large ASR.", + "source": "Hugging Face", + "score": 0.0001278891265273712, + "project_name": "hubert-large-japanese-asr" + }, + { + "url": "https://huggingface.co/rinna/bilingual-gpt-neox-4b-instruction-ppo", + "downloads": 1127, + "description": "bilingual-gpt-neox-4b-instruction-ppoOverviewThis repository provides an English-Japanese bilingual GPT-NeoX model of 3.8 billion parameters.", + "source": "Hugging Face", + "score": 0.0001265417432803752, + "project_name": "bilingual-gpt-neox-4b-instruction-ppo" + }, + { + "url": "https://huggingface.co/sociocom/MedNER-CR-JA", + "downloads": 1088, + "description": "This is a model for named entity recognition of Japanese medical documents.", + "source": "Hugging Face", + "score": 0.00012216274772763815, + "project_name": "MedNER-CR-JA" + }, + { + "url": "https://huggingface.co/line-corporation/japanese-large-lm-3.6b", + "downloads": 1068, + "description": "japanese-large-lm-3.6bThis repository provides a 3.6B parameters Japanese language model, trained by LINE Corporation.", + "source": "Hugging Face", + "score": 0.0001199171089826448, + "project_name": "japanese-large-lm-3.6b" + }, + { + "url": "https://huggingface.co/mmnga/umiyuki-Japanese-Chat-Umievo-itr001-7b-gguf", + "downloads": 1049, + "description": "umiyuki-Japanese-Chat-Umievo-itr001-7b-ggufumiyukiさんが公開しているJapanese-Chat-Umievo-itr001-7bのggufフォーマット変換版です。", + "source": "Hugging Face", + "score": 0.00011778375217490112, + "project_name": "umiyuki-Japanese-Chat-Umievo-itr001-7b-gguf" + }, + { + "url": "https://huggingface.co/stabilityai/japanese-stable-diffusion-xl", + "downloads": 1024, + "description": "By clicking \"Agree\", you agree to the License Agreement and acknowledge Stability AI's Privacy Policy.", + "source": "Hugging Face", + "score": 0.00011497670374365944, + "project_name": "japanese-stable-diffusion-xl" + }, + { + "url": "https://huggingface.co/tokyotech-llm/Swallow-13b-hf", + "downloads": 1013, + "description": "SwallowOur Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", + "source": "Hugging Face", + "score": 0.00011374160243391309, + "project_name": "Swallow-13b-hf" + }, + { + "url": "https://huggingface.co/aken12/splade-japanese-v3", + "downloads": 1007, + "description": "Evaluation on MIRACL japaneseThese models don't train on the MIRACL training data.", + "source": "Hugging Face", + "score": 0.00011306791081041509, + "project_name": "splade-japanese-v3" + }, + { + "url": "https://huggingface.co/line-corporation/japanese-large-lm-1.7b", + "downloads": 1004, + "description": "japanese-large-lm-1.7bThis repository provides a 1.7B parameters Japanese language model, trained by LINE Corporation.", + "source": "Hugging Face", + "score": 0.00011273106499866609, + "project_name": "japanese-large-lm-1.7b" + }, + { + "url": "https://huggingface.co/hotchpotch/japanese-bge-reranker-v2-m3-v1", + "downloads": 964, + "description": "hotchpotch/japanese-bge-reranker-v2-m3-v1日本語で学習させた Reranker (CrossEncoder) シリーズです。", + "source": "Hugging Face", + "score": 0.0001082397875086794, + "project_name": "japanese-bge-reranker-v2-m3-v1" + }, + { + "url": "https://huggingface.co/mmnga/aixsatoshi-Ex-karakuri-8x12B-chat-v1-gguf", + "downloads": 964, + "description": "aixsatoshi-Ex-karakuri-8x12B-chat-v1-ggufaixsatoshiさんが公開しているEx-karakuri-8x12B-chat-v1のggufフォーマット変換版です。", + "source": "Hugging Face", + "score": 0.0001082397875086794, + "project_name": "aixsatoshi-Ex-karakuri-8x12B-chat-v1-gguf" + }, + { + "url": "https://huggingface.co/tokyotech-llm/Swallow-13b-instruct-v0.1", + "downloads": 962, + "description": "SwallowOur Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", + "source": "Hugging Face", + "score": 0.00010801522363418006, + "project_name": "Swallow-13b-instruct-v0.1" + }, + { + "url": "https://huggingface.co/ThePioneer/CoolerWaifuDiffusion", + "downloads": 950, + "description": "モデル説明 (model explanation)CoolJapanDiffusion 2.1.1とWaifuDiffusion 1.4 anime epoch2のマージ。", + "source": "Hugging Face", + "score": 0.00010666784038718405, + "project_name": "CoolerWaifuDiffusion" + }, + { + "url": "https://huggingface.co/mmnga/tokyotech-llm-Swallow-70b-instruct-v0.1-gguf", + "downloads": 941, + "description": "tokyotech-llm-Swallow-70b-instruct-v0.1-gguftokyotech-llmさんが公開しているSwallow-70b-instruct-v0.1のggufフォーマット変換版です。", + "source": "Hugging Face", + "score": 0.00010565730295193704, + "project_name": "tokyotech-llm-Swallow-70b-instruct-v0.1-gguf" + }, + { + "url": "https://huggingface.co/studio-ousia/luke-japanese-base-lite", + "downloads": 931, + "description": "luke-japaneseluke-japanese is the Japanese version of LUKE (LanguageUnderstanding with Knowledge-based Embeddings), a pre-trainedknowledge-enhanced contextualized representation of words and entities.", + "source": "Hugging Face", + "score": 0.00010453448357944037, + "project_name": "luke-japanese-base-lite" + }, + { + "url": "https://huggingface.co/ku-nlp/deberta-v2-tiny-japanese", + "downloads": 926, + "description": "Model Card for Japanese DeBERTa V2 tinyModel descriptionThis is a Japanese DeBERTa V2 tiny model pre-trained on Japanese Wikipedia, the Japanese portion of CC-100, and the Japanese portion of OSCAR.How to useYou can use this model for masked language modeling as follows:from transformers import AutoTokenizer, AutoModelForMaskedLMtokenizer = AutoTokenizer.from_pretrained('ku-nlp/deberta-v2-tiny-japanese')", + "source": "Hugging Face", + "score": 0.00010397307389319204, + "project_name": "deberta-v2-tiny-japanese" + }, + { + "url": "https://huggingface.co/mmnga/lightblue-suzume-llama-3-8B-japanese-gguf", + "downloads": 905, + "description": "lightblue-suzume-llama-3-8B-japanese-gguflightblueさんが公開しているsuzume-llama-3-8B-japaneseのggufフォーマット変換版です。", + "source": "Hugging Face", + "score": 0.00010161515321094901, + "project_name": "lightblue-suzume-llama-3-8B-japanese-gguf" + }, + { + "url": "https://huggingface.co/gaianet/Llama-3-8B-Japanese-Instruct-GGUF", + "downloads": 905, + "description": "Llama-3-8B-Japanese-Instruct-GGUFOriginal Modelhaqishen/Llama-3-8B-Japanese-InstructRun with GaianetPrompt template:prompt template: llama-3-chatContext size:chat_ctx_size: 4096Run with GaiaNet:Quick start: https://docs.gaianet.ai/node-guide/quick-startCustomize your node: https://docs.gaianet.ai/node-guide/customizeQuantized GGUF ModelsNameQuant methodBitsSizeUse caseLlama-3-8B-Japanese-Instruct-Q2_K.ggufQ2_K23.18 GBsmallest, significant quality loss - not recommended for most purposesLlama-3-8B-Japanese-I", + "source": "Hugging Face", + "score": 0.00010161515321094901, + "project_name": "Llama-3-8B-Japanese-Instruct-GGUF" + }, + { + "url": "https://huggingface.co/retrieva-jp/t5-large-long", + "downloads": 891, + "description": "Model card for model IDThis is a T5 v1.1 model, pre-trained on a Japanese corpus.", + "source": "Hugging Face", + "score": 0.00010004320608945367, + "project_name": "t5-large-long" + }, + { + "url": "https://huggingface.co/retrieva-jp/t5-small-long", + "downloads": 891, + "description": "Model card for model IDThis is a T5 v1.1 model, pre-trained on a Japanese corpus.", + "source": "Hugging Face", + "score": 0.00010004320608945367, + "project_name": "t5-small-long" + }, + { + "url": "https://huggingface.co/karakuri-ai/karakuri-lm-70b-v0.1", + "downloads": 882, + "description": "KARAKURI LMKARAKURI LM is a pretrained language model that builds upon Llama 2.Our model enhances Llama 2's capabilities by incorporating additional Japanese vocabulary and further pretraining on a mixture of Japanese and multilingual corpora.", + "source": "Hugging Face", + "score": 9.903266865420667e-05, + "project_name": "karakuri-lm-70b-v0.1" + }, + { + "url": "https://huggingface.co/rinna/japanese-gpt-neox-small", + "downloads": 875, + "description": "japanese-gpt-neox-smallThis repository provides a small-sized Japanese GPT-NeoX model.", + "source": "Hugging Face", + "score": 9.8246695093459e-05, + "project_name": "japanese-gpt-neox-small" + }, + { + "url": "https://huggingface.co/mmnga/ELYZA-japanese-Llama-2-7b-fast-gguf", + "downloads": 869, + "description": "ELYZA-japanese-Llama-2-7b-fast-ggufELYZAさんが公開しているELYZA-japanese-Llama-2-7b-fastのggufフォーマット変換版です。", + "source": "Hugging Face", + "score": 9.7573003469961e-05, + "project_name": "ELYZA-japanese-Llama-2-7b-fast-gguf" + }, + { + "url": "https://huggingface.co/hajime9652/xlnet-japanese", + "downloads": 867, + "description": "XLNet-japaneseModel descriptionThis model require Mecab and senetencepiece with XLNetTokenizer.", + "source": "Hugging Face", + "score": 9.734843959546165e-05, + "project_name": "xlnet-japanese" + }, + { + "url": "https://huggingface.co/Local-Novel-LLM-project/Ocuteus-v1-gguf", + "downloads": 862, + "description": "OcuteusのGGUF版です。", + "source": "Hugging Face", + "score": 9.678702990921331e-05, + "project_name": "Ocuteus-v1-gguf" + }, + { + "url": "https://huggingface.co/mmnga/ryota39-Phi-3-mini-4k-instruct-dpo-gguf", + "downloads": 861, + "description": "ryota39-Phi-3-mini-4k-instruct-dpo-ggufryota39さんが公開しているPhi-3-mini-4k-instruct-dpoのggufフォーマット変換版です。", + "source": "Hugging Face", + "score": 9.667474797196365e-05, + "project_name": "ryota39-Phi-3-mini-4k-instruct-dpo-gguf" + }, + { + "url": "https://huggingface.co/TheBloke/japanese-stablelm-instruct-beta-7B-GGUF", + "downloads": 852, + "description": "Chat & support: TheBloke's Discord serverWant to contribute?", + "source": "Hugging Face", + "score": 9.566421053671664e-05, + "project_name": "japanese-stablelm-instruct-beta-7B-GGUF" + }, + { + "url": "https://huggingface.co/abeja/gpt2-large-japanese", + "downloads": 841, + "description": "gpt2-large-japaneseThis repository provides a large sized Japanese GPT-2 model.", + "source": "Hugging Face", + "score": 9.44291092269703e-05, + "project_name": "gpt2-large-japanese" + }, + { + "url": "https://huggingface.co/mmnga/Ninja-v1-128k-gguf", + "downloads": 841, + "description": "Ninja-v1-128k-ggufLocal-Novel-LLM-projectさんが公開しているNinja-v1-128kのggufフォーマット変換版です。", + "source": "Hugging Face", + "score": 9.44291092269703e-05, + "project_name": "Ninja-v1-128k-gguf" + }, + { + "url": "https://huggingface.co/rinna/japanese-gpt-neox-3.6b-instruction-sft", + "downloads": 824, + "description": "japanese-gpt-neox-3.6b-instruction-sftOverviewThis repository provides a Japanese GPT-NeoX model of 3.6 billion parameters.", + "source": "Hugging Face", + "score": 9.252031629372595e-05, + "project_name": "japanese-gpt-neox-3.6b-instruction-sft" + }, + { + "url": "https://huggingface.co/ku-nlp/deberta-v3-base-japanese", + "downloads": 806, + "description": "Model Card for Japanese DeBERTa V3 baseModel", + "source": "Hugging Face", + "score": 9.049924142323194e-05, + "project_name": "deberta-v3-base-japanese" + }, + { + "url": "https://huggingface.co/Tanrei/GPTSAN-japanese", + "downloads": 798, + "description": "Model Card for Tanrei/GPTSAN-japaneseGeneral-purpose Swich transformer based Japanese language modelGPTSAN has some unique features.", + "source": "Hugging Face", + "score": 8.96009859252346e-05, + "project_name": "GPTSAN-japanese" + }, + { + "url": "https://huggingface.co/tokyotech-llm/Swallow-70b-instruct-v0.1", + "downloads": 796, + "description": "SwallowOur Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", + "source": "Hugging Face", + "score": 8.937642205073527e-05, + "project_name": "Swallow-70b-instruct-v0.1" + }, + { + "url": "https://huggingface.co/llm-book/bert-base-japanese-v3-crf-ner-wikipedia-dataset", + "downloads": 795, + "description": "llm-book/bert-base-japanese-v3-crf-ner-wikipedia-dataset「大規模言語モデル入門」の第6章で紹介している固有表現認識のモデルです。", + "source": "Hugging Face", + "score": 8.92641401134856e-05, + "project_name": "bert-base-japanese-v3-crf-ner-wikipedia-dataset" + }, + { + "url": "https://huggingface.co/KoichiYasuoka/roberta-small-japanese-luw-upos", + "downloads": 791, + "description": "roberta-small-japanese-luw-uposModel", + "source": "Hugging Face", + "score": 8.881501236448693e-05, + "project_name": "roberta-small-japanese-luw-upos" + }, + { + "url": "https://huggingface.co/stabilityai/japanese-stablelm-base-alpha-7b", + "downloads": 789, + "description": "Japanese-StableLM-Base-Alpha-7B\"A parrot able to speak Japanese, ukiyoe, edo period\" — Stable Diffusion XLModel Descriptionjapanese-stablelm-base-alpha-7b is a 7B-parameter decoder-only language model pre-trained on a diverse collection of Japanese and English datasets which focus on maximizing Japanese language modeling performance and Japanese downstream task performance.", + "source": "Hugging Face", + "score": 8.85904484899876e-05, + "project_name": "japanese-stablelm-base-alpha-7b" + }, + { + "url": "https://huggingface.co/llm-book/bert-base-japanese-v3-marc_ja", + "downloads": 778, + "description": "bert-base-japanese-v3-marc_ja「大規模言語モデル入門」の第5章で紹介している(感情分析)のモデルです。", + "source": "Hugging Face", + "score": 8.735534718024125e-05, + "project_name": "bert-base-japanese-v3-marc_ja" + }, + { + "url": "https://huggingface.co/mmnga/Qwen1.5-110B-Chat-gguf", + "downloads": 770, + "description": "Qwen1.5-110B-Chat-ggufQwenさんが公開しているQwen1.5-110B-Chatのggufフォーマット変換版です。", + "source": "Hugging Face", + "score": 8.645709168224391e-05, + "project_name": "Qwen1.5-110B-Chat-gguf" + }, + { + "url": "https://huggingface.co/sazyou-roukaku/LittleStepMix", + "downloads": 770, + "description": "License:CreativeML Open RAIL-MAdditional Copyright: sazyou_roukaku (TwitterID @sazyou_roukaku) as of June 25, 2023このモデルは『CreativeML Open RAIL-M』でLicenseそのものに変更はありません。", + "source": "Hugging Face", + "score": 8.645709168224391e-05, + "project_name": "LittleStepMix" + }, + { + "url": "https://huggingface.co/rinna/japanese-wav2vec2-base", + "downloads": 769, + "description": "rinna/japanese-wav2vec2-baseOverviewThis is a Japanese wav2vec 2.0 Base model trained by rinna Co.", + "source": "Hugging Face", + "score": 8.634480974499424e-05, + "project_name": "japanese-wav2vec2-base" + }, + { + "url": "https://huggingface.co/stabilityai/japanese-stablelm-2-base-1_6b", + "downloads": 767, + "description": "By clicking \"Agree\", you agree to the License Agreement and acknowledge Stability AI's Privacy Policy.", + "source": "Hugging Face", + "score": 8.612024587049491e-05, + "project_name": "japanese-stablelm-2-base-1_6b" + }, + { + "url": "https://huggingface.co/stabilityai/japanese-stablelm-instruct-alpha-7b-v2", + "downloads": 758, + "description": "Japanese-StableLM-Instruct-Alpha-7B-v2\"A parrot able to speak Japanese, ukiyoe, edo period\" — Stable Diffusion XLModel Descriptionjapanese-stablelm-instruct-alpha-7b-v2 is a 7B parameter decoder-only language models pre-trained built on top of the Japanese-StableLM-Base-Alpha-7B model and further fine-tuned on various instruction-following datasets.", + "source": "Hugging Face", + "score": 8.510970843524791e-05, + "project_name": "japanese-stablelm-instruct-alpha-7b-v2" + }, + { + "url": "https://huggingface.co/rinna/japanese-hubert-large", + "downloads": 750, + "description": "rinna/japanese-hubert-largeOverviewThis is a Japanese HuBERT Large model trained by rinna Co.", + "source": "Hugging Face", + "score": 8.421145293725057e-05, + "project_name": "japanese-hubert-large" + }, + { + "url": "https://huggingface.co/TheBloke/japanese-stablelm-instruct-gamma-7B-GGUF", + "downloads": 746, + "description": "Chat & support: TheBloke's Discord serverWant to contribute?", + "source": "Hugging Face", + "score": 8.37623251882519e-05, + "project_name": "japanese-stablelm-instruct-gamma-7B-GGUF" + }, + { + "url": "https://huggingface.co/retrieva-jp/t5-base-long", + "downloads": 728, + "description": "Model card for model IDThis is a T5 v1.1 model, pre-trained on a Japanese corpus.", + "source": "Hugging Face", + "score": 8.174125031775788e-05, + "project_name": "t5-base-long" + }, + { + "url": "https://huggingface.co/Mizuiro-sakura/luke-japanese-base-finetuned-ner", + "downloads": 706, + "description": "このモデルはluke-japanese-baseをファインチューニングして、固有表現抽出(NER)に用いれるようにしたものです。", + "source": "Hugging Face", + "score": 7.92710476982652e-05, + "project_name": "luke-japanese-base-finetuned-ner" + }, + { + "url": "https://huggingface.co/stockmark/gpt-neox-japanese-1.4b", + "downloads": 700, + "description": "stockmark/gpt-neox-japanese-1.4bThis repository provides a GPT-NeoX based model with 1.4B parameters pre-trained on Japanese corpus of about 20B tokens.", + "source": "Hugging Face", + "score": 7.85973560747672e-05, + "project_name": "gpt-neox-japanese-1.4b" + }, + { + "url": "https://huggingface.co/mmnga/ELYZA-japanese-CodeLlama-7b-instruct-gguf", + "downloads": 678, + "description": "ELYZA-japanese-CodeLlama-7b-instruct-ggufELYZAさんが公開しているELYZA-japanese-CodeLlama-7b-instructのggufフォーマット変換版です。", + "source": "Hugging Face", + "score": 7.612715345527451e-05, + "project_name": "ELYZA-japanese-CodeLlama-7b-instruct-gguf" + }, + { + "url": "https://huggingface.co/mmnga/ELYZA-japanese-Llama-2-13b-fast-gguf", + "downloads": 637, + "description": "ELYZA-japanese-Llama-2-13b-fast-ggufELYZAさんが公開しているELYZA-japanese-Llama-2-13b-fastのggufフォーマット変換版です。", + "source": "Hugging Face", + "score": 7.152359402803814e-05, + "project_name": "ELYZA-japanese-Llama-2-13b-fast-gguf" + }, + { + "url": "https://huggingface.co/nlp-waseda/roberta-base-japanese", + "downloads": 635, + "description": "nlp-waseda/roberta-base-japaneseModel descriptionThis is a Japanese RoBERTa base model pretrained on Japanese Wikipedia and the Japanese portion of CC-100.How to useYou can use this model for masked language modeling as follows:from transformers import AutoTokenizer, AutoModelForMaskedLMtokenizer = AutoTokenizer.from_pretrained(\"nlp-waseda/roberta-base-japanese\")", + "source": "Hugging Face", + "score": 7.129903015353881e-05, + "project_name": "roberta-base-japanese" + }, + { + "url": "https://huggingface.co/alfredplpl/Llama-3-8B-Instruct-Ja", + "downloads": 631, + "description": "日本語向け Llama 3 8BはじめにこのリポジトリはLlama 3を日本語化しようとしたモデルのリポジトリです。", + "source": "Hugging Face", + "score": 7.084990240454014e-05, + "project_name": "Llama-3-8B-Instruct-Ja" + }, + { + "url": "https://huggingface.co/ku-nlp/deberta-v2-base-japanese-char-wwm", + "downloads": 623, + "description": "Model Card for Japanese character-level DeBERTa V2 baseModel", + "source": "Hugging Face", + "score": 6.99516469065428e-05, + "project_name": "deberta-v2-base-japanese-char-wwm" + }, + { + "url": "https://huggingface.co/hotchpotch/japanese-reranker-cross-encoder-small-v1", + "downloads": 616, + "description": "hotchpotch/japanese-reranker-cross-encoder-small-v1日本語で学習させた Reranker (CrossEncoder) シリーズです。", + "source": "Hugging Face", + "score": 6.916567334579513e-05, + "project_name": "japanese-reranker-cross-encoder-small-v1" + }, + { + "url": "https://huggingface.co/mmnga/YuisekinAIEvol-Mistral-7B-ja-math-v0.1.1-gguf", + "downloads": 615, + "description": "YuisekinAIEvol-Mistral-7B-ja-math-v0.1.1-ggufyuisekiさんが公開しているYuisekinAIEvol-Mistral-7B-ja-math-v0.1.1のggufフォーマット変換版です。", + "source": "Hugging Face", + "score": 6.905339140854546e-05, + "project_name": "YuisekinAIEvol-Mistral-7B-ja-math-v0.1.1-gguf" + }, + { + "url": "https://huggingface.co/mmnga/ELYZA-japanese-Llama-2-7b-gguf", + "downloads": 614, + "description": "ELYZA-japanese-Llama-2-7b-ggufELYZAさんが公開しているELYZA-japanese-Llama-2-7bのggufフォーマット変換版です。", + "source": "Hugging Face", + "score": 6.89411094712958e-05, + "project_name": "ELYZA-japanese-Llama-2-7b-gguf" + }, + { + "url": "https://huggingface.co/stabilityai/japanese-stablelm-3b-4e1t-base", + "downloads": 597, + "description": "Japanese StableLM-3B-4E1T BaseModel DescriptionThis is a 3B-parameter decoder-only language model with a focus on maximizing Japanese language modeling performance and Japanese downstream task performance.", + "source": "Hugging Face", + "score": 6.703231653805144e-05, + "project_name": "japanese-stablelm-3b-4e1t-base" + }, + { + "url": "https://huggingface.co/jurabi/bert-ner-japanese", + "downloads": 591, + "description": "BERTによる日本語固有表現抽出のモデルBertForTokenClassificationを用いて、日本語の文から固有表現を抽出します。", + "source": "Hugging Face", + "score": 6.635862491455344e-05, + "project_name": "bert-ner-japanese" + }, + { + "url": "https://huggingface.co/hotchpotch/japanese-reranker-cross-encoder-base-v1", + "downloads": 590, + "description": "hotchpotch/japanese-reranker-cross-encoder-base-v1日本語で学習させた Reranker (CrossEncoder) シリーズです。", + "source": "Hugging Face", + "score": 6.624634297730377e-05, + "project_name": "japanese-reranker-cross-encoder-base-v1" + }, + { + "url": "https://huggingface.co/mmnga/rinna-japanese-gpt-neox-3.6b-gguf", + "downloads": 563, + "description": "rinna/japanese-gpt-neox-3.6brinnaさんが公開しているjapanese-gpt-neox-3.6bのgguf変換版です。", + "source": "Hugging Face", + "score": 6.321473067156276e-05, + "project_name": "rinna-japanese-gpt-neox-3.6b-gguf" + }, + { + "url": "https://huggingface.co/mmnga/rinna-japanese-gpt-neox-3.6b-instruction-ppo-gguf", + "downloads": 562, + "description": "rinna/japanese-gpt-neox-3.6b-instruction-pporinnaさんが公開しているjapanese-gpt-neox-3.6b-instruction-ppoのgguf変換版です。", + "source": "Hugging Face", + "score": 6.310244873431309e-05, + "project_name": "rinna-japanese-gpt-neox-3.6b-instruction-ppo-gguf" + }, + { + "url": "https://huggingface.co/sonoisa/sentence-t5-base-ja-mean-tokens", + "downloads": 559, + "description": "This is a Japanese sentence-T5 model.", + "source": "Hugging Face", + "score": 6.276560292256409e-05, + "project_name": "sentence-t5-base-ja-mean-tokens" + }, + { + "url": "https://huggingface.co/TheBloke/japanese-stablelm-instruct-beta-70B-GGUF", + "downloads": 556, + "description": "Chat & support: TheBloke's Discord serverWant to contribute?", + "source": "Hugging Face", + "score": 6.242875711081509e-05, + "project_name": "japanese-stablelm-instruct-beta-70B-GGUF" + }, + { + "url": "https://huggingface.co/ku-nlp/deberta-v2-large-japanese", + "downloads": 556, + "description": "Model Card for Japanese DeBERTa V2 largeModel descriptionThis is a Japanese DeBERTa V2 large model pre-trained on Japanese Wikipedia, the Japanese portion of CC-100, and theJapanese portion of OSCAR.How to useYou can use this model for masked language modeling as follows:from transformers import AutoTokenizer, AutoModelForMaskedLMtokenizer = AutoTokenizer.from_pretrained('ku-nlp/deberta-v2-large-japanese')", + "source": "Hugging Face", + "score": 6.242875711081509e-05, + "project_name": "deberta-v2-large-japanese" + }, + { + "url": "https://huggingface.co/mmnga/stockmark-100b-gguf", + "downloads": 555, + "description": "stockmark-100b-ggufstockmarkさんが公開しているstockmark-100bのggufフォーマット変換版です。", + "source": "Hugging Face", + "score": 6.231647517356542e-05, + "project_name": "stockmark-100b-gguf" + }, + { + "url": "https://huggingface.co/dddump/Japanese-TextGen-Kage-v0.1-2x7B-gguf", + "downloads": 538, + "description": "Japanese-TextGen-Kage-v0.1-2x7BKage is \"影\" in Japanese or \"Shadow\" in English.", + "source": "Hugging Face", + "score": 6.0407682240321074e-05, + "project_name": "Japanese-TextGen-Kage-v0.1-2x7B-gguf" + }, + { + "url": "https://huggingface.co/studio-ousia/luke-japanese-large", + "downloads": 533, + "description": "luke-japanese-largeluke-japanese is the Japanese version of LUKE (LanguageUnderstanding with Knowledge-based Embeddings), a pre-trainedknowledge-enhanced contextualized representation of words and entities.", + "source": "Hugging Face", + "score": 5.984627255407274e-05, + "project_name": "luke-japanese-large" + }, + { + "url": "https://huggingface.co/mmnga/line-corp-japanese-large-lm-1.7b-instruction-sft-gguf", + "downloads": 528, + "description": "line-corporation/japanese-large-lm-1.7b-instruction-sftline-corporationさんが公開しているjapanese-large-lm-1.7b-instruction-sftのgguf変換版です。", + "source": "Hugging Face", + "score": 5.9284862867824396e-05, + "project_name": "line-corp-japanese-large-lm-1.7b-instruction-sft-gguf" + }, + { + "url": "https://huggingface.co/skytnt/gpt2-japanese-lyric-small", + "downloads": 526, + "description": "Japanese GPT2 Lyric ModelModel descriptionThe model is used to generate Japanese lyrics.", + "source": "Hugging Face", + "score": 5.906029899332506e-05, + "project_name": "gpt2-japanese-lyric-small" + }, + { + "url": "https://huggingface.co/classla/xlm-roberta-base-multilingual-text-genre-classifier", + "downloads": 526, + "description": "X-GENRE classifier - multilingual text genre classifierText classification model based on xlm-roberta-base and fine-tuned on a combination of three genre datasets: Slovene GINCO dataset (Kuzman et al.", + "source": "Hugging Face", + "score": 5.906029899332506e-05, + "project_name": "xlm-roberta-base-multilingual-text-genre-classifier" + }, + { + "url": "https://huggingface.co/umiyuki/Umievo-itr012-Gleipnir-7B", + "downloads": 525, + "description": "Umievo-itr012-Gleipnir-7Bこのモデルは強力な4つの日本語モデルを進化的アルゴリズムで進化的マージしたものです。", + "source": "Hugging Face", + "score": 5.8948017056075396e-05, + "project_name": "Umievo-itr012-Gleipnir-7B" + }, + { + "url": "https://huggingface.co/mmnga/alfredplpl-Llama-3-8B-Instruct-Ja-gguf", + "downloads": 522, + "description": "alfredplpl-Llama-3-8B-Instruct-Ja-ggufalfredplplさんが公開しているLlama-3-8B-Instruct-Jaのggufフォーマット変換版です。", + "source": "Hugging Face", + "score": 5.8611171244326396e-05, + "project_name": "alfredplpl-Llama-3-8B-Instruct-Ja-gguf" + }, + { + "url": "https://huggingface.co/nvidia/parakeet-tdt_ctc-0.6b-ja", + "downloads": 521, + "description": "Parakeet TDT-CTC 0.6B (ja)||parakeet-tdt_ctc-0.6b-ja is an ASR model that transcribes Japanese speech with Punctuations.", + "source": "Hugging Face", + "score": 5.8498889307076724e-05, + "project_name": "parakeet-tdt_ctc-0.6b-ja" + }, + { + "url": "https://huggingface.co/sonoisa/t5-base-japanese-question-generation", + "downloads": 516, + "description": "回答と回答が出てくるパラグラフを与えると質問文を生成するモデルSEE: https://github.com/sonoisa/deep-question-generation本モデルの作成ステップ概要SQuAD 1.1を日本語に機械翻訳し、不正なデータをクレンジング(有効なデータは約半分)。", + "source": "Hugging Face", + "score": 5.793747962082839e-05, + "project_name": "t5-base-japanese-question-generation" + }, + { + "url": "https://huggingface.co/Aratako/c4ai-command-r-v01-japanese-instruct-GGUF", + "downloads": 502, + "description": "c4ai-command-r-v01-japanese-instruct-GGUF概要Aratako/c4ai-command-r-v01-japanese-instructの量子化済みGGUF版です。", + "source": "Hugging Face", + "score": 5.6365532499333046e-05, + "project_name": "c4ai-command-r-v01-japanese-instruct-GGUF" + }, + { + "url": "https://huggingface.co/mmnga/pfnet-nekomata-14b-pfn-qfin-inst-merge-gguf", + "downloads": 501, + "description": "pfnet-nekomata-14b-pfn-qfin-inst-merge-ggufpfnetさんが公開しているnekomata-14b-pfn-qfin-inst-mergeのggufフォーマット変換版です。", + "source": "Hugging Face", + "score": 5.6253250562083375e-05, + "project_name": "pfnet-nekomata-14b-pfn-qfin-inst-merge-gguf" + }, + { + "url": "https://huggingface.co/mmnga/line-corp-japanese-large-lm-1.7b-gguf", + "downloads": 495, + "description": "line-corporation/japanese-large-lm-1.7bline-corporationさんが公開しているjapanese-large-lm-1.7bのgguf変換版です。", + "source": "Hugging Face", + "score": 5.5579558938585375e-05, + "project_name": "line-corp-japanese-large-lm-1.7b-gguf" + }, + { + "url": "https://huggingface.co/nlp-waseda/bigbird-base-japanese", + "downloads": 494, + "description": "nlp-waseda/bigbird-base-japaneseModel descriptionThis is a Japanese BigBird base model pretrained on Japanese Wikipedia, the Japanese portion of CC-100, and the Japanese portion of OSCAR.How to useYou can use this model for masked language modeling as follows:from transformers import AutoTokenizer, AutoModelForMaskedLMtokenizer = AutoTokenizer.from_pretrained(\"nlp-waseda/bigbird-base-japanese\")", + "source": "Hugging Face", + "score": 5.5467277001335704e-05, + "project_name": "bigbird-base-japanese" + }, + { + "url": "https://huggingface.co/NTQAI/chatntq-ja-7b-v1.0", + "downloads": 494, + "description": "ChatNTQ JA 7B V1.0Model", + "source": "Hugging Face", + "score": 5.5467277001335704e-05, + "project_name": "chatntq-ja-7b-v1.0" + }, + { + "url": "https://huggingface.co/studio-ousia/luke-japanese-base", + "downloads": 486, + "description": "luke-japaneseluke-japanese is the Japanese version of LUKE (Language Understanding with Knowledge-based Embeddings), a pre-trained knowledge-enhanced contextualized representation of words and entities.", + "source": "Hugging Face", + "score": 5.456902150333837e-05, + "project_name": "luke-japanese-base" + }, + { + "url": "https://huggingface.co/vumichien/wav2vec2-large-xlsr-japanese-hiragana", + "downloads": 482, + "description": "Wav2Vec2-Large-XLSR-53-JapaneseFine-tuned facebook/wav2vec2-large-xlsr-53 on Japanese using the Common Voice and Japanese speech corpus of Saruwatari-lab, University of Tokyo JSUT.When using this model, make sure that your speech input is sampled at 16kHz.", + "source": "Hugging Face", + "score": 5.41198937543397e-05, + "project_name": "wav2vec2-large-xlsr-japanese-hiragana" + }, + { + "url": "https://huggingface.co/llm-book/t5-base-long-livedoor-news-corpus", + "downloads": 466, + "description": "llm-book/t5-base-long-livedoor-news-corpus「大規模言語モデル入門」の第7章で紹介している要約生成のモデルです。", + "source": "Hugging Face", + "score": 5.232338275834502e-05, + "project_name": "t5-base-long-livedoor-news-corpus" + }, + { + "url": "https://huggingface.co/NTQAI/wav2vec2-large-japanese", + "downloads": 463, + "description": "Wav2Vec2-Large-JapaneseFine-tuned facebook/wav2vec2-large-xlsr-53 on Japanese using the Common Voice, JSUT, TEDxJP and some other data.", + "source": "Hugging Face", + "score": 5.198653694659602e-05, + "project_name": "wav2vec2-large-japanese" + }, + { + "url": "https://huggingface.co/mmnga/aya-23-35B-gguf", + "downloads": 461, + "description": "aya-23-35B-ggufCohereForAIさんが公開しているaya-23-35Bのggufフォーマット変換版です。", + "source": "Hugging Face", + "score": 5.176197307209668e-05, + "project_name": "aya-23-35B-gguf" + }, + { + "url": "https://huggingface.co/mmnga/pfnet-nekomata-14b-pfn-qfin-gguf", + "downloads": 455, + "description": "pfnet-nekomata-14b-pfn-qfin-ggufpfnetさんが公開しているnekomata-14b-pfn-qfinのggufフォーマット変換版です。", + "source": "Hugging Face", + "score": 5.1088281448598676e-05, + "project_name": "pfnet-nekomata-14b-pfn-qfin-gguf" + }, + { + "url": "https://huggingface.co/nlp-waseda/roberta-large-japanese-seq512", + "downloads": 451, + "description": "nlp-waseda/roberta-large-japanese-seq512Model descriptionThis is a Japanese RoBERTa large model pretrained on Japanese Wikipedia and the Japanese portion of CC-100 with the maximum sequence length of 512.How to useYou can use this model for masked language modeling as follows:from transformers import AutoTokenizer, AutoModelForMaskedLMtokenizer = AutoTokenizer.from_pretrained(\"nlp-waseda/roberta-large-japanese-seq512\")", + "source": "Hugging Face", + "score": 5.0639153699600005e-05, + "project_name": "roberta-large-japanese-seq512" + }, + { + "url": "https://huggingface.co/tsmatz/mt5_summarize_japanese", + "downloads": 451, + "description": "mt5_summarize_japanese(Japanese caption : 日本語の要約のモデル)This model is a fine-tuned version of google/mt5-small trained for Japanese summarization.", + "source": "Hugging Face", + "score": 5.0639153699600005e-05, + "project_name": "mt5_summarize_japanese" + }, + { + "url": "https://huggingface.co/rinna/nekomata-7b", + "downloads": 442, + "description": "rinna/nekomata-7bOverviewWe conduct continual pre-training of qwen-7b on 30B tokens from a mixture of Japanese and English datasets.", + "source": "Hugging Face", + "score": 4.9628616264353e-05, + "project_name": "nekomata-7b" + }, + { + "url": "https://huggingface.co/llm-book/bert-base-japanese-v3-jnli", + "downloads": 430, + "description": "bert-base-japanese-v3-jnli「大規模言語モデル入門」の第5章で紹介している(自然言語推論)のモデルです。", + "source": "Hugging Face", + "score": 4.828123301735699e-05, + "project_name": "bert-base-japanese-v3-jnli" + }, + { + "url": "https://huggingface.co/llm-book/bert-base-japanese-v3-unsup-simcse-jawiki", + "downloads": 428, + "description": "bert-base-japanese-v3-unsup-simcse-jawiki「大規模言語モデル入門」の第8章で紹介している教師なしSimCSEのモデルです。", + "source": "Hugging Face", + "score": 4.8056669142857655e-05, + "project_name": "bert-base-japanese-v3-unsup-simcse-jawiki" + }, + { + "url": "https://huggingface.co/elyza/ELYZA-japanese-CodeLlama-7b-instruct", + "downloads": 425, + "description": "ELYZA-japanese-CodeLlama-7bModel DescriptionELYZA-japanese-CodeLlama-7b は、 Code Llamaをベースとして日本語能力を拡張するために追加事前学習を行ったモデルです。", + "source": "Hugging Face", + "score": 4.7719823331108655e-05, + "project_name": "ELYZA-japanese-CodeLlama-7b-instruct" + }, + { + "url": "https://huggingface.co/second-state/ELYZA-japanese-Llama-2-13b-fast-instruct-GGUF", + "downloads": 410, + "description": "ELYZA-japanese-Llama-2-13b-fast-instruct-GGUFOriginal Modelelyza/ELYZA-japanese-Llama-2-13b-fast-instructRun with LlamaEdgeLlamaEdge version: v0.2.8 and abovePrompt templatePrompt type: llama-2-chatPrompt string<s>[INST] <<SYS>>{{ system_prompt }}<</SYS>>{{ user_msg_1 }}", + "source": "Hugging Face", + "score": 4.603559427236364e-05, + "project_name": "ELYZA-japanese-Llama-2-13b-fast-instruct-GGUF" + }, + { + "url": "https://huggingface.co/maddes8cht/stabilityai-japanese-stablelm-3b-4e1t-instruct-gguf", + "downloads": 403, + "description": "I'm constantly enhancing these model descriptions to provide you with the most relevant and comprehensive informationjapanese-stablelm-3b-4e1t-instruct - GGUFModel creator: stabilityaiOriginal model: japanese-stablelm-3b-4e1t-instructStableLMThis is a Model based on StableLM.Stablelm is a familiy of Language Models by Stability AI.Note:Current (as of 2023-11-15) implementations of Llama.cpp only support GPU offloading up to 34 Layers with these StableLM Models.", + "source": "Hugging Face", + "score": 4.524962071161597e-05, + "project_name": "stabilityai-japanese-stablelm-3b-4e1t-instruct-gguf" + }, + { + "url": "https://huggingface.co/stabilityai/japanese-stablelm-3b-4e1t-instruct", + "downloads": 402, + "description": "Japanese StableLM-3B-4E1T InstructModel DescriptionThis is a 3B-parameter decoder-only Japanese language model fine-tuned on instruction-following datasets, built on top of the base model Japanese StableLM-3B-4E1T Base.", + "source": "Hugging Face", + "score": 4.5137338774366305e-05, + "project_name": "japanese-stablelm-3b-4e1t-instruct" + }, + { + "url": "https://huggingface.co/stabilityai/japanese-stablelm-base-beta-7b", + "downloads": 401, + "description": "Japanese-StableLM-Base-Beta-7BA cute robot wearing a kimono writes calligraphy with one single brush — Stable Diffusion XLModel Descriptionjapanese-stablelm-base-beta-7b is a 7B-parameter decoder-only language model based on Llama-2-7b that has been fine-tuned on a diverse collection of Japanese data, with the intent of maximizing downstream performance on Japanese language tasks.", + "source": "Hugging Face", + "score": 4.5025056837116634e-05, + "project_name": "japanese-stablelm-base-beta-7b" + }, + { + "url": "https://huggingface.co/line-corporation/japanese-large-lm-1.7b-instruction-sft", + "downloads": 399, + "description": "japanese-large-lm-1.7b-instruction-sftThis repository provides a 1.7B parameters Japanese language model, fine-tuned and trained by LINE Corporation.", + "source": "Hugging Face", + "score": 4.48004929626173e-05, + "project_name": "japanese-large-lm-1.7b-instruction-sft" + }, + { + "url": "https://huggingface.co/mmnga/SakanaAI-EvoLLM-JP-A-v1-7B-gguf", + "downloads": 396, + "description": "SakanaAI-EvoLLM-JP-A-v1-7B-ggufSakanaAIさんが公開しているEvoLLM-JP-A-v1-7Bのggufフォーマット変換版です。", + "source": "Hugging Face", + "score": 4.44636471508683e-05, + "project_name": "SakanaAI-EvoLLM-JP-A-v1-7B-gguf" + }, + { + "url": "https://huggingface.co/mmnga/lightblue-Karasu-Mixtral-8x22B-v0.1-gguf", + "downloads": 395, + "description": "lightblue-Karasu-Mixtral-8x22B-v0.1-gguflightblueさんが公開しているKarasu-Mixtral-8x22B-v0.1のggufフォーマット変換版です。", + "source": "Hugging Face", + "score": 4.4351365213618634e-05, + "project_name": "lightblue-Karasu-Mixtral-8x22B-v0.1-gguf" + }, + { + "url": "https://huggingface.co/tohoku-nlp/bert-large-japanese-char-v2", + "downloads": 385, + "description": "BERT large Japanese (character-level tokenization with whole word masking, CC-100 and jawiki-20230102)This is a BERT model pretrained on texts in the Japanese language.", + "source": "Hugging Face", + "score": 4.3228545841121956e-05, + "project_name": "bert-large-japanese-char-v2" + }, + { + "url": "https://huggingface.co/retrieva-jp/t5-small-short", + "downloads": 381, + "description": "Model card for model IDThis is a T5 v1.1 model, pre-trained on a Japanese corpus.", + "source": "Hugging Face", + "score": 4.2779418092123285e-05, + "project_name": "t5-small-short" + }, + { + "url": "https://huggingface.co/TFMC/Japanese-Starling-ChatV-7B", + "downloads": 380, + "description": "Japanese-Starling-ChatV-7Bこのモデルは\"chatntq-ja-7b-v1.0\"をベースにした7Bパラメータの日本語チャットモデルです。", + "source": "Hugging Face", + "score": 4.266713615487362e-05, + "project_name": "Japanese-Starling-ChatV-7B" + }, + { + "url": "https://huggingface.co/abeja/Mixtral-8x7B-v0.1-japanese", + "downloads": 364, + "description": "Mixtral-8x7B-v0.1-japaneseMixtral-8x7B-v0.1-japaneseはMixtral-8x7B-v0.1をベースに日本語の語彙拡張継続事前学習を実施したモデルです。", + "source": "Hugging Face", + "score": 4.087062515887894e-05, + "project_name": "Mixtral-8x7B-v0.1-japanese" + }, + { + "url": "https://huggingface.co/rinna/nue-asr", + "downloads": 362, + "description": "rinna/nue-asrOverview[Paper][GitHub]We propose a novel end-to-end speech recognition model, Nue ASR, which integrates pre-trained speech and language models.", + "source": "Hugging Face", + "score": 4.0646061284379606e-05, + "project_name": "nue-asr" + }, + { + "url": "https://huggingface.co/OrionStarAI/Orion-14B-LongChat", + "downloads": 357, + "description": "Orion-14B🌐English | 🇨🇳中文 | 🇯🇵日本語 | 🇰🇷한국어🤗", + "source": "Hugging Face", + "score": 4.008465159813127e-05, + "project_name": "Orion-14B-LongChat" + }, + { + "url": "https://huggingface.co/ku-nlp/bart-base-japanese", + "downloads": 354, + "description": "Model Card for Japanese BART baseModel", + "source": "Hugging Face", + "score": 3.9747805786382264e-05, + "project_name": "bart-base-japanese" + }, + { + "url": "https://huggingface.co/mmnga/ELYZA-japanese-CodeLlama-7b-gguf", + "downloads": 343, + "description": "ELYZA-japanese-CodeLlama-7b-ggufELYZAさんが公開しているELYZA-japanese-CodeLlama-7b-instructのggufフォーマット変換版です。", + "source": "Hugging Face", + "score": 3.851270447663593e-05, + "project_name": "ELYZA-japanese-CodeLlama-7b-gguf" + }, + { + "url": "https://huggingface.co/ken11/bert-japanese-ner", + "downloads": 339, + "description": "bert-japanese-nerこのモデルは日本語の固有表現抽出タスクを目的として、京都大学 黒橋・褚・村脇研究室が公開しているBERT日本語Pretrainedモデルをベースにストックマーク株式会社が公開しているner-wikipedia-datasetでファインチューニングしたものです。", + "source": "Hugging Face", + "score": 3.806357672763726e-05, + "project_name": "bert-japanese-ner" + }, + { + "url": "https://huggingface.co/llm-book/bert-base-japanese-v3-jsts", + "downloads": 339, + "description": "bert-base-japanese-v3-jsts「大規模言語モデル入門」の第5章で紹介している(意味類似度計算)のモデルです。", + "source": "Hugging Face", + "score": 3.806357672763726e-05, + "project_name": "bert-base-japanese-v3-jsts" + }, + { + "url": "https://huggingface.co/maddes8cht/stabilityai-japanese-stablelm-3b-4e1t-base-gguf", + "downloads": 332, + "description": "I'm constantly enhancing these model descriptions to provide you with the most relevant and comprehensive informationjapanese-stablelm-3b-4e1t-base - GGUFModel creator: stabilityaiOriginal model: japanese-stablelm-3b-4e1t-baseStableLMThis is a Model based on StableLM.Stablelm is a familiy of Language Models by Stability AI.Note:Current (as of 2023-11-15) implementations of Llama.cpp only support GPU offloading up to 34 Layers with these StableLM Models.", + "source": "Hugging Face", + "score": 3.7277603166889586e-05, + "project_name": "stabilityai-japanese-stablelm-3b-4e1t-base-gguf" + }, + { + "url": "https://huggingface.co/Local-Novel-LLM-project/Ninja-v1", + "downloads": 331, + "description": "Our ModelsVecteusNinja-v1Ninja-v1-NSFWNinja-v1-128kNinja-v1-NSFW-128kModel Card for Ninja-v1.0The Mistral-7B--based Large Language Model (LLM) is an noveldataset fine-tuned version of the Mistral-7B-v0.1Ninja has the following changes compared to Mistral-7B-v0.1.Achieving both high quality Japanese and English generationMemory ability that does not forget even after long-context generationThis model was created with the help of GPUs from the first LocalAI hackathon.", + "source": "Hugging Face", + "score": 3.7165321229639914e-05, + "project_name": "Ninja-v1" + }, + { + "url": "https://huggingface.co/mmnga/SakanaAI-EvoLLM-JP-v1-7B-gguf", + "downloads": 328, + "description": "SakanaAI-EvoLLM-JP-v1-7B-ggufSakanaAIさんが公開しているEvoLLM-JP-v1-7Bのggufフォーマット変換版です。", + "source": "Hugging Face", + "score": 3.6828475417890914e-05, + "project_name": "SakanaAI-EvoLLM-JP-v1-7B-gguf" + }, + { + "url": "https://huggingface.co/sociocom/MedNERN-CR-JA", + "downloads": 314, + "description": "This is a model for named entity recognition of Japanese medical documents.", + "source": "Hugging Face", + "score": 3.525652829639557e-05, + "project_name": "MedNERN-CR-JA" + }, + { + "url": "https://huggingface.co/TheBloke/japanese-stablelm-base-beta-70B-GGUF", + "downloads": 311, + "description": "Chat & support: TheBloke's Discord serverWant to contribute?", + "source": "Hugging Face", + "score": 3.4919682484646565e-05, + "project_name": "japanese-stablelm-base-beta-70B-GGUF" + }, + { + "url": "https://huggingface.co/stabilityai/japanese-stablelm-instruct-beta-7b", + "downloads": 310, + "description": "Japanese-StableLM-Instruct-Beta-7BA cute robot wearing a kimono writes calligraphy with one single brush — Stable Diffusion XLModel Descriptionjapanese-stablelm-instruct-beta-7b is a 7B-parameter decoder-only language model based on", + "source": "Hugging Face", + "score": 3.48074005473969e-05, + "project_name": "japanese-stablelm-instruct-beta-7b" + }, + { + "url": "https://huggingface.co/umiyuki/Japanese-WizardLM2-ChatV-7B-GGUF", + "downloads": 291, + "description": "Japanese-WizardLM2-ChatV-7B-GGUFGGUF conversion of \"Japanese-WizardLM2-ChatV-7B\"This model, Japanese-WizardLM2-ChatV-7B, is based on \"chatntq-ja-7b-v1.0 \", and was created by subtracting \"Mistral-7B-v0.1\" from \"WizardLM-2-7b\" ChatVector was added by a factor of 1.0.We aimed to add the high performance of WizardLM-2 to the Japanese language capability of ChatNTQ.", + "source": "Hugging Face", + "score": 3.267404373965322e-05, + "project_name": "Japanese-WizardLM2-ChatV-7B-GGUF" + }, + { + "url": "https://huggingface.co/bclavie/fio-base-japanese-v0.1", + "downloads": 288, + "description": "fio-base-japanese-v0.1日本語版は近日公開予定です(日本語を勉強中なので、間違いはご容赦ください!", + "source": "Hugging Face", + "score": 3.2337197927904215e-05, + "project_name": "fio-base-japanese-v0.1" + }, + { + "url": "https://huggingface.co/kit-nlp/bert-base-japanese-sentiment-irony", + "downloads": 282, + "description": "BERT Base Japanese for IronyThis is a BERT Base model for sentiment analysis in Japanese additionally finetuned for automatic irony detection.", + "source": "Hugging Face", + "score": 3.1663506304406215e-05, + "project_name": "bert-base-japanese-sentiment-irony" + }, + { + "url": "https://huggingface.co/watashiha/watashiha-gpt-6b", + "downloads": 280, + "description": "モデル概要AWSのtrn1インスタンスを用いて開発した大喜利言語モデルです。", + "source": "Hugging Face", + "score": 3.143894242990688e-05, + "project_name": "watashiha-gpt-6b" + }, + { + "url": "https://huggingface.co/aipib/karasu-lora-jp-qa-chat", + "downloads": 277, + "description": "karasu-lora-jp-qa-chatkarasu fine tuned model by lora method with the original Q&A dataset.", + "source": "Hugging Face", + "score": 3.110209661815787e-05, + "project_name": "karasu-lora-jp-qa-chat" + }, + { + "url": "https://huggingface.co/votepurchase/Yaki-Dofu-Mix", + "downloads": 274, + "description": "Yaki-Dofu-Mix概要 / OverviewYaki-Dofu-Mixは、アニメ風の画風に特化したマージモデルです。 ", + "source": "Hugging Face", + "score": 3.076525080640887e-05, + "project_name": "Yaki-Dofu-Mix" + }, + { + "url": "https://huggingface.co/KoichiYasuoka/deberta-base-japanese-aozora-ud-head", + "downloads": 262, + "description": "deberta-base-japanese-aozora-ud-headModel", + "source": "Hugging Face", + "score": 2.9417867559412866e-05, + "project_name": "deberta-base-japanese-aozora-ud-head" + }, + { + "url": "https://huggingface.co/izumi-lab/bert-base-japanese-fin-additional", + "downloads": 259, + "description": "Additional pretrained BERT base Japanese financeThis is a BERT model pretrained on texts in the Japanese language.", + "source": "Hugging Face", + "score": 2.9081021747663862e-05, + "project_name": "bert-base-japanese-fin-additional" + }, + { + "url": "https://huggingface.co/cyberagent/xlm-roberta-large-jnli-jsick", + "downloads": 255, + "description": "Japanese Natural Language Inference ModelThis model was trained using SentenceTransformers Cross-Encoder class, gradient accumulation PR, and the code from CyberAgentAILab/japanese-nli-model.", + "source": "Hugging Face", + "score": 2.863189399866519e-05, + "project_name": "xlm-roberta-large-jnli-jsick" + }, + { + "url": "https://huggingface.co/ku-nlp/gpt2-small-japanese-char", + "downloads": 251, + "description": "Model Card for Japanese character-level GPT-2 SmallModel descriptionThis is a Japanese character-level GPT-2 Small (90M parameters) language model pre-trained on Japanese Wikipedia, the Japanese portion of CC-100, and the Japanese portion of OSCAR.How to useYou can use this model directly with a pipeline for text generation.", + "source": "Hugging Face", + "score": 2.8182766249666523e-05, + "project_name": "gpt2-small-japanese-char" + }, + { + "url": "https://huggingface.co/ybelkada/japanese-roberta-question-answering", + "downloads": 250, + "description": "RoBERTa base Japanese - JaQuADDescriptionA Japanese Question Answering model fine-tuned on JaQuAD.Please refer RoBERTa base Japanese for details about the pre-training model.", + "source": "Hugging Face", + "score": 2.8070484312416855e-05, + "project_name": "japanese-roberta-question-answering" + }, + { + "url": "https://huggingface.co/Mizuiro-sakura/luke-japanese-base-finetuned-QA", + "downloads": 240, + "description": "このモデルはluke-japanese-base-liteをファインチューニングして、Question-Answeringに用いれるようにしたものです。", + "source": "Hugging Face", + "score": 2.694766493992018e-05, + "project_name": "luke-japanese-base-finetuned-QA" + }, + { + "url": "https://huggingface.co/taoki/phi3-mini-4k-qlora-jmultiwoz-dolly-amenokaku-alpaca_jp_python-GGUF", + "downloads": 234, + "description": "This repository contains a model trained (QLoRA-SFT)", + "source": "Hugging Face", + "score": 2.6273973316422177e-05, + "project_name": "phi3-mini-4k-qlora-jmultiwoz-dolly-amenokaku-alpaca_jp_python-GGUF" + }, + { + "url": "https://huggingface.co/izumi-lab/bert-small-japanese", + "downloads": 230, + "description": "BERT small Japanese financeThis is a BERT model pretrained on texts in the Japanese language.", + "source": "Hugging Face", + "score": 2.5824845567423506e-05, + "project_name": "bert-small-japanese" + }, + { + "url": "https://huggingface.co/megagonlabs/t5-base-japanese-web", + "downloads": 229, + "description": "t5-base-japanese-web (with Byte-fallback, 32K)Descriptionmegagonlabs/t5-base-japanese-web is a T5 (Text-to-Text Transfer Transformer) model pre-trained on Japanese web texts.", + "source": "Hugging Face", + "score": 2.5712563630173838e-05, + "project_name": "t5-base-japanese-web" + }, + { + "url": "https://huggingface.co/mmnga/shisa-7b-v1-gguf", + "downloads": 229, + "description": "shisa-7b-v1-ggufaugmxntさんが公開しているshisa-7b-v1のggufフォーマット変換版です。", + "source": "Hugging Face", + "score": 2.5712563630173838e-05, + "project_name": "shisa-7b-v1-gguf" + }, + { + "url": "https://huggingface.co/mmnga/Deepreneur-blue-lizard-gguf", + "downloads": 228, + "description": "Deepreneur-blue-lizard-ggufDeepreneurさんが公開しているblue-lizardのggufフォーマット変換版です。", + "source": "Hugging Face", + "score": 2.5600281692924174e-05, + "project_name": "Deepreneur-blue-lizard-gguf" + }, + { + "url": "https://huggingface.co/nlp-waseda/roberta-large-japanese", + "downloads": 228, + "description": "nlp-waseda/roberta-large-japaneseModel descriptionThis is a Japanese RoBERTa large model pretrained on Japanese Wikipedia and the Japanese portion of CC-100.How to useYou can use this model for masked language modeling as follows:from transformers import AutoTokenizer, AutoModelForMaskedLMtokenizer = AutoTokenizer.from_pretrained(\"nlp-waseda/roberta-large-japanese\")", + "source": "Hugging Face", + "score": 2.5600281692924174e-05, + "project_name": "roberta-large-japanese" + }, + { + "url": "https://huggingface.co/sonoisa/clip-vit-b-32-japanese-v1", + "downloads": 226, + "description": "日本語版CLIPモデルThis is a CLIP text/image encoder model for Japanese.", + "source": "Hugging Face", + "score": 2.5375717818424838e-05, + "project_name": "clip-vit-b-32-japanese-v1" + }, + { + "url": "https://huggingface.co/OrionStarAI/Orion-14B-Chat-Int4", + "downloads": 226, + "description": "Orion-14B🌐English | 🇨🇳中文 | 🇯🇵日本語 | 🇰🇷한국어🤗", + "source": "Hugging Face", + "score": 2.5375717818424838e-05, + "project_name": "Orion-14B-Chat-Int4" + }, + { + "url": "https://huggingface.co/turing-motors/heron-chat-git-ja-stablelm-base-7b-v1", + "downloads": 226, + "description": "Heron GIT Japanese StableLM", + "source": "Hugging Face", + "score": 2.5375717818424838e-05, + "project_name": "heron-chat-git-ja-stablelm-base-7b-v1" + }, + { + "url": "https://huggingface.co/MaziyarPanahi/japanese-stablelm-base-gamma-7b-Mistral-7B-Instruct-v0.1-GGUF", + "downloads": 225, + "description": "MaziyarPanahi/japanese-stablelm-base-gamma-7b-Mistral-7B-Instruct-v0.1-GGUFModel creator: MaziyarPanahiOriginal model: MaziyarPanahi/japanese-stablelm-base-gamma-7b-Mistral-7B-Instruct-v0.1DescriptionMaziyarPanahi/japanese-stablelm-base-gamma-7b-Mistral-7B-Instruct-v0.1-GGUF contains GGUF format model files for MaziyarPanahi/japanese-stablelm-base-gamma-7b-Mistral-7B-Instruct-v0.1.How to useThanks to TheBloke for preparing an amazing README on how to use GGUF models:About GGUFGGUF is a new format introduced", + "source": "Hugging Face", + "score": 2.526343588117517e-05, + "project_name": "japanese-stablelm-base-gamma-7b-Mistral-7B-Instruct-v0.1-GGUF" + }, + { + "url": "https://huggingface.co/watashiha/Watashiha-Llama-2-13B-Ogiri-sft", + "downloads": 221, + "description": "The English document is here.", + "source": "Hugging Face", + "score": 2.48143081321765e-05, + "project_name": "Watashiha-Llama-2-13B-Ogiri-sft" + }, + { + "url": "https://huggingface.co/mmnga/line-corp-japanese-large-lm-3.6b-instruction-sft-gguf", + "downloads": 217, + "description": "line-corporation/japanese-large-lm-3.6b-instruction-sftline-corporationさんが公開しているjapanese-large-lm-3.6b-instruction-sftのgguf変換版です。", + "source": "Hugging Face", + "score": 2.436518038317783e-05, + "project_name": "line-corp-japanese-large-lm-3.6b-instruction-sft-gguf" + }, + { + "url": "https://huggingface.co/elyza/ELYZA-japanese-CodeLlama-7b", + "downloads": 204, + "description": "ELYZA-japanese-CodeLlama-7bModel DescriptionELYZA-japanese-CodeLlama-7b は、 Code Llamaをベースとして日本語能力を拡張するために追加事前学習を行ったモデルです。", + "source": "Hugging Face", + "score": 2.2905515198932153e-05, + "project_name": "ELYZA-japanese-CodeLlama-7b" + }, + { + "url": "https://huggingface.co/umiyuki/Japanese-Chat-Umievo-itr004-7b", + "downloads": 202, + "description": "japanese-chat-umievo-itr004-7bThis is a merge of pre-trained language models created using mergekit.", + "source": "Hugging Face", + "score": 2.268095132443282e-05, + "project_name": "Japanese-Chat-Umievo-itr004-7b" + }, + { + "url": "https://huggingface.co/mmnga/Tanuki-ZeRo-gguf", + "downloads": 200, + "description": "Tanuki-ZeRo-ggufkanhatakeyamaさんが公開しているTanuki-ZeRoのggufフォーマット変換版です。", + "source": "Hugging Face", + "score": 2.2456387449933485e-05, + "project_name": "Tanuki-ZeRo-gguf" + }, + { + "url": "https://huggingface.co/stanfordnlp/stanza-ja", + "downloads": 199, + "description": "Stanza model for Japanese (ja)Stanza is a collection of accurate and efficient tools for the linguistic analysis of many human languages.", + "source": "Hugging Face", + "score": 2.2344105512683817e-05, + "project_name": "stanza-ja" + }, + { + "url": "https://huggingface.co/toshi456/llava-jp-1.3b-v1.0", + "downloads": 185, + "description": "LLaVA-JP Model CardModel detailModel type:LLaVA-JP is a vision-language model that can converse about input images.", + "source": "Hugging Face", + "score": 2.0772158391188474e-05, + "project_name": "llava-jp-1.3b-v1.0" + }, + { + "url": "https://huggingface.co/esnya/japanese_speecht5_tts", + "downloads": 184, + "description": "SpeechT5 (TTS task) for JapaneseSpeechT5 model fine-tuned for Japanese speech synthesis (text-to-speech)", + "source": "Hugging Face", + "score": 2.0659876453938807e-05, + "project_name": "japanese_speecht5_tts" + }, + { + "url": "https://huggingface.co/kit-nlp/bert-base-japanese-sentiment-cyberbullying", + "downloads": 184, + "description": "electra-base-cyberbullyingThis is a BERT Base model for the Japanese language finetuned for automatic cyberbullying detection.", + "source": "Hugging Face", + "score": 2.0659876453938807e-05, + "project_name": "bert-base-japanese-sentiment-cyberbullying" + }, + { + "url": "https://huggingface.co/clu-ling/whisper-large-v2-japanese-5k-steps", + "downloads": 181, + "description": "whisper-large-v2-japanese-5k-stepsThis model is a fine-tuned version of openai/whisper-large-v2 on the Japanese CommonVoice dataset (v11)..", + "source": "Hugging Face", + "score": 2.0323030642189803e-05, + "project_name": "whisper-large-v2-japanese-5k-steps" + }, + { + "url": "https://huggingface.co/ku-nlp/gpt2-large-japanese-char", + "downloads": 179, + "description": "Model Card for Japanese character-level GPT-2 LargeModel descriptionThis is a Japanese character-level GPT-2 Large (717M parameters) language model pre-trained on Japanese Wikipedia, the Japanese portion of CC-100, and the Japanese portion of OSCAR.How to useYou can use this model directly with a pipeline for text generation.", + "source": "Hugging Face", + "score": 2.0098466767690468e-05, + "project_name": "gpt2-large-japanese-char" + }, + { + "url": "https://huggingface.co/retrieva-jp/t5-xl", + "downloads": 174, + "description": "Model card for model IDThis is a T5 v1.1 model, pre-trained on a Japanese corpus.", + "source": "Hugging Face", + "score": 1.9537057081442132e-05, + "project_name": "t5-xl" + }, + { + "url": "https://huggingface.co/TareHimself/manga-ocr-base", + "downloads": 172, + "description": "Original ModelOptical character recognition for Japanese text, with the main focus being Japanese manga.", + "source": "Hugging Face", + "score": 1.9312493206942796e-05, + "project_name": "manga-ocr-base" + }, + { + "url": "https://huggingface.co/Hemlok/REV-Mix", + "downloads": 166, + "description": "◆REV-Mix\"レボリューション\"なモデルです。", + "source": "Hugging Face", + "score": 1.8638801583444793e-05, + "project_name": "REV-Mix" + }, + { + "url": "https://huggingface.co/MaziyarPanahi/japanese-stablelm-instruct-gamma-7b-Mistral-7B-Instruct-v0.1-GGUF", + "downloads": 166, + "description": "MaziyarPanahi/japanese-stablelm-instruct-gamma-7b-Mistral-7B-Instruct-v0.1-GGUFModel creator: MaziyarPanahiOriginal model: MaziyarPanahi/japanese-stablelm-instruct-gamma-7b-Mistral-7B-Instruct-v0.1DescriptionMaziyarPanahi/japanese-stablelm-instruct-gamma-7b-Mistral-7B-Instruct-v0.1-GGUF contains GGUF format model files for MaziyarPanahi/japanese-stablelm-instruct-gamma-7b-Mistral-7B-Instruct-v0.1.How to useThanks to TheBloke for preparing an amazing README on how to use GGUF models:About GGUFGGUF is a new f", + "source": "Hugging Face", + "score": 1.8638801583444793e-05, + "project_name": "japanese-stablelm-instruct-gamma-7b-Mistral-7B-Instruct-v0.1-GGUF" + }, + { + "url": "https://huggingface.co/mmnga/aya-23-8B-gguf", + "downloads": 165, + "description": "aya-23-8B-ggufCohereForAIさんが公開しているaya-23-8Bのggufフォーマット変換版です。", + "source": "Hugging Face", + "score": 1.8526519646195125e-05, + "project_name": "aya-23-8B-gguf" + }, + { + "url": "https://huggingface.co/pfnet/plamo-13b-instruct", + "downloads": 163, + "description": "PLaMo-13B-InstructModel DescriptionPLaMo-13B-Instruct is an instruct fine-tuned model built upon the 8192 context length version of PLaMo-13B text generation model.", + "source": "Hugging Face", + "score": 1.830195577169579e-05, + "project_name": "plamo-13b-instruct" + }, + { + "url": "https://huggingface.co/ThePioneer/MoeDiffusionPlusPlus", + "downloads": 162, + "description": "モデル説明 (model explanation)V1 = MoeDiffusion 1.0 + (HassanBlend 1.5 - VMix03) * 0.2V2 = MoeDiffusion 0.6 : HassanBlend 1.5 0.2 : VMix03 : 0.2マージ元のルーツにNAIリークやInsta系モデルが含まれるという噂があるので、NAIリークアンチ・Insta系モデルアンチには非推奨理想の黒髪ポニテ顔が出せるYaguruMagikuを、ある程度顔が近くて制御しやすいAbyssOrangeMix2と混ぜてみた。", + "source": "Hugging Face", + "score": 1.818967383444612e-05, + "project_name": "MoeDiffusionPlusPlus" + }, + { + "url": "https://huggingface.co/retrieva-jp/t5-large-short", + "downloads": 159, + "description": "Model card for model IDThis is a T5 v1.1 model, pre-trained on a Japanese corpus.", + "source": "Hugging Face", + "score": 1.785282802269712e-05, + "project_name": "t5-large-short" + }, + { + "url": "https://huggingface.co/sonoisa/t5-base-japanese-title-generation", + "downloads": 158, + "description": "記事本文からタイトルを生成するモデルSEE: https://qiita.com/sonoisa/items/a9af64ff641f0bbfed44", + "source": "Hugging Face", + "score": 1.7740546085447454e-05, + "project_name": "t5-base-japanese-title-generation" + }, + { + "url": "https://huggingface.co/recruit-jp/japanese-clip-vit-b-32-roberta-base", + "downloads": 155, + "description": "recruit-jp/japanese-clip-vit-b-32-roberta-baseOverviewDeveloped by: Recruit Co.", + "source": "Hugging Face", + "score": 1.740370027369845e-05, + "project_name": "japanese-clip-vit-b-32-roberta-base" + }, + { + "url": "https://huggingface.co/aixsatoshi/Swallow-MX-8x7b-NVE-chatvector-Mixtral-instruct-v2", + "downloads": 150, + "description": "Swallow-MX-8x7b-NVE-v0.1に対し、Mixtral-8x7B-Instruct-v0.1とMixtral-8x7B-v0.1の差分をマージしたモデルです。", + "source": "Hugging Face", + "score": 1.6842290587450115e-05, + "project_name": "Swallow-MX-8x7b-NVE-chatvector-Mixtral-instruct-v2" + }, + { + "url": "https://huggingface.co/cameltech/japanese-gpt-1b-PII-masking", + "downloads": 148, + "description": "japanese-gpt-1b-PII-maskingModel Descriptionjapanese-gpt-1b-PII-masking は、 日本語事前学習済み1B GPTモデルをベースとして、日本語の文章から個人情報をマスキングするように学習したモデルです。", + "source": "Hugging Face", + "score": 1.661772671295078e-05, + "project_name": "japanese-gpt-1b-PII-masking" + }, + { + "url": "https://huggingface.co/TFMC/ChatNTQ-JA-7b-v1.0-GGUF", + "downloads": 147, + "description": "GGUF conversion of NTQAI/chatntq-ja-7b-v1.0ChatNTQ-JA-7b-v1.0 is a Japanese chat fine-tuned model built on top of the stabilityai/japanese-stablelm-base-gamma-7b, which is originally based on Mistral 7B v0.1.", + "source": "Hugging Face", + "score": 1.650544477570111e-05, + "project_name": "ChatNTQ-JA-7b-v1.0-GGUF" + }, + { + "url": "https://huggingface.co/stabilityai/japanese-stable-vlm", + "downloads": 145, + "description": "By clicking \"Agree\", you agree to the License Agreement and acknowledge Stability AI's Privacy Policy.", + "source": "Hugging Face", + "score": 1.6280880901201775e-05, + "project_name": "japanese-stable-vlm" + }, + { + "url": "https://huggingface.co/nlp-waseda/roberta-large-japanese-with-auto-jumanpp", + "downloads": 144, + "description": "nlp-waseda/roberta-large-japanese-with-auto-jumanppModel descriptionThis is a Japanese RoBERTa large model pretrained on Japanese Wikipedia and the Japanese portion of CC-100.How to useYou can use this model for masked language modeling as follows:from transformers import AutoTokenizer, AutoModelForMaskedLMtokenizer = AutoTokenizer.from_pretrained(\"nlp-waseda/roberta-large-japanese-with-auto-jumanpp\")", + "source": "Hugging Face", + "score": 1.6168598963952108e-05, + "project_name": "roberta-large-japanese-with-auto-jumanpp" + }, + { + "url": "https://huggingface.co/stabilityai/japanese-stablelm-instruct-ja_vocab-beta-7b", + "downloads": 144, + "description": "Japanese-StableLM-Instruct-JAVocab-Beta-7BA cute robot wearing a kimono writes calligraphy with one single brush — Stable Diffusion XLModel Descriptionjapanese-stablelm-instruct-ja_vocab-beta-7b is a 7B-parameter decoder-only language model based on japanese-stablelm-ja_vocab-beta-7b and further fine tuned on Databricks Dolly-15k, Anthropic HH, and other public data.", + "source": "Hugging Face", + "score": 1.6168598963952108e-05, + "project_name": "japanese-stablelm-instruct-ja_vocab-beta-7b" + }, + { + "url": "https://huggingface.co/stockmark/stockmark-13b-instruct", + "downloads": 143, + "description": "Stockmark-13b-instructStockmark-13b-instruct is an instruction-tuned version of Stockmark-13b, a 13 billion parameter Japanese LLM.", + "source": "Hugging Face", + "score": 1.605631702670244e-05, + "project_name": "stockmark-13b-instruct" + }, + { + "url": "https://huggingface.co/nold/Orion-14B-Base-GGUF", + "downloads": 141, + "description": "Orion-14B🌐English | 🇨🇳中文 | 🇯🇵日本語 |🇰🇷한국어🤗", + "source": "Hugging Face", + "score": 1.5831753152203108e-05, + "project_name": "Orion-14B-Base-GGUF" + }, + { + "url": "https://huggingface.co/reazon-research/reazonspeech-espnet-next", + "downloads": 141, + "description": "reazonspeech-espnet-nextReazonSpeech is a project to maintain freely-available Japanese audiodatasets and ML models.reazonspeech-espnet-next is a \"bleeding-edge\" repository that containslatest ASR models trained by ReazonSpeech team.", + "source": "Hugging Face", + "score": 1.5831753152203108e-05, + "project_name": "reazonspeech-espnet-next" + }, + { + "url": "https://huggingface.co/kotoba-tech/kotoba-speech-v0.1", + "downloads": 139, + "description": "Kotoba-Speech-v0.1Kotoba-Speech v0.1 is a 1.2B Transformer-based speech generative model.", + "source": "Hugging Face", + "score": 1.5607189277703772e-05, + "project_name": "kotoba-speech-v0.1" + }, + { + "url": "https://huggingface.co/izumi-lab/deberta-v2-small-japanese", + "downloads": 135, + "description": "DeBERTa V2 small JapaneseThis is a DeBERTaV2 model pretrained on Japanese texts.", + "source": "Hugging Face", + "score": 1.5158061528705102e-05, + "project_name": "deberta-v2-small-japanese" + }, + { + "url": "https://huggingface.co/Spiral-AI/Spiral-RetNet-3b-base", + "downloads": 130, + "description": "SpiralAI Spiral-RetNet-3b-baseWe have conducted pre-training from scratch on the RetNet (https://arxiv.org/abs/2307.08621)", + "source": "Hugging Face", + "score": 1.4596651842456765e-05, + "project_name": "Spiral-RetNet-3b-base" + }, + { + "url": "https://huggingface.co/sonoisa/t5-base-japanese-adapt", + "downloads": 129, + "description": "日本語T5 Prefix Language ModelThis is a T5 (Text-to-Text Transfer Transformer)", + "source": "Hugging Face", + "score": 1.4484369905207097e-05, + "project_name": "t5-base-japanese-adapt" + }, + { + "url": "https://huggingface.co/mmnga/line-corp-japanese-large-lm-3.6b-gguf", + "downloads": 129, + "description": "line-corporation/japanese-large-lm-3.6bline-corporationさんが公開しているjapanese-large-lm-3.6bのgguf変換版です。", + "source": "Hugging Face", + "score": 1.4484369905207097e-05, + "project_name": "line-corp-japanese-large-lm-3.6b-gguf" + }, + { + "url": "https://huggingface.co/fukugawa/transformer-lm-japanese-0.1b", + "downloads": 124, + "description": "transformer-lm-japanese-0.1bThis is a JAX/Flax-based transformer language model trained on a Japanese dataset.", + "source": "Hugging Face", + "score": 1.392296021895876e-05, + "project_name": "transformer-lm-japanese-0.1b" + }, + { + "url": "https://huggingface.co/sonoisa/t5-base-japanese-v1.1", + "downloads": 120, + "description": "日本語T5事前学習済みモデルThis is a T5 (Text-to-Text Transfer Transformer) model pretrained on Japanese corpus.", + "source": "Hugging Face", + "score": 1.347383246996009e-05, + "project_name": "t5-base-japanese-v1.1" + }, + { + "url": "https://huggingface.co/turing-motors/heron-chat-blip-ja-stablelm-base-7b-v1", + "downloads": 120, + "description": "Heron BLIP Japanese StableLM", + "source": "Hugging Face", + "score": 1.347383246996009e-05, + "project_name": "heron-chat-blip-ja-stablelm-base-7b-v1" + }, + { + "url": "https://huggingface.co/tsmatz/roberta_qa_japanese", + "downloads": 120, + "description": "roberta_qa_japanese(Japanese caption : 日本語の (抽出型) 質問応答のモデル)This model is a fine-tuned version of rinna/japanese-roberta-base (pre-trained RoBERTa model provided by rinna Co.", + "source": "Hugging Face", + "score": 1.347383246996009e-05, + "project_name": "roberta_qa_japanese" + }, + { + "url": "https://huggingface.co/sazyou-roukaku/AfterRealXL", + "downloads": 119, + "description": "こちらでアップロードできないので、civitaiにて先に公開しています。", + "source": "Hugging Face", + "score": 1.3361550532710422e-05, + "project_name": "AfterRealXL" + }, + { + "url": "https://huggingface.co/retrieva-jp/t5-base-medium", + "downloads": 116, + "description": "Model card for model IDThis is a T5 v1.1 model, pre-trained on a Japanese corpus.", + "source": "Hugging Face", + "score": 1.302470472096142e-05, + "project_name": "t5-base-medium" + }, + { + "url": "https://huggingface.co/sonoisa/t5-base-english-japanese", + "downloads": 115, + "description": "英語+���本語T5事前学習済みモデルThis is a T5 (Text-to-Text Transfer Transformer) model pretrained on English and Japanese balanced corpus.", + "source": "Hugging Face", + "score": 1.2912422783711753e-05, + "project_name": "t5-base-english-japanese" + }, + { + "url": "https://huggingface.co/ysakuramoto/mobilebert-ja", + "downloads": 114, + "description": "MobileBERT 日本語事前学習済みモデル爆誕!!", + "source": "Hugging Face", + "score": 1.2800140846462087e-05, + "project_name": "mobilebert-ja" + }, + { + "url": "https://huggingface.co/tokyotech-llm/Swallow-7b-NVE-hf", + "downloads": 114, + "description": "SwallowOur Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", + "source": "Hugging Face", + "score": 1.2800140846462087e-05, + "project_name": "Swallow-7b-NVE-hf" + }, + { + "url": "https://huggingface.co/KoichiYasuoka/bert-large-japanese-upos", + "downloads": 113, + "description": "bert-large-japanese-uposModel DescriptionThis is a BERT model pre-trained on Japanese Wikipedia texts for POS-tagging and dependency-parsing, derived from bert-large-japanese-char-extended.", + "source": "Hugging Face", + "score": 1.2687858909212419e-05, + "project_name": "bert-large-japanese-upos" + }, + { + "url": "https://huggingface.co/Aratako/Ninja-v1-RP-expressive", + "downloads": 112, + "description": "Ninja-v1-RP-expressiveGGUF版はこちら/Click here for the GGUF version概要This is a merge of pre-trained language models created using mergekit.", + "source": "Hugging Face", + "score": 1.2575576971962751e-05, + "project_name": "Ninja-v1-RP-expressive" + }, + { + "url": "https://huggingface.co/minutillamolinara/bert-japanese_finetuned-sentiment-analysis", + "downloads": 112, + "description": "bert-japanese_finetuned-sentiment-analysisThis model was trained from scratch on the Japanese Sentiment Polarity Dictionary dataset.", + "source": "Hugging Face", + "score": 1.2575576971962751e-05, + "project_name": "bert-japanese_finetuned-sentiment-analysis" + }, + { + "url": "https://huggingface.co/rinna/nekomata-14b-instruction-gguf", + "downloads": 106, + "description": "rinna/nekomata-14b-instruction-ggufOverviewThe model is the GGUF version of rinna/nekomata-14b-instruction.", + "source": "Hugging Face", + "score": 1.1901885348464746e-05, + "project_name": "nekomata-14b-instruction-gguf" + }, + { + "url": "https://huggingface.co/inu-ai/dolly-japanese-gpt-1b", + "downloads": 105, + "description": "更新履歴2023年5月7日「oasst1-89k-ja」データセットを追加して対話システムに対応しました。", + "source": "Hugging Face", + "score": 1.178960341121508e-05, + "project_name": "dolly-japanese-gpt-1b" + }, + { + "url": "https://huggingface.co/patrickramos/bert-base-japanese-v2-wrime-fine-tune", + "downloads": 105, + "description": "WRIME-fine-tuned BERT base JapaneseThis model is a Japanese BERTBASE fine-tuned on the WRIME dataset.", + "source": "Hugging Face", + "score": 1.178960341121508e-05, + "project_name": "bert-base-japanese-v2-wrime-fine-tune" + }, + { + "url": "https://huggingface.co/Lasorco/Kokuwa", + "downloads": 104, + "description": "Kokuwalamettaの改良でマージさせるモデル探しをしていたらKiwiMixという面白そうなモデルを見つけました。", + "source": "Hugging Face", + "score": 1.1677321473965412e-05, + "project_name": "Kokuwa" + }, + { + "url": "https://huggingface.co/stabilityai/japanese-instructblip-alpha", + "downloads": 100, + "description": "Japanese InstructBLIP AlphaModel DetailsJapanese InstructBLIP Alpha is a vision-language instruction-following model that enables to generate Japanese descriptions for input images and optionally input texts such as questions.", + "source": "Hugging Face", + "score": 1.1228193724966742e-05, + "project_name": "japanese-instructblip-alpha" + }, + { + "url": "https://huggingface.co/wolf4032/bert-japanese-token-classification-search-local-cuisine", + "downloads": 100, + "description": "Model Card for Model ID料理を検索するための質問文から、検索検索用キーワードである固有表現を抽出しますModel DetailsModel Description例えば、「東京の肉料理で、春に食べられる、鶏肉を使った料理を教えてください」という文章を入力すると、「東京 → 都道府県/地方(AREA)」 「肉料理 → 種類(TYPE)」 「春 → 季節(SZN)」 「鶏肉 → 食材(INGR)」のように、固有表現を抽出します抽出対象は、AREA、TYPE、SZN、INGRの4つですLanguage(s) (NLP):", + "source": "Hugging Face", + "score": 1.1228193724966742e-05, + "project_name": "bert-japanese-token-classification-search-local-cuisine" + }, + { + "url": "https://huggingface.co/nlp-waseda/comet-t5-base-japanese", + "downloads": 100, + "description": "COMET-T5 jaFinetuned T5 on ATOMIC ja using a text-to-text language modeling objective.", + "source": "Hugging Face", + "score": 1.1228193724966742e-05, + "project_name": "comet-t5-base-japanese" + }, + { + "url": "https://huggingface.co/sappho192/aihub-ja-ko-translator", + "downloads": 99, + "description": "Japanese to Korean translatorJapanese to Korean translator model based on EncoderDecoderModel(bert-japanese+kogpt2)", + "source": "Hugging Face", + "score": 1.1115911787717075e-05, + "project_name": "aihub-ja-ko-translator" + }, + { + "url": "https://huggingface.co/nk2t/Llama-3-8B-Instruct-japanese-nk2t-v0.2", + "downloads": 98, + "description": "Llama-3-8B-Instruct-JP-nk2t-v0.2Model Details: Built with Meta Llama 3This is a model that has been fine-tuned (using QLora) on a very small dataset (around 1k) based on Meta's llama-3-8b-instruct.", + "source": "Hugging Face", + "score": 1.1003629850467407e-05, + "project_name": "Llama-3-8B-Instruct-japanese-nk2t-v0.2" + }, + { + "url": "https://huggingface.co/owner203/japanese-llama-2-13b-gguf", + "downloads": 98, + "description": "Japanese-LLaMA-2-13B-GGUFJapanese-LLaMA-2-13B-GGUFはJapanese-LLaMA-2-13BのGGUF形式です。", + "source": "Hugging Face", + "score": 1.1003629850467407e-05, + "project_name": "japanese-llama-2-13b-gguf" + }, + { + "url": "https://huggingface.co/jweb/japanese-soseki-gpt2-1b", + "downloads": 97, + "description": "japanese-soseki-gpt2-1bThis repository provides a 1.3B-parameter finetuned Japanese GPT2 model.", + "source": "Hugging Face", + "score": 1.0891347913217739e-05, + "project_name": "japanese-soseki-gpt2-1b" + }, + { + "url": "https://huggingface.co/izumi-lab/deberta-v2-base-japanese", + "downloads": 95, + "description": "DeBERTa V2 base JapaneseThis is a DeBERTaV2 model pretrained on Japanese texts.", + "source": "Hugging Face", + "score": 1.0666784038718405e-05, + "project_name": "deberta-v2-base-japanese" + }, + { + "url": "https://huggingface.co/ku-nlp/roberta-base-japanese-char-wwm", + "downloads": 95, + "description": "ku-nlp/roberta-base-japanese-char-wwmModel descriptionThis is a Japanese RoBERTa base model pre-trained on Japanese Wikipedia and the Japanese portion of CC-100.This model is trained with character-level tokenization and whole word masking.", + "source": "Hugging Face", + "score": 1.0666784038718405e-05, + "project_name": "roberta-base-japanese-char-wwm" + }, + { + "url": "https://huggingface.co/sonoisa/sentence-bert-base-ja-en-mean-tokens", + "downloads": 94, + "description": "This is a Japanese+English sentence-BERT model.", + "source": "Hugging Face", + "score": 1.0554502101468737e-05, + "project_name": "sentence-bert-base-ja-en-mean-tokens" + }, + { + "url": "https://huggingface.co/ku-nlp/gpt2-medium-japanese-char", + "downloads": 94, + "description": "Model Card for Japanese character-level", + "source": "Hugging Face", + "score": 1.0554502101468737e-05, + "project_name": "gpt2-medium-japanese-char" + }, + { + "url": "https://huggingface.co/vumichien/wav2vec2-large-xlsr-japanese", + "downloads": 88, + "description": "Wav2Vec2-Large-XLSR-53-JapaneseFine-tuned facebook/wav2vec2-large-xlsr-53 on Japanese using the Common Voice and Japanese speech corpus of Saruwatari-lab, University of Tokyo JSUT.When using this model, make sure that your speech input is sampled at 16kHz.", + "source": "Hugging Face", + "score": 9.880810477970734e-06, + "project_name": "wav2vec2-large-xlsr-japanese" + }, + { + "url": "https://huggingface.co/ThePioneer/MoeDiffusion", + "downloads": 86, + "description": "モデル説明 (model explanation)YaguruMagiku 0.6 : AbyssOrangeMix2_sfw 0.4マージ元のルーツにNAIリークが含まれるという噂があるので、NAIリークアンチには非推奨理想の黒髪ポニテ顔が出せるYaguruMagikuを、ある程度顔が近くて制御しやすいAbyssOrangeMix2と混ぜてみた。", + "source": "Hugging Face", + "score": 9.656246603471398e-06, + "project_name": "MoeDiffusion" + }, + { + "url": "https://huggingface.co/KoichiYasuoka/bert-base-japanese-unidic-luw-upos", + "downloads": 85, + "description": "bert-base-japanese-unidic-luw-uposModel", + "source": "Hugging Face", + "score": 9.54396466622173e-06, + "project_name": "bert-base-japanese-unidic-luw-upos" + }, + { + "url": "https://huggingface.co/ThePioneer/MoeSharpV1", + "downloads": 84, + "description": "モデル説明 (model explanation)MoeDiffusionPlusPlus 0.7 : DreamShaper 3.3 (full) 0.3。", + "source": "Hugging Face", + "score": 9.431682728972064e-06, + "project_name": "MoeSharpV1" + }, + { + "url": "https://huggingface.co/Lasorco/spekulatius", + "downloads": 83, + "description": "spekulatiusマージしているとたまに出てくる「目的の意図とは違うのだけどなんだか消すにはもったいないモデル」をおすそ分けするシリーズです。", + "source": "Hugging Face", + "score": 9.319400791722396e-06, + "project_name": "spekulatius" + }, + { + "url": "https://huggingface.co/Mizuiro-sakura/deberta-v2-base-japanese-finetuned-QAe", + "downloads": 79, + "description": "このモデルはdeberta-v2-base-japaneseをファインチューニングしてQAタスクに用いれるようにしたものです。", + "source": "Hugging Face", + "score": 8.870273042723727e-06, + "project_name": "deberta-v2-base-japanese-finetuned-QAe" + }, + { + "url": "https://huggingface.co/Miwa-Keita/zenz-v1", + "downloads": 79, + "description": "zenz-v1zenz-v1はGPT-2アーキテクチャに基づくかな漢字変換タスクに特化した言語モデルです。", + "source": "Hugging Face", + "score": 8.870273042723727e-06, + "project_name": "zenz-v1" + }, + { + "url": "https://huggingface.co/turing-motors/heron-chat-blip-ja-stablelm-base-7b-v1-llava-620k", + "downloads": 79, + "description": "Heron BLIP Japanese StableLM", + "source": "Hugging Face", + "score": 8.870273042723727e-06, + "project_name": "heron-chat-blip-ja-stablelm-base-7b-v1-llava-620k" + }, + { + "url": "https://huggingface.co/LoneStriker/SambaLingo-Japanese-Chat-GGUF", + "downloads": 78, + "description": "SambaLingo-Japanese-ChatSambaLingo-Japanese-Chat is a human aligned chat model trained in Japanese and English.", + "source": "Hugging Face", + "score": 8.757991105474059e-06, + "project_name": "SambaLingo-Japanese-Chat-GGUF" + }, + { + "url": "https://huggingface.co/rinna/nekomata-7b-instruction-gguf", + "downloads": 78, + "description": "rinna/nekomata-7b-instruction-ggufOverviewThe model is the GGUF version of rinna/nekomata-7b-instruction.", + "source": "Hugging Face", + "score": 8.757991105474059e-06, + "project_name": "nekomata-7b-instruction-gguf" + }, + { + "url": "https://huggingface.co/colorfulscoop/gpt2-small-ja", + "downloads": 75, + "description": "GPT-2 small Japanese modelThis repository contains a GPT2-small model trained on Japanese Wikipedia dataset.", + "source": "Hugging Face", + "score": 8.421145293725057e-06, + "project_name": "gpt2-small-ja" + }, + { + "url": "https://huggingface.co/KoichiYasuoka/bert-base-japanese-char-extended", + "downloads": 73, + "description": "bert-base-japanese-char-extendedModel", + "source": "Hugging Face", + "score": 8.196581419225722e-06, + "project_name": "bert-base-japanese-char-extended" + }, + { + "url": "https://huggingface.co/taishi-i/awesome-japanese-nlp-classification-model", + "downloads": 72, + "description": "Model overviewThis model is the baseline model for awesome-japanese-nlp-classification-dataset.", + "source": "Hugging Face", + "score": 8.084299481976054e-06, + "project_name": "awesome-japanese-nlp-classification-model" + }, + { + "url": "https://huggingface.co/Deepreneur/blue-lizard", + "downloads": 71, + "description": "Deepreneur-blue-lizardModel DescriptionDeepreneur-blue-lizardは、MetaのLlama-2-7bに対して、Wikipediaや書籍等の日本語の学習データを用いて追加事前学習と独自データによるファインチューニングを実施したモデルです。", + "source": "Hugging Face", + "score": 7.972017544726388e-06, + "project_name": "blue-lizard" + }, + { + "url": "https://huggingface.co/TeamFnord/manga-ocr", + "downloads": 71, + "description": "Manga OCROptical character recognition for Japanese text, with the main focus being Japanese manga.", + "source": "Hugging Face", + "score": 7.972017544726388e-06, + "project_name": "manga-ocr" + }, + { + "url": "https://huggingface.co/abhishek/autonlp-japanese-sentiment-59363", + "downloads": 71, + "description": "Model Trained Using AutoNLPProblem type: Binary ClassificationModel ID: 59363Validation MetricsLoss: 0.12651239335536957Accuracy: 0.9532079853817648Precision: 0.9729688278823665Recall: 0.9744633462616643AUC: 0.9717333684823413F1: 0.9737155136027014UsageYou can use cURL to access this model:$ curl -X POST -H \"Authorization: Bearer YOUR_API_KEY\" -H \"Content-Type: application/json\" -d '{\"inputs\": \"I love AutoNLP\"}'", + "source": "Hugging Face", + "score": 7.972017544726388e-06, + "project_name": "autonlp-japanese-sentiment-59363" + }, + { + "url": "https://huggingface.co/owner203/japanese-alpaca-2-13b-gguf", + "downloads": 70, + "description": "Japanese-Alpaca-2-13B-GGUFJapanese-Alpaca-2-13B-GGUFはJapanese-Alpaca-2-13BのGGUF形式です。", + "source": "Hugging Face", + "score": 7.85973560747672e-06, + "project_name": "japanese-alpaca-2-13b-gguf" + }, + { + "url": "https://huggingface.co/AIBunCho/japanese-novel-gpt-j-6b", + "downloads": 70, + "description": "AIBunCho/japanese-novel-gpt-j-6bAI BunChoで利用しているモデルです。", + "source": "Hugging Face", + "score": 7.85973560747672e-06, + "project_name": "japanese-novel-gpt-j-6b" + }, + { + "url": "https://huggingface.co/izumi-lab/electra-base-japanese-discriminator", + "downloads": 69, + "description": "ELECTRA base Japanese discriminatorThis is a ELECTRA model pretrained on texts in the Japanese language.", + "source": "Hugging Face", + "score": 7.747453670227052e-06, + "project_name": "electra-base-japanese-discriminator" + }, + { + "url": "https://huggingface.co/ku-nlp/bart-large-japanese", + "downloads": 67, + "description": "Model Card for Japanese BART largeModel descriptionThis is a Japanese BART large model pre-trained on Japanese Wikipedia.", + "source": "Hugging Face", + "score": 7.522889795727717e-06, + "project_name": "bart-large-japanese" + }, + { + "url": "https://huggingface.co/zh-plus/faster-whisper-large-v2-japanese-5k-steps", + "downloads": 67, + "description": "Converted from clu-ling/whisper-large-v2-japanese-5k-steps using CTranslate2.Usage:Install pip install faster-whisper (Check faster-whisper for detailed instructions.", + "source": "Hugging Face", + "score": 7.522889795727717e-06, + "project_name": "faster-whisper-large-v2-japanese-5k-steps" + }, + { + "url": "https://huggingface.co/kit-nlp/bert-base-japanese-basic-char-v2-cyberbullying", + "downloads": 67, + "description": "electra-base-cyberbullyingThis is a BERT Base model for the Japanese language finetuned for automatic cyberbullying detection.", + "source": "Hugging Face", + "score": 7.522889795727717e-06, + "project_name": "bert-base-japanese-basic-char-v2-cyberbullying" + }, + { + "url": "https://huggingface.co/Bagus/wav2vec2-xlsr-japanese-speech-emotion-recognition", + "downloads": 66, + "description": "This is for (private) DEMO only.", + "source": "Hugging Face", + "score": 7.4106078584780495e-06, + "project_name": "wav2vec2-xlsr-japanese-speech-emotion-recognition" + }, + { + "url": "https://huggingface.co/Mizuiro-sakura/luke-japanese-base-lite-jsquad", + "downloads": 66, + "description": "このモデルはluke-japanese-base-liteをファインチューニングして、Question-Answeringに用いれるようにしたものです。", + "source": "Hugging Face", + "score": 7.4106078584780495e-06, + "project_name": "luke-japanese-base-lite-jsquad" + }, + { + "url": "https://huggingface.co/recruit-jp/japanese-typo-detector-roberta-base", + "downloads": 64, + "description": "recruit-jp/japanese-typo-detector-roberta-baseモデルの概要日本語の文章を入力すると各文字ごとに誤字脱字である確率を出力します各ラベルの意味は以下の通りですidlabelmeaning0OK誤字なし1deletion1文字の抜け2insertion_a余分な1文字の挿入3insertion_b直前の文字列と一致する2文字以上の余分な文字の挿入4kanji-conversion_a同一の読みを持つ漢字の入れ替え(誤変換)5kanji-conversion_b近い読みを持つ漢字の入れ替え(誤変換)6substitution1文字の入れ替え7transposition隣接する2文字間の転置8othersその他の入力誤り誤り種類の詳細については学習データセットの元論文をご参照ください日本語 Wikipedia の編集履歴に基づく 入力誤りデータセットと訂正システムの改良その他、モデルの詳細については当社ブログ記事をご参照ください誤字脱字検出モデルをHugging Face Hubに公開しました (Recruit Data Blog)学習データ京都大学大学院情報学研究科知能情", + "source": "Hugging Face", + "score": 7.186043983978715e-06, + "project_name": "japanese-typo-detector-roberta-base" + }, + { + "url": "https://huggingface.co/turing-motors/heron-chat-blip-ja-stablelm-base-7b-v0", + "downloads": 64, + "description": "Heron BLIP Japanese StableLM", + "source": "Hugging Face", + "score": 7.186043983978715e-06, + "project_name": "heron-chat-blip-ja-stablelm-base-7b-v0" + }, + { + "url": "https://huggingface.co/rinna/nekomata-7b-gguf", + "downloads": 61, + "description": "rinna/nekomata-7b-ggufOverviewThe model is the GGUF version of rinna/nekomata-7b.", + "source": "Hugging Face", + "score": 6.849198172229713e-06, + "project_name": "nekomata-7b-gguf" + }, + { + "url": "https://huggingface.co/KoichiYasuoka/deberta-base-japanese-wikipedia", + "downloads": 61, + "description": "deberta-base-japanese-wikipediaModel DescriptionThis is a DeBERTa(V2) model pre-trained on Japanese Wikipedia and 青空文庫 texts.", + "source": "Hugging Face", + "score": 6.849198172229713e-06, + "project_name": "deberta-base-japanese-wikipedia" + }, + { + "url": "https://huggingface.co/izumi-lab/bert-small-japanese-fin", + "downloads": 59, + "description": "BERT small Japanese financeThis is a BERT model pretrained on texts in the Japanese language.", + "source": "Hugging Face", + "score": 6.624634297730378e-06, + "project_name": "bert-small-japanese-fin" + }, + { + "url": "https://huggingface.co/rinna/nekomata-14b-gguf", + "downloads": 58, + "description": "rinna/nekomata-14b-ggufOverviewThe model is the GGUF version of rinna/nekomata-14b.", + "source": "Hugging Face", + "score": 6.51235236048071e-06, + "project_name": "nekomata-14b-gguf" + }, + { + "url": "https://huggingface.co/aerner/lm-v2", + "downloads": 58, + "description": "Aerner LM-v2事前学習から全部日本語で学習させたモデルのバージョン2です。", + "source": "Hugging Face", + "score": 6.51235236048071e-06, + "project_name": "lm-v2" + }, + { + "url": "https://huggingface.co/AndrewMcDowell/wav2vec2-xls-r-300m-japanese", + "downloads": 57, + "description": "This model is a fine-tuned version of facebook/wav2vec2-xls-r-300m on the MOZILLA-FOUNDATION/COMMON_VOICE_8_0 - JA dataset.", + "source": "Hugging Face", + "score": 6.400070423231043e-06, + "project_name": "wav2vec2-xls-r-300m-japanese" + }, + { + "url": "https://huggingface.co/nlp-waseda/roberta_jtruthfulqa", + "downloads": 56, + "description": "Finetuned Waseda RoBERTa to evaluate the generated answers on JTruthfulQA.", + "source": "Hugging Face", + "score": 6.2877884859813756e-06, + "project_name": "roberta_jtruthfulqa" + }, + { + "url": "https://huggingface.co/alabnii/jmedroberta-base-sentencepiece", + "downloads": 55, + "description": "alabnii/jmedroberta-base-sentencepieceModel descriptionThis is a Japanese RoBERTa base model pre-trained on academic articles in medical sciences collected by Japan Science and Technology Agency (JST).", + "source": "Hugging Face", + "score": 6.175506548731708e-06, + "project_name": "jmedroberta-base-sentencepiece" + }, + { + "url": "https://huggingface.co/kz/mt5base-finetuned-ECC-japanese-small", + "downloads": 54, + "description": "Google's mt5-base fine-tuned in Japanese to solve error detection and correction task.", + "source": "Hugging Face", + "score": 6.063224611482041e-06, + "project_name": "mt5base-finetuned-ECC-japanese-small" + }, + { + "url": "https://huggingface.co/sambanovasystems/SambaLingo-Japanese-Chat", + "downloads": 53, + "description": "SambaLingo-Japanese-ChatSambaLingo-Japanese-Chat is a human aligned chat model trained in Japanese and English.", + "source": "Hugging Face", + "score": 5.950942674232373e-06, + "project_name": "SambaLingo-Japanese-Chat" + }, + { + "url": "https://huggingface.co/oshizo/qa-refine-japanese-gpt-1b", + "downloads": 53, + "description": "Model Card for Model IDこのモデルはrinna/japanese-gpt-1bをベースモデルとして、コンテキストからの抽出型QAと、解答を新たなコンテキストでリファインするための学習を行ったモデルです。", + "source": "Hugging Face", + "score": 5.950942674232373e-06, + "project_name": "qa-refine-japanese-gpt-1b" + }, + { + "url": "https://huggingface.co/nk2t/Llama-3-8B-Instruct-japanese-nk2t-v0.3", + "downloads": 50, + "description": "Llama-3-8B-Instruct-JP-nk2t-v0.3Model Details: Built with Meta Llama 3llama-3-8bの日本語継続学習モデルにChatVectorを適用し、さらにQLoraでファインチューニングしたモデルです。", + "source": "Hugging Face", + "score": 5.614096862483371e-06, + "project_name": "Llama-3-8B-Instruct-japanese-nk2t-v0.3" + }, + { + "url": "https://huggingface.co/abhishek/autonlp-japanese-sentiment-59362", + "downloads": 49, + "description": "Model Trained Using AutoNLPProblem type: Binary ClassificationModel ID: 59362Validation MetricsLoss: 0.13092292845249176Accuracy: 0.9527127414314258Precision: 0.9634070704982427Recall: 0.9842171959602166AUC: 0.9667289746092403F1:", + "source": "Hugging Face", + "score": 5.501814925233703e-06, + "project_name": "autonlp-japanese-sentiment-59362" + }, + { + "url": "https://huggingface.co/tohoku-nlp/stable-diffusion-xl-jp-base-1.0", + "downloads": 47, + "description": "(English part follows Japanese one.", + "source": "Hugging Face", + "score": 5.277251050734369e-06, + "project_name": "stable-diffusion-xl-jp-base-1.0" + }, + { + "url": "https://huggingface.co/Local-Novel-LLM-project/Ninja-v1-GGUF", + "downloads": 47, + "description": "Ninja-v1 のGGUF版Our Models for GGUFVecteus-GGUFNinja-v1-GGUFNinja-v1-NSFW-GGUFNinja-v1-128k-GGUFNinja-v1-NSFW-128k-GGUF", + "source": "Hugging Face", + "score": 5.277251050734369e-06, + "project_name": "Ninja-v1-GGUF" + }, + { + "url": "https://huggingface.co/stabilityai/japanese-stablelm-base-ja_vocab-beta-7b", + "downloads": 47, + "description": "Japanese-StableLM-Base-JAVocab-Beta-7BA cute robot wearing a kimono writes calligraphy with one single brush — Stable Diffusion XLModel Descriptionjapanese-stablelm-base-ja_vocab-beta-7b is a 7B-parameter decoder-only language model based on Llama-2-7b that has been fine-tuned on a diverse collection of Japanese data, with the intent of maximizing downstream performance on Japanese language tasks.", + "source": "Hugging Face", + "score": 5.277251050734369e-06, + "project_name": "japanese-stablelm-base-ja_vocab-beta-7b" + }, + { + "url": "https://huggingface.co/Aratako/ELYZA-japanese-Llama-2-MoE-2x13B-v0.1-GGUF", + "downloads": 46, + "description": "ELYZA-japanese-Llama-2-MoE-2x13B-v0.1-GGUF概要Aratako/ELYZA-japanese-Llama-2-MoE-2x13B-v0.1の量子化済みGGUF版です。", + "source": "Hugging Face", + "score": 5.164969113484702e-06, + "project_name": "ELYZA-japanese-Llama-2-MoE-2x13B-v0.1-GGUF" + }, + { + "url": "https://huggingface.co/Kendamarron/Tokara-0.5B-Chat-v0.1", + "downloads": 46, + "description": "モデルについてQwen/Qwen1.5-0.5Bを日英データ5Bトークンで継続事前学習したTokara-0.5B-v0.1にchat vectorで対話能力を加えたモデルになります。", + "source": "Hugging Face", + "score": 5.164969113484702e-06, + "project_name": "Tokara-0.5B-Chat-v0.1" + }, + { + "url": "https://huggingface.co/Formzu/bert-base-japanese-jsnli", + "downloads": 45, + "description": "bert-base-japanese-jsnliThis model is a fine-tuned version of cl-tohoku/bert-base-japanese-v2 on the JSNLI dataset.", + "source": "Hugging Face", + "score": 5.052687176235034e-06, + "project_name": "bert-base-japanese-jsnli" + }, + { + "url": "https://huggingface.co/akiFQC/bert-base-japanese-v3_nli-jsnli", + "downloads": 45, + "description": "Cross-Encoder for Natural Language Inference(NLI) for JapaneseConsidering the results of the JNLI evaluation result, we recommend using akiFQC/bert-base-japanese-v3_nli-jsnli-jnli-jsick for natural language inference in Japanese.", + "source": "Hugging Face", + "score": 5.052687176235034e-06, + "project_name": "bert-base-japanese-v3_nli-jsnli" + }, + { + "url": "https://huggingface.co/tokyotech-llm/Swallow-13b-NVE-hf", + "downloads": 45, + "description": "SwallowOur Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", + "source": "Hugging Face", + "score": 5.052687176235034e-06, + "project_name": "Swallow-13b-NVE-hf" + }, + { + "url": "https://huggingface.co/oshizo/japanese-sexual-moderation-v2", + "downloads": 44, + "description": "japanese-sexual-moderation-v2は、studio-ousia/luke-japanese-large-liteをファインチューニングしたモデルです。", + "source": "Hugging Face", + "score": 4.940405238985367e-06, + "project_name": "japanese-sexual-moderation-v2" + }, + { + "url": "https://huggingface.co/Local-Novel-LLM-project/Ninja-v1-128k", + "downloads": 44, + "description": "Our ModelsVecteusNinja-v1Ninja-v1-NSFWNinja-v1-128kNinja-v1-NSFW-128kModel Card for Ninja-v1-128kThe Mistral-7B--based Large Language Model (LLM) is an noveldataset fine-tuned version of the Mistral-7B-v0.1Ninja-128k has the following changes compared to Mistral-7B-v0.1.128k context window (8k context in v0.1)Achieving both high quality Japanese and English generationMemory ability that does not forget even after long-context generationThis model was created with the help of GPUs from the first LocalAI hack", + "source": "Hugging Face", + "score": 4.940405238985367e-06, + "project_name": "Ninja-v1-128k" + }, + { + "url": "https://huggingface.co/bardsai/finance-sentiment-ja-base", + "downloads": 43, + "description": "Finance Sentiment JA (base)Finance Sentiment JA (base) is a model based on bert-base-japanese for analyzing sentiment of Japanese financial news.", + "source": "Hugging Face", + "score": 4.828123301735699e-06, + "project_name": "finance-sentiment-ja-base" + }, + { + "url": "https://huggingface.co/Kendamarron/Tokara-0.5B-v0.1", + "downloads": 42, + "description": "モデルについてQwen/Qwen1.5-0.5Bを日英データ5Bトークンで継続事前学習したモデルです。", + "source": "Hugging Face", + "score": 4.715841364486032e-06, + "project_name": "Tokara-0.5B-v0.1" + }, + { + "url": "https://huggingface.co/slplab/wav2vec2-xls-r-300m-japanese-hiragana", + "downloads": 41, + "description": "Wav2Vec2-XLS-R-300M-Japanese-HiraganaFine-tuned facebook/wav2vec2-xls-r-300m on Japanese Hiragana characters using the Common Voice and JSUT.The sentence outputs do not contain word boundaries.", + "source": "Hugging Face", + "score": 4.603559427236364e-06, + "project_name": "wav2vec2-xls-r-300m-japanese-hiragana" + }, + { + "url": "https://huggingface.co/cinmodel/electra-small-japanese-discriminator", + "downloads": 41, + "description": "Japanese ELECTRA-smallWe provide a Japanese ELECTRA-Small model, as described in ELECTRA: Pre-training Text Encoders as Discriminators Rather Than Generators.", + "source": "Hugging Face", + "score": 4.603559427236364e-06, + "project_name": "electra-small-japanese-discriminator" + }, + { + "url": "https://huggingface.co/Mizuiro-sakura/t5-CAMERA-title-generation", + "downloads": 41, + "description": "sonoisa/t5-base-japaneseをファインチューニングして、タイトル生成に用いれるようにしたモデルです。", + "source": "Hugging Face", + "score": 4.603559427236364e-06, + "project_name": "t5-CAMERA-title-generation" + }, + { + "url": "https://huggingface.co/Aratako/ELYZA-japanese-Llama-2-MoE-2x7B-v0.1-GGUF", + "downloads": 40, + "description": "ELYZA-japanese-Llama-2-MoE-2x7B-v0.1-GGUF概要Aratako/ELYZA-japanese-Llama-2-MoE-2x7B-v0.1の量子化済みGGUF版です。", + "source": "Hugging Face", + "score": 4.4912774899866965e-06, + "project_name": "ELYZA-japanese-Llama-2-MoE-2x7B-v0.1-GGUF" + }, + { + "url": "https://huggingface.co/offtoung/tsukuyomi-chan-calm2-7b", + "downloads": 40, + "description": "つくよみちゃんデータセットを用いて calm-2-7b-chat をファインチューニングしたモデルです。", + "source": "Hugging Face", + "score": 4.4912774899866965e-06, + "project_name": "tsukuyomi-chan-calm2-7b" + }, + { + "url": "https://huggingface.co/daisaku-s/medtxt_ner_roberta", + "downloads": 39, + "description": "日本語医療固有表現抽出モデル概要ソーシャル・コンピューティング研究室さまより公開されているMedTxt-CRを用いて、alabniiさまより公開されているRoBERTaをfine-tuningした固有表現抽出モデルです。", + "source": "Hugging Face", + "score": 4.3789955527370295e-06, + "project_name": "medtxt_ner_roberta" + }, + { + "url": "https://huggingface.co/yellowback/gpt-neo-japanese-1.3B", + "downloads": 39, + "description": "GPT-Neo 1.3B pre-trained model for JapaneseModel DescriptionGPT2/GPT3 like model trained on Japanese.corpus.", + "source": "Hugging Face", + "score": 4.3789955527370295e-06, + "project_name": "gpt-neo-japanese-1.3B" + }, + { + "url": "https://huggingface.co/nu-dialogue/sfc2022-stable-diffusion", + "downloads": 39, + "description": "SFCOCO Stable Diffusion Model CardSFCOCO Stable Diffusion is a Japanese-specific latent text-to-image diffusion model capable of generating photo-realistic images given any text input.", + "source": "Hugging Face", + "score": 4.3789955527370295e-06, + "project_name": "sfc2022-stable-diffusion" + }, + { + "url": "https://huggingface.co/SkelterLabsInc/bert-base-japanese-jaquad", + "downloads": 39, + "description": "BERT base Japanese - JaQuADDescriptionA Japanese Question Answering model fine-tuned on JaQuAD.Please refer BERT base Japanese for details about the pre-training model.", + "source": "Hugging Face", + "score": 4.3789955527370295e-06, + "project_name": "bert-base-japanese-jaquad" + }, + { + "url": "https://huggingface.co/ken11/albert-base-japanese-v1-with-japanese-tokenizer", + "downloads": 38, + "description": "albert-base-japanese-v1-with-japanese日本語事前学習済みALBERTモデルですこのモデ��ではTokenizerにBertJapaneseTokenizerクラスを利用していますalbert-base-japanese-v1よりトークナイズ処理が楽になっていますHow to useファインチューニングこのモデルはPreTrainedモデルです基本的には各種タスク用にファインチューニングして使用されることを想定していますFill-Maskfor PyTorchfrom transformers import (AutoModelForMaskedLM, AutoTokenizer)tokenizer = AutoTokenizer.from_pretrained(\"ken11/albert-base-japanese-v1-with-japanese-tokenizer\")", + "source": "Hugging Face", + "score": 4.266713615487362e-06, + "project_name": "albert-base-japanese-v1-with-japanese-tokenizer" + }, + { + "url": "https://huggingface.co/haqishen/h2o-Llama-3-8B-Japanese-Instruct", + "downloads": 38, + "description": "IntroductionWho am I: Qishen Ha", + "source": "Hugging Face", + "score": 4.266713615487362e-06, + "project_name": "h2o-Llama-3-8B-Japanese-Instruct" + }, + { + "url": "https://huggingface.co/stockmark/bart-base-japanese-news", + "downloads": 38, + "description": "bart-base-japanese-news(base-sized model)This repository provides a Japanese BART model.", + "source": "Hugging Face", + "score": 4.266713615487362e-06, + "project_name": "bart-base-japanese-news" + }, + { + "url": "https://huggingface.co/llm-book/bert-base-japanese-v3-bpr-question-aio", + "downloads": 37, + "description": "bert-base-japanese-v3-bpr-question-aio「大規模言語モデル入門」の第9章で紹介している文書検索モデルBPRの質問エンコーダです。", + "source": "Hugging Face", + "score": 4.154431678237695e-06, + "project_name": "bert-base-japanese-v3-bpr-question-aio" + }, + { + "url": "https://huggingface.co/Helsinki-NLP/opus-mt-ja-nl", + "downloads": 37, + "description": "jpn-nldsource group: Japanesetarget group: DutchOPUS readme: jpn-nldmodel: transformer-alignsource language(s): jpn jpn_Hani jpn_Hira jpn_Kana jpn_Latntarget language(s): nldmodel: transformer-alignpre-processing: normalization + SentencePiece (spm32k,spm32k)", + "source": "Hugging Face", + "score": 4.154431678237695e-06, + "project_name": "opus-mt-ja-nl" + }, + { + "url": "https://huggingface.co/tohoku-nlp/stable-diffusion-xl-jp-refiner-1.0", + "downloads": 36, + "description": "(English part follows Japanese one.", + "source": "Hugging Face", + "score": 4.042149740988027e-06, + "project_name": "stable-diffusion-xl-jp-refiner-1.0" + }, + { + "url": "https://huggingface.co/webbigdata/C3TR-Adapter_gptq", + "downloads": 36, + "description": "Model card英日、日英翻訳用モデルC3TR-AdapterのGPTQ4bit量子化版です。", + "source": "Hugging Face", + "score": 4.042149740988027e-06, + "project_name": "C3TR-Adapter_gptq" + }, + { + "url": "https://huggingface.co/Ivydata/whisper-small-japanese", + "downloads": 36, + "description": "Fine-tuned Japanese Whisper model for speech recognition using whisper-smallFine-tuned openai/whisper-small on Japanese using Common Voice, JVS and JSUT.When using this model, make sure that your speech input is sampled at 16kHz.", + "source": "Hugging Face", + "score": 4.042149740988027e-06, + "project_name": "whisper-small-japanese" + }, + { + "url": "https://huggingface.co/snu-nia-12/wav2vec2-xls-r-300m_nia12_phone-hiragana_japanese", + "downloads": 35, + "description": "Wav2Vec2-XLS-R-300M-Japanese-HiraganaFine-tuned facebook/wav2vec2-xls-r-300m on Japanese Hiragana characters using JSUT, JVS, Common Voice, and in-house dataset.", + "source": "Hugging Face", + "score": 3.92986780373836e-06, + "project_name": "wav2vec2-xls-r-300m_nia12_phone-hiragana_japanese" + }, + { + "url": "https://huggingface.co/line-corporation/japanese-large-lm-1.7b-instruction-sft-4bit-128g-actorder_False", + "downloads": 35, + "description": "japanese-large-lm-1.7b-instruction-sft-4bit-128g-actorder_FalseThis repository provides a 1.7B parameters Japanese language quantized model, fine-tuned and trained by LINE Corporation.", + "source": "Hugging Face", + "score": 3.92986780373836e-06, + "project_name": "japanese-large-lm-1.7b-instruction-sft-4bit-128g-actorder_False" + }, + { + "url": "https://huggingface.co/espnet/kan-bayashi_jvs_tts_finetune_jvs001_jsut_vits_raw_phn_jaconv_pyopenjta-truncated-178804", + "downloads": 34, + "description": "ESPnet2 TTS pretrained modelkan-bayashi/jvs_tts_finetune_jvs001_jsut_vits_raw_phn_jaconv_pyopenjtalk_accent_with_pause_latest♻", + "source": "Hugging Face", + "score": 3.817585866488692e-06, + "project_name": "kan-bayashi_jvs_tts_finetune_jvs001_jsut_vits_raw_phn_jaconv_pyopenjta-truncated-178804" + }, + { + "url": "https://huggingface.co/Aratako/Ninja-v1-RP", + "downloads": 34, + "description": "Ninja-v1-RPGGUF版はこちら/Click here for the GGUF version概要This is a merge of pre-trained language models created using mergekit.Aratako/Ninja-v1-RP-WIPをベースに、Task Vectorの加算・Model Stockによるマージを行い指示追従能力と表現力を強化したロールプレイ用モデルです。", + "source": "Hugging Face", + "score": 3.817585866488692e-06, + "project_name": "Ninja-v1-RP" + }, + { + "url": "https://huggingface.co/kubota/luke-large-defamation-detection-japanese", + "downloads": 34, + "description": "luke-large-defamation-detection-japanese日本語誹謗中傷検出器This model is a fine-tuned version of studio-ousia/luke-japanese-large for the Japanese language finetuned for automatic defamation detection.", + "source": "Hugging Face", + "score": 3.817585866488692e-06, + "project_name": "luke-large-defamation-detection-japanese" + }, + { + "url": "https://huggingface.co/doc2query/msmarco-japanese-mt5-base-v1", + "downloads": 33, + "description": "doc2query/msmarco-japanese-mt5-base-v1This is a doc2query model based on mT5 (also known as docT5query).", + "source": "Hugging Face", + "score": 3.7053039292390247e-06, + "project_name": "msmarco-japanese-mt5-base-v1" + }, + { + "url": "https://huggingface.co/rinna/japanese-stable-diffusion", + "downloads": 33, + "description": "One more step before getting this model.", + "source": "Hugging Face", + "score": 3.7053039292390247e-06, + "project_name": "japanese-stable-diffusion" + }, + { + "url": "https://huggingface.co/Formzu/roberta-base-japanese-jsnli", + "downloads": 33, + "description": "roberta-base-japanese-jsnliThis model is a fine-tuned version of nlp-waseda/roberta-base-japanese on the JSNLI dataset.", + "source": "Hugging Face", + "score": 3.7053039292390247e-06, + "project_name": "roberta-base-japanese-jsnli" + }, + { + "url": "https://huggingface.co/line-corporation/japanese-large-lm-3.6b-instruction-sft-4bit-128g-actorder_False", + "downloads": 33, + "description": "japanese-large-lm-3.6b-instruction-sft-4bit-128g-actorder_FalseThis repository provides a 3.6B parameters Japanese language quantized model, fine-tuned and trained by LINE Corporation.", + "source": "Hugging Face", + "score": 3.7053039292390247e-06, + "project_name": "japanese-large-lm-3.6b-instruction-sft-4bit-128g-actorder_False" + }, + { + "url": "https://huggingface.co/ku-nlp/roberta-large-japanese-char-wwm", + "downloads": 33, + "description": "ku-nlp/roberta-large-japanese-char-wwmModel descriptionThis is a Japanese RoBERTa large model pre-trained on Japanese Wikipedia and the Japanese portion of CC-100.This model is trained with character-level tokenization and whole word masking.", + "source": "Hugging Face", + "score": 3.7053039292390247e-06, + "project_name": "roberta-large-japanese-char-wwm" + }, + { + "url": "https://huggingface.co/Mizuiro-sakura/luke-japanese-base-finetuned-jsts", + "downloads": 32, + "description": "このモデルはluke-japanese-baseをファインチューニングして、JSTS(文章の類似度計算)に用いれるようにしたものです。", + "source": "Hugging Face", + "score": 3.5930219919893573e-06, + "project_name": "luke-japanese-base-finetuned-jsts" + }, + { + "url": "https://huggingface.co/Mizuiro-sakura/luke-japanese-base-marcja", + "downloads": 32, + "description": "このモデルはluke-japanese-baseをファインチューニングして、MARC-ja(positive or negativeの二値分類)に用いれるようにしたものです。", + "source": "Hugging Face", + "score": 3.5930219919893573e-06, + "project_name": "luke-japanese-base-marcja" + }, + { + "url": "https://huggingface.co/TheBloke/japanese-stablelm-instruct-beta-70B-GPTQ", + "downloads": 32, + "description": "Chat & support: TheBloke's Discord serverWant to contribute?", + "source": "Hugging Face", + "score": 3.5930219919893573e-06, + "project_name": "japanese-stablelm-instruct-beta-70B-GPTQ" + }, + { + "url": "https://huggingface.co/nlp-waseda/gpt2-xl-japanese", + "downloads": 32, + "description": "nlp-waseda/gpt2-xl-japaneseThis is Japanese GPT2 with approximately 1.5B parameters pretrained on Japanese Wikipedia and CC-100The model architecture of the model are based on Radford+ 2019.Intended uses & limitationsYou can use the raw model for text generation or fine-tune it to a downstream task.", + "source": "Hugging Face", + "score": 3.5930219919893573e-06, + "project_name": "gpt2-xl-japanese" + }, + { + "url": "https://huggingface.co/Ivydata/whisper-base-japanese", + "downloads": 31, + "description": "Fine-tuned Japanese Whisper model for speech recognition using whisper-baseFine-tuned openai/whisper-base on Japanese using Common Voice, JVS and JSUT.When using this model, make sure that your speech input is sampled at 16kHz.", + "source": "Hugging Face", + "score": 3.48074005473969e-06, + "project_name": "whisper-base-japanese" + }, + { + "url": "https://huggingface.co/Aratako/ELYZA-japanese-Llama-2-fast-MoE-2x7B-v0.1-GGUF", + "downloads": 31, + "description": "ELYZA-japanese-Llama-2-fast-MoE-2x7B-v0.1-GGUF概要Aratako/ELYZA-japanese-Llama-2-fast-MoE-2x7B-v0.1の量子化済みGGUF版です。", + "source": "Hugging Face", + "score": 3.48074005473969e-06, + "project_name": "ELYZA-japanese-Llama-2-fast-MoE-2x7B-v0.1-GGUF" + }, + { + "url": "https://huggingface.co/aixsatoshi/Swallow-MX-8x7b-NVE-chatvector-Mixtral-instruct", + "downloads": 31, + "description": "更新情報日本語機能とinstructベクトルのバランス調整したver.2をアップロードしましたSwallow-MX-8x7b-NVE-chatvector-Mixtral-instruct-v2モデル概要Swallow-MX-8x7b-NVE-v0.1に対し、Mixtral-8x7B-Instruct-v0.1とMixtral-8x7B-v0.1の差分をマージしたモデルです。", + "source": "Hugging Face", + "score": 3.48074005473969e-06, + "project_name": "Swallow-MX-8x7b-NVE-chatvector-Mixtral-instruct" + }, + { + "url": "https://huggingface.co/sambanovasystems/SambaLingo-Japanese-Base", + "downloads": 31, + "description": "SambaLingo-Japanese-BaseSambaLingo-Japanese-Base is a pretrained Bi-lingual Japanese and English model that adapts Llama-2-7b to Japanese by training on 42 billion tokens from the Japanese split of the Cultura-X dataset.", + "source": "Hugging Face", + "score": 3.48074005473969e-06, + "project_name": "SambaLingo-Japanese-Base" + }, + { + "url": "https://huggingface.co/oshizo/japanese-e5-mistral-7b_slerp", + "downloads": 30, + "description": "This model was created by merging intfloat/e5-mistral-7b-instruct and stabilityai/japanese-stablelm-base-gamma-7b.", + "source": "Hugging Face", + "score": 3.3684581174900226e-06, + "project_name": "japanese-e5-mistral-7b_slerp" + }, + { + "url": "https://huggingface.co/Local-Novel-LLM-project/Ocuteus-v1", + "downloads": 30, + "description": "VecteusをベースにLLavaに対応させたモデルです。", + "source": "Hugging Face", + "score": 3.3684581174900226e-06, + "project_name": "Ocuteus-v1" + }, + { + "url": "https://huggingface.co/nlp-waseda/gpt2-small-japanese", + "downloads": 30, + "description": "nlp-waseda/gpt2-small-japaneseThis model is Japanese GPT-2 pretrained on Japanese Wikipedia and CC-100.Intended uses & limitationsYou can use the raw model for text generation or fine-tune it to a downstream task.", + "source": "Hugging Face", + "score": 3.3684581174900226e-06, + "project_name": "gpt2-small-japanese" + }, + { + "url": "https://huggingface.co/OrionStarAI/Orion-14B-Base-Int4", + "downloads": 29, + "description": "Orion-14B🌐English | 🇨🇳中文 | 🇯🇵日本語 |🇰🇷한국어🤗", + "source": "Hugging Face", + "score": 3.256176180240355e-06, + "project_name": "Orion-14B-Base-Int4" + }, + { + "url": "https://huggingface.co/llm-book/bert-base-japanese-v3-jcommonsenseqa", + "downloads": 28, + "description": "bert-base-japanese-v3-jcommonsenseqa「大規模言語モデル入門」の第5章で紹介している(多肢選択式質問応答)のモデルです。", + "source": "Hugging Face", + "score": 3.1438942429906878e-06, + "project_name": "bert-base-japanese-v3-jcommonsenseqa" + }, + { + "url": "https://huggingface.co/tokyotech-llm/Swallow-70b-NVE-instruct-hf", + "downloads": 28, + "description": "SwallowOur Swallow model has undergone continual pre-training from the Llama 2 family, primarily with the addition of Japanese language data.", + "source": "Hugging Face", + "score": 3.1438942429906878e-06, + "project_name": "Swallow-70b-NVE-instruct-hf" + }, + { + "url": "https://huggingface.co/schroneko/ELYZA-japanese-Llama-2-13b-fast-instruct-gguf", + "downloads": 28, + "description": "ELYZA-japanese-Llama-2-13b-fast-instruct-ggufELYZA-japanese-Llama-2-13b-fast-instructの GGUF", + "source": "Hugging Face", + "score": 3.1438942429906878e-06, + "project_name": "ELYZA-japanese-Llama-2-13b-fast-instruct-gguf" + }, + { + "url": "https://huggingface.co/abeja/Mixtral-8x7B-Instruct-v0.1-japanese", + "downloads": 28, + "description": "Mixtral-8x7B-Instruct-v0.1-japaneseMixtral-8x7B-Instruct-v0.1-japaneseはMixtral-8x7B-Instruct-v0.1をベースに日本語の語彙拡張継続事前学習を実施したモデルです。", + "source": "Hugging Face", + "score": 3.1438942429906878e-06, + "project_name": "Mixtral-8x7B-Instruct-v0.1-japanese" + }, + { + "url": "https://huggingface.co/retrieva-jp/t5-base-short", + "downloads": 27, + "description": "Model card for model IDThis is a T5 v1.1 model, pre-trained on a Japanese corpus.", + "source": "Hugging Face", + "score": 3.0316123057410204e-06, + "project_name": "t5-base-short" + }, + { + "url": "https://huggingface.co/knok/japanese-distilgpt2", + "downloads": 27, + "description": "日本語 gpt2 蒸留モデルこのモデルはrinna/japanese-gpt2-meduimを教師として蒸留したものです。", + "source": "Hugging Face", + "score": 3.0316123057410204e-06, + "project_name": "japanese-distilgpt2" + }, + { + "url": "https://huggingface.co/Helsinki-NLP/opus-mt-ja-it", + "downloads": 27, + "description": "jpn-itasource group: Japanesetarget group: ItalianOPUS readme: jpn-itamodel: transformer-alignsource language(s): jpn jpn_Hani jpn_Hira jpn_Kana jpn_Latn jpn_Yiiitarget language(s): itamodel: transformer-alignpre-processing: normalization + SentencePiece (spm32k,spm32k)", + "source": "Hugging Face", + "score": 3.0316123057410204e-06, + "project_name": "opus-mt-ja-it" + }, + { + "url": "https://huggingface.co/Tomohiro/RealMedNLP_CR_JA", + "downloads": 26, + "description": "This is a model for named entity recognition of Japanese medical documents.", + "source": "Hugging Face", + "score": 2.919330368491353e-06, + "project_name": "RealMedNLP_CR_JA" + }, + { + "url": "https://huggingface.co/studio-ousia/luke-japanese-large-lite", + "downloads": 26, + "description": "luke-japanese-large-liteluke-japanese is the Japanese version of LUKE (LanguageUnderstanding with Knowledge-based Embeddings), a pre-trainedknowledge-enhanced contextualized representation of words and entities.", + "source": "Hugging Face", + "score": 2.919330368491353e-06, + "project_name": "luke-japanese-large-lite" + }, + { + "url": "https://huggingface.co/kz/mt5base-finetuned-patentsum-japanese-small", + "downloads": 25, + "description": "Google's mt5-base fine-tuned in Japanese to summarize patent claims in a limited Pharmaceutical domain.", + "source": "Hugging Face", + "score": 2.8070484312416856e-06, + "project_name": "mt5base-finetuned-patentsum-japanese-small" + }, + { + "url": "https://huggingface.co/sonoisa/t5-base-japanese-mC4-Wikipedia", + "downloads": 25, + "description": "日本語T5事前学習済みモデルThis is a T5 (Text-to-Text Transfer Transformer) model pretrained on Japanese corpus.", + "source": "Hugging Face", + "score": 2.8070484312416856e-06, + "project_name": "t5-base-japanese-mC4-Wikipedia" + }, + { + "url": "https://huggingface.co/akiFQC/bert-base-japanese-v3_nli-jsnli-jnli-jsick", + "downloads": 25, + "description": "Cross-Encoder for Natural Language Inference(NLI) for JapaneseThis model was trained using SentenceTransformers Cross-Encoder class.", + "source": "Hugging Face", + "score": 2.8070484312416856e-06, + "project_name": "bert-base-japanese-v3_nli-jsnli-jnli-jsick" + }, + { + "url": "https://huggingface.co/huranokuma/es", + "downloads": 25, + "description": "ESを書くAIJapanese GPT-2 modelをファインチューニングしましたファインチューニングには、内定者の二万件以上のESを用いました。", + "source": "Hugging Face", + "score": 2.8070484312416856e-06, + "project_name": "es" + }, + { + "url": "https://huggingface.co/colorfulscoop/bert-base-ja", + "downloads": 24, + "description": "BERT base Japanese modelThis repository contains a BERT base model trained on Japanese Wikipedia dataset.", + "source": "Hugging Face", + "score": 2.6947664939920182e-06, + "project_name": "bert-base-ja" + }, + { + "url": "https://huggingface.co/spow12/Visual-novel-transcriptor", + "downloads": 24, + "description": "Model Card for Model IDFine tunned ASR model from distil-whisper/distil-large-v2.This model aimed to transcribe japanese audio especially visual novel.", + "source": "Hugging Face", + "score": 2.6947664939920182e-06, + "project_name": "Visual-novel-transcriptor" + }, + { + "url": "https://huggingface.co/astremo/friendly_JA", + "downloads": 23, + "description": "friendly_JA-Model (T5 fine-tuned model)MT model trained using the friendly_JA Corpus attempting to make Japanese easier/more accessible to occidental people by using the Latin/English derived katakana lexicon instead of the standard Sino-Japanese lexiconExamplesinputoutput最適化を応用した��械翻訳モデルは高精度だオプティマイゼーションを応用したマシントランスレーションモデルは高いアキュラシーだ彼は架空の世界に住んでいる彼はイマジナリー世界に住んでいる新型コロナウイルスに感染してしまったコロナウイルスにかかってしまった深層学習は難しいディープラーニングはむずかしい新たな概念を紹介する新しいコンセプトを紹介する津波の警報が流れたツナミのアラートが流れた南海トラフの災害は震源地による南海トラフのディザスターはエピセンターによる息子は際どい内容の本を", + "source": "Hugging Face", + "score": 2.582484556742351e-06, + "project_name": "friendly_JA" + }, + { + "url": "https://huggingface.co/sonoisa/t5-qiita-title-generation", + "downloads": 23, + "description": "記事本文からタイトルを生成するモデルSEE: https://qiita.com/sonoisa/items/30876467ad5a8a81821f", + "source": "Hugging Face", + "score": 2.582484556742351e-06, + "project_name": "t5-qiita-title-generation" + }, + { + "url": "https://huggingface.co/tohoku-nlp/bert-large-japanese-char", + "downloads": 22, + "description": "BERT large Japanese (character-level tokenization with whole word masking, jawiki-20200831)This is a BERT model pretrained on texts in the Japanese language.", + "source": "Hugging Face", + "score": 2.4702026194926834e-06, + "project_name": "bert-large-japanese-char" + }, + { + "url": "https://huggingface.co/nlp-waseda/roberta-large-japanese-seq512-with-auto-jumanpp", + "downloads": 22, + "description": "nlp-waseda/roberta-large-japanese-seq512-with-auto-jumanppModel descriptionThis is a Japanese RoBERTa large model pretrained on Japanese Wikipedia and the Japanese portion of CC-100 with the maximum sequence length of 512.How to useYou can use this model for masked language modeling as follows:from transformers import AutoTokenizer, AutoModelForMaskedLMtokenizer = AutoTokenizer.from_pretrained(\"nlp-waseda/roberta-large-japanese-seq512-with-auto-jumanpp\")", + "source": "Hugging Face", + "score": 2.4702026194926834e-06, + "project_name": "roberta-large-japanese-seq512-with-auto-jumanpp" + }, + { + "url": "https://huggingface.co/alabnii/jmedroberta-base-manbyo-wordpiece", + "downloads": 22, + "description": "alabnii/jmedroberta-base-manbyo-wordpieceModel descriptionThis is a Japanese RoBERTa base model pre-trained on academic articles in medical sciences collected by Japan Science and Technology Agency (JST).", + "source": "Hugging Face", + "score": 2.4702026194926834e-06, + "project_name": "jmedroberta-base-manbyo-wordpiece" + }, + { + "url": "https://huggingface.co/drewschaub/whisper-large-v3-japanese-4k-steps", + "downloads": 22, + "description": "whisper-large-v3-japanese-4k-stepsThis model is a fine-tuned version of openai/whisper-large-v3 on the Common Voice 16.1 dataset.", + "source": "Hugging Face", + "score": 2.4702026194926834e-06, + "project_name": "whisper-large-v3-japanese-4k-steps" + }, + { + "url": "https://huggingface.co/Formzu/bart-base-japanese", + "downloads": 22, + "description": "bart-base-japaneseThis model is converted from the original Japanese BART Pretrained model released by Kyoto University.", + "source": "Hugging Face", + "score": 2.4702026194926834e-06, + "project_name": "bart-base-japanese" + }, + { + "url": "https://huggingface.co/KoichiYasuoka/roberta-base-japanese-aozora-char", + "downloads": 21, + "description": "roberta-base-japanese-aozora-charModel DescriptionThis is a RoBERTa model pre-trained on 青空文庫 texts with character tokenizer.", + "source": "Hugging Face", + "score": 2.357920682243016e-06, + "project_name": "roberta-base-japanese-aozora-char" + }, + { + "url": "https://huggingface.co/alabnii/jmedroberta-base-manbyo-wordpiece-vocab50000", + "downloads": 21, + "description": "alabnii/jmedroberta-base-manbyo-wordpiece-vocab50000Model descriptionThis is a Japanese RoBERTa base model pre-trained on academic articles in medical sciences collected by Japan Science and Technology Agency (JST).", + "source": "Hugging Face", + "score": 2.357920682243016e-06, + "project_name": "jmedroberta-base-manbyo-wordpiece-vocab50000" + }, + { + "url": "https://huggingface.co/DavidAU/alpaca-guanaco-japanese-gpt-1b-Q8_0-GGUF", + "downloads": 21, + "description": "DavidAU/alpaca-guanaco-japanese-gpt-1b-Q8_0-GGUFThis model was converted to GGUF format from inu-ai/alpaca-guanaco-japanese-gpt-1b using llama.cpp via the ggml.ai's GGUF-my-repo space.", + "source": "Hugging Face", + "score": 2.357920682243016e-06, + "project_name": "alpaca-guanaco-japanese-gpt-1b-Q8_0-GGUF" + }, + { + "url": "https://huggingface.co/thefrigidliquidation/nllb-jaen-1.3B-lightnovels", + "downloads": 20, + "description": "NLLB 1.3B fine-tuned on Japanese to English Light Novel translationThis model was fine-tuned on light and web novel for Japanese to English translation.", + "source": "Hugging Face", + "score": 2.2456387449933482e-06, + "project_name": "nllb-jaen-1.3B-lightnovels" + }, + { + "url": "https://huggingface.co/pfnet/plamo-13b-instruct-nc", + "downloads": 20, + "description": "PLaMo-13B-Instruct-NCModel DescriptionPLaMo-13B-Instruct-NC is a noncommercial instruct fine-tuned model built upon the 8192 context length version of PLaMo-13B text generation model.", + "source": "Hugging Face", + "score": 2.2456387449933482e-06, + "project_name": "plamo-13b-instruct-nc" + }, + { + "url": "https://huggingface.co/ptaszynski/yacis-electra-small-japanese-cyberbullying", + "downloads": 20, + "description": "yacis-electra-small-cyberbullyingThis is an ELECTRA Small model for the Japanese language finetuned for automatic cyberbullying detection.", + "source": "Hugging Face", + "score": 2.2456387449933482e-06, + "project_name": "yacis-electra-small-japanese-cyberbullying" + }, + { + "url": "https://huggingface.co/NovelAI/genji-jp", + "downloads": 20, + "description": "Genji-JP 6BPlease check our blog post for more details, samples, evaluations and more:BlogpostModel DescriptionGenji-JP 6B is a model finetuned on our Japanese storytelling dataset based on EleutherAI's GPT-J 6B model.", + "source": "Hugging Face", + "score": 2.2456387449933482e-06, + "project_name": "genji-jp" + }, + { + "url": "https://huggingface.co/KoichiYasuoka/bert-large-japanese-unidic-luw-upos", + "downloads": 19, + "description": "bert-large-japanese-unidic-luw-uposModel DescriptionThis is a BERT model pre-trained on Japanese Wikipedia texts for POS-tagging and dependency-parsing, derived from bert-large-japanese.", + "source": "Hugging Face", + "score": 2.133356807743681e-06, + "project_name": "bert-large-japanese-unidic-luw-upos" + }, + { + "url": "https://huggingface.co/hiroshi-matsuda-rit/bert-base-japanese-basic-char-v2", + "downloads": 19, + "description": "BERT base Japanese (character-level tokenization with whole word masking, jawiki-20200831)This pretrained model is almost the same as cl-tohoku/bert-base-japanese-char-v2 but do not need fugashi or unidic_lite.", + "source": "Hugging Face", + "score": 2.133356807743681e-06, + "project_name": "bert-base-japanese-basic-char-v2" + }, + { + "url": "https://huggingface.co/sosoai/Orion-14B-Chat-RAG-safetensors", + "downloads": 19, + "description": "Orion-14B🌐English | 🇨🇳中文 | 🇯🇵日本語 | 🇰🇷한국어🤗", + "source": "Hugging Face", + "score": 2.133356807743681e-06, + "project_name": "Orion-14B-Chat-RAG-safetensors" + }, + { + "url": "https://huggingface.co/KoichiYasuoka/roberta-base-japanese-aozora-ud-head", + "downloads": 19, + "description": "roberta-base-japanese-aozora-ud-headModel", + "source": "Hugging Face", + "score": 2.133356807743681e-06, + "project_name": "roberta-base-japanese-aozora-ud-head" + }, + { + "url": "https://huggingface.co/espnet/kan-bayashi_jsut_transformer_accent_with_pause", + "downloads": 18, + "description": "Example ESPnet2 TTS modelkan-bayashi/jsut_transformer_accent_with_pause♻", + "source": "Hugging Face", + "score": 2.0210748704940135e-06, + "project_name": "kan-bayashi_jsut_transformer_accent_with_pause" + }, + { + "url": "https://huggingface.co/astremo/JAINU", + "downloads": 18, + "description": "JAINU-Model (T5 fine-tuned model)JAINU is a Japanese - Ainu language machine translation model.", + "source": "Hugging Face", + "score": 2.0210748704940135e-06, + "project_name": "JAINU" + }, + { + "url": "https://huggingface.co/eepj/wstcg-mt-ja-en", + "downloads": 18, + "description": "WS TCG Card Text TranslatorA Japanese-English machine translation model specifically trained for translating card text from the Weiss Schwarz (WS) Trading Card Game, fine-tuned on Helsinki-NLP/opus-mt-ja-en.", + "source": "Hugging Face", + "score": 2.0210748704940135e-06, + "project_name": "wstcg-mt-ja-en" + }, + { + "url": "https://huggingface.co/KoichiYasuoka/roberta-large-japanese-aozora-ud-head", + "downloads": 18, + "description": "roberta-large-japanese-aozora-ud-headModel", + "source": "Hugging Face", + "score": 2.0210748704940135e-06, + "project_name": "roberta-large-japanese-aozora-ud-head" + }, + { + "url": "https://huggingface.co/KoichiYasuoka/deberta-large-japanese-unidic-ud-head", + "downloads": 18, + "description": "deberta-large-japanese-unidic-ud-headModel", + "source": "Hugging Face", + "score": 2.0210748704940135e-06, + "project_name": "deberta-large-japanese-unidic-ud-head" + }, + { + "url": "https://huggingface.co/KoichiYasuoka/roberta-large-japanese-luw-upos", + "downloads": 18, + "description": "roberta-large-japanese-luw-uposModel", + "source": "Hugging Face", + "score": 2.0210748704940135e-06, + "project_name": "roberta-large-japanese-luw-upos" + }, + { + "url": "https://huggingface.co/KoichiYasuoka/bert-large-japanese-char-extended", + "downloads": 17, + "description": "bert-large-japanese-char-extendedModel DescriptionThis is a BERT model pre-trained on Japanese Wikipedia texts, derived from bert-large-japanese-char.", + "source": "Hugging Face", + "score": 1.908792933244346e-06, + "project_name": "bert-large-japanese-char-extended" + }, + { + "url": "https://huggingface.co/TylorShine/distilhubert-ft-japanese-50k", + "downloads": 17, + "description": "distilhubert-ft-japanese-50kFine-tuned (more precisely, continue trained)", + "source": "Hugging Face", + "score": 1.908792933244346e-06, + "project_name": "distilhubert-ft-japanese-50k" + }, + { + "url": "https://huggingface.co/Local-Novel-LLM-project/Vecteus-Constant", + "downloads": 17, + "description": "Our ModelsVecteusNinja-v1Ninja-v1-NSFWNinja-v1-128kNinja-v1-NSFW-128kThis is a prototype of Vecteus-v1Model Card for VecTeus-ConstantThe Mistral-7B--based Large Language Model (LLM) is an noveldataset fine-tuned version of the Mistral-7B-v0.1VecTeus has the following changes compared to Mistral-7B-v0.1.Achieving both high quality Japanese and English generationCan be generated NSFWMemory ability that does not forget even after long-context generationThis model was created with the help of GPUs from the firs", + "source": "Hugging Face", + "score": 1.908792933244346e-06, + "project_name": "Vecteus-Constant" + }, + { + "url": "https://huggingface.co/izumi-lab/electra-small-japanese-fin-generator", + "downloads": 17, + "description": "ELECTRA small Japanese finance generatorThis is a ELECTRA model pretrained on texts in the Japanese language.", + "source": "Hugging Face", + "score": 1.908792933244346e-06, + "project_name": "electra-small-japanese-fin-generator" + }, + { + "url": "https://huggingface.co/izumi-lab/electra-base-japanese-generator", + "downloads": 17, + "description": "ELECTRA base Japanese generatorThis is a ELECTRA model pretrained on texts in the Japanese language.", + "source": "Hugging Face", + "score": 1.908792933244346e-06, + "project_name": "electra-base-japanese-generator" + }, + { + "url": "https://huggingface.co/Momerio/meigen_generate_Japanese", + "downloads": 17, + "description": "名言推論モデル", + "source": "Hugging Face", + "score": 1.908792933244346e-06, + "project_name": "meigen_generate_Japanese" + }, + { + "url": "https://huggingface.co/llm-book/bert-base-japanese-v3-bpr-passage-aio", + "downloads": 16, + "description": "bert-base-japanese-v3-bpr-passage-aio「大規模言語モデル入門」の第9章で紹介している文書検索モデルBPRのパッセージエンコーダです。", + "source": "Hugging Face", + "score": 1.7965109959946787e-06, + "project_name": "bert-base-japanese-v3-bpr-passage-aio" + }, + { + "url": "https://huggingface.co/Elizezen/Phos-7B", + "downloads": 16, + "description": "Phos 7B「どうかお慈悲を もう 疲れ果てました」生成例[太字以降がAI生成]「どうか」”それ”は懇願した。", + "source": "Hugging Face", + "score": 1.7965109959946787e-06, + "project_name": "Phos-7B" + }, + { + "url": "https://huggingface.co/kurogane/Llama3-BioYouri-8B-instruct-chatvector-mergetest", + "downloads": 16, + "description": "kurogane/Llama3-BioYouri-8B-mergetestこのモデルは生物学・医学に精通したOpenBioLLM-8Bをベースに、日本語対応を向上させるためにLlama-3-youko-8b-instruct-chatvectorとマージさせたモデルです。", + "source": "Hugging Face", + "score": 1.7965109959946787e-06, + "project_name": "Llama3-BioYouri-8B-instruct-chatvector-mergetest" + }, + { + "url": "https://huggingface.co/oshizo/japanese-e5-mistral-1.9b", + "downloads": 16, + "description": "Model trained on 800,000 Japanese sentences after reducing oshizo/japanese-e5-mistral-7b_slerp to 8 layers.", + "source": "Hugging Face", + "score": 1.7965109959946787e-06, + "project_name": "japanese-e5-mistral-1.9b" + }, + { + "url": "https://huggingface.co/KoichiYasuoka/deberta-base-japanese-wikipedia-ud-goeswith", + "downloads": 16, + "description": "deberta-base-japanese-wikipedia-ud-goeswithModel DescriptionThis is a DeBERTa(V2) model pretrained on Japanese Wikipedia and 青空文庫 texts for POS-tagging and dependency-parsing (using goeswith for subwords), derived from deberta-base-japanese-wikipedia-luw-upos and UD_Japanese-GSDLUW.How to Useclass UDgoeswith(object):def __init__(self,bert):", + "source": "Hugging Face", + "score": 1.7965109959946787e-06, + "project_name": "deberta-base-japanese-wikipedia-ud-goeswith" + }, + { + "url": "https://huggingface.co/ttop324/wav2vec2-live-japanese", + "downloads": 16, + "description": "wav2vec2-live-japanesehttps://github.com/ttop32/wav2vec2-live-japanese-translatorFine-tuned facebook/wav2vec2-large-xlsr-53 on Japanese hiragana using thecommon_voiceJSUTCSS10TEDxJP-10KJVSJSSSInference#usageimport torchimport torchaudiofrom datasets import load_datasetfrom transformers import Wav2Vec2ForCTC, Wav2Vec2Processormodel = Wav2Vec2ForCTC.from_pretrained(\"ttop324/wav2vec2-live-japanese\")", + "source": "Hugging Face", + "score": 1.7965109959946787e-06, + "project_name": "wav2vec2-live-japanese" + }, + { + "url": "https://huggingface.co/izumi-lab/electra-small-paper-japanese-generator", + "downloads": 16, + "description": "ELECTRA small Japanese generatorThis is a ELECTRA model pretrained on texts in the Japanese language.", + "source": "Hugging Face", + "score": 1.7965109959946787e-06, + "project_name": "electra-small-paper-japanese-generator" + }, + { + "url": "https://huggingface.co/nitky/Oumuamua-7b-instruct", + "downloads": 16, + "description": "Oumuamua-7b-instructThis is a merge of pre-trained language models created using mergekit.Output example[INST] <<SYS>>あなたは日本語を話す優秀なアシスタントです。", + "source": "Hugging Face", + "score": 1.7965109959946787e-06, + "project_name": "Oumuamua-7b-instruct" + }, + { + "url": "https://huggingface.co/c299m/japanese_stock_sentiment", + "downloads": 15, + "description": "Japanese Stock Comment Sentiment ModelThis model is a sentiment analysis tool specifically trained to analyze comments and discussions related to Japanese stocks.", + "source": "Hugging Face", + "score": 1.6842290587450113e-06, + "project_name": "japanese_stock_sentiment" + }, + { + "url": "https://huggingface.co/megagonlabs/transformers-ud-japanese-electra-base-discriminator", + "downloads": 15, + "description": "transformers-ud-japanese-electra-ginza (sudachitra-wordpiece, mC4 Japanese) -", + "source": "Hugging Face", + "score": 1.6842290587450113e-06, + "project_name": "transformers-ud-japanese-electra-base-discriminator" + }, + { + "url": "https://huggingface.co/minkhantycc/translation-en-ja", + "downloads": 15, + "description": "This model is the fine-tuned version of Helsinki-NLP/opus-mt-ja-en on bsd_ja_en dataset.", + "source": "Hugging Face", + "score": 1.6842290587450113e-06, + "project_name": "translation-en-ja" + }, + { + "url": "https://huggingface.co/uzabase/luke-japanese-wordpiece-base", + "downloads": 15, + "description": "studio-ousia/luke-japanese-baseに対して次の変更を加えたモデルです。", + "source": "Hugging Face", + "score": 1.6842290587450113e-06, + "project_name": "luke-japanese-wordpiece-base" + }, + { + "url": "https://huggingface.co/arc-r/faster-whisper-large-v2-mix-jp", + "downloads": 15, + "description": "whisper-large-v2-mix-jp model for CTranslate2This repository contains the conversion of vumichien/whisper-large-v2-mix-jp to the CTranslate2 model format.", + "source": "Hugging Face", + "score": 1.6842290587450113e-06, + "project_name": "faster-whisper-large-v2-mix-jp" + }, + { + "url": "https://huggingface.co/ptaszynski/yacis-electra-small-japanese", + "downloads": 15, + "description": "yacis-electra-smallThis is ELECTRA Small model for Japanese pretrained on 354 million sentences / 5.6 billion words of YACIS blog corpus.", + "source": "Hugging Face", + "score": 1.6842290587450113e-06, + "project_name": "yacis-electra-small-japanese" + }, + { + "url": "https://huggingface.co/nlp-waseda/gpt2-small-japanese-wikipedia", + "downloads": 15, + "description": "nlp-waseda/gpt2-small-japanese-wikipediaThis model is Japanese GPT-2 pretrained on Japanese Wikipedia.", + "source": "Hugging Face", + "score": 1.6842290587450113e-06, + "project_name": "gpt2-small-japanese-wikipedia" + }, + { + "url": "https://huggingface.co/KoichiYasuoka/bert-base-japanese-luw-upos", + "downloads": 14, + "description": "bert-base-japanese-luw-uposModel", + "source": "Hugging Face", + "score": 1.5719471214953439e-06, + "project_name": "bert-base-japanese-luw-upos" + }, + { + "url": "https://huggingface.co/megagonlabs/t5-base-japanese-web-8k", + "downloads": 14, + "description": "t5-base-japanese-web-8k (with Byte-fallback, 8K)Descriptionmegagonlabs/t5-base-japanese-web-8k is a T5 (Text-to-Text Transfer Transformer) model pre-trained on Japanese web texts.", + "source": "Hugging Face", + "score": 1.5719471214953439e-06, + "project_name": "t5-base-japanese-web-8k" + }, + { + "url": "https://huggingface.co/ClassCat/gpt2-base-japanese-v2", + "downloads": 14, + "description": "GPT2 Japanese base model version 2Prerequisitestransformers==4.19.2Model", + "source": "Hugging Face", + "score": 1.5719471214953439e-06, + "project_name": "gpt2-base-japanese-v2" + }, + { + "url": "https://huggingface.co/Mizuiro-sakura/luke-japanese-large-finetuned-QA", + "downloads": 14, + "description": "このモデルはluke-japanese-large-liteをファインチューニングして、Question-Answeringに用いれるようにしたものです。", + "source": "Hugging Face", + "score": 1.5719471214953439e-06, + "project_name": "luke-japanese-large-finetuned-QA" + }, + { + "url": "https://huggingface.co/omzn/facemark_detection", + "downloads": 14, + "description": "Facemark DetectionThis model classifies given text into facemark(1) or not(0).", + "source": "Hugging Face", + "score": 1.5719471214953439e-06, + "project_name": "facemark_detection" + }, + { + "url": "https://huggingface.co/okazaki-lab/japanese-gpt2-medium-unidic", + "downloads": 14, + "description": "japanese-gpt2-medium-unidicThis is a medium-sized Japanese GPT-2 model using BERT-like tokenizer.", + "source": "Hugging Face", + "score": 1.5719471214953439e-06, + "project_name": "japanese-gpt2-medium-unidic" + }, + { + "url": "https://huggingface.co/NilanE/tinyllama-en_ja-translation-v2", + "downloads": 14, + "description": "In-progess long-context Japanese-English translation model based on tinyllama.", + "source": "Hugging Face", + "score": 1.5719471214953439e-06, + "project_name": "tinyllama-en_ja-translation-v2" + }, + { + "url": "https://huggingface.co/Gustav114514/work", + "downloads": 14, + "description": "Fine-tuned XLSR-53 large model for speech recognition in JapaneseFine-tuned facebook/wav2vec2-large-xlsr-53 on Japanese using the train and validation splits of Common Voice 6.1, CSS10 and JSUT.When using this model, make sure that your speech input is sampled at 16kHz.", + "source": "Hugging Face", + "score": 1.5719471214953439e-06, + "project_name": "work" + }, + { + "url": "https://huggingface.co/TheBloke/japanese-stablelm-instruct-beta-7B-GPTQ", + "downloads": 14, + "description": "Chat & support: TheBloke's Discord serverWant to contribute?", + "source": "Hugging Face", + "score": 1.5719471214953439e-06, + "project_name": "japanese-stablelm-instruct-beta-7B-GPTQ" + }, + { + "url": "https://huggingface.co/haih2/open-calm-7b-summarizer-lora", + "downloads": 14, + "description": "Fine-tuned OpenCALM-7B Adapters for Meeting SummarizationDescriptionThese are weights for LoRA adapters fine-tuned on the OpenCALM-7B (Andonian et al.", + "source": "Hugging Face", + "score": 1.5719471214953439e-06, + "project_name": "open-calm-7b-summarizer-lora" + }, + { + "url": "https://huggingface.co/stabilityai/japanese-stablelm-instruct-alpha-7b", + "downloads": 14, + "description": "This repository is publicly accessible, but you have to accept the conditions to access its files and content.", + "source": "Hugging Face", + "score": 1.5719471214953439e-06, + "project_name": "japanese-stablelm-instruct-alpha-7b" + }, + { + "url": "https://huggingface.co/thefrigidliquidation/nllb-200-distilled-1.3B-bookworm", + "downloads": 14, + "description": "NLLB-200 1.3B fine-tuned on Ascendance of a BookwormThis model was fine-tuned on Ascendance of a Bookworm to translate the web novel in Japanese to English.", + "source": "Hugging Face", + "score": 1.5719471214953439e-06, + "project_name": "nllb-200-distilled-1.3B-bookworm" + }, + { + "url": "https://huggingface.co/microsoft/unihanlm-base", + "downloads": 14, + "description": "Unihan LM: Coarse-to-Fine Chinese-Japanese Language Model Pretraining with the Unihan DatabaseModel descriptionChinese and Japanese share many characters with similar surface morphology.", + "source": "Hugging Face", + "score": 1.5719471214953439e-06, + "project_name": "unihanlm-base" + }, + { + "url": "https://huggingface.co/KoichiYasuoka/bert-large-japanese-luw-upos", + "downloads": 13, + "description": "bert-large-japanese-luw-uposModel", + "source": "Hugging Face", + "score": 1.4596651842456765e-06, + "project_name": "bert-large-japanese-luw-upos" + }, + { + "url": "https://huggingface.co/wietsedv/xlm-roberta-base-ft-udpos28-ja", + "downloads": 13, + "description": "XLM-RoBERTa base Universal Dependencies v2.8 POS tagging:", + "source": "Hugging Face", + "score": 1.4596651842456765e-06, + "project_name": "xlm-roberta-base-ft-udpos28-ja" + }, + { + "url": "https://huggingface.co/Nikolajvestergaard/Japanese_Fine_Tuned_Whisper_Model", + "downloads": 13, + "description": "Japanese_Fine_Tuned_Whisper_ModelThis model is a fine-tuned version of openai/whisper-tiny on the Common Voice dataset.", + "source": "Hugging Face", + "score": 1.4596651842456765e-06, + "project_name": "Japanese_Fine_Tuned_Whisper_Model" + }, + { + "url": "https://huggingface.co/knosing/japanese_ner_model", + "downloads": 13, + "description": "Model DescriptionThis model is a fine-tuned version of the tohoku-nlp/bert-base-japanese-v3, specifically optimized for Named Entity Recognition (NER) tasks.", + "source": "Hugging Face", + "score": 1.4596651842456765e-06, + "project_name": "japanese_ner_model" + }, + { + "url": "https://huggingface.co/jovyan/Swallow-MS-7b-v0.1-ChatVector", + "downloads": 13, + "description": "Swallow-MS-7b-v0.1-ChatVectorJapanese \"instruction tuned\" model made by the technique of Chat VectorThe weights of this model are obtained not by any instruction tuning but by the following arithmetic:Swallow-MS-7b-v0.1 + Mistral-7B-Instruct-v0.2 - Mistral-7B-v0.1Chat Vectorの手法を使って、学習済み重みの足し引きのみでSwallow-MS-7b-v0.1モデルにチャット形式の対話能力を与えたモデルです。", + "source": "Hugging Face", + "score": 1.4596651842456765e-06, + "project_name": "Swallow-MS-7b-v0.1-ChatVector" + }, + { + "url": "https://huggingface.co/Mizuiro-sakura/bert-large-japanese-v2-finetuned-ner", + "downloads": 13, + "description": "このモデルはcl-tohoku/bert-large-japanese-v2をファインチューニングして、固有表現抽出(NER)に用いれるようにしたものです。", + "source": "Hugging Face", + "score": 1.4596651842456765e-06, + "project_name": "bert-large-japanese-v2-finetuned-ner" + }, + { + "url": "https://huggingface.co/Mizuiro-sakura/deberta-v2-large-japanese-finetuned-ner", + "downloads": 13, + "description": "このモデルはdeberta-v2-large-japaneseをファインチューニングして固有表現抽出(NER)に用いれるようにしたものです。", + "source": "Hugging Face", + "score": 1.4596651842456765e-06, + "project_name": "deberta-v2-large-japanese-finetuned-ner" + }, + { + "url": "https://huggingface.co/KoichiYasuoka/roberta-small-japanese-aozora-char", + "downloads": 13, + "description": "roberta-small-japanese-aozora-charModel DescriptionThis is a RoBERTa model pre-trained on 青空文庫 texts with character tokenizer.", + "source": "Hugging Face", + "score": 1.4596651842456765e-06, + "project_name": "roberta-small-japanese-aozora-char" + }, + { + "url": "https://huggingface.co/Helsinki-NLP/opus-mt-ja-bg", + "downloads": 12, + "description": "jpn-bulsource group: Japanesetarget group: BulgarianOPUS readme: jpn-bulmodel: transformer-alignsource language(s): jpn jpn_Hani jpn_Hira jpn_Kanatarget language(s): bulmodel: transformer-alignpre-processing: normalization + SentencePiece (spm32k,spm32k)", + "source": "Hugging Face", + "score": 1.3473832469960091e-06, + "project_name": "opus-mt-ja-bg" + }, + { + "url": "https://huggingface.co/KoichiYasuoka/roberta-base-japanese-luw-upos", + "downloads": 12, + "description": "roberta-base-japanese-luw-uposModel", + "source": "Hugging Face", + "score": 1.3473832469960091e-06, + "project_name": "roberta-base-japanese-luw-upos" + }, + { + "url": "https://huggingface.co/KoichiYasuoka/roberta-base-japanese-char-luw-upos", + "downloads": 12, + "description": "roberta-base-japanese-char-luw-uposModel", + "source": "Hugging Face", + "score": 1.3473832469960091e-06, + "project_name": "roberta-base-japanese-char-luw-upos" + }, + { + "url": "https://huggingface.co/espnet/kan-bayashi_jsut_vits_prosody", + "downloads": 12, + "description": "ESPnet2 TTS pretrained modelkan-bayashi/jsut_vits_prosody♻", + "source": "Hugging Face", + "score": 1.3473832469960091e-06, + "project_name": "kan-bayashi_jsut_vits_prosody" + }, + { + "url": "https://huggingface.co/Language-Media-Lab/byt5-small-ain-jpn-mt", + "downloads": 12, + "description": "Byt5-small-ain-jpn-mt is a machine translation model pretrained with Google's ByT5-small and fine-tuned on bilingual datasets crawled from the Web.", + "source": "Hugging Face", + "score": 1.3473832469960091e-06, + "project_name": "byt5-small-ain-jpn-mt" + }, + { + "url": "https://huggingface.co/sonoisa/byt5-small-japanese", + "downloads": 12, + "description": "日本語ByT5事前学習済みモデルThis is a ByT5 (a tokenizer-free extension of the Text-to-Text Transfer Transformer) model pretrained on Japanese corpus.", + "source": "Hugging Face", + "score": 1.3473832469960091e-06, + "project_name": "byt5-small-japanese" + }, + { + "url": "https://huggingface.co/megagonlabs/roberta-long-japanese", + "downloads": 12, + "description": "roberta-long-japanese (jumanpp + sentencepiece, mC4 Japanese)This is the longer input version of RoBERTa Japanese model pretrained on approximately 200", + "source": "Hugging Face", + "score": 1.3473832469960091e-06, + "project_name": "roberta-long-japanese" + }, + { + "url": "https://huggingface.co/KoichiYasuoka/roberta-large-japanese-aozora-ud-goeswith", + "downloads": 12, + "description": "roberta-large-japanese-aozora-ud-goeswithModel DescriptionThis is a RoBERTa model pretrained on 青空文庫 texts for POS-tagging and dependency-parsing (using goeswith for subwords), derived from roberta-large-japanese-aozora and UD_Japanese-GSDLUW.How to Useclass UDgoeswith(object):def __init__(self,bert):", + "source": "Hugging Face", + "score": 1.3473832469960091e-06, + "project_name": "roberta-large-japanese-aozora-ud-goeswith" + }, + { + "url": "https://huggingface.co/KoichiYasuoka/deberta-large-japanese-aozora-ud-goeswith", + "downloads": 12, + "description": "deberta-large-japanese-aozora-ud-goeswithModel DescriptionThis is a DeBERTa(V2) model pretrained on 青空文庫 texts for POS-tagging and dependency-parsing (using goeswith for subwords), derived from deberta-large-japanese-luw-upos and UD_Japanese-GSDLUW.How to Useclass UDgoeswith(object):def __init__(self,bert):", + "source": "Hugging Face", + "score": 1.3473832469960091e-06, + "project_name": "deberta-large-japanese-aozora-ud-goeswith" + }, + { + "url": "https://huggingface.co/KoichiYasuoka/deberta-base-japanese-aozora-ud-goeswith", + "downloads": 12, + "description": "deberta-base-japanese-aozora-ud-goeswithModel DescriptionThis is a DeBERTa(V2) model pretrained on 青空文庫 texts for POS-tagging and dependency-parsing (using goeswith for subwords), derived from deberta-base-japanese-aozora and UD_Japanese-GSDLUW.How to Useclass UDgoeswith(object):def __init__(self,bert):", + "source": "Hugging Face", + "score": 1.3473832469960091e-06, + "project_name": "deberta-base-japanese-aozora-ud-goeswith" + }, + { + "url": "https://huggingface.co/TomokiFujihara/twhin-bert-base-japanese-offensiveness-estimation", + "downloads": 12, + "description": "モデル概要このモデルは、 Twitter/twhin-bert-base をSNS上のコメントに人手で攻撃性評価を行ったデータセットでFine-tuningすることで作成しました。", + "source": "Hugging Face", + "score": 1.3473832469960091e-06, + "project_name": "twhin-bert-base-japanese-offensiveness-estimation" + }, + { + "url": "https://huggingface.co/AbeShinzo0708/Japanese-Starling-ChatV-7B-exl2", + "downloads": 12, + "description": "Japanese-Starling-ChatV-7Bこのモデルは\"chatntq-ja-7b-v1.0\"をベースにした7Bパラメータの日本語チャットモデルです。", + "source": "Hugging Face", + "score": 1.3473832469960091e-06, + "project_name": "Japanese-Starling-ChatV-7B-exl2" + }, + { + "url": "https://huggingface.co/retrieva-jp/t5-large-medium", + "downloads": 12, + "description": "Model card for model IDThis is a T5 v1.1 model, pre-trained on a Japanese corpus.", + "source": "Hugging Face", + "score": 1.3473832469960091e-06, + "project_name": "t5-large-medium" + }, + { + "url": "https://huggingface.co/KoichiYasuoka/deberta-large-japanese-juman-ud-goeswith", + "downloads": 12, + "description": "deberta-large-japanese-juman-ud-goeswithModel DescriptionThis is a DeBERTa(V2) model pretrained on Japanese Wikipedia, CC-100, and OSCAR texts for POS-tagging and dependency-parsing (using goeswith for subwords), derived from deberta-v2-large-japanese.", + "source": "Hugging Face", + "score": 1.3473832469960091e-06, + "project_name": "deberta-large-japanese-juman-ud-goeswith" + }, + { + "url": "https://huggingface.co/KoichiYasuoka/deberta-base-japanese-juman-ud-goeswith", + "downloads": 12, + "description": "deberta-base-japanese-juman-ud-goeswithModel DescriptionThis is a DeBERTa(V2) model pretrained on Japanese Wikipedia, CC-100, and OSCAR texts for POS-tagging and dependency-parsing (using goeswith for subwords), derived from deberta-v2-base-japanese.", + "source": "Hugging Face", + "score": 1.3473832469960091e-06, + "project_name": "deberta-base-japanese-juman-ud-goeswith" + }, + { + "url": "https://huggingface.co/Formzu/bart-large-japanese", + "downloads": 12, + "description": "bart-large-japaneseThis model is converted from the original Japanese BART Pretrained model released by Kyoto University.", + "source": "Hugging Face", + "score": 1.3473832469960091e-06, + "project_name": "bart-large-japanese" + }, + { + "url": "https://huggingface.co/kit-nlp/electra-small-japanese-discriminator-cyberbullying", + "downloads": 12, + "description": "electra-base-cyberbullyingThis is an ELECTRA Small model for the Japanese language finetuned for automatic cyberbullying detection.", + "source": "Hugging Face", + "score": 1.3473832469960091e-06, + "project_name": "electra-small-japanese-discriminator-cyberbullying" + }, + { + "url": "https://huggingface.co/skytnt/gpt2-japanese-lyric-medium", + "downloads": 12, + "description": "Japanese GPT2 Lyric ModelModel descriptionThe model is used to generate Japanese lyrics.", + "source": "Hugging Face", + "score": 1.3473832469960091e-06, + "project_name": "gpt2-japanese-lyric-medium" + }, + { + "url": "https://huggingface.co/qqpann/wav2vec2-large-xlsr-japanese-0325-1200", + "downloads": 12, + "description": "Wav2Vec2-Large-XLSR-53-{language} #TODO: replace language with your {language}, e.g. ", + "source": "Hugging Face", + "score": 1.3473832469960091e-06, + "project_name": "wav2vec2-large-xlsr-japanese-0325-1200" + }, + { + "url": "https://huggingface.co/qqpann/w2v_hf_jsut_xlsr53", + "downloads": 12, + "description": "Wav2Vec2-Large-XLSR-53-JapaneseFine-tuned facebook/wav2vec2-large-xlsr-53 on Japanese using the Common Voice, and JSUT", + "source": "Hugging Face", + "score": 1.3473832469960091e-06, + "project_name": "w2v_hf_jsut_xlsr53" + }, + { + "url": "https://huggingface.co/espnet/kan-bayashi_jsut_conformer_fastspeech2", + "downloads": 12, + "description": "Example ESPnet2 TTS modelkan-bayashi/jsut_conformer_fastspeech2♻", + "source": "Hugging Face", + "score": 1.3473832469960091e-06, + "project_name": "kan-bayashi_jsut_conformer_fastspeech2" + }, + { + "url": "https://huggingface.co/KoichiYasuoka/roberta-large-japanese-char-luw-upos", + "downloads": 12, + "description": "roberta-large-japanese-char-luw-uposModel", + "source": "Hugging Face", + "score": 1.3473832469960091e-06, + "project_name": "roberta-large-japanese-char-luw-upos" + }, + { + "url": "https://huggingface.co/Helsinki-NLP/opus-mt-ja-pl", + "downloads": 12, + "description": "jpn-polsource group: Japanesetarget group: PolishOPUS readme: jpn-polmodel: transformer-alignsource language(s): jpn jpn_Bopo jpn_Hani jpn_Hira jpn_Kana jpn_Latntarget language(s): polmodel: transformer-alignpre-processing: normalization + SentencePiece (spm32k,spm32k)", + "source": "Hugging Face", + "score": 1.3473832469960091e-06, + "project_name": "opus-mt-ja-pl" + }, + { + "url": "https://huggingface.co/AndrewMcDowell/wav2vec2-xls-r-1b-japanese-hiragana-katakana", + "downloads": 12, + "description": "This model is a fine-tuned version of facebook/wav2vec2-xls-r-1b on the MOZILLA-FOUNDATION/COMMON_VOICE_8_0 - JA dataset.", + "source": "Hugging Face", + "score": 1.3473832469960091e-06, + "project_name": "wav2vec2-xls-r-1b-japanese-hiragana-katakana" + }, + { + "url": "https://huggingface.co/KoichiYasuoka/deberta-base-japanese-unidic-ud-head", + "downloads": 11, + "description": "deberta-base-japanese-unidic-ud-headModel", + "source": "Hugging Face", + "score": 1.2351013097463417e-06, + "project_name": "deberta-base-japanese-unidic-ud-head" + }, + { + "url": "https://huggingface.co/KoichiYasuoka/deberta-large-japanese-unidic-luw-upos", + "downloads": 11, + "description": "deberta-large-japanese-unidic-luw-uposModel", + "source": "Hugging Face", + "score": 1.2351013097463417e-06, + "project_name": "deberta-large-japanese-unidic-luw-upos" + }, + { + "url": "https://huggingface.co/KoichiYasuoka/deberta-base-japanese-unidic-luw-upos", + "downloads": 11, + "description": "deberta-base-japanese-unidic-luw-uposModel", + "source": "Hugging Face", + "score": 1.2351013097463417e-06, + "project_name": "deberta-base-japanese-unidic-luw-upos" + }, + { + "url": "https://huggingface.co/kit-nlp/bert-base-japanese-basic-char-v2-irony", + "downloads": 11, + "description": "bert-base-ironyThis is a BERT Base model for the Japanese language finetuned for automatic irony detection.", + "source": "Hugging Face", + "score": 1.2351013097463417e-06, + "project_name": "bert-base-japanese-basic-char-v2-irony" + }, + { + "url": "https://huggingface.co/Mizuiro-sakura/luke-japanese-base-finetuned-jnli", + "downloads": 11, + "description": "このモデルはluke-japanese-baseをファインチューニングして、JNLI(文章の関係性判別)に用いれるようにしたものです。", + "source": "Hugging Face", + "score": 1.2351013097463417e-06, + "project_name": "luke-japanese-base-finetuned-jnli" + }, + { + "url": "https://huggingface.co/ku-accms/bert-base-japanese-ssuw", + "downloads": 11, + "description": "ku-accms/bert-base-japanese-ssuwModel descriptionThis is a pre-trained Japanese BERT base model for super short unit words (SSUW).", + "source": "Hugging Face", + "score": 1.2351013097463417e-06, + "project_name": "bert-base-japanese-ssuw" + }, + { + "url": "https://huggingface.co/Mizuiro-sakura/deberta-v2-tiny-japanese-finetuned-QA", + "downloads": 11, + "description": "このモデルはdeberta-v2-tiny-japaneseをファインチューニングしてQAタスクに用いれるようにしたものです。", + "source": "Hugging Face", + "score": 1.2351013097463417e-06, + "project_name": "deberta-v2-tiny-japanese-finetuned-QA" + }, + { + "url": "https://huggingface.co/MuneK/bert-large-japanese-v2-finetuned-jed", + "downloads": 11, + "description": "bert-large-japanese-v2-finetuned-wrimeThis model is finetuned from cl-tohoku/bert-large-japanese-v2 by JEmpatheticDialogues.", + "source": "Hugging Face", + "score": 1.2351013097463417e-06, + "project_name": "bert-large-japanese-v2-finetuned-jed" + }, + { + "url": "https://huggingface.co/abeja/Mixtral-8x7B-Instruct-v0.1-japanese-alpha-merged", + "downloads": 11, + "description": "Mixtral-8x7B-Instruct-v0.1-japanese-alpha-mergedMixtral-8x7B-Instruct-v0.1-japanese-alpha-mergedはMixtral-8x7B-Instruct-v0.1をベースに日本語の語彙拡張継続事前学習を実施した学習途中のモデルに対して、差分マージを実施したモデルです。", + "source": "Hugging Face", + "score": 1.2351013097463417e-06, + "project_name": "Mixtral-8x7B-Instruct-v0.1-japanese-alpha-merged" + }, + { + "url": "https://huggingface.co/AdapterHub/bert-base-multilingual-cased-ja-wiki_pfeiffer", + "downloads": 11, + "description": "Adapter bert-base-multilingual-cased-ja-wiki_pfeiffer for bert-base-multilingual-casedPfeiffer Adapter trained with Masked Language Modelling on Japanese Wikipedia Articles for 250k steps and a batch size of 64.This adapter was created for usage with the Adapters library.", + "source": "Hugging Face", + "score": 1.2351013097463417e-06, + "project_name": "bert-base-multilingual-cased-ja-wiki_pfeiffer" + }, + { + "url": "https://huggingface.co/youhansun/Llama-3-70B-japanese-suzume-vector-v0.1-Q2_K-GGUF", + "downloads": 11, + "description": "youhansun/Llama-3-70B-japanese-suzume-vector-v0.1-Q2_K-GGUFThis model was converted to GGUF format from mmnga/Llama-3-70B-japanese-suzume-vector-v0.1 using llama.cpp via the ggml.ai's GGUF-my-repo space.", + "source": "Hugging Face", + "score": 1.2351013097463417e-06, + "project_name": "Llama-3-70B-japanese-suzume-vector-v0.1-Q2_K-GGUF" + }, + { + "url": "https://huggingface.co/KoichiYasuoka/deberta-v3-base-japanese-ud-goeswith", + "downloads": 11, + "description": "deberta-v3-base-japanese-ud-goeswithModel DescriptionThis is a DeBERTa(V3) model pretrained on LLM-jp corpus v1.0 for POS-tagging and dependency-parsing (using goeswith for subwords), derived from deberta-v3-base-japanese and UD_Japanese-GSDLUW.How to Useclass UDgoeswith(object):def __init__(self,bert):", + "source": "Hugging Face", + "score": 1.2351013097463417e-06, + "project_name": "deberta-v3-base-japanese-ud-goeswith" + }, + { + "url": "https://huggingface.co/Mizuiro-sakura/deberta-v2-base-japanese-finetuned-ner", + "downloads": 11, + "description": "このモデルはdeberta-v2-base-japaneseをファインチューニングして固有表現抽出(NER)に用いれるようにしたものです。", + "source": "Hugging Face", + "score": 1.2351013097463417e-06, + "project_name": "deberta-v2-base-japanese-finetuned-ner" + }, + { + "url": "https://huggingface.co/kit-nlp/transformers-ud-japanese-electra-base-discriminator-cyberbullying", + "downloads": 11, + "description": "electra-base-cyberbullyingThis is an ELECTRA Base model for the Japanese language finetuned for automatic cyberbullying detection.", + "source": "Hugging Face", + "score": 1.2351013097463417e-06, + "project_name": "transformers-ud-japanese-electra-base-discriminator-cyberbullying" + }, + { + "url": "https://huggingface.co/KoichiYasuoka/deberta-base-japanese-wikipedia-ud-head", + "downloads": 11, + "description": "deberta-base-japanese-wikipedia-ud-headModel", + "source": "Hugging Face", + "score": 1.2351013097463417e-06, + "project_name": "deberta-base-japanese-wikipedia-ud-head" + }, + { + "url": "https://huggingface.co/KoichiYasuoka/deberta-large-japanese-luw-upos", + "downloads": 11, + "description": "deberta-large-japanese-luw-uposModel", + "source": "Hugging Face", + "score": 1.2351013097463417e-06, + "project_name": "deberta-large-japanese-luw-upos" + }, + { + "url": "https://huggingface.co/KoichiYasuoka/deberta-base-japanese-luw-upos", + "downloads": 11, + "description": "deberta-base-japanese-luw-uposModel", + "source": "Hugging Face", + "score": 1.2351013097463417e-06, + "project_name": "deberta-base-japanese-luw-upos" + }, + { + "url": "https://huggingface.co/KoichiYasuoka/deberta-small-japanese-upos", + "downloads": 11, + "description": "deberta-small-japanese-uposModel DescriptionThis is a DeBERTa(V2) model pre-trained on 青空文庫 texts for POS-tagging and dependency-parsing, derived from deberta-small-japanese-aozora.", + "source": "Hugging Face", + "score": 1.2351013097463417e-06, + "project_name": "deberta-small-japanese-upos" + }, + { + "url": "https://huggingface.co/KoichiYasuoka/roberta-small-japanese-char-luw-upos", + "downloads": 11, + "description": "roberta-small-japanese-char-luw-uposModel", + "source": "Hugging Face", + "score": 1.2351013097463417e-06, + "project_name": "roberta-small-japanese-char-luw-upos" + }, + { + "url": "https://huggingface.co/datasets/shunk031/JGLUE", + "downloads": 34408, + "description": "Please feel free to open an issue or pull request.", + "source": "Hugging Face", + "score": 0.0038633968968865566, + "project_name": "JGLUE" + }, + { + "url": "https://huggingface.co/datasets/nlp-waseda/JMMLU", + "downloads": 6849, + "description": "JMMLUJapanese Massive Multitask Language Understanding BenchmarkJMMLU is a four-choice question set consisting of Japanese-translated questions of a portion of MMLU (Paper, Github) (Translated questions) and questions based on unique Japanese cultural context (Japanese questions).", + "source": "Hugging Face", + "score": 0.0007690189882229722, + "project_name": "JMMLU" + }, + { + "url": "https://huggingface.co/datasets/elyza/ELYZA-tasks-100", + "downloads": 5869, + "description": "ELYZA-tasks-100: 日本語instructionモデル評価データセットData Description本データセットはinstruction-tuningを行ったモデルの評価用データセットです。", + "source": "Hugging Face", + "score": 0.0006589826897182981, + "project_name": "ELYZA-tasks-100" + }, + { + "url": "https://huggingface.co/datasets/sbintuitions/JMTEB", + "downloads": 5598, + "description": "JMTEB:", + "source": "Hugging Face", + "score": 0.0006285542847236382, + "project_name": "JMTEB" + }, + { + "url": "https://huggingface.co/datasets/kunishou/databricks-dolly-15k-ja", + "downloads": 3055, + "description": "This dataset was created by automatically translating \"databricks-dolly-15k\" into Japanese.", + "source": "Hugging Face", + "score": 0.000343021318297734, + "project_name": "databricks-dolly-15k-ja" + }, + { + "url": "https://huggingface.co/datasets/llm-jp/hh-rlhf-12k-ja", + "downloads": 1896, + "description": "hh-rlhf-12k-jaThis repository provides a human preference dataset developed by LLM-jp, a collaborative project launched in Japan.", + "source": "Hugging Face", + "score": 0.00021288655302536942, + "project_name": "hh-rlhf-12k-ja" + }, + { + "url": "https://huggingface.co/datasets/llm-jp/databricks-dolly-15k-ja", + "downloads": 1841, + "description": "databricks-dolly-15k-jaThis repository provides an instruction tuning dataset developed by LLM-jp, a collaborative project launched in Japan.", + "source": "Hugging Face", + "score": 0.00020671104647663772, + "project_name": "databricks-dolly-15k-ja" + }, + { + "url": "https://huggingface.co/datasets/mkshing/xlsum_ja", + "downloads": 1746, + "description": "This is the filtered Japanese subset of XL-Sum followed by PaLM 2filters15-gram overlap* code: https://gist.github.com/mkshing/d6371cbfdd50d4f352cee247fd4dd86anumber of examplestrain: 4215 (before: 7113)validation: 758 (before: 889)test: 766 (before: 889)", + "source": "Hugging Face", + "score": 0.0001960442624379193, + "project_name": "xlsum_ja" + }, + { + "url": "https://huggingface.co/datasets/range3/wiki40b-ja", + "downloads": 1518, + "description": "range3/wiki40b-jaThis dataset consists of three parquet files from the wiki40b dataset with only Japanese data extracted.", + "source": "Hugging Face", + "score": 0.00017044398074499514, + "project_name": "wiki40b-ja" + }, + { + "url": "https://huggingface.co/datasets/kumapo/JAQKET", + "downloads": 1196, + "description": "作成するデータセットは,既存研究 [7] に倣い,Wikipedia2 の記事名を答えとした,日本語のオープンドメイン QA タスクのデータセットである.", + "source": "Hugging Face", + "score": 0.00013428919695060223, + "project_name": "JAQKET" + }, + { + "url": "https://huggingface.co/datasets/llm-book/wrime-sentiment", + "downloads": 1183, + "description": "GitHub リポジトリ ids-cv/wrime で公開されているデータセットを利用しています。", + "source": "Hugging Face", + "score": 0.00013282953176635656, + "project_name": "wrime-sentiment" + }, + { + "url": "https://huggingface.co/datasets/turing-motors/LLaVA-Instruct-150K-JA", + "downloads": 1053, + "description": "Dataset DetailsDataset Type:Japanese LLaVA Instruct 150K is a localized version of the original LLaVA Visual Instruct 150K dataset.", + "source": "Hugging Face", + "score": 0.00011823287992389979, + "project_name": "LLaVA-Instruct-150K-JA" + }, + { + "url": "https://huggingface.co/datasets/seungwon929/Ja-miracl", + "downloads": 772, + "description": "Ja-miraclThis dataset represents a conversion of the Japanese (Ja) section from the miracl dataset into the BeIR format, making it compatible for use with mteb.", + "source": "Hugging Face", + "score": 8.668165555674325e-05, + "project_name": "Ja-miracl" + }, + { + "url": "https://huggingface.co/datasets/fujiki/japanese_hh-rlhf-49k", + "downloads": 757, + "description": "This is a little bit different version of kunishou/hh-rlhf-49k-ja without ng_translation == 1 examples.", + "source": "Hugging Face", + "score": 8.499742649799824e-05, + "project_name": "japanese_hh-rlhf-49k" + }, + { + "url": "https://huggingface.co/datasets/izumi-lab/llm-japanese-dataset", + "downloads": 655, + "description": "llm-japanese-datasetLLM構築用の日本語インストラクション(チャット)データセット主に,英語で構築されたLLMモデルなどに対して,チャット(Instruction)応答タスクに関してLoRAなどでチューニングするために使用できます.", + "source": "Hugging Face", + "score": 7.354466889853216e-05, + "project_name": "llm-japanese-dataset" + }, + { + "url": "https://huggingface.co/datasets/neulab/odex", + "downloads": 607, + "description": "ODEX is an Open-Domain EXecution-based NL-to-Code generation data benchmark.", + "source": "Hugging Face", + "score": 6.815513591054813e-05, + "project_name": "odex" + }, + { + "url": "https://huggingface.co/datasets/llm-book/ner-wikipedia-dataset", + "downloads": 569, + "description": "Githubリポジトリstockmarkteam/ner-wikipedia-datasetで公開されているデータセットを利用しています。", + "source": "Hugging Face", + "score": 6.388842229506076e-05, + "project_name": "ner-wikipedia-dataset" + }, + { + "url": "https://huggingface.co/datasets/kogi-jwu/jhumaneval", + "downloads": 509, + "description": "LLM のコード生成能力の標準ベンチマーク HumanEval の日本語翻訳版です。", + "source": "Hugging Face", + "score": 5.715150606008072e-05, + "project_name": "jhumaneval" + }, + { + "url": "https://huggingface.co/datasets/llm-book/livedoor-news-corpus", + "downloads": 490, + "description": "オリジナルのサイトと同じものを使用しています。", + "source": "Hugging Face", + "score": 5.501814925233704e-05, + "project_name": "livedoor-news-corpus" + }, + { + "url": "https://huggingface.co/datasets/hatakeyama-llm-team/AutoGeneratedJapaneseQA", + "downloads": 438, + "description": "自動生成Q&A種々のデータソースから、MaziyarPanahi/Mixtral-8x22B-Instruct-v0.1-GGUFを使ってQ&Aを自動生成したものです。CC-BY系またはApatch-2.0のデータソースを改変して生成しています。", + "source": "Hugging Face", + "score": 4.917948851535433e-05, + "project_name": "AutoGeneratedJapaneseQA" + }, + { + "url": "https://huggingface.co/datasets/hotchpotch/JQaRA", + "downloads": 419, + "description": "JQaRA : Japanese Question Answering with Retrieval Augmentation - 検索拡張(RAG)評価のための日本語 Q&A データセット高性能な LLM の台頭に伴い、LLM を用いた質疑応答のユースケースが増加しています。", + "source": "Hugging Face", + "score": 4.704613170761065e-05, + "project_name": "JQaRA" + }, + { + "url": "https://huggingface.co/datasets/llm-jp/oasst1-21k-ja", + "downloads": 411, + "description": "oasst1-21k-jaThis repository provides an instruction tuning dataset developed by LLM-jp, a collaborative project launched in Japan.", + "source": "Hugging Face", + "score": 4.614787620961331e-05, + "project_name": "oasst1-21k-ja" + }, + { + "url": "https://huggingface.co/datasets/globis-university/aozorabunko-clean", + "downloads": 372, + "description": "OverviewThis dataset provides a convenient and user-friendly format of data from Aozora Bunko (青空文庫), a website that compiles public-domain books in Japan, ideal for Machine Learning applications.", + "source": "Hugging Face", + "score": 4.176888065687628e-05, + "project_name": "aozorabunko-clean" + }, + { + "url": "https://huggingface.co/datasets/SkelterLabsInc/JaQuAD", + "downloads": 335, + "description": "JaQuAD is developed to provide a SQuAD-like QA dataset in Japanese.", + "source": "Hugging Face", + "score": 3.7614448978638586e-05, + "project_name": "JaQuAD" + }, + { + "url": "https://huggingface.co/datasets/yuzuai/rakuda-questions", + "downloads": 260, + "description": "Rakuda - Questions for Japanese modelsRepository:", + "source": "Hugging Face", + "score": 2.919330368491353e-05, + "project_name": "rakuda-questions" + }, + { + "url": "https://huggingface.co/datasets/llm-book/jawiki-sentences", + "downloads": 255, + "description": "GitHub リポジトリ singletongue/wikipedia-utils で公開されているデータセットを利用しています。", + "source": "Hugging Face", + "score": 2.863189399866519e-05, + "project_name": "jawiki-sentences" + }, + { + "url": "https://huggingface.co/datasets/taishi-i/nagisa_stopwords", + "downloads": 249, + "description": "Japanese stopwords for nagisaThis is a stopword list of frequently used words in the Japanese language, created according to the tokenization rules of the Japanese text analysis library, nagisa.", + "source": "Hugging Face", + "score": 2.7958202375167187e-05, + "project_name": "nagisa_stopwords" + }, + { + "url": "https://huggingface.co/datasets/llm-jp/oasst2-33k-ja", + "downloads": 240, + "description": "oasst2-33k-jaThis repository provides an instruction tuning dataset developed by LLM-jp, a collaborative project launched in Japan.", + "source": "Hugging Face", + "score": 2.694766493992018e-05, + "project_name": "oasst2-33k-ja" + }, + { + "url": "https://huggingface.co/datasets/DataPilot/databricks-dolly-15k-Nyan-ja", + "downloads": 232, + "description": "このデータセットはkunishou氏が公開している\"databricks-dolly-15k\"を日本語訳したkunishou/databricks-dolly-15k-jaデータセットの語尾をArrowPro-7B-KUJIRAを用いて「にゃん!", + "source": "Hugging Face", + "score": 2.604940944192284e-05, + "project_name": "databricks-dolly-15k-Nyan-ja" + }, + { + "url": "https://huggingface.co/datasets/taishi-i/awesome-japanese-nlp-classification-dataset", + "downloads": 229, + "description": "Dataset overviewThis dataset identifies whether a GitHub repository description pertains to Japanese natural language processing (NLP).", + "source": "Hugging Face", + "score": 2.5712563630173838e-05, + "project_name": "awesome-japanese-nlp-classification-dataset" + }, + { + "url": "https://huggingface.co/datasets/GENIAC-Team-Ozaki/chatbot-arena-ja-calm2-7b-chat-experimental_deduped", + "downloads": 196, + "description": "chatbot-arena-ja-calm2-7b-chatからpromptが一致するデータを削除したデータセットです。", + "source": "Hugging Face", + "score": 2.2007259700934814e-05, + "project_name": "chatbot-arena-ja-calm2-7b-chat-experimental_deduped" + }, + { + "url": "https://huggingface.co/datasets/yongtae-jp/orca_dpo_pairs_ja", + "downloads": 187, + "description": "About this datasetThis dataset is a machine translation of the Intel/orca_dpo_pairs dataset with Palm 2 (prompt for translation is pasted below).", + "source": "Hugging Face", + "score": 2.0996722265687807e-05, + "project_name": "orca_dpo_pairs_ja" + }, + { + "url": "https://huggingface.co/datasets/range3/wikipedia-ja-20230101", + "downloads": 181, + "description": "range3/wikipedia-ja-20230101This dataset consists of a parquet file from the wikipedia dataset with only Japanese data extracted.", + "source": "Hugging Face", + "score": 2.0323030642189803e-05, + "project_name": "wikipedia-ja-20230101" + }, + { + "url": "https://huggingface.co/datasets/llm-book/aio-retriever", + "downloads": 178, + "description": "GitHub リポジトリ cl-tohoku/quiz-datasets で公開されているデータセットを利用しています。", + "source": "Hugging Face", + "score": 1.99861848304408e-05, + "project_name": "aio-retriever" + }, + { + "url": "https://huggingface.co/datasets/llm-book/jsnli", + "downloads": 167, + "description": "JSNLI Version 1.1 のデータセットのうち、フィルタリング後の訓練セット (train_w_filtering)", + "source": "Hugging Face", + "score": 1.875108352069446e-05, + "project_name": "jsnli" + }, + { + "url": "https://huggingface.co/datasets/SakanaAI/JA-VG-VQA-500", + "downloads": 167, + "description": "JA-VG-VQA-500Dataset DescriptionJA-VG-VQA-500 is a 500-sample subset of Japanese Visual Genome VQA dataset.", + "source": "Hugging Face", + "score": 1.875108352069446e-05, + "project_name": "JA-VG-VQA-500" + }, + { + "url": "https://huggingface.co/datasets/fujiki/japanese_alpaca_data", + "downloads": 158, + "description": "[github].", + "source": "Hugging Face", + "score": 1.7740546085447454e-05, + "project_name": "japanese_alpaca_data" + }, + { + "url": "https://huggingface.co/datasets/cl-nagoya/auto-wiki-qa", + "downloads": 153, + "description": "AutoWikiQA東工大が公開しているSwallow-MXを用いて、Wikipedia中のテキストを入力として「質問(query)」と「回答(answer)」を生成し、生成された質問と回答についてフィルタリングを行ったデータセットです。", + "source": "Hugging Face", + "score": 1.7179136399199115e-05, + "project_name": "auto-wiki-qa" + }, + { + "url": "https://huggingface.co/datasets/p1atdev/ichikara-instruction", + "downloads": 148, + "description": "ichikara-instruction (Non Commercial)LLMのための日本語インストラクションデータ 公開ページ公開ページより、本データに関して、言語処理学会第30回年次大会において発表を行います。", + "source": "Hugging Face", + "score": 1.661772671295078e-05, + "project_name": "ichikara-instruction" + }, + { + "url": "https://huggingface.co/datasets/shunk031/jsnli", + "downloads": 145, + "description": "Dataset PreprocessingSupported Tasks and LeaderboardsLanguages注釈はすべて日本語を主要言語としています。", + "source": "Hugging Face", + "score": 1.6280880901201775e-05, + "project_name": "jsnli" + }, + { + "url": "https://huggingface.co/datasets/HachiML/Evol-Alpaca-gen3-500", + "downloads": 143, + "description": "Evol-Alpaca-gen3-500Evol-Alpaca-gen3-500は、Stanford Alpacaのseed tasksを日本語化Evol-Instructionの手法mistralai/Mixtral-8x22B-Instruct-v0.1で作った合成データ(Synthetic data)です。", + "source": "Hugging Face", + "score": 1.605631702670244e-05, + "project_name": "Evol-Alpaca-gen3-500" + }, + { + "url": "https://huggingface.co/datasets/stockmark/ner-wikipedia-dataset", + "downloads": 142, + "description": "Wikipediaを用いた日本語の固有表現抽出データセットGitHub: https://github.com/stockmarkteam/ner-wikipedia-dataset/LICENSE: CC-BY-SA 3.0Developed by Stockmark Inc.", + "source": "Hugging Face", + "score": 1.5944035089452775e-05, + "project_name": "ner-wikipedia-dataset" + }, + { + "url": "https://huggingface.co/datasets/izumi-lab/llm-japanese-dataset-vanilla", + "downloads": 140, + "description": "llm-japanese-dataset-vanillaLLM構築用の日本語チャットデータセットizumi-lab/llm-japanese-dataset から,日英翻訳のデータセット等を抜いたものです.", + "source": "Hugging Face", + "score": 1.571947121495344e-05, + "project_name": "llm-japanese-dataset-vanilla" + }, + { + "url": "https://huggingface.co/datasets/matsuxr/JaGovFaqs-22k", + "downloads": 135, + "description": "このデータセットについてこのデータは、日本の官公庁のWebサイトに掲載されている「よくある質問」を手作業で抽出し、インストラクション用のデータセットとしたものです。", + "source": "Hugging Face", + "score": 1.5158061528705102e-05, + "project_name": "JaGovFaqs-22k" + }, + { + "url": "https://huggingface.co/datasets/baobab-trees/wikipedia-human-retrieval-ja", + "downloads": 130, + "description": "Japanese Wikipedia Human Retrieval datasetThis is a Japanese question answereing dataset with retrieval on Wikipedia articlesby trained human workers.", + "source": "Hugging Face", + "score": 1.4596651842456765e-05, + "project_name": "wikipedia-human-retrieval-ja" + }, + { + "url": "https://huggingface.co/datasets/HachiML/alpaca_jp_python", + "downloads": 130, + "description": "alpaca_jp_pythonalpaca_jp_pythonは、Stanford Alpacaの手法mistralai/Mixtral-8x22B-Instruct-v0.1で作った合成データ(Synthetic data)です。", + "source": "Hugging Face", + "score": 1.4596651842456765e-05, + "project_name": "alpaca_jp_python" + }, + { + "url": "https://huggingface.co/datasets/nyanko7/danbooru2023", + "downloads": 123, + "description": "Danbooru2023:", + "source": "Hugging Face", + "score": 1.3810678281709092e-05, + "project_name": "danbooru2023" + }, + { + "url": "https://huggingface.co/datasets/kunishou/J-ResearchCorpus", + "downloads": 118, + "description": "J-ResearchCorpusUpdate:2024/3/16言語処理学会第30回年次大会(NLP2024)を含む、論文 1,343 本のデータを追加2024/2/25言語処理学会誌「自然言語処理」のうち CC-BY-4.0 で公開されている論文 360 本のデータを追加概要CC-BY-* ライセンスで公開されている日本語論文や学会誌等から抜粋した高品質なテキストのデータセットです。", + "source": "Hugging Face", + "score": 1.3249268595460756e-05, + "project_name": "J-ResearchCorpus" + }, + { + "url": "https://huggingface.co/datasets/Kendamarron/jimba-instuction-1k-beta", + "downloads": 111, + "description": "cyberagent/calm2-7b-chatの出力を人手でチェック・修正することで作成した日本語Instructionデータセットです。", + "source": "Hugging Face", + "score": 1.2463295034713083e-05, + "project_name": "jimba-instuction-1k-beta" + }, + { + "url": "https://huggingface.co/datasets/ryota39/Aya_ja", + "downloads": 100, + "description": "Aya_jaこのデータセットはCohereForAI/aya_datasetの日本語インストラクションデータのみを抽出したデータセットです。", + "source": "Hugging Face", + "score": 1.1228193724966742e-05, + "project_name": "Aya_ja" + }, + { + "url": "https://huggingface.co/datasets/hpprc/en-ja-align", + "downloads": 99, + "description": "en-ja-align日英対訳文対応付けデータ(内山ら, 2003)として公開されている日英対訳文データセットです。", + "source": "Hugging Face", + "score": 1.1115911787717075e-05, + "project_name": "en-ja-align" + }, + { + "url": "https://huggingface.co/datasets/Verah/JParaCrawl-Filtered-English-Japanese-Parallel-Corpus", + "downloads": 92, + "description": "IntroductionThis is a LLM-filtered set of the first 1M rows from ntt's JParaCrawl v3 large English-Japanese parallel corpus.", + "source": "Hugging Face", + "score": 1.0329938226969403e-05, + "project_name": "JParaCrawl-Filtered-English-Japanese-Parallel-Corpus" + }, + { + "url": "https://huggingface.co/datasets/kunishou/amenokaku-code-instruct", + "downloads": 92, + "description": "Amenokaku-Code-InstructUpdate:2023/12/27データセットに JaxTon , プロになるJava のコードデータ 180 レコードを追加しました。", + "source": "Hugging Face", + "score": 1.0329938226969403e-05, + "project_name": "amenokaku-code-instruct" + }, + { + "url": "https://huggingface.co/datasets/p1atdev/ja-stackoverflow", + "downloads": 87, + "description": "ja-stackoverflow日本語版 Stack Overflow の スタック・オーバーフロー のデータダンプ をもとにデータを加工し、質問文と回答文のペアになるように調整した QA データセット。", + "source": "Hugging Face", + "score": 9.768528540721066e-06, + "project_name": "ja-stackoverflow" + }, + { + "url": "https://huggingface.co/datasets/tohoku-nlp/abc-multiple-choice", + "downloads": 87, + "description": "abc-multiple-choice Datasetabc-multiple-choice は、競技クイズの大会「abc」で使用された4択問題を元に作成された、多肢選択式の質問応答データセットです。", + "source": "Hugging Face", + "score": 9.768528540721066e-06, + "project_name": "abc-multiple-choice" + }, + { + "url": "https://huggingface.co/datasets/saldra/sakura_japanese_dataset", + "downloads": 87, + "description": "Sakura_dataset商用利用可能な超小規模高品質日本語データセット。", + "source": "Hugging Face", + "score": 9.768528540721066e-06, + "project_name": "sakura_japanese_dataset" + }, + { + "url": "https://huggingface.co/datasets/NilanE/ParallelFiction-Ja_En-100k", + "downloads": 78, + "description": "Dataset detailsEach entry in this dataset is a sentence-aligned Japanese web novel chapter and English fan translation.", + "source": "Hugging Face", + "score": 8.757991105474059e-06, + "project_name": "ParallelFiction-Ja_En-100k" + }, + { + "url": "https://huggingface.co/datasets/Mitsua/wikidata-parallel-descriptions-en-ja", + "downloads": 78, + "description": "Wikidata parallel descriptions en-jaParallel corpus for machine translation generated from wikidata dump (2024-05-06).", + "source": "Hugging Face", + "score": 8.757991105474059e-06, + "project_name": "wikidata-parallel-descriptions-en-ja" + }, + { + "url": "https://huggingface.co/datasets/yulanfmy/databricks-qa-ja", + "downloads": 77, + "description": "データセット概要手動で作成したDatabricksに関する質問と回答ペアの日本語データセットです。", + "source": "Hugging Face", + "score": 8.645709168224391e-06, + "project_name": "databricks-qa-ja" + }, + { + "url": "https://huggingface.co/datasets/hatakeyama-llm-team/AutoGeneratedJapaneseQA-CC", + "downloads": 74, + "description": "自動生成Q&Aデータソースから、MaziyarPanahi/Mixtral-8x22B-Instruct-v0.1-GGUFを使ってQ&Aを自動生成したものです。Common Crawlをもとに生成しています。 ", + "source": "Hugging Face", + "score": 8.30886335647539e-06, + "project_name": "AutoGeneratedJapaneseQA-CC" + }, + { + "url": "https://huggingface.co/datasets/range3/cc100-ja", + "downloads": 71, + "description": "range3/cc100-jaThis dataset consists of parquet files from the cc100 dataset with only the Japanese language extracted and sharded.", + "source": "Hugging Face", + "score": 7.972017544726388e-06, + "project_name": "cc100-ja" + }, + { + "url": "https://huggingface.co/datasets/snow_simplified_japanese_corpus", + "downloads": 70, + "description": "The corpus has 50,000 manually simplified and aligned sentences.", + "source": "Hugging Face", + "score": 7.85973560747672e-06, + "project_name": "snow_simplified_japanese_corpus" + }, + { + "url": "https://huggingface.co/datasets/llm-book/jawiki-paragraphs", + "downloads": 70, + "description": "GitHub リポジトリ singletongue/wikipedia-utils で公開されているデータセットを利用しています。", + "source": "Hugging Face", + "score": 7.85973560747672e-06, + "project_name": "jawiki-paragraphs" + }, + { + "url": "https://huggingface.co/datasets/hatakeyama-llm-team/AutoGeneratedJapaneseQA-other", + "downloads": 69, + "description": "自動生成Q&Aデータソースから、MaziyarPanahi/Mixtral-8x22B-Instruct-v0.1-GGUFを使ってQ&Aを自動生成したものです。チームで作成したデータおよび「Common Crawlをもとに生成しています。 ", + "source": "Hugging Face", + "score": 7.747453670227052e-06, + "project_name": "AutoGeneratedJapaneseQA-other" + }, + { + "url": "https://huggingface.co/datasets/larryvrh/WikiMatrix-v1-Ja_Zh-filtered", + "downloads": 67, + "description": "Filtered and modified version of Japanese/Chinese language pair data from WikiMatrix v1.Process steps:1.", + "source": "Hugging Face", + "score": 7.522889795727717e-06, + "project_name": "WikiMatrix-v1-Ja_Zh-filtered" + }, + { + "url": "https://huggingface.co/datasets/kanhatakeyama/AutoMultiTurnByMixtral8x22b", + "downloads": 67, + "description": "自動生成のマルチターンデータセットオープンなデータソースから、MaziyarPanahi/Mixtral-8x22B-Instruct-v0.1-GGUFを使ってQ&Aを自動生成したものです。関連コード一部の計算には東京工業大学のスーパーコンピュータTSUBAME4.0を利用しました。データソースはじめの質問(q1)を、種々のデータソースから収集しました。その後のやりとりはすべて、Mixtralが生成しました。質問文については、元データのライセンスに準拠します。oasst2-33k-jaapache 2.0databricks-dolly-15k-jacc-by-sa-3.0minnadeCC0cyberagent/chatbot-arena-ja-calm2-7b-chat-experimentalcc-by-4.0", + "source": "Hugging Face", + "score": 7.522889795727717e-06, + "project_name": "AutoMultiTurnByMixtral8x22b" + }, + { + "url": "https://huggingface.co/datasets/kanhatakeyama/AutoWikiQA", + "downloads": 64, + "description": "Wikipedia日本語版からのQ&Aの自動生成Mixtral 8x22bのGGUF(5bit)をベースに、Wikipedia日本語版の記事から、自動生成コード1自動生成���ード2を使ってQ&Aを作成しました。計算には東京工業大学のスーパーコンピュータTSUBAME4.0を利用しました。注意回答にハルシネーション等が含まれている可能性があるので、フィルタリングをかける必要があるかもしれません。", + "source": "Hugging Face", + "score": 7.186043983978715e-06, + "project_name": "AutoWikiQA" + }, + { + "url": "https://huggingface.co/datasets/sudy-super/CoTangent", + "downloads": 57, + "description": "CoTangentは人手で作成された高品質でクリーンな100セットの日本語CoT用データセットです。", + "source": "Hugging Face", + "score": 6.400070423231043e-06, + "project_name": "CoTangent" + }, + { + "url": "https://huggingface.co/datasets/shi3z/ja_conv_wikipedia_orion14B_100K", + "downloads": 54, + "description": "AbstructThis is a multi-turn conversation dataset generated from the Japanese Wikipedia dataset using Orion14B-Chat.", + "source": "Hugging Face", + "score": 6.063224611482041e-06, + "project_name": "ja_conv_wikipedia_orion14B_100K" + }, + { + "url": "https://huggingface.co/datasets/joujiboi/japanese-anime-speech", + "downloads": 53, + "description": "Japanese Anime Speech Dataset日本語はこちらjapanese-anime-speech is an audio-text dataset designed for the training of automatic speech recognition models.", + "source": "Hugging Face", + "score": 5.950942674232373e-06, + "project_name": "japanese-anime-speech" + }, + { + "url": "https://huggingface.co/datasets/kunishou/oasst2-chat-68k-ja", + "downloads": 52, + "description": "oasst2-135k-jaをチャット形式に変換したデータセットになります。", + "source": "Hugging Face", + "score": 5.838660736982706e-06, + "project_name": "oasst2-chat-68k-ja" + }, + { + "url": "https://huggingface.co/datasets/kunishou/oasst1-89k-ja", + "downloads": 52, + "description": "This dataset was created by automatically translating \"OpenAssistant/oasst1\" into Japanese.", + "source": "Hugging Face", + "score": 5.838660736982706e-06, + "project_name": "oasst1-89k-ja" + }, + { + "url": "https://huggingface.co/datasets/kubota/defamation-japanese-twitter", + "downloads": 50, + "description": "defamation_japanese_twitterTwitter日本語誹謗中傷検出データセットDataset SummarySNSにおける誹謗中傷検出のためのデータセットです.", + "source": "Hugging Face", + "score": 5.614096862483371e-06, + "project_name": "defamation-japanese-twitter" + }, + { + "url": "https://huggingface.co/datasets/hotchpotch/JaCWIR", + "downloads": 50, + "description": "JaCWIR: Japanese Casual Web IR - 日本語情報検索評価のための小規模でカジュアルなWebタイトルと概要のデータセット近年、大規模言語モデル(LLM)の台頭により、一般的な日本語を用いた自然な検索クエリで質問するユースケースが増えています。", + "source": "Hugging Face", + "score": 5.614096862483371e-06, + "project_name": "JaCWIR" + }, + { + "url": "https://huggingface.co/datasets/GENIAC-Team-Ozaki/WikiHowNFQA-ja_cleaned", + "downloads": 48, + "description": "Lurunchik/WikiHowNFQAを日本語に翻訳し、人手でクリーニングしたデータセットです。", + "source": "Hugging Face", + "score": 5.3895329879840364e-06, + "project_name": "WikiHowNFQA-ja_cleaned" + }, + { + "url": "https://huggingface.co/datasets/hpprc/jawiki", + "downloads": 42, + "description": "JaWikiWikipediaのHTML形式のダンプファイルから抽出したテキストデータセットです。", + "source": "Hugging Face", + "score": 4.715841364486032e-06, + "project_name": "jawiki" + }, + { + "url": "https://huggingface.co/datasets/turing-motors/Japanese-Heron-Bench", + "downloads": 41, + "description": "Japanese-Heron-BenchDataset DescriptionJapanese-Heron-Bench is a benchmark for evaluating Japanese VLMs (Vision-Language Models).", + "source": "Hugging Face", + "score": 4.603559427236364e-06, + "project_name": "Japanese-Heron-Bench" + }, + { + "url": "https://huggingface.co/datasets/bclavie/mmarco-japanese-hard-negatives", + "downloads": 38, + "description": "[Under Construction]This is a repository containing all the queries from the Japanese part of the MMarco dataset, the multilingual version of the MSMarco dataset.", + "source": "Hugging Face", + "score": 4.266713615487362e-06, + "project_name": "mmarco-japanese-hard-negatives" + }, + { + "url": "https://huggingface.co/datasets/kunishou/OpenMathInstruct-1-1.8m-ja", + "downloads": 36, + "description": "OpenMathInstruct-1 を日本語に自動翻訳した商用利用可能な180万件の指示チューニングデータセットになります。", + "source": "Hugging Face", + "score": 4.042149740988027e-06, + "project_name": "OpenMathInstruct-1-1.8m-ja" + }, + { + "url": "https://huggingface.co/datasets/dichmau/ja_vi_translation", + "downloads": 36, + "description": "Japanese-Vietnamese Translated Sentence Pairs.", + "source": "Hugging Face", + "score": 4.042149740988027e-06, + "project_name": "ja_vi_translation" + }, + { + "url": "https://huggingface.co/datasets/kunishou/jp-effective-instructions", + "downloads": 35, + "description": "oasst1-89k-ja , databricks-dolly-15k-ja , hh-rlhf-49k-ja の中から JGLUE( JcommonsenseQA , MARC-ja , JSQuAD )の観点で高品質なデータセットに絞り込んだデータセットです。", + "source": "Hugging Face", + "score": 3.92986780373836e-06, + "project_name": "jp-effective-instructions" + }, + { + "url": "https://huggingface.co/datasets/llm-book/aio-passages-bpr-bert-base-japanese-v3", + "downloads": 26, + "description": "llm-book/aio-passages のデータセットに対して、llm-book/bert-base-japanese-v3-bpr-passage-encoder によるパッセージのバイナリベクトルが embeddings フィールドに追加されています。", + "source": "Hugging Face", + "score": 2.919330368491353e-06, + "project_name": "aio-passages-bpr-bert-base-japanese-v3" + }, + { + "url": "https://huggingface.co/datasets/augmxnt/shisa-pretrain-en-ja-v1", + "downloads": 25, + "description": "This pre-training dataset was created for shisa-base-7b-v1.It is primarily composed of a DSIR sampling of MADLAD-400 JA/EN tokens in a 90%/10% ratio.", + "source": "Hugging Face", + "score": 2.8070484312416856e-06, + "project_name": "shisa-pretrain-en-ja-v1" + }, + { + "url": "https://huggingface.co/datasets/oshizo/japanese-wikipedia-paragraphs", + "downloads": 22, + "description": "A slightly modified version of the parsing and chunking method for singletongue/wikipedia-utils.", + "source": "Hugging Face", + "score": 2.4702026194926834e-06, + "project_name": "japanese-wikipedia-paragraphs" + }, + { + "url": "https://huggingface.co/datasets/recruit-jp/japanese-image-classification-evaluation-dataset", + "downloads": 21, + "description": "recruit-jp/japanese-image-classification-evaluation-datasetOverviewDeveloped by: Recruit Co.", + "source": "Hugging Face", + "score": 2.357920682243016e-06, + "project_name": "japanese-image-classification-evaluation-dataset" + }, + { + "url": "https://huggingface.co/datasets/llm-book/ner-wikinews-dataset", + "downloads": 20, + "description": "固有表現ラベルはllm-book/ner-wikipedia-datasetと同様のものを採用しており、全部で8種類 (人名、法人名、地名、製品名、政治的組織名、施設名、その他の組織名、イベント名)あります。", + "source": "Hugging Face", + "score": 2.2456387449933482e-06, + "project_name": "ner-wikinews-dataset" + }, + { + "url": "https://huggingface.co/datasets/Kendamarron/pret-a-porter-instruction-v0.1", + "downloads": 19, + "description": "データセットについてオープンソースLLMの出力を人手でチェック・修正したinstructionにSwallow-MXでoutputを生成したデータセットです。", + "source": "Hugging Face", + "score": 2.133356807743681e-06, + "project_name": "pret-a-porter-instruction-v0.1" + }, + { + "url": "https://huggingface.co/datasets/hpprc/mmarco-ja", + "downloads": 16, + "description": "mmarcoデータセットのquery--passageのペアについて、queryをkeyとして重複を削除したデータセットです。", + "source": "Hugging Face", + "score": 1.7965109959946787e-06, + "project_name": "mmarco-ja" + }, + { + "url": "https://huggingface.co/datasets/hpprc/jsick", + "downloads": 14, + "description": "Dataset.", + "source": "Hugging Face", + "score": 1.5719471214953439e-06, + "project_name": "jsick" + }, + { + "url": "https://huggingface.co/datasets/polm-stability/jblimp", + "downloads": 14, + "description": "JBLiMPThis is the data from \"JBLiMP: Japanese Benchmark of Linguistic Minimal Pairs\" (Someya and Oseki, 2023).", + "source": "Hugging Face", + "score": 1.5719471214953439e-06, + "project_name": "jblimp" + }, + { + "url": "https://huggingface.co/datasets/llm-book/aio-passages", + "downloads": 14, + "description": "GitHub リポジトリ cl-tohoku/quiz-datasets で公開されているデータセットを利用しています。", + "source": "Hugging Face", + "score": 1.5719471214953439e-06, + "project_name": "aio-passages" + }, + { + "url": "https://huggingface.co/datasets/svjack/pokemon-blip-captions-en-ja", + "downloads": 14, + "description": "Dataset used to train Pokémon text to image model, add a Japanese Column of Pokémon BLIP captionsBLIP generated captions for Pokémon images from Few Shot Pokémon dataset introduced by Towards Faster and Stabilized GAN Training for High-fidelity Few-shot Image Synthesis (FastGAN).", + "source": "Hugging Face", + "score": 1.5719471214953439e-06, + "project_name": "pokemon-blip-captions-en-ja" + }, + { + "url": "https://huggingface.co/datasets/fujiki/guanaco_ja", + "downloads": 12, + "description": "This is a Japanese portion of the Guanaco dataset.", + "source": "Hugging Face", + "score": 1.3473832469960091e-06, + "project_name": "guanaco_ja" + }, + { + "url": "https://huggingface.co/datasets/y2lan/japan-law", + "downloads": 12, + "description": "Japanese LawsThis dataset comprises 8.75K law records retrieved from the official Japanese government website e-Gov. ", + "source": "Hugging Face", + "score": 1.3473832469960091e-06, + "project_name": "japan-law" + }, + { + "url": "https://huggingface.co/datasets/llm-jp/mbpp-ja", + "downloads": 12, + "description": "mbpp-jaThis repository provides a mbpp dataset translated from English into Japanese by LLM-jp, a collaborative project launched in Japan.", + "source": "Hugging Face", + "score": 1.3473832469960091e-06, + "project_name": "mbpp-ja" + }, + { + "url": "https://huggingface.co/datasets/Nexdata/English-Japanese_Parallel_Corpus_Data", + "downloads": 12, + "description": "It covers multiple fields such as tourism, medical treatment, daily life, news, etc. ", + "source": "Hugging Face", + "score": 1.3473832469960091e-06, + "project_name": "English-Japanese_Parallel_Corpus_Data" + }, + { + "url": "https://huggingface.co/datasets/Fhrozen/CABankSakuraCHJP", + "downloads": 11, + "description": "CABank Japanese CallHome CorpusParticipants: 120Type of Study: phone callLocation: United StatesMedia type: audioDOI: doi:10.21415/T5H59VWeb: https://ca.talkbank.org/access/CallHome/jpn.htmlCitation informationSome citation here.", + "source": "Hugging Face", + "score": 1.2351013097463417e-06, + "project_name": "CABankSakuraCHJP" + }, + { + "url": "https://huggingface.co/datasets/sakusakumura/databricks-dolly-15k-ja-scored", + "downloads": 11, + "description": "For the English version, please click here.", + "source": "Hugging Face", + "score": 1.2351013097463417e-06, + "project_name": "databricks-dolly-15k-ja-scored" + }, + { + "url": "https://huggingface.co/datasets/covid_tweets_japanese", + "downloads": 11, + "description": "The annotation is by majority decision by 5 - 10 crowd workers.", + "source": "Hugging Face", + "score": 1.2351013097463417e-06, + "project_name": "covid_tweets_japanese" + } +] \ No newline at end of file