using Microsoft.ML.Tokenizers; using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Net; using System.Net.Http; using System.Text; using System.Threading.Tasks; namespace StableDiffusionV2 { internal class ClipTokenizer { private readonly Tokenizer _tokenizer; private readonly int _startToken; private readonly int _endToken; public ClipTokenizer(string vocabPath, string mergesPath, int startToken = 49406, int endToken = 49407) { if(!File.Exists(vocabPath) || !File.Exists(mergesPath)) { // download vocab from https://huggingface.co/laion/CLIP-ViT-H-14-laion2B-s32B-b79K/raw/main/vocab.json // download merges from https://huggingface.co/laion/CLIP-ViT-H-14-laion2B-s32B-b79K/blob/main/merges.txt using (var client = new HttpClient()) { Console.WriteLine("download vocab.json and merges.txt"); using (var s = client.GetStringAsync("https://huggingface.co/laion/CLIP-ViT-H-14-laion2B-s32B-b79K/raw/main/vocab.json")) { using (var fs = new StreamWriter(vocabPath)) { fs.Write(s.Result); } } using (var s = client.GetStringAsync("https://huggingface.co/laion/CLIP-ViT-H-14-laion2B-s32B-b79K/raw/main/merges.txt")) { using (var fs = new StreamWriter(mergesPath)) { fs.Write(s.Result); } } } } _tokenizer = new Tokenizer(new Bpe(vocabPath, mergesPath, endOfWordSuffix: "")); _startToken = startToken; _endToken = endToken; } public int[] Tokenize(string text, int maxTokens = 77) { var res = _tokenizer.Encode(text); var tokens = new[] { _startToken }.Concat(res.Ids.Concat(Enumerable.Repeat(0, maxTokens - res.Ids.Count - 2))).Concat(new[] { _endToken }).ToArray(); return tokens; } } }