File size: 2,256 Bytes
9c54c90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
using Microsoft.ML.Tokenizers;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Net.Http;
using System.Text;
using System.Threading.Tasks;

namespace StableDiffusionV2
{
    internal class ClipTokenizer
    {
        private readonly Tokenizer _tokenizer;
        private readonly int _startToken;
        private readonly int _endToken;

        public ClipTokenizer(string vocabPath, string mergesPath, int startToken = 49406, int endToken = 49407)
        {
            if(!File.Exists(vocabPath) || !File.Exists(mergesPath))
            {
                // download vocab from https://huggingface.co/laion/CLIP-ViT-H-14-laion2B-s32B-b79K/raw/main/vocab.json
                // download merges from https://huggingface.co/laion/CLIP-ViT-H-14-laion2B-s32B-b79K/blob/main/merges.txt

                using (var client = new HttpClient())
                {
                    Console.WriteLine("download vocab.json and merges.txt");
                    using (var s = client.GetStringAsync("https://huggingface.co/laion/CLIP-ViT-H-14-laion2B-s32B-b79K/raw/main/vocab.json"))
                    {
                        using (var fs = new StreamWriter(vocabPath))
                        {
                            fs.Write(s.Result);
                        }
                    }

                    using (var s = client.GetStringAsync("https://huggingface.co/laion/CLIP-ViT-H-14-laion2B-s32B-b79K/raw/main/merges.txt"))
                    {
                        using (var fs = new StreamWriter(mergesPath))
                        {
                            fs.Write(s.Result);
                        }
                    }
                }
            }

            _tokenizer = new Tokenizer(new Bpe(vocabPath, mergesPath, endOfWordSuffix: "</w>"));
            _startToken = startToken;
            _endToken = endToken;
        }

        public int[] Tokenize(string text, int maxTokens = 77)
        {
            var res = _tokenizer.Encode(text);
            var tokens = new[] { _startToken }.Concat(res.Ids.Concat(Enumerable.Repeat(0, maxTokens - res.Ids.Count - 2))).Concat(new[] { _endToken }).ToArray();
            return tokens;
        }
    }
}