File size: 3,185 Bytes
01f754d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b802856
 
 
 
 
01f754d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
export async function getEmbeddings(
  worker,
  weightsURL,
  tokenizerURL,
  configURL,
  modelID,
  sentences,
  updateStatus = null
) {
  return new Promise((resolve, reject) => {
    worker.postMessage({
      weightsURL,
      tokenizerURL,
      configURL,
      modelID,
      sentences,
    });
    function messageHandler(event) {
      if ("error" in event.data) {
        worker.removeEventListener("message", messageHandler);
        reject(new Error(event.data.error));
      }
      if (event.data.status === "complete") {
        worker.removeEventListener("message", messageHandler);
        resolve(event.data);
      }
      if (updateStatus) updateStatus(event.data);
    }
    worker.addEventListener("message", messageHandler);
  });
}

const MODELS = {
  intfloat_e5_small_v2: {
    base_url: "https://huggingface.co/intfloat/e5-small-v2/resolve/main/",
    search_prefix: "query: ",
    document_prefix: "passage: ",
  },
  intfloat_e5_base_v2: {
    base_url: "https://huggingface.co/intfloat/e5-base-v2/resolve/main/",
    search_prefix: "query: ",
    document_prefix: "passage:",
  },
  intfloat_multilingual_e5_small: {
    base_url:
      "https://huggingface.co/intfloat/multilingual-e5-small/resolve/main/",
    search_prefix: "query: ",
    document_prefix: "passage: ",
  },
  sentence_transformers_all_MiniLM_L6_v2: {
    base_url:
      "https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2/resolve/refs%2Fpr%2F21/",
    search_prefix: "",
    document_prefix: "",
  },
  sentence_transformers_all_MiniLM_L12_v2: {
    base_url:
      "https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2/resolve/refs%2Fpr%2F4/",
    search_prefix: "",
    document_prefix: "",
  },
  gte_tiny: {
    base_url: "https://huggingface.co/TaylorAI/gte-tiny/resolve/refs%2Fpr%2F2/",
    search_prefix: "",
    document_prefix: "",
  },
};
export function getModelInfo(id) {
  return {
    modelURL: MODELS[id].base_url + "model.safetensors",
    configURL: MODELS[id].base_url + "config.json",
    tokenizerURL: MODELS[id].base_url + "tokenizer.json",
    search_prefix: MODELS[id].search_prefix,
    document_prefix: MODELS[id].document_prefix,
  };
}

export function cosineSimilarity(vec1, vec2) {
  const dot = vec1.reduce((acc, val, i) => acc + val * vec2[i], 0);
  const a = Math.sqrt(vec1.reduce((acc, val) => acc + val * val, 0));
  const b = Math.sqrt(vec2.reduce((acc, val) => acc + val * val, 0));
  return dot / (a * b);
}
export async function getWikiText(article) {
  // thanks to wikipedia for the API
  const URL = `https://en.wikipedia.org/w/api.php?action=query&prop=extracts&exlimit=1&titles=${article}&explaintext=1&exsectionformat=plain&format=json&origin=*`;
  return fetch(URL, {
    method: "GET",
    headers: {
      Accept: "application/json",
    },
  })
    .then((r) => r.json())
    .then((data) => {
      const pages = data.query.pages;
      const pageId = Object.keys(pages)[0];
      const extract = pages[pageId].extract;
      if (extract === undefined || extract === "") {
        throw new Error("No article found");
      }
      return extract;
    })
    .catch((error) => console.error("Error:", error));
}