File size: 2,407 Bytes
828c90a
 
 
 
0d78964
828c90a
0d78964
 
828c90a
 
 
 
 
 
 
0d78964
 
 
 
828c90a
 
 
 
 
bbe9324
 
 
 
0d78964
828c90a
 
 
0d78964
828c90a
 
 
 
 
 
 
 
 
 
 
0d78964
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
828c90a
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import os

import cohere
import gradio as gr
import numpy as np
import pinecone
import torch
from transformers import AutoModel, AutoTokenizer

co = cohere.Client(os.environ.get('COHERE_API', ''))
pinecone.init(
    api_key=os.environ.get('PINECONE_API', ''),
    environment=os.environ.get('PINECONE_ENV', '')
)

model = AutoModel.from_pretrained('monsoon-nlp/gpt-nyc')
tokenizer = AutoTokenizer.from_pretrained('monsoon-nlp/gpt-nyc')
zos = np.zeros(4096-1024).tolist()

def list_me(matches):
    result = ''
    for match in matches:
        result += '<li><a target="_blank" href="https://reddit.com/r/AskNYC/comments/' + match['id'] + '">'
        result += match['metadata']['question']
        result += '</a>'
        if 'body' in match['metadata']:
            result += '<br/>' + match['metadata']['body']
        result += '</li>'
    return result.replace('/mini', '/')


def query(question):
    # Cohere search
    response = co.embed(
        model='large',
        texts=[question],
    )
    index = pinecone.Index("gptnyc")
    closest = index.query(
        top_k=2,
        include_metadata=True,
        vector=response.embeddings[0],
    )

    # SGPT search
    batch_tokens = tokenizer(
        [question],
        padding=True,
        truncation=True,
        return_tensors="pt"
    )
    with torch.no_grad():
        last_hidden_state = model(**batch_tokens, output_hidden_states=True, return_dict=True).last_hidden_state
    weights = (
        torch.arange(start=1, end=last_hidden_state.shape[1] + 1)
        .unsqueeze(0)
        .unsqueeze(-1)
        .expand(last_hidden_state.size())
        .float().to(last_hidden_state.device)
    )
    input_mask_expanded = (
        batch_tokens["attention_mask"]
        .unsqueeze(-1)
        .expand(last_hidden_state.size())
        .float()
    )
    sum_embeddings = torch.sum(last_hidden_state * input_mask_expanded * weights, dim=1)
    sum_mask = torch.sum(input_mask_expanded * weights, dim=1)
    embeddings = sum_embeddings / sum_mask
    closest_sgpt = index.query(
        top_k=2,
        include_metadata=True,
        namespace="mini",
        vector=embeddings[0].tolist() + zos,
    )

    return '<h3>Cohere</h3><ul>' + list_me(closest['matches']) + '</ul><h3>SGPT</h3><ul>' + list_me(closest_sgpt['matches']) + '</ul>'


iface = gr.Interface(
    fn=query,
    inputs="text",
    outputs="html"
)
iface.launch()