rrg92 commited on
Commit
51833bf
1 Parent(s): 75128ac

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -16
app.py CHANGED
@@ -3,14 +3,14 @@ import torch.nn.functional as F
3
  from transformers import AutoTokenizer, AutoModel, AutoImageProcessor
4
  import gradio as gr
5
  import spaces
 
6
 
7
  # neuralmind/bert-base-portuguese-cased
8
- ModelName = "papluca/xlm-roberta-base-language-detection"
9
  model = AutoModel.from_pretrained(ModelName)
10
  tokenizer = AutoTokenizer.from_pretrained(ModelName, do_lower_case=False)
11
-
12
- # processor = AutoImageProcessor.from_pretrained("nomic-ai/nomic-embed-vision-v1.5")
13
- # vision_model = AutoModel.from_pretrained("nomic-ai/nomic-embed-vision-v1.5", trust_remote_code=True)
14
 
15
  # tokenizer = AutoTokenizer.from_pretrained('nomic-ai/nomic-embed-text-v1.5')
16
  # text_model = AutoModel.from_pretrained('nomic-ai/nomic-embed-text-v1.5', trust_remote_code=True)
@@ -23,7 +23,7 @@ def mean_pooling(model_output, attention_mask):
23
 
24
  @spaces.GPU
25
  def TxtEmbed(text):
26
- import torch
27
 
28
 
29
  input_ids = tokenizer.encode(text, return_tensors='pt')
@@ -31,20 +31,20 @@ def TxtEmbed(text):
31
  with torch.no_grad():
32
  outs = model(input_ids)
33
  encoded = outs[0][0, 1:-1] # Ignore [CLS] and [SEP] special tokens
34
-
35
 
36
 
37
- # sentences = [text]
38
- # encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
 
 
 
 
 
 
 
39
  #
40
- # with torch.no_grad():
41
- # model_output = text_model(**encoded_input)
42
- #
43
- # text_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
44
- # text_embeddings = F.layer_norm(text_embeddings, normalized_shape=(text_embeddings.shape[1],))
45
- # text_embeddings = F.normalize(text_embeddings, p=2, dim=1)
46
-
47
- return (encoded.tolist())[0];
48
 
49
 
50
 
 
3
  from transformers import AutoTokenizer, AutoModel, AutoImageProcessor
4
  import gradio as gr
5
  import spaces
6
+ import torch
7
 
8
  # neuralmind/bert-base-portuguese-cased
9
+ ModelName = "neuralmind/bert-base-portuguese-cased"
10
  model = AutoModel.from_pretrained(ModelName)
11
  tokenizer = AutoTokenizer.from_pretrained(ModelName, do_lower_case=False)
12
+ processor = AutoImageProcessor.from_pretrained("nomic-ai/nomic-embed-vision-v1.5")
13
+ vision_model = AutoModel.from_pretrained("nomic-ai/nomic-embed-vision-v1.5", trust_remote_code=True)
 
14
 
15
  # tokenizer = AutoTokenizer.from_pretrained('nomic-ai/nomic-embed-text-v1.5')
16
  # text_model = AutoModel.from_pretrained('nomic-ai/nomic-embed-text-v1.5', trust_remote_code=True)
 
23
 
24
  @spaces.GPU
25
  def TxtEmbed(text):
26
+
27
 
28
 
29
  input_ids = tokenizer.encode(text, return_tensors='pt')
 
31
  with torch.no_grad():
32
  outs = model(input_ids)
33
  encoded = outs[0][0, 1:-1] # Ignore [CLS] and [SEP] special tokens
34
+ return (encoded.tolist())[0];
35
 
36
 
37
+ #sentences = [text]
38
+ #encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
39
+ #
40
+ #with torch.no_grad():
41
+ # model_output = text_model(**encoded_input)
42
+ #
43
+ #text_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
44
+ #text_embeddings = F.layer_norm(text_embeddings, normalized_shape=(text_embeddings.shape[1],))
45
+ #text_embeddings = F.normalize(text_embeddings, p=2, dim=1)
46
  #
47
+ # return (text_embeddings.tolist)[0]
 
 
 
 
 
 
 
48
 
49
 
50