File size: 2,557 Bytes
7a3bfd1
df38f5d
 
7a3bfd1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
rom transformers import AutoModelForTokenClassification, AutoTokenizer, pipeline
import gradio as gr

def ner_tagging(text):
  model_name = "browndw/docusco-bert"
  tokenizer = AutoTokenizer.from_pretrained(model_name,       add_prefix_space=True)
  
  model = AutoModelForTokenClassification.from_pretrained(model_name)
  nlp = pipeline("ner", model=model, tokenizer=tokenizer)
  
  output = []
  
  text_2 = text.split(" ")
  
  for i in range(len(text_2)):
      ent = ner_results[i]["entity"]
      if ent != "O":
          output.extend([(text_2[i], ent), (" ", None)])
      else:
          output.extend([(text_2[i], None), (" ", None)])

  return output

def get_entities(example):
    model_name = "browndw/docusco-bert"
    tokenizer = AutoTokenizer.from_pretrained(model_name,       add_prefix_space=True)
  
    model = AutoModelForTokenClassification.from_pretrained(model_name)
    token_classifier = pipeline("token-classification", aggregation_strategy="simple", model=model, tokenizer=tokenizer)
    
    output = []

    i=0
    prev_item = None
    next_item = None
    while i < (len(results)):
        item = results[i]
        p=i-1
        n=i+1
        
        if p > 0:
            prev_item = results[p]
        
        
        if n<(len(results)):
            next_item = results[n]
        
        
        if (i==0):
            if item["start"]>0:
                output.extend([(example[0:item["start"]], None)])
        output.extend([(example[item["start"]:item["end"]], item["entity_group"])])
        if (next_item!=None):
            ##verificar el tramo entre actual y siguiente
            if(item["end"]!=next_item["start"]):
                output.extend([(example[item["end"]:next_item["start"]], None)])
        i=i+1

    if item["end"] < len(example):
        output.extend([(example[item["end"]:len(example)], None)])
    
    return output

def greet(name):
    return "Hello " + name + "!!"

iface = gr.Interface(fn=get_entities, inputs="text", outputs=['highlight'], examples=[['Jaws is a splendidly shrewd cinematic equation which not only gives you one or two very nasty turns when you least expect them but, possibly more important, knows when to make you think another is coming without actually providing it.'], 
['Jaws is a splendidly shrewd cinematic equation which not only gives you one or two very nasty turns when you least expect them but, possibly more important, knows when to make you think another is coming without actually providing it.']], title="Test of docusco-bert ",)
iface.launch()