Edit model card

MetaIE

This is a meta-model distilled from ChatGPT-4 for information extraction. This is an intermediate checkpoint that can be well-transferred to all kinds of downstream information extraction tasks. This model can also be tested by different label-to-span matching as shown in the following example:

from transformers import AutoModelForTokenClassification, AutoTokenizer
import torch

device = torch.device("cuda:0")
path = f"KomeijiForce/roberta-large-metaie-gpt4"
tokenizer = AutoTokenizer.from_pretrained(path)
tagger = AutoModelForTokenClassification.from_pretrained(path).to(device)

def find_sequences(lst):
    sequences = []
    i = 0
    while i < len(lst):
        if lst[i] == 0:
            start = i
            end = i
            i += 1
            while i < len(lst) and lst[i] == 1:
                end = i
                i += 1
            sequences.append((start, end+1))
        else:
            i += 1
    return sequences

def is_sublst(lst1, lst2):
    for idx in range(len(lst1)-len(lst2)+1):
        if lst1[idx:idx+len(lst2)] == lst2:
            return True
    return False

words = ["John", "Smith", "loves", "his", "hometown", ",", "Los", "Angeles", "."]

for prefix in ["Person", "Location", "John Smith births in", "Positive opinion"]:

    sentence = " ".join([prefix, ":"]+words)

    inputs = tokenizer(sentence, return_tensors="pt").to(device)
    tag_predictions = tagger(**inputs).logits[0].argmax(-1)

    predictions = [tokenizer.decode(inputs.input_ids[0, seq[0]:seq[1]]).strip() for seq in find_sequences(tag_predictions)]
    predictions = [prediction for prediction in predictions if is_sublst(words, prediction.split())]
    
    print(prefix, predictions)

The output will be

"Person" ['John Smith']
"Location" ['Los Angeles']
"John Smith births in" ['Los Angeles']
"Positive opinion" ['loves his hometown']
Downloads last month
3
Safetensors
Model size
354M params
Tensor type
F32
·
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social visibility and check back later, or deploy to Inference Endpoints (dedicated) instead.

Finetuned from

Dataset used to train KomeijiForce/roberta-large-metaie-gpt4