PereLluis13 commited on
Commit
bccc38b
1 Parent(s): ab249d6

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +9 -6
README.md CHANGED
@@ -1,6 +1,8 @@
1
  ---
2
  language:
3
  - en
 
 
4
  tags:
5
  - seq2seq
6
  license: cc-by-nc-sa-4.0
@@ -9,6 +11,11 @@ To use the model with a pipeline:
9
  ```python3
10
  from transformers import pipeline
11
 
 
 
 
 
 
12
  def extract_triplets(text):
13
  triplets = []
14
  relation = ''
@@ -36,10 +43,6 @@ def extract_triplets(text):
36
  relation += ' ' + token
37
  triplets.append((subject, relation, object_))
38
  return triplets
39
-
40
- triplet_extractor = pipeline('text2text-generation', model='Babelscape/rebel-large', tokenizer='Babelscape/rebel-large')
41
- extracted_text = triplet_extractor("Punta Cana is a resort town in the municipality of Higüey, in La Altagracia Province, the easternmost province of the Dominican Republic.Punta Cana is a resort town in the municipality of Higüey, in La Altagracia Province, the easternmost province of the Dominican Republic.")["generated_text"]
42
-
43
  extracted_triplets = extract_triplets(extracted_text)
44
  print(extracted_triplets)
45
  ```
@@ -78,8 +81,8 @@ def extract_triplets(text):
78
  return triplets
79
 
80
  # Load model and tokenizer
81
- tokenizer = AutoTokenizer.from_pretrained("model/rebel-large")
82
- model = AutoModelForSeq2SeqLM.from_pretrained("model/rebel-large")
83
  gen_kwargs = {
84
  "max_length": 256,
85
  "length_penalty": 0,
 
1
  ---
2
  language:
3
  - en
4
+ widget:
5
+ - text: "generate question: <hl> 42 <hl> is the answer to life, the universe and everything. </s>"
6
  tags:
7
  - seq2seq
8
  license: cc-by-nc-sa-4.0
 
11
  ```python3
12
  from transformers import pipeline
13
 
14
+ triplet_extractor = pipeline('text2text-generation', model='Babelscape/rebel-large', tokenizer='Babelscape/rebel-large')
15
+ # We need to use the tokenizer manually since we need special tokens.
16
+ extracted_text = triplet_extractor.tokenizer.decode(triplet_extractor("Punta Cana is a resort town in the municipality of Higuey, in La Altagracia Province, the eastern most province of the Dominican Republic", return_tensors=True, return_text=False)[0]["generated_token_ids"])
17
+ print(extracted_text)
18
+ # Function to parse the generated text and extract the triplets
19
  def extract_triplets(text):
20
  triplets = []
21
  relation = ''
 
43
  relation += ' ' + token
44
  triplets.append((subject, relation, object_))
45
  return triplets
 
 
 
 
46
  extracted_triplets = extract_triplets(extracted_text)
47
  print(extracted_triplets)
48
  ```
 
81
  return triplets
82
 
83
  # Load model and tokenizer
84
+ tokenizer = AutoTokenizer.from_pretrained("Babelscape/rebel-large")
85
+ model = AutoModelForSeq2SeqLM.from_pretrained("Babelscape/rebel-large")
86
  gen_kwargs = {
87
  "max_length": 256,
88
  "length_penalty": 0,