gaduhhartawan
commited on
Commit
•
6704d43
1
Parent(s):
db6ec24
Update README.md
Browse files
README.md
CHANGED
@@ -35,4 +35,36 @@ See demo model here [notebook](https://colab.research.google.com/drive/1bcqS42M3
|
|
35 |
- Transformers 4.40.0
|
36 |
- Pytorch 2.2.1+cu121
|
37 |
- Datasets 2.19.0
|
38 |
-
- Tokenizers 0.19.1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
- Transformers 4.40.0
|
36 |
- Pytorch 2.2.1+cu121
|
37 |
- Datasets 2.19.0
|
38 |
+
- Tokenizers 0.19.1
|
39 |
+
|
40 |
+
## Usage
|
41 |
+
```python
|
42 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
43 |
+
|
44 |
+
# Load model and tokenizer
|
45 |
+
model = AutoModelForSeq2SeqLM.from_pretrained("gaduhhartawan/indobart-base")
|
46 |
+
tokenizer = AutoTokenizer.from_pretrained("gaduhhartawan/indobart-base")
|
47 |
+
|
48 |
+
# Input article for summarization
|
49 |
+
ARTICLE_TO_SUMMARIZE = "lorem ipsum..."
|
50 |
+
|
51 |
+
# Generate summary
|
52 |
+
input_ids = tokenizer.encode(ARTICLE_TO_SUMMARIZE, return_tensors='pt')
|
53 |
+
summary_ids = model.generate(input_ids,
|
54 |
+
min_length=30,
|
55 |
+
max_length=150,
|
56 |
+
num_beams=2,
|
57 |
+
repetition_penalty=2.0,
|
58 |
+
length_penalty=0.8,
|
59 |
+
early_stopping=True,
|
60 |
+
no_repeat_ngram_size=2,
|
61 |
+
use_cache=True,
|
62 |
+
do_sample=True,
|
63 |
+
temperature=0.7,
|
64 |
+
top_k=50,
|
65 |
+
top_p=0.95)
|
66 |
+
|
67 |
+
# Decode the summary
|
68 |
+
summary_text = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
|
69 |
+
print("Summary: ", summary_text)
|
70 |
+
```
|