initial commit

Browse files

Files changed (7) hide show

README.md +59 -0
config.json +58 -0
merges.txt +0 -0
pytorch_model.bin +3 -0
tokenizer.json +0 -0
tokenizer_config.json +1 -0
vocab.json +0 -0

README.md CHANGED Viewed

@@ -1,3 +1,62 @@
 ---
 license: mit
 ---

 ---
+language: en
+datasets:
+- vblagoje/lfqa
+- vblagoje/lfqa_support_docs
 license: mit
 ---
+## Introduction
+See [blog post](https://towardsdatascience.com/long-form-qa-beyond-eli5-an-updated-dataset-and-approach-319cb841aabb) for more details.
+## Usage
+```python
+import torch
+from transformers import AutoTokenizer, AutoModel, AutoModelForSeq2SeqLM
+model_name = "vblagoje/bart_lfqa"
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
+model = model.to(device)
+# it all starts with a question/query
+query = "Why does water heated to room temperature feel colder than the air around it?"
+# given the question above suppose these documents below were found in some document store
+documents = ["when the skin is completely wet. The body continuously loses water by...",
+             "at greater pressures. There is an ambiguity, however, as to the meaning of the terms 'heating' and 'cooling'...",
+             "are not in a relation of thermal equilibrium, heat will flow from the hotter to the colder, by whatever pathway...",
+             "air condition and moving along a line of constant enthalpy toward a state of higher humidity. A simple example ...",
+             "Thermal contact conductance In physics, thermal contact conductance is the study of heat conduction between solid ..."]
+# concatenate question and support documents into BART input
+conditioned_doc = "<P> " + " <P> ".join([d for d in documents])
+query_and_docs = "question: {} context: {}".format(query, conditioned_doc)
+model_input = tokenizer(query_and_docs, truncation=True, padding=True, return_tensors="pt")
+generated_answers_encoded = model.generate(input_ids=model_input["input_ids"].to(device),
+                                           attention_mask=model_input["attention_mask"].to(device),
+                                           min_length=64,
+                                           max_length=256,
+                                           do_sample=False,
+                                           early_stopping=True,
+                                           num_beams=8,
+                                           temperature=1.0,
+                                           top_k=None,
+                                           top_p=None,
+                                           eos_token_id=tokenizer.eos_token_id,
+                                           no_repeat_ngram_size=3,
+                                           num_return_sequences=1)
+tokenizer.batch_decode(generated_answers_encoded, skip_special_tokens=True,clean_up_tokenization_spaces=True)
+# below is the abstractive answer generated by the model
+["When you heat water to room temperature, it loses heat to the air around it. When you cool it down, it gains heat back from the air, which is why it feels colder than the air surrounding it. It's the same reason why you feel cold when you turn on a fan. The air around you is losing heat, and the water is gaining heat."]
+```
+## Author
+- Vladimir Blagojevic: `dovlex [at] gmail.com`  [Twitter](https://twitter.com/vladblagoje) | [LinkedIn](https://www.linkedin.com/in/blagojevicvladimir/)

config.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "_num_labels": 3,
+  "activation_dropout": 0.0,
+  "activation_function": "gelu",
+  "add_bias_logits": false,
+  "add_final_layer_norm": false,
+  "architectures": [
+    "BartForConditionalGeneration"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "classif_dropout": 0.0,
+  "d_model": 1024,
+  "decoder_attention_heads": 16,
+  "decoder_ffn_dim": 4096,
+  "decoder_layerdrop": 0.0,
+  "decoder_layers": 12,
+  "decoder_start_token_id": 2,
+  "dropout": 0.1,
+  "encoder_attention_heads": 16,
+  "encoder_ffn_dim": 4096,
+  "encoder_layerdrop": 0.0,
+  "encoder_layers": 12,
+  "eos_token_id": 2,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2"
+  },
+  "init_std": 0.02,
+  "is_encoder_decoder": true,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2
+  },
+  "max_position_embeddings": 1024,
+  "model_type": "bart",
+  "normalize_before": false,
+  "normalize_embedding": true,
+  "num_hidden_layers": 12,
+  "output_past": false,
+  "pad_token_id": 1,
+  "prefix": " ",
+  "scale_embedding": false,
+  "static_position_embeddings": false,
+  "task_specific_params": {
+    "summarization": {
+      "early_stopping": true,
+      "length_penalty": 2.0,
+      "max_length": 142,
+      "min_length": 56,
+      "no_repeat_ngram_size": 3,
+      "num_beams": 4
+    }
+  },
+  "vocab_size": 50265
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:75eaab4cbd1dac20d21abb3ed2be6464a761983b79aad307ac38c39e7b22296b
+size 1625557313

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"model_max_length": 1024}

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff