dthulke commited on
Commit
95bbefd
1 Parent(s): ffc253b

adds chat template and update README

Browse files
Files changed (2) hide show
  1. README.md +8 -2
  2. tokenizer_config.json +2 -1
README.md CHANGED
@@ -12,6 +12,9 @@ co2_eq_emissions:
12
  training_type: "pre-training"
13
  geographical_location: "Washington, USA"
14
  hardware_used: "8x NVIDIA H100 HBM"
 
 
 
15
  ---
16
  # ClimateGPT-7B-FSC
17
 
@@ -38,7 +41,7 @@ Explore the model lineage [here](https://huggingface.co/spaces/EQTYLab/lineage-e
38
  - **Context length:** 4K tokens
39
  - **Input:** Text-only data
40
  - **Output:** Model generates text only
41
- - **Paper:** [Download](https://shareddatastgacct.blob.core.windows.net/shared-data/climategpt-v1-publication.pdf)
42
  - **Website:** [eci.io](https://eci.io)
43
 
44
  ## Uses
@@ -72,7 +75,7 @@ The model was trained using ChatML so the following format should be followed wh
72
 
73
  ## Evaluation
74
 
75
- Detailed evaluation results are presented in our [paper](https://shareddatastgacct.blob.core.windows.net/shared-data/climategpt-v1-publication.pdf) on our model card website: [eci.io/model-card](https://eci.io/model-card)
76
 
77
  ## Environmental Impact
78
  - **Hardware Type:** 8x NVIDIA H100 HBM
@@ -92,5 +95,8 @@ If you find ClimateGPT is useful in your work, please cite it with:
92
  title={ClimateGPT: Towards AI Synthesizing Interdisciplinary Research on Climate Change},
93
  author={David Thulke and Yingbo Gao and Petrus Pelser and Rein Brune and Rricha Jalota and Floris Fok and Michael Ramos and Ian van Wyk and Abdallah Nasir and Hayden Goldstein and Taylor Tragemann and Katie Nguyen and Ariana Fowler and Andrew Stanco and Jon Gabriel and Jordan Taylor and Dean Moro and Evgenii Tsymbalov and Juliette de Waal and Evgeny Matusov and Mudar Yaghi and Mohammad Shihadah and Hermann Ney and Christian Dugast and Jonathan Dotan and Daniel Erasmus},
94
  year={2024},
 
 
 
95
  }
96
  ```
 
12
  training_type: "pre-training"
13
  geographical_location: "Washington, USA"
14
  hardware_used: "8x NVIDIA H100 HBM"
15
+ license: other
16
+ license_name: climategpt-community-license
17
+ license_link: https://huggingface.co/eci-io/climategpt-7b-fsc/blob/main/LICENSE.txt
18
  ---
19
  # ClimateGPT-7B-FSC
20
 
 
41
  - **Context length:** 4K tokens
42
  - **Input:** Text-only data
43
  - **Output:** Model generates text only
44
+ - **Paper:** [arXiv:2401.09646](https://arxiv.org/abs/2401.09646)
45
  - **Website:** [eci.io](https://eci.io)
46
 
47
  ## Uses
 
75
 
76
  ## Evaluation
77
 
78
+ Detailed evaluation results are presented in our [paper](https://arxiv.org/abs/2401.09646) on our model card website: [eci.io/model-card](https://eci.io/model-card)
79
 
80
  ## Environmental Impact
81
  - **Hardware Type:** 8x NVIDIA H100 HBM
 
95
  title={ClimateGPT: Towards AI Synthesizing Interdisciplinary Research on Climate Change},
96
  author={David Thulke and Yingbo Gao and Petrus Pelser and Rein Brune and Rricha Jalota and Floris Fok and Michael Ramos and Ian van Wyk and Abdallah Nasir and Hayden Goldstein and Taylor Tragemann and Katie Nguyen and Ariana Fowler and Andrew Stanco and Jon Gabriel and Jordan Taylor and Dean Moro and Evgenii Tsymbalov and Juliette de Waal and Evgeny Matusov and Mudar Yaghi and Mohammad Shihadah and Hermann Ney and Christian Dugast and Jonathan Dotan and Daniel Erasmus},
97
  year={2024},
98
+ eprint={2401.09646},
99
+ archivePrefix={arXiv},
100
+ primaryClass={cs.LG}
101
  }
102
  ```
tokenizer_config.json CHANGED
@@ -145,5 +145,6 @@
145
  "spaces_between_special_tokens": false,
146
  "tokenizer_class": "LlamaTokenizer",
147
  "unk_token": "<unk>",
148
- "use_default_system_prompt": false
 
149
  }
 
145
  "spaces_between_special_tokens": false,
146
  "tokenizer_class": "LlamaTokenizer",
147
  "unk_token": "<unk>",
148
+ "use_default_system_prompt": false,
149
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}"
150
  }