Text Generation
Transformers
English
Polish
gpt2
cosy
mini
nice
helping
simple
creative
demo
friendly
conversational
Inference Endpoints
text-generation-inference
mICHPl commited on
Commit
77887d8
1 Parent(s): 4968dd6

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +38 -33
config.json CHANGED
@@ -1,33 +1,38 @@
1
- import sagemaker
2
- import boto3
3
- from sagemaker.huggingface import HuggingFace
4
-
5
- # gets role for executing training job
6
- iam_client = boto3.client('iam')
7
- role = iam_client.get_role(RoleName='{IAM_ROLE_WITH_SAGEMAKER_PERMISSIONS}')['Role']['Arn']
8
- hyperparameters = {
9
- 'model_name_or_path':'mICHPl/MINI_AI',
10
- 'output_dir':'/opt/ml/model'
11
- # add your remaining hyperparameters
12
- # more info here https://github.com/huggingface/transformers/tree/v4.17.0/examples/pytorch/language-modeling
13
- }
14
-
15
- # git configuration to download our fine-tuning script
16
- git_config = {'repo': 'https://github.com/huggingface/transformers.git','branch': 'v4.17.0'}
17
-
18
- # creates Hugging Face estimator
19
- huggingface_estimator = HuggingFace(
20
- entry_point='run_clm.py',
21
- source_dir='./examples/pytorch/language-modeling',
22
- instance_type='ml.p3.2xlarge',
23
- instance_count=1,
24
- role=role,
25
- git_config=git_config,
26
- transformers_version='4.17.0',
27
- pytorch_version='1.10.2',
28
- py_version='py38',
29
- hyperparameters = hyperparameters
30
- )
31
-
32
- # starting the train job
33
- huggingface_estimator.fit()
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "./output-MINI_AI",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "conversational": {
31
+ "max_length": 1000
32
+ }
33
+ },
34
+ "torch_dtype": "float32",
35
+ "transformers_version": "4.20.1",
36
+ "use_cache": true,
37
+ "vocab_size": 50257
38
+ }