rtferraz commited on
Commit
af3b720
·
verified ·
1 Parent(s): 6ccb9e6

Update package to v0.3.0 with training exports

Browse files
Files changed (1) hide show
  1. src/domain_tokenizer/__init__.py +6 -7
src/domain_tokenizer/__init__.py CHANGED
@@ -6,21 +6,20 @@ Core components:
6
  - tokenizers: DomainTokenizerBuilder, per-field tokenizers
7
  - schemas: Predefined schemas (FINANCE, ECOMMERCE, HEALTHCARE)
8
  - models: DomainTransformerForCausalLM, PLR, JointFusion
 
9
  """
10
 
11
  from .schema import DomainSchema, FieldSpec, FieldType
12
  from .tokenizers.domain_tokenizer import DomainTokenizerBuilder
13
  from .tokenizers.field_tokenizers import (
14
- BaseFieldTokenizer,
15
- CalendarTokenizer,
16
- CategoricalTokenizer,
17
- DiscreteNumericalTokenizer,
18
- MagnitudeBucketTokenizer,
19
- SignTokenizer,
20
  )
21
  from .models.configuration import DomainTransformerConfig
22
  from .models.modeling import DomainTransformerForCausalLM, DomainTransformerModel
23
  from .models.plr_embeddings import PeriodicLinearReLU
24
  from .models.joint_fusion import JointFusionModel, DCNv2
 
 
25
 
26
- __version__ = "0.2.0"
 
6
  - tokenizers: DomainTokenizerBuilder, per-field tokenizers
7
  - schemas: Predefined schemas (FINANCE, ECOMMERCE, HEALTHCARE)
8
  - models: DomainTransformerForCausalLM, PLR, JointFusion
9
+ - training: prepare_clm_dataset, pretrain_domain_model
10
  """
11
 
12
  from .schema import DomainSchema, FieldSpec, FieldType
13
  from .tokenizers.domain_tokenizer import DomainTokenizerBuilder
14
  from .tokenizers.field_tokenizers import (
15
+ BaseFieldTokenizer, CalendarTokenizer, CategoricalTokenizer,
16
+ DiscreteNumericalTokenizer, MagnitudeBucketTokenizer, SignTokenizer,
 
 
 
 
17
  )
18
  from .models.configuration import DomainTransformerConfig
19
  from .models.modeling import DomainTransformerForCausalLM, DomainTransformerModel
20
  from .models.plr_embeddings import PeriodicLinearReLU
21
  from .models.joint_fusion import JointFusionModel, DCNv2
22
+ from .training.data_pipeline import prepare_clm_dataset, pack_sequences
23
+ from .training.pretrain import pretrain_domain_model
24
 
25
+ __version__ = "0.3.0"