| { |
| "architectures": [ |
| "LeanAlbertForPretraining", "LeanAlbertForTokenClassification", "LeanAlbertForSequenceClassification" |
| ], |
| "model_type": "lean_albert", |
| "num_hidden_layers": 32, |
| "num_hidden_groups": 32, |
| "num_inner_groups": 1, |
| "share_large_matrices": true, |
| "adapter_dim": 32, |
| "hidden_size": 2560, |
| "intermediate_size": 10240, |
| "embedding_size": 256, |
| "num_attention_heads": 64, |
| "vocab_size": 999, |
| "hidden_act": "gelu_new", |
| "hidden_act_gated": true, |
| "sandwich_norm": true, |
| "position_embedding_type": "rotary", |
| "hidden_dropout_prob": 0, |
| "classifier_dropout_prob": 0.1, |
| "attention_probs_dropout_prob": 0, |
| "layer_norm_eps": 1e-12, |
| "type_vocab_size": 2, |
| "pad_token_id": 0, |
| "bos_token_id": 2, |
| "eos_token_id": 3 |
| } |
|
|