test-003 / config.json

Upload initial model version

ab5fe31 verified 10 months ago

17.5 kB

	{
	"return_dict": true,
	"torchscript": false,
	"torch_dtype": null,
	"use_bfloat16": false,
	"tf_legacy_loss": false,
	"pruned_heads": {},
	"tie_word_embeddings": true,
	"chunk_size_feed_forward": 0,
	"is_encoder_decoder": false,
	"is_decoder": true,
	"cross_attention_hidden_size": null,
	"add_cross_attention": false,
	"tie_encoder_decoder": false,
	"max_length": 20,
	"min_length": 0,
	"do_sample": false,
	"early_stopping": false,
	"num_beams": 1,
	"num_beam_groups": 1,
	"diversity_penalty": 0.0,
	"temperature": 1.0,
	"top_k": 50,
	"top_p": 1.0,
	"typical_p": 1.0,
	"repetition_penalty": 1.0,
	"length_penalty": 1.0,
	"no_repeat_ngram_size": 0,
	"encoder_no_repeat_ngram_size": 0,
	"bad_words_ids": null,
	"num_return_sequences": 1,
	"output_scores": false,
	"return_dict_in_generate": false,
	"forced_bos_token_id": null,
	"forced_eos_token_id": null,
	"remove_invalid_values": false,
	"exponential_decay_length_penalty": null,
	"suppress_tokens": null,
	"begin_suppress_tokens": null,
	"architectures": null,
	"finetuning_task": null,
	"id2label": {
	"0": "LABEL_0",
	"1": "LABEL_1"
	},
	"label2id": {
	"LABEL_0": 0,
	"LABEL_1": 1
	},
	"tokenizer_class": null,
	"prefix": null,
	"bos_token_id": null,
	"pad_token_id": null,
	"eos_token_id": null,
	"sep_token_id": null,
	"task_specific_params": null,
	"problem_type": null,
	"_name_or_path": "",
	"_attn_implementation_autoset": false,
	"transformers_version": "4.51.3",
	"target_dim": 1,
	"static_dim": 0,
	"dynamic_dim": 0,
	"past_dynamic_dim": 0,
	"static_cardinalities": null,
	"dynamic_cardinalities": null,
	"past_dynamic_cardinalities": null,
	"static_embedding_dim": null,
	"dynamic_embedding_dim": null,
	"past_dynamic_embedding_dim": null,
	"time_features": null,
	"scaling": true,
	"decoder_start_token_value": 0.0,
	"feature_size": 1,
	"context_length": 1024,
	"prediction_length": 256,
	"quantiles": [
	0.005,
	0.015,
	0.025,
	0.034999999999999996,
	0.045,
	0.055,
	0.065,
	0.07500000000000001,
	0.085,
	0.095,
	0.10500000000000001,
	0.115,
	0.125,
	0.135,
	0.14500000000000002,
	0.155,
	0.165,
	0.17500000000000002,
	0.185,
	0.195,
	0.20500000000000002,
	0.215,
	0.225,
	0.23500000000000001,
	0.245,
	0.255,
	0.265,
	0.275,
	0.28500000000000003,
	0.295,
	0.305,
	0.315,
	0.325,
	0.335,
	0.34500000000000003,
	0.35500000000000004,
	0.365,
	0.375,
	0.385,
	0.395,
	0.405,
	0.41500000000000004,
	0.425,
	0.435,
	0.445,
	0.455,
	0.465,
	0.47500000000000003,
	0.485,
	0.495,
	0.505,
	0.515,
	0.525,
	0.535,
	0.545,
	0.555,
	0.5650000000000001,
	0.5750000000000001,
	0.585,
	0.595,
	0.605,
	0.615,
	0.625,
	0.635,
	0.645,
	0.655,
	0.665,
	0.675,
	0.685,
	0.6950000000000001,
	0.7050000000000001,
	0.715,
	0.725,
	0.735,
	0.745,
	0.755,
	0.765,
	0.775,
	0.785,
	0.795,
	0.805,
	0.8150000000000001,
	0.8250000000000001,
	0.8350000000000001,
	0.845,
	0.855,
	0.865,
	0.875,
	0.885,
	0.895,
	0.905,
	0.915,
	0.925,
	0.935,
	0.9450000000000001,
	0.9550000000000001,
	0.965,
	0.975,
	0.985,
	0.995
	],
	"output_token_lengths": 1,
	"loss_type": "quantile",
	"use_dynamic_features": false,
	"use_static_features": false,
	"autoregressive": true,
	"gradient_checkpointing": true,
	"model_type": "transformer",
	"d_model": 768,
	"hidden_dropout_prob": 0.1,
	"max_position_embeddings": 4096,
	"architecture": {
	"layout": "decoder",
	"num_encoder_layers": 0,
	"num_decoder_layers": 16,
	"share_weights": false
	},
	"value_embedding_config": {
	"type": "value",
	"dropout": 0.1,
	"embedding_dim": null,
	"kwargs": {
	"feature_size": 1,
	"d_model": 768
	}
	},
	"positional_embedding_config": {
	"type": "stacked_embedding",
	"dropout": 0.1,
	"embedding_dim": null,
	"kwargs": {
	"embedding_configs": [
	{
	"type": "fourier",
	"args": {
	"feature_size": 64
	}
	}
	],
	"max_seq_len": 4096
	}
	},
	"encoder_blocks": null,
	"decoder_blocks": [
	{
	"block_type": "default_decoder",
	"attention_config": {
	"attention_type": "full",
	"num_heads": 12,
	"dropout": 0.1,
	"bias": true,
	"use_rope": true,
	"use_alibi": true,
	"rope_base": 10000,
	"kwargs": {}
	},
	"cross_attention_config": null,
	"ffn_config": {
	"type": "standard",
	"intermediate_size": 3072,
	"activation": "gelu",
	"dropout": 0.1,
	"bias": true,
	"num_experts": null,
	"top_k": null,
	"expert_intermediate_size": null,
	"load_balancing_coef": 0.01,
	"kwargs": {}
	},
	"norm_config": {
	"norm_type": "layer",
	"eps": 1e-05,
	"kwargs": {}
	},
	"kwargs": {}
	},
	{
	"block_type": "default_decoder",
	"attention_config": {
	"attention_type": "full",
	"num_heads": 12,
	"dropout": 0.1,
	"bias": true,
	"use_rope": true,
	"use_alibi": true,
	"rope_base": 10000,
	"kwargs": {}
	},
	"cross_attention_config": null,
	"ffn_config": {
	"type": "standard",
	"intermediate_size": 3072,
	"activation": "gelu",
	"dropout": 0.1,
	"bias": true,
	"num_experts": null,
	"top_k": null,
	"expert_intermediate_size": null,
	"load_balancing_coef": 0.01,
	"kwargs": {}
	},
	"norm_config": {
	"norm_type": "layer",
	"eps": 1e-05,
	"kwargs": {}
	},
	"kwargs": {}
	},
	{
	"block_type": "default_decoder",
	"attention_config": {
	"attention_type": "full",
	"num_heads": 12,
	"dropout": 0.1,
	"bias": true,
	"use_rope": true,
	"use_alibi": true,
	"rope_base": 10000,
	"kwargs": {}
	},
	"cross_attention_config": null,
	"ffn_config": {
	"type": "standard",
	"intermediate_size": 3072,
	"activation": "gelu",
	"dropout": 0.1,
	"bias": true,
	"num_experts": null,
	"top_k": null,
	"expert_intermediate_size": null,
	"load_balancing_coef": 0.01,
	"kwargs": {}
	},
	"norm_config": {
	"norm_type": "layer",
	"eps": 1e-05,
	"kwargs": {}
	},
	"kwargs": {}
	},
	{
	"block_type": "default_decoder",
	"attention_config": {
	"attention_type": "full",
	"num_heads": 12,
	"dropout": 0.1,
	"bias": true,
	"use_rope": true,
	"use_alibi": true,
	"rope_base": 10000,
	"kwargs": {}
	},
	"cross_attention_config": null,
	"ffn_config": {
	"type": "standard",
	"intermediate_size": 3072,
	"activation": "gelu",
	"dropout": 0.1,
	"bias": true,
	"num_experts": null,
	"top_k": null,
	"expert_intermediate_size": null,
	"load_balancing_coef": 0.01,
	"kwargs": {}
	},
	"norm_config": {
	"norm_type": "layer",
	"eps": 1e-05,
	"kwargs": {}
	},
	"kwargs": {}
	},
	{
	"block_type": "default_decoder",
	"attention_config": {
	"attention_type": "full",
	"num_heads": 12,
	"dropout": 0.1,
	"bias": true,
	"use_rope": true,
	"use_alibi": true,
	"rope_base": 10000,
	"kwargs": {}
	},
	"cross_attention_config": null,
	"ffn_config": {
	"type": "standard",
	"intermediate_size": 3072,
	"activation": "gelu",
	"dropout": 0.1,
	"bias": true,
	"num_experts": null,
	"top_k": null,
	"expert_intermediate_size": null,
	"load_balancing_coef": 0.01,
	"kwargs": {}
	},
	"norm_config": {
	"norm_type": "layer",
	"eps": 1e-05,
	"kwargs": {}
	},
	"kwargs": {}
	},
	{
	"block_type": "default_decoder",
	"attention_config": {
	"attention_type": "full",
	"num_heads": 12,
	"dropout": 0.1,
	"bias": true,
	"use_rope": true,
	"use_alibi": true,
	"rope_base": 10000,
	"kwargs": {}
	},
	"cross_attention_config": null,
	"ffn_config": {
	"type": "standard",
	"intermediate_size": 3072,
	"activation": "gelu",
	"dropout": 0.1,
	"bias": true,
	"num_experts": null,
	"top_k": null,
	"expert_intermediate_size": null,
	"load_balancing_coef": 0.01,
	"kwargs": {}
	},
	"norm_config": {
	"norm_type": "layer",
	"eps": 1e-05,
	"kwargs": {}
	},
	"kwargs": {}
	},
	{
	"block_type": "default_decoder",
	"attention_config": {
	"attention_type": "full",
	"num_heads": 12,
	"dropout": 0.1,
	"bias": true,
	"use_rope": true,
	"use_alibi": true,
	"rope_base": 10000,
	"kwargs": {}
	},
	"cross_attention_config": null,
	"ffn_config": {
	"type": "standard",
	"intermediate_size": 3072,
	"activation": "gelu",
	"dropout": 0.1,
	"bias": true,
	"num_experts": null,
	"top_k": null,
	"expert_intermediate_size": null,
	"load_balancing_coef": 0.01,
	"kwargs": {}
	},
	"norm_config": {
	"norm_type": "layer",
	"eps": 1e-05,
	"kwargs": {}
	},
	"kwargs": {}
	},
	{
	"block_type": "default_decoder",
	"attention_config": {
	"attention_type": "full",
	"num_heads": 12,
	"dropout": 0.1,
	"bias": true,
	"use_rope": true,
	"use_alibi": true,
	"rope_base": 10000,
	"kwargs": {}
	},
	"cross_attention_config": null,
	"ffn_config": {
	"type": "standard",
	"intermediate_size": 3072,
	"activation": "gelu",
	"dropout": 0.1,
	"bias": true,
	"num_experts": null,
	"top_k": null,
	"expert_intermediate_size": null,
	"load_balancing_coef": 0.01,
	"kwargs": {}
	},
	"norm_config": {
	"norm_type": "layer",
	"eps": 1e-05,
	"kwargs": {}
	},
	"kwargs": {}
	},
	{
	"block_type": "default_decoder",
	"attention_config": {
	"attention_type": "full",
	"num_heads": 12,
	"dropout": 0.1,
	"bias": true,
	"use_rope": true,
	"use_alibi": true,
	"rope_base": 10000,
	"kwargs": {}
	},
	"cross_attention_config": null,
	"ffn_config": {
	"type": "standard",
	"intermediate_size": 3072,
	"activation": "gelu",
	"dropout": 0.1,
	"bias": true,
	"num_experts": null,
	"top_k": null,
	"expert_intermediate_size": null,
	"load_balancing_coef": 0.01,
	"kwargs": {}
	},
	"norm_config": {
	"norm_type": "layer",
	"eps": 1e-05,
	"kwargs": {}
	},
	"kwargs": {}
	},
	{
	"block_type": "default_decoder",
	"attention_config": {
	"attention_type": "full",
	"num_heads": 12,
	"dropout": 0.1,
	"bias": true,
	"use_rope": true,
	"use_alibi": true,
	"rope_base": 10000,
	"kwargs": {}
	},
	"cross_attention_config": null,
	"ffn_config": {
	"type": "standard",
	"intermediate_size": 3072,
	"activation": "gelu",
	"dropout": 0.1,
	"bias": true,
	"num_experts": null,
	"top_k": null,
	"expert_intermediate_size": null,
	"load_balancing_coef": 0.01,
	"kwargs": {}
	},
	"norm_config": {
	"norm_type": "layer",
	"eps": 1e-05,
	"kwargs": {}
	},
	"kwargs": {}
	},
	{
	"block_type": "default_decoder",
	"attention_config": {
	"attention_type": "full",
	"num_heads": 12,
	"dropout": 0.1,
	"bias": true,
	"use_rope": true,
	"use_alibi": true,
	"rope_base": 10000,
	"kwargs": {}
	},
	"cross_attention_config": null,
	"ffn_config": {
	"type": "standard",
	"intermediate_size": 3072,
	"activation": "gelu",
	"dropout": 0.1,
	"bias": true,
	"num_experts": null,
	"top_k": null,
	"expert_intermediate_size": null,
	"load_balancing_coef": 0.01,
	"kwargs": {}
	},
	"norm_config": {
	"norm_type": "layer",
	"eps": 1e-05,
	"kwargs": {}
	},
	"kwargs": {}
	},
	{
	"block_type": "default_decoder",
	"attention_config": {
	"attention_type": "full",
	"num_heads": 12,
	"dropout": 0.1,
	"bias": true,
	"use_rope": true,
	"use_alibi": true,
	"rope_base": 10000,
	"kwargs": {}
	},
	"cross_attention_config": null,
	"ffn_config": {
	"type": "standard",
	"intermediate_size": 3072,
	"activation": "gelu",
	"dropout": 0.1,
	"bias": true,
	"num_experts": null,
	"top_k": null,
	"expert_intermediate_size": null,
	"load_balancing_coef": 0.01,
	"kwargs": {}
	},
	"norm_config": {
	"norm_type": "layer",
	"eps": 1e-05,
	"kwargs": {}
	},
	"kwargs": {}
	},
	{
	"block_type": "default_decoder",
	"attention_config": {
	"attention_type": "full",
	"num_heads": 12,
	"dropout": 0.1,
	"bias": true,
	"use_rope": true,
	"use_alibi": true,
	"rope_base": 10000,
	"kwargs": {}
	},
	"cross_attention_config": null,
	"ffn_config": {
	"type": "standard",
	"intermediate_size": 3072,
	"activation": "gelu",
	"dropout": 0.1,
	"bias": true,
	"num_experts": null,
	"top_k": null,
	"expert_intermediate_size": null,
	"load_balancing_coef": 0.01,
	"kwargs": {}
	},
	"norm_config": {
	"norm_type": "layer",
	"eps": 1e-05,
	"kwargs": {}
	},
	"kwargs": {}
	},
	{
	"block_type": "default_decoder",
	"attention_config": {
	"attention_type": "full",
	"num_heads": 12,
	"dropout": 0.1,
	"bias": true,
	"use_rope": true,
	"use_alibi": true,
	"rope_base": 10000,
	"kwargs": {}
	},
	"cross_attention_config": null,
	"ffn_config": {
	"type": "standard",
	"intermediate_size": 3072,
	"activation": "gelu",
	"dropout": 0.1,
	"bias": true,
	"num_experts": null,
	"top_k": null,
	"expert_intermediate_size": null,
	"load_balancing_coef": 0.01,
	"kwargs": {}
	},
	"norm_config": {
	"norm_type": "layer",
	"eps": 1e-05,
	"kwargs": {}
	},
	"kwargs": {}
	},
	{
	"block_type": "default_decoder",
	"attention_config": {
	"attention_type": "full",
	"num_heads": 12,
	"dropout": 0.1,
	"bias": true,
	"use_rope": true,
	"use_alibi": true,
	"rope_base": 10000,
	"kwargs": {}
	},
	"cross_attention_config": null,
	"ffn_config": {
	"type": "standard",
	"intermediate_size": 3072,
	"activation": "gelu",
	"dropout": 0.1,
	"bias": true,
	"num_experts": null,
	"top_k": null,
	"expert_intermediate_size": null,
	"load_balancing_coef": 0.01,
	"kwargs": {}
	},
	"norm_config": {
	"norm_type": "layer",
	"eps": 1e-05,
	"kwargs": {}
	},
	"kwargs": {}
	},
	{
	"block_type": "default_decoder",
	"attention_config": {
	"attention_type": "full",
	"num_heads": 12,
	"dropout": 0.1,
	"bias": true,
	"use_rope": true,
	"use_alibi": true,
	"rope_base": 10000,
	"kwargs": {}
	},
	"cross_attention_config": null,
	"ffn_config": {
	"type": "standard",
	"intermediate_size": 3072,
	"activation": "gelu",
	"dropout": 0.1,
	"bias": true,
	"num_experts": null,
	"top_k": null,
	"expert_intermediate_size": null,
	"load_balancing_coef": 0.01,
	"kwargs": {}
	},
	"norm_config": {
	"norm_type": "layer",
	"eps": 1e-05,
	"kwargs": {}
	},
	"kwargs": {}
	}
	],
	"output_head_config": {
	"type": "distpred",
	"output_size": 100,
	"kwargs": {
	"num_outputs": 100,
	"feature_size": 1
	}
	},
	"norm_config": {
	"norm_type": "layer",
	"eps": 1e-05,
	"kwargs": {}
	},
	"head_agg_config": {
	"type": "mean",
	"kwargs": {}
	},
	"loss_config": {
	"type": "crps",
	"kwargs": {
	"scaling_type": "minmax",
	"scaling_dim": 1,
	"scaling_eps": 1e-08
	}
	},
	"output_attentions": false,
	"output_hidden_states": false,
	"use_teacher_forcing": true,
	"quantizer_config": null,
	"vocab_size": null,
	"decoder_start_token_id": null,
	"num_quantiles": 100
	}