indextts2_decoder / config.yaml
hongyun1021's picture
Upload folder using huggingface_hub
2c346af verified
semantic_codec:
conf:
codebook_size: 8192
hidden_size: 1024
codebook_dim: 8
vocos_dim: 384
vocos_intermediate_dim: 2048
vocos_num_layers: 12
checkpoint: semantic_codec.safetensors
s2mel:
preprocess_params:
sr: 22050
spect_params:
n_fft: 1024
win_length: 1024
hop_length: 256
n_mels: 80
fmin: 0
fmax: "None"
dit_type: "DiT"
reg_loss_type: "l1"
style_encoder:
dim: 192
length_regulator:
channels: 512
is_discrete: false
in_channels: 1024
content_codebook_size: 2048
sampling_ratios: [1, 1, 1, 1]
vector_quantize: false
n_codebooks: 1
quantizer_dropout: 0.0
f0_condition: false
n_f0_bins: 512
DiT:
hidden_dim: 512
num_heads: 8
depth: 13
class_dropout_prob: 0.1
block_size: 8192
in_channels: 80
style_condition: true
final_layer_type: 'wavenet'
target: 'mel'
content_dim: 512
content_codebook_size: 1024
content_type: 'discrete'
f0_condition: false
n_f0_bins: 512
content_codebooks: 1
is_causal: false
long_skip_connection: true
zero_prompt_speech_token: false
time_as_token: false
style_as_token: false
uvit_skip_connection: true
add_resblock_in_transformer: false
wavenet:
hidden_dim: 512
num_layers: 8
kernel_size: 5
dilation_rate: 1
p_dropout: 0.2
style_condition: true
gpt_checkpoint: gpt.pth
s2mel_checkpoint: s2mel.pth
vocoder:
type: "bigvgan"
name: "nvidia/bigvgan_v2_22khz_80band_256x"