hongyun1021 commited on
Commit
2c346af
·
verified ·
1 Parent(s): 00b69ac

Upload folder using huggingface_hub

Browse files
bigvgan_generator.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e95ba25972d3de0628d99cd156e9315a9c018899bf739988959ebe3544080ced
3
+ size 449228171
config.yaml ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ semantic_codec:
2
+ conf:
3
+ codebook_size: 8192
4
+ hidden_size: 1024
5
+ codebook_dim: 8
6
+ vocos_dim: 384
7
+ vocos_intermediate_dim: 2048
8
+ vocos_num_layers: 12
9
+ checkpoint: semantic_codec.safetensors
10
+
11
+ s2mel:
12
+ preprocess_params:
13
+ sr: 22050
14
+ spect_params:
15
+ n_fft: 1024
16
+ win_length: 1024
17
+ hop_length: 256
18
+ n_mels: 80
19
+ fmin: 0
20
+ fmax: "None"
21
+
22
+ dit_type: "DiT"
23
+ reg_loss_type: "l1"
24
+ style_encoder:
25
+ dim: 192
26
+ length_regulator:
27
+ channels: 512
28
+ is_discrete: false
29
+ in_channels: 1024
30
+ content_codebook_size: 2048
31
+ sampling_ratios: [1, 1, 1, 1]
32
+ vector_quantize: false
33
+ n_codebooks: 1
34
+ quantizer_dropout: 0.0
35
+ f0_condition: false
36
+ n_f0_bins: 512
37
+ DiT:
38
+ hidden_dim: 512
39
+ num_heads: 8
40
+ depth: 13
41
+ class_dropout_prob: 0.1
42
+ block_size: 8192
43
+ in_channels: 80
44
+ style_condition: true
45
+ final_layer_type: 'wavenet'
46
+ target: 'mel'
47
+ content_dim: 512
48
+ content_codebook_size: 1024
49
+ content_type: 'discrete'
50
+ f0_condition: false
51
+ n_f0_bins: 512
52
+ content_codebooks: 1
53
+ is_causal: false
54
+ long_skip_connection: true
55
+ zero_prompt_speech_token: false
56
+ time_as_token: false
57
+ style_as_token: false
58
+ uvit_skip_connection: true
59
+ add_resblock_in_transformer: false
60
+ wavenet:
61
+ hidden_dim: 512
62
+ num_layers: 8
63
+ kernel_size: 5
64
+ dilation_rate: 1
65
+ p_dropout: 0.2
66
+ style_condition: true
67
+
68
+ gpt_checkpoint: gpt.pth
69
+ s2mel_checkpoint: s2mel.pth
70
+ vocoder:
71
+ type: "bigvgan"
72
+ name: "nvidia/bigvgan_v2_22khz_80band_256x"
prompt_condition.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:929741bfc6dde9350f13e2664f56cf166a58b2a964b6505a97cd8fbb8cdbf58a
3
+ size 1388136
ref_mel.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d17af867cef9c6f48b1cb5e57e65a890456c57348dfaa36fa4a9f7223e651438
3
+ size 218217
s2mel.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aae1bb12017cbb47e7a5ce537fc82f40b6b1deb71acdb9b8f25686f32714b636
3
+ size 1202198223
semantic_codec.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec947271175d8cad75ec37e83aa487e27c97a0f72a303393772da5ffa84bddf2
3
+ size 177183712
style.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f8800c757c2f3bb33ed654ba4c7fd3b4b9184d4b8f318a86c1a09e7da4ae069
3
+ size 2267