johnrachwanpruna commited on
Commit
5137c65
·
verified ·
1 Parent(s): e482afc

Add files using upload-large-folder tool

Browse files
safety_checker/config.json CHANGED
@@ -8,30 +8,52 @@
8
  "model_type": "clip",
9
  "projection_dim": 64,
10
  "text_config": {
 
11
  "attention_dropout": 0.1,
12
  "bos_token_id": 0,
 
 
13
  "dropout": 0.1,
14
  "dtype": "float32",
15
  "eos_token_id": 2,
 
16
  "gradient_checkpointing": false,
17
  "hidden_act": "quick_gelu",
18
  "hidden_size": 32,
19
  "initializer_factor": 1.0,
20
  "initializer_range": 0.02,
21
  "intermediate_size": 37,
 
22
  "layer_norm_eps": 1e-05,
23
  "max_position_embeddings": 512,
24
  "model_type": "clip_text_model",
25
  "num_attention_heads": 4,
26
  "num_hidden_layers": 5,
 
 
27
  "projection_dim": 512,
 
 
 
 
 
 
 
 
 
28
  "vocab_size": 99
29
  },
30
- "transformers_version": "4.57.6",
31
  "vision_config": {
 
32
  "attention_dropout": 0.1,
 
 
 
33
  "dropout": 0.1,
34
  "dtype": "float32",
 
 
35
  "gradient_checkpointing": false,
36
  "hidden_act": "quick_gelu",
37
  "hidden_size": 32,
@@ -39,13 +61,25 @@
39
  "initializer_factor": 1.0,
40
  "initializer_range": 0.02,
41
  "intermediate_size": 37,
 
42
  "layer_norm_eps": 1e-05,
43
  "model_type": "clip_vision_model",
44
  "num_attention_heads": 4,
45
  "num_channels": 3,
46
  "num_hidden_layers": 5,
 
47
  "patch_size": 2,
48
- "projection_dim": 512
 
 
 
 
 
 
 
 
 
 
49
  },
50
  "vocab_size": 1000
51
  }
 
8
  "model_type": "clip",
9
  "projection_dim": 64,
10
  "text_config": {
11
+ "add_cross_attention": false,
12
  "attention_dropout": 0.1,
13
  "bos_token_id": 0,
14
+ "cross_attention_hidden_size": null,
15
+ "decoder_start_token_id": null,
16
  "dropout": 0.1,
17
  "dtype": "float32",
18
  "eos_token_id": 2,
19
+ "finetuning_task": null,
20
  "gradient_checkpointing": false,
21
  "hidden_act": "quick_gelu",
22
  "hidden_size": 32,
23
  "initializer_factor": 1.0,
24
  "initializer_range": 0.02,
25
  "intermediate_size": 37,
26
+ "is_decoder": false,
27
  "layer_norm_eps": 1e-05,
28
  "max_position_embeddings": 512,
29
  "model_type": "clip_text_model",
30
  "num_attention_heads": 4,
31
  "num_hidden_layers": 5,
32
+ "pad_token_id": 1,
33
+ "prefix": null,
34
  "projection_dim": 512,
35
+ "pruned_heads": {},
36
+ "sep_token_id": null,
37
+ "task_specific_params": null,
38
+ "tf_legacy_loss": false,
39
+ "tie_encoder_decoder": false,
40
+ "tie_word_embeddings": true,
41
+ "tokenizer_class": null,
42
+ "torchscript": false,
43
+ "use_bfloat16": false,
44
  "vocab_size": 99
45
  },
46
+ "transformers_version": "5.1.0",
47
  "vision_config": {
48
+ "add_cross_attention": false,
49
  "attention_dropout": 0.1,
50
+ "bos_token_id": null,
51
+ "cross_attention_hidden_size": null,
52
+ "decoder_start_token_id": null,
53
  "dropout": 0.1,
54
  "dtype": "float32",
55
+ "eos_token_id": null,
56
+ "finetuning_task": null,
57
  "gradient_checkpointing": false,
58
  "hidden_act": "quick_gelu",
59
  "hidden_size": 32,
 
61
  "initializer_factor": 1.0,
62
  "initializer_range": 0.02,
63
  "intermediate_size": 37,
64
+ "is_decoder": false,
65
  "layer_norm_eps": 1e-05,
66
  "model_type": "clip_vision_model",
67
  "num_attention_heads": 4,
68
  "num_channels": 3,
69
  "num_hidden_layers": 5,
70
+ "pad_token_id": null,
71
  "patch_size": 2,
72
+ "prefix": null,
73
+ "projection_dim": 512,
74
+ "pruned_heads": {},
75
+ "sep_token_id": null,
76
+ "task_specific_params": null,
77
+ "tf_legacy_loss": false,
78
+ "tie_encoder_decoder": false,
79
+ "tie_word_embeddings": true,
80
+ "tokenizer_class": null,
81
+ "torchscript": false,
82
+ "use_bfloat16": false
83
  },
84
  "vocab_size": 1000
85
  }
text_encoder/config.json CHANGED
@@ -19,6 +19,6 @@
19
  "num_hidden_layers": 5,
20
  "pad_token_id": 1,
21
  "projection_dim": 512,
22
- "transformers_version": "4.57.6",
23
  "vocab_size": 1000
24
  }
 
19
  "num_hidden_layers": 5,
20
  "pad_token_id": 1,
21
  "projection_dim": 512,
22
+ "transformers_version": "5.1.0",
23
  "vocab_size": 1000
24
  }
tokenizer/tokenizer.json ADDED
@@ -0,0 +1,3679 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [
6
+ {
7
+ "id": 0,
8
+ "content": "<|startoftext|>",
9
+ "single_word": false,
10
+ "lstrip": false,
11
+ "rstrip": false,
12
+ "normalized": true,
13
+ "special": true
14
+ },
15
+ {
16
+ "id": 1,
17
+ "content": "<|endoftext|>",
18
+ "single_word": false,
19
+ "lstrip": false,
20
+ "rstrip": false,
21
+ "normalized": false,
22
+ "special": true
23
+ }
24
+ ],
25
+ "normalizer": {
26
+ "type": "Sequence",
27
+ "normalizers": [
28
+ {
29
+ "type": "NFC"
30
+ },
31
+ {
32
+ "type": "Replace",
33
+ "pattern": {
34
+ "Regex": "\\s+"
35
+ },
36
+ "content": " "
37
+ },
38
+ {
39
+ "type": "Lowercase"
40
+ }
41
+ ]
42
+ },
43
+ "pre_tokenizer": {
44
+ "type": "Sequence",
45
+ "pretokenizers": [
46
+ {
47
+ "type": "Split",
48
+ "pattern": {
49
+ "Regex": "<\\|startoftext\\|>|<\\|endoftext\\|>|'s|'t|'re|'ve|'m|'ll|'d|[\\p{L}]+|[\\p{N}]|[^\\s\\p{L}\\p{N}]+"
50
+ },
51
+ "behavior": "Removed",
52
+ "invert": true
53
+ },
54
+ {
55
+ "type": "ByteLevel",
56
+ "add_prefix_space": false,
57
+ "trim_offsets": true,
58
+ "use_regex": true
59
+ }
60
+ ]
61
+ },
62
+ "post_processor": {
63
+ "type": "RobertaProcessing",
64
+ "sep": [
65
+ "<|endoftext|>",
66
+ 1
67
+ ],
68
+ "cls": [
69
+ "<|startoftext|>",
70
+ 0
71
+ ],
72
+ "trim_offsets": false,
73
+ "add_prefix_space": false
74
+ },
75
+ "decoder": {
76
+ "type": "ByteLevel",
77
+ "add_prefix_space": true,
78
+ "trim_offsets": true,
79
+ "use_regex": true
80
+ },
81
+ "model": {
82
+ "type": "BPE",
83
+ "dropout": null,
84
+ "unk_token": "<|endoftext|>",
85
+ "continuing_subword_prefix": "",
86
+ "end_of_word_suffix": "</w>",
87
+ "fuse_unk": false,
88
+ "byte_fallback": false,
89
+ "ignore_merges": false,
90
+ "vocab": {
91
+ "<|startoftext|>": 0,
92
+ "<|endoftext|>": 1,
93
+ "!": 2,
94
+ "\"": 3,
95
+ "#": 4,
96
+ "$": 5,
97
+ "%": 6,
98
+ "&": 7,
99
+ "'": 8,
100
+ "(": 9,
101
+ ")": 10,
102
+ "*": 11,
103
+ "+": 12,
104
+ ",": 13,
105
+ "-": 14,
106
+ ".": 15,
107
+ "/": 16,
108
+ "0": 17,
109
+ "1": 18,
110
+ "2": 19,
111
+ "3": 20,
112
+ "4": 21,
113
+ "5": 22,
114
+ "6": 23,
115
+ "7": 24,
116
+ "8": 25,
117
+ "9": 26,
118
+ ":": 27,
119
+ ";": 28,
120
+ "<": 29,
121
+ "=": 30,
122
+ ">": 31,
123
+ "?": 32,
124
+ "@": 33,
125
+ "A": 34,
126
+ "B": 35,
127
+ "C": 36,
128
+ "D": 37,
129
+ "E": 38,
130
+ "F": 39,
131
+ "G": 40,
132
+ "H": 41,
133
+ "I": 42,
134
+ "J": 43,
135
+ "K": 44,
136
+ "L": 45,
137
+ "M": 46,
138
+ "N": 47,
139
+ "O": 48,
140
+ "P": 49,
141
+ "Q": 50,
142
+ "R": 51,
143
+ "S": 52,
144
+ "T": 53,
145
+ "U": 54,
146
+ "V": 55,
147
+ "W": 56,
148
+ "X": 57,
149
+ "Y": 58,
150
+ "Z": 59,
151
+ "[": 60,
152
+ "\\": 61,
153
+ "]": 62,
154
+ "^": 63,
155
+ "_": 64,
156
+ "`": 65,
157
+ "a": 66,
158
+ "b": 67,
159
+ "c": 68,
160
+ "d": 69,
161
+ "e": 70,
162
+ "f": 71,
163
+ "g": 72,
164
+ "h": 73,
165
+ "i": 74,
166
+ "j": 75,
167
+ "k": 76,
168
+ "l": 77,
169
+ "m": 78,
170
+ "n": 79,
171
+ "o": 80,
172
+ "p": 81,
173
+ "q": 82,
174
+ "r": 83,
175
+ "s": 84,
176
+ "t": 85,
177
+ "u": 86,
178
+ "v": 87,
179
+ "w": 88,
180
+ "x": 89,
181
+ "y": 90,
182
+ "z": 91,
183
+ "|": 92,
184
+ "}": 93,
185
+ "~": 94,
186
+ "¡": 95,
187
+ "¢": 96,
188
+ "£": 97,
189
+ "¤": 98,
190
+ "¥": 99,
191
+ "¦": 100,
192
+ "§": 101,
193
+ "¨": 102,
194
+ "©": 103,
195
+ "ª": 104,
196
+ "«": 105,
197
+ "¬": 106,
198
+ "®": 107,
199
+ "¯": 108,
200
+ "°": 109,
201
+ "±": 110,
202
+ "²": 111,
203
+ "³": 112,
204
+ "´": 113,
205
+ "µ": 114,
206
+ "¶": 115,
207
+ "·": 116,
208
+ "¸": 117,
209
+ "¹": 118,
210
+ "º": 119,
211
+ "»": 120,
212
+ "¼": 121,
213
+ "½": 122,
214
+ "¾": 123,
215
+ "¿": 124,
216
+ "Â": 125,
217
+ "Ã": 126,
218
+ "Ä": 127,
219
+ "Å": 128,
220
+ "Æ": 129,
221
+ "Ç": 130,
222
+ "È": 131,
223
+ "É": 132,
224
+ "Ê": 133,
225
+ "Ë": 134,
226
+ "Ì": 135,
227
+ "Í": 136,
228
+ "Î": 137,
229
+ "Ï": 138,
230
+ "Ð": 139,
231
+ "Ñ": 140,
232
+ "Ö": 141,
233
+ "×": 142,
234
+ "Ø": 143,
235
+ "Ù": 144,
236
+ "Ü": 145,
237
+ "à": 146,
238
+ "á": 147,
239
+ "â": 148,
240
+ "ã": 149,
241
+ "ä": 150,
242
+ "å": 151,
243
+ "æ": 152,
244
+ "ç": 153,
245
+ "è": 154,
246
+ "é": 155,
247
+ "ë": 156,
248
+ "ì": 157,
249
+ "ï": 158,
250
+ "Ċ": 159,
251
+ "Ġ": 160,
252
+ "Ģ": 161,
253
+ "ģ": 162,
254
+ "Ĥ": 163,
255
+ "ĥ": 164,
256
+ "Ħ": 165,
257
+ "ħ": 166,
258
+ "Ĩ": 167,
259
+ "ĩ": 168,
260
+ "Ī": 169,
261
+ "ī": 170,
262
+ "Ĭ": 171,
263
+ "ĭ": 172,
264
+ "Į": 173,
265
+ "į": 174,
266
+ "İ": 175,
267
+ "ı": 176,
268
+ "IJ": 177,
269
+ "ij": 178,
270
+ "Ĵ": 179,
271
+ "ĵ": 180,
272
+ "Ķ": 181,
273
+ "ķ": 182,
274
+ "ĸ": 183,
275
+ "Ĺ": 184,
276
+ "ĺ": 185,
277
+ "Ļ": 186,
278
+ "ļ": 187,
279
+ "Ľ": 188,
280
+ "ľ": 189,
281
+ "Ŀ": 190,
282
+ "ŀ": 191,
283
+ "Ł": 192,
284
+ "ł": 193,
285
+ "Ń": 194,
286
+ "e</w>": 195,
287
+ "d</w>": 196,
288
+ "a</w>": 197,
289
+ "o</w>": 198,
290
+ "n</w>": 199,
291
+ "±</w>": 200,
292
+ "l</w>": 201,
293
+ "m</w>": 202,
294
+ "h</w>": 203,
295
+ "r</w>": 204,
296
+ "i</w>": 205,
297
+ "s</w>": 206,
298
+ "Z</w>": 207,
299
+ "t</w>": 208,
300
+ "f</w>": 209,
301
+ "k</w>": 210,
302
+ "y</w>": 211,
303
+ "b</w>": 212,
304
+ "F</w>": 213,
305
+ "g</w>": 214,
306
+ "7</w>": 215,
307
+ "0</w>": 216,
308
+ "p</w>": 217,
309
+ "L</w>": 218,
310
+ "H</w>": 219,
311
+ "¡</w>": 220,
312
+ "Ī</w>": 221,
313
+ "1</w>": 222,
314
+ "Ģ</w>": 223,
315
+ "c</w>": 224,
316
+ "ĩ</w>": 225,
317
+ "6</w>": 226,
318
+ "A</w>": 227,
319
+ "z</w>": 228,
320
+ "u</w>": 229,
321
+ "S</w>": 230,
322
+ "2</w>": 231,
323
+ "v</w>": 232,
324
+ "4</w>": 233,
325
+ "M</w>": 234,
326
+ "T</w>": 235,
327
+ "8</w>": 236,
328
+ "I</w>": 237,
329
+ "N</w>": 238,
330
+ "C</w>": 239,
331
+ "5</w>": 240,
332
+ "¹</w>": 241,
333
+ "9</w>": 242,
334
+ "3</w>": 243,
335
+ "ī</w>": 244,
336
+ "P</w>": 245,
337
+ "E</w>": 246,
338
+ "»</w>": 247,
339
+ "V</w>": 248,
340
+ "İ</w>": 249,
341
+ "w</w>": 250,
342
+ "J</w>": 251,
343
+ "ł</w>": 252,
344
+ ".</w>": 253,
345
+ "K</w>": 254,
346
+ "D</w>": 255,
347
+ "Ķ</w>": 256,
348
+ "¸</w>": 257,
349
+ "B</w>": 258,
350
+ "©</w>": 259,
351
+ "º</w>": 260,
352
+ "µ</w>": 261,
353
+ "Ĥ</w>": 262,
354
+ "X</w>": 263,
355
+ "R</w>": 264,
356
+ "O</w>": 265,
357
+ "«</w>": 266,
358
+ "Ļ</w>": 267,
359
+ "U</w>": 268,
360
+ "x</w>": 269,
361
+ "[</w>": 270,
362
+ "¿</w>": 271,
363
+ "³</w>": 272,
364
+ "ģ</w>": 273,
365
+ "W</w>": 274,
366
+ "§</w>": 275,
367
+ "-</w>": 276,
368
+ "ĸ</w>": 277,
369
+ "Ħ</w>": 278,
370
+ ",</w>": 279,
371
+ "q</w>": 280,
372
+ "ħ</w>": 281,
373
+ "¨</w>": 282,
374
+ "G</w>": 283,
375
+ "²</w>": 284,
376
+ "ĺ</w>": 285,
377
+ "ª</w>": 286,
378
+ "¯</w>": 287,
379
+ "j</w>": 288,
380
+ "]</w>": 289,
381
+ "ļ</w>": 290,
382
+ "Ŀ</w>": 291,
383
+ "¤</w>": 292,
384
+ "ŀ</w>": 293,
385
+ "½</w>": 294,
386
+ "IJ</w>": 295,
387
+ "'</w>": 296,
388
+ "Ń</w>": 297,
389
+ "°</w>": 298,
390
+ "ľ</w>": 299,
391
+ "></w>": 300,
392
+ "¶</w>": 301,
393
+ "į</w>": 302,
394
+ "¦</w>": 303,
395
+ "|</w>": 304,
396
+ "¼</w>": 305,
397
+ "¢</w>": 306,
398
+ "´</w>": 307,
399
+ "Ĩ</w>": 308,
400
+ "Q</w>": 309,
401
+ "Y</w>": 310,
402
+ "Ľ</w>": 311,
403
+ "ĵ</w>": 312,
404
+ "ij</w>": 313,
405
+ "ķ</w>": 314,
406
+ "Ĭ</w>": 315,
407
+ "¾</w>": 316,
408
+ ";</w>": 317,
409
+ "(</w>": 318,
410
+ "¬</w>": 319,
411
+ "@</w>": 320,
412
+ "ĭ</w>": 321,
413
+ "Ĺ</w>": 322,
414
+ "£</w>": 323,
415
+ "Į</w>": 324,
416
+ "#</w>": 325,
417
+ "·</w>": 326,
418
+ "*</w>": 327,
419
+ "Ĵ</w>": 328,
420
+ "®</w>": 329,
421
+ ")</w>": 330,
422
+ "^</w>": 331,
423
+ "ı</w>": 332,
424
+ "Ġ</w>": 333,
425
+ "_</w>": 334,
426
+ "Ł</w>": 335,
427
+ "}</w>": 336,
428
+ "ĥ</w>": 337,
429
+ "\\</w>": 338,
430
+ "¥</w>": 339,
431
+ "<</w>": 340,
432
+ "+</w>": 341,
433
+ "=</w>": 342,
434
+ "~</w>": 343,
435
+ "\"</w>": 344,
436
+ "!</w>": 345,
437
+ "?</w>": 346,
438
+ "`</w>": 347,
439
+ "$</w>": 348,
440
+ "Ċ</w>": 349,
441
+ "/</w>": 350,
442
+ "%</w>": 351,
443
+ "&</w>": 352,
444
+ ":</w>": 353,
445
+ "Ġt": 354,
446
+ "Ġth": 355,
447
+ "Ġa": 356,
448
+ "Ġthe</w>": 357,
449
+ "in": 358,
450
+ "Ġo": 359,
451
+ "Ġ,</w>": 360,
452
+ "Ġs": 361,
453
+ "ed</w>": 362,
454
+ "Ġw": 363,
455
+ "er": 364,
456
+ "Ġ.</w>": 365,
457
+ "Ġi": 366,
458
+ "re": 367,
459
+ "Ġc": 368,
460
+ "nd</w>": 369,
461
+ "Ġf": 370,
462
+ "Ġb": 371,
463
+ "at": 372,
464
+ "Ġof</w>": 373,
465
+ "er</w>": 374,
466
+ "en": 375,
467
+ "ar": 376,
468
+ "or": 377,
469
+ "it": 378,
470
+ "Ġp": 379,
471
+ "Ġh": 380,
472
+ "Ġand</w>": 381,
473
+ "on": 382,
474
+ "ing</w>": 383,
475
+ "an": 384,
476
+ "ro": 385,
477
+ "Ġm": 386,
478
+ "Ġd": 387,
479
+ "es</w>": 388,
480
+ "Ġin</w>": 389,
481
+ "on</w>": 390,
482
+ "Ġto</w>": 391,
483
+ "ou": 392,
484
+ "is": 393,
485
+ "Ġa</w>": 394,
486
+ "ic": 395,
487
+ "ĠT": 396,
488
+ "al": 397,
489
+ "Ġl": 398,
490
+ "Ġ=</w>": 399,
491
+ "Ġre": 400,
492
+ "Ġ\"</w>": 401,
493
+ "es": 402,
494
+ "ĠS": 403,
495
+ "as</w>": 404,
496
+ "al</w>": 405,
497
+ "il": 406,
498
+ "el": 407,
499
+ "ion</w>": 408,
500
+ "ĠA": 409,
501
+ "ĠC": 410,
502
+ "Ġ1": 411,
503
+ "ĠĊ</w>": 412,
504
+ "ur": 413,
505
+ "ĠTh": 414,
506
+ "Ġn": 415,
507
+ "as": 416,
508
+ "Ġ@": 417,
509
+ "ec": 418,
510
+ "om": 419,
511
+ "ac": 420,
512
+ "Ġe": 421,
513
+ "Ġwas</w>": 422,
514
+ "ĠM": 423,
515
+ "or</w>": 424,
516
+ "an</w>": 425,
517
+ "am": 426,
518
+ "en</w>": 427,
519
+ "ol": 428,
520
+ "Ġin": 429,
521
+ "Ġg": 430,
522
+ "Ġ'</w>": 431,
523
+ "ĠB": 432,
524
+ "ly</w>": 433,
525
+ "at</w>": 434,
526
+ "iv": 435,
527
+ "ts</w>": 436,
528
+ "ĠThe</w>": 437,
529
+ "us": 438,
530
+ "-@</w>": 439,
531
+ "Ġ@-@</w>": 440,
532
+ "is</w>": 441,
533
+ "ĠI": 442,
534
+ "Ġwh": 443,
535
+ "ig": 444,
536
+ "ĠH": 445,
537
+ "Ġst": 446,
538
+ "os": 447,
539
+ "un": 448,
540
+ "th": 449,
541
+ "ĠP": 450,
542
+ "Ġwit": 451,
543
+ "Ġthat</w>": 452,
544
+ "ir": 453,
545
+ "Ġas</w>": 454,
546
+ "em": 455,
547
+ "Ġon</w>": 456,
548
+ "ra": 457,
549
+ "Ġfor</w>": 458,
550
+ "ĠR": 459,
551
+ "et": 460,
552
+ "ow": 461,
553
+ "Ġ2": 462,
554
+ "id": 463,
555
+ "ĠD": 464,
556
+ "le</w>": 465,
557
+ "Ġwith</w>": 466,
558
+ "la": 467,
559
+ "ent</w>": 468,
560
+ "im": 469,
561
+ "ĠF": 470,
562
+ "ea": 471,
563
+ "ion": 472,
564
+ "Ġby</w>": 473,
565
+ "Ġ)</w>": 474,
566
+ "Ġ(</w>": 475,
567
+ "Ġal": 476,
568
+ "Ġcon": 477,
569
+ "ent": 478,
570
+ "ĠW": 479,
571
+ "Ġis</w>": 480,
572
+ "ere</w>": 481,
573
+ "ĠG": 482,
574
+ "ĠN": 483,
575
+ "ĠL": 484,
576
+ "Ġha": 485,
577
+ "ers</w>": 486,
578
+ "ri": 487,
579
+ "th</w>": 488,
580
+ "ted</w>": 489,
581
+ "uc": 490,
582
+ "ĠJ": 491,
583
+ "Ġ19": 492,
584
+ "ev": 493,
585
+ "ul": 494,
586
+ "Ġv": 495,
587
+ "ce</w>": 496,
588
+ "ation</w>": 497,
589
+ "rom</w>": 498,
590
+ "Ġbe": 499,
591
+ "ĠE": 500,
592
+ "in</w>": 501,
593
+ "Ġthe": 502,
594
+ "Ġfrom</w>": 503,
595
+ "ĠO": 504,
596
+ "ter</w>": 505,
597
+ "Ġpro": 506,
598
+ "Ġar": 507,
599
+ "ad": 508,
600
+ "Ġcom": 509,
601
+ "ic</w>": 510,
602
+ "ag": 511,
603
+ "Ġhis</w>": 512,
604
+ "Ġsh": 513,
605
+ "Ġat</w>": 514,
606
+ "ov": 515,
607
+ "ies</w>": 516,
608
+ "oo": 517,
609
+ "pp": 518,
610
+ "st": 519,
611
+ "ch": 520,
612
+ "Ġr": 521,
613
+ "Ġ20": 522,
614
+ "ay</w>": 523,
615
+ "if": 524,
616
+ "Ġwere</w>": 525,
617
+ "Ġch": 526,
618
+ "ut</w>": 527,
619
+ "st</w>": 528,
620
+ "ut": 529,
621
+ "ds</w>": 530,
622
+ "op": 531,
623
+ "um": 532,
624
+ "Ġit</w>": 533,
625
+ "oc": 534,
626
+ "ter": 535,
627
+ "le": 536,
628
+ "igh": 537,
629
+ "ud": 538,
630
+ "Ġex": 539,
631
+ "ions</w>": 540,
632
+ "ate</w>": 541,
633
+ "ity</w>": 542,
634
+ "ated</w>": 543,
635
+ "Ġun": 544,
636
+ "ep": 545,
637
+ "qu": 546,
638
+ "Ġno": 547,
639
+ "ĠK": 548,
640
+ "ive</w>": 549,
641
+ "ist": 550,
642
+ "Ġon": 551,
643
+ "ame</w>": 552,
644
+ "oun": 553,
645
+ "ir</w>": 554,
646
+ "ab": 555,
647
+ "Ġâ": 556,
648
+ "ing": 557,
649
+ "Ġhe</w>": 558,
650
+ "ld</w>": 559,
651
+ "ug": 560,
652
+ "ich</w>": 561,
653
+ "Ġan</w>": 562,
654
+ "ed": 563,
655
+ "Ġk": 564,
656
+ "ĠâĢ": 565,
657
+ "Ġhad</w>": 566,
658
+ "ve</w>": 567,
659
+ "ain": 568,
660
+ "Ġse": 569,
661
+ "tion</w>": 570,
662
+ "ore</w>": 571,
663
+ "res": 572,
664
+ "Ġwhich</w>": 573,
665
+ "ĠIn</w>": 574,
666
+ "od": 575,
667
+ "ther</w>": 576,
668
+ "ak": 577,
669
+ "Ġsp": 578,
670
+ "ar</w>": 579,
671
+ "Ġy": 580,
672
+ "ĠCh": 581,
673
+ "ong</w>": 582,
674
+ "Ġac": 583,
675
+ "est</w>": 584,
676
+ "ĠU": 585,
677
+ "ap": 586,
678
+ "ff": 587,
679
+ "ally</w>": 588,
680
+ "rit": 589,
681
+ "ĠSt": 590,
682
+ "ub": 591,
683
+ "ge</w>": 592,
684
+ "ber</w>": 593,
685
+ "et</w>": 594,
686
+ "Ġbe</w>": 595,
687
+ "ear": 596,
688
+ "Ġrec": 597,
689
+ "ers": 598,
690
+ "Ġfir": 599,
691
+ "ot": 600,
692
+ "Ġare</w>": 601,
693
+ "Ġan": 602,
694
+ "ch</w>": 603,
695
+ "og": 604,
696
+ "ia</w>": 605,
697
+ "est": 606,
698
+ "ine</w>": 607,
699
+ "ill": 608,
700
+ "and": 609,
701
+ "el</w>": 610,
702
+ "ary</w>": 611,
703
+ "ew</w>": 612,
704
+ "id</w>": 613,
705
+ "Ġfor": 614,
706
+ "Ġ;</w>": 615,
707
+ "Ġcomp": 616,
708
+ "ĠV": 617,
709
+ "Ġinc": 618,
710
+ "tr": 619,
711
+ "Ġ200": 620,
712
+ "Ġtheir</w>": 621,
713
+ "us</w>": 622,
714
+ "Ġbut</w>": 623,
715
+ "ran": 624,
716
+ "ical</w>": 625,
717
+ "Ġfirst</w>": 626,
718
+ "Ġde": 627,
719
+ "Ġint": 628,
720
+ "Ġro": 629,
721
+ "so</w>": 630,
722
+ "ĠâĢĵ</w>": 631,
723
+ "Ġnot</w>": 632,
724
+ "ding</w>": 633,
725
+ "fter</w>": 634,
726
+ "ure</w>": 635,
727
+ "Ġpar": 636,
728
+ "Ġ:</w>": 637,
729
+ "ian</w>": 638,
730
+ "Ġtw": 639,
731
+ "ould</w>": 640,
732
+ "Ġalso</w>": 641,
733
+ "Ġits</w>": 642,
734
+ "Ġwor": 643,
735
+ "um</w>": 644,
736
+ "Ġor</w>": 645,
737
+ "ost</w>": 646,
738
+ "00</w>": 647,
739
+ "our": 648,
740
+ "ard</w>": 649,
741
+ "Ġres": 650,
742
+ "mp": 651,
743
+ "ue</w>": 652,
744
+ "Ġab": 653,
745
+ "ish</w>": 654,
746
+ "Ġcont": 655,
747
+ "Ġad": 656,
748
+ "own</w>": 657,
749
+ "all</w>": 658,
750
+ "oug": 659,
751
+ "Ġher</w>": 660,
752
+ "ast</w>": 661,
753
+ "Ġen": 662,
754
+ "ome</w>": 663,
755
+ "all": 664,
756
+ "ded</w>": 665,
757
+ "ow</w>": 666,
758
+ "Ġhave</w>": 667,
759
+ "Ġus": 668,
760
+ "ear</w>": 669,
761
+ "ack</w>": 670,
762
+ "duc": 671,
763
+ "ial</w>": 672,
764
+ "ss": 673,
765
+ "ents</w>": 674,
766
+ "ain</w>": 675,
767
+ "ting</w>": 676,
768
+ "Ġone</w>": 677,
769
+ "ess": 678,
770
+ "Ġhas</w>": 679,
771
+ "ight</w>": 680,
772
+ "av": 681,
773
+ "Ġev": 682,
774
+ "out</w>": 683,
775
+ "ay": 684,
776
+ "ence</w>": 685,
777
+ "Ġbeen</w>": 686,
778
+ "ew": 687,
779
+ "Ġtwo</w>": 688,
780
+ "Ġcl": 689,
781
+ "der</w>": 690,
782
+ "ime</w>": 691,
783
+ "ks</w>": 692,
784
+ "ess</w>": 693,
785
+ "ish": 694,
786
+ ".@</w>": 695,
787
+ "Ġ@.@</w>": 696,
788
+ "Ġpla": 697,
789
+ "Ġpl": 698,
790
+ "Ġor": 699,
791
+ "up</w>": 700,
792
+ "ment</w>": 701,
793
+ "uring</w>": 702,
794
+ "oll": 703,
795
+ "ĠIn": 704,
796
+ "Ġthis</w>": 705,
797
+ "Ġbec": 706,
798
+ "Ġcomm": 707,
799
+ "Ġdis": 708,
800
+ "ater</w>": 709,
801
+ "age</w>": 710,
802
+ "Ġapp": 711,
803
+ "ous</w>": 712,
804
+ "ey</w>": 713,
805
+ "il</w>": 714,
806
+ "per": 715,
807
+ "ĠAl": 716,
808
+ "ional</w>": 717,
809
+ "lud": 718,
810
+ "ely</w>": 719,
811
+ "tt": 720,
812
+ "ile</w>": 721,
813
+ "iz": 722,
814
+ "Ġj": 723,
815
+ "Ġwho</w>": 724,
816
+ "Ġag": 725,
817
+ "ib": 726,
818
+ "Ġthey</w>": 727,
819
+ "for": 728,
820
+ "Ġov": 729,
821
+ "ath": 730,
822
+ "eg": 731,
823
+ "Ġsc": 732,
824
+ "ip": 733,
825
+ "Ġ201": 734,
826
+ "Ġ3": 735,
827
+ "Ġper": 736,
828
+ "ory</w>": 737,
829
+ "Ġdes": 738,
830
+ "ide</w>": 739,
831
+ "Ġser": 740,
832
+ "se</w>": 741,
833
+ "ĠHe</w>": 742,
834
+ "land</w>": 743,
835
+ "ations</w>": 744,
836
+ "ric": 745,
837
+ "it</w>": 746,
838
+ "res</w>": 747,
839
+ "ered</w>": 748,
840
+ "Ġpre": 749,
841
+ "ĠSh": 750,
842
+ "ance</w>": 751,
843
+ "ort</w>": 752,
844
+ "ant</w>": 753,
845
+ ",@</w>": 754,
846
+ "Ġ@,@</w>": 755,
847
+ "ell</w>": 756,
848
+ "ĠY": 757,
849
+ "ned</w>": 758,
850
+ "ell": 759,
851
+ "ite</w>": 760,
852
+ "Ġinclud": 761,
853
+ "Ġrep": 762,
854
+ "Ġafter</w>": 763,
855
+ "Ġsuc": 764,
856
+ "ree</w>": 765,
857
+ "any</w>": 766,
858
+ "im</w>": 767,
859
+ "ort": 768,
860
+ "Ġ18": 769,
861
+ "Ġsu": 770,
862
+ "ade</w>": 771,
863
+ "our</w>": 772,
864
+ "ĠUn": 773,
865
+ "ĠIt</w>": 774,
866
+ "ik": 775,
867
+ "ĠMar": 776,
868
+ "ember</w>": 777,
869
+ "Ġ1</w>": 778,
870
+ "een</w>": 779,
871
+ "and</w>": 780,
872
+ "Ġsec": 781,
873
+ "ice</w>": 782,
874
+ "Ġtime</w>": 783,
875
+ "ĠAn": 784,
876
+ "Ġinto</w>": 785,
877
+ "Ġfin": 786,
878
+ "Ġother</w>": 787,
879
+ "Ġatt": 788,
880
+ "ill</w>": 789,
881
+ "ren": 790,
882
+ "ach": 791,
883
+ "ass": 792,
884
+ "eral</w>": 793,
885
+ "ese</w>": 794,
886
+ "sh": 795,
887
+ "als</w>": 796,
888
+ "ition</w>": 797,
889
+ "ough</w>": 798,
890
+ "les</w>": 799,
891
+ "amp": 800,
892
+ "Ġwould</w>": 801,
893
+ "Ġmore</w>": 802,
894
+ "roug": 803,
895
+ "rib": 804,
896
+ "ery</w>": 805,
897
+ "ace</w>": 806,
898
+ "ĠA</w>": 807,
899
+ "Ġplay": 808,
900
+ "ited</w>": 809,
901
+ "ked</w>": 810,
902
+ "ist</w>": 811,
903
+ "ied</w>": 812,
904
+ "Ġ2</w>": 813,
905
+ "ased</w>": 814,
906
+ "ings</w>": 815,
907
+ "ang": 816,
908
+ "am</w>": 817,
909
+ "ip</w>": 818,
910
+ "Ġbo": 819,
911
+ "able</w>": 820,
912
+ "ty</w>": 821,
913
+ "Ġchar": 822,
914
+ "Ġcent": 823,
915
+ "etw": 824,
916
+ "ates</w>": 825,
917
+ "rop": 826,
918
+ "ĠI</w>": 827,
919
+ "und</w>": 828,
920
+ "ĠAm": 829,
921
+ "ces</w>": 830,
922
+ "oin": 831,
923
+ "Ġinter": 832,
924
+ "up": 833,
925
+ "ct": 834,
926
+ "one</w>": 835,
927
+ "Ġtra": 836,
928
+ "ant": 837,
929
+ "ect": 838,
930
+ "Ġall</w>": 839,
931
+ "ef": 840,
932
+ "Ġcons": 841,
933
+ "ubl": 842,
934
+ "ning</w>": 843,
935
+ "ans</w>": 844,
936
+ "Ġfe": 845,
937
+ "ust</w>": 846,
938
+ "Ġ0": 847,
939
+ "Ġrem": 848,
940
+ "ase</w>": 849,
941
+ "ong": 850,
942
+ "Ġwhen</w>": 851,
943
+ "eb": 852,
944
+ "ĠWh": 853,
945
+ "Ġear": 854,
946
+ "ever</w>": 855,
947
+ "Ġover</w>": 856,
948
+ "Ġkn": 857,
949
+ "aus": 858,
950
+ "Ġpos": 859,
951
+ "ad</w>": 860,
952
+ "erm": 861,
953
+ "Ġshe</w>": 862,
954
+ "Ġra": 863,
955
+ "Ġduring</w>": 864,
956
+ "ason</w>": 865,
957
+ "vi": 866,
958
+ "Ġexp": 867,
959
+ "Ġlea": 868,
960
+ "Ġel": 869,
961
+ "Ġ4": 870,
962
+ "Ġonly</w>": 871,
963
+ "ond</w>": 872,
964
+ "Ġdec": 873,
965
+ "Ġacc": 874,
966
+ "Ġoff": 875,
967
+ "iss": 876,
968
+ "Ġfl": 877,
969
+ "ĠEn": 878,
970
+ "ot</w>": 879,
971
+ "ens": 880,
972
+ "ose</w>": 881,
973
+ "ake</w>": 882,
974
+ "om</w>": 883,
975
+ "Ġsev": 884,
976
+ "ach</w>": 885,
977
+ "etween</w>": 886,
978
+ "ern": 887,
979
+ "Ġ3</w>": 888,
980
+ "Ġpr": 889,
981
+ "Ġgro": 890,
982
+ "ruc": 891,
983
+ "Ġdi": 892,
984
+ "Ġ199": 893,
985
+ "ĠAr": 894,
986
+ "Ġgame</w>": 895,
987
+ "Ġhim</w>": 896,
988
+ "ook</w>": 897,
989
+ "Ġup</w>": 898,
990
+ "Ġabout</w>": 899,
991
+ "Ġrel": 900,
992
+ "form": 901,
993
+ "Ġthree</w>": 902,
994
+ "att": 903,
995
+ "ĠCom": 904,
996
+ "Ġsa": 905,
997
+ "ears</w>": 906,
998
+ "Ġ5": 907,
999
+ "ry</w>": 908,
1000
+ "Ġimp": 909,
1001
+ "Ġmost</w>": 910,
1002
+ "fer": 911,
1003
+ "Ġpres": 912,
1004
+ "Ġfil": 913,
1005
+ "Ġbetween</w>": 914,
1006
+ "Ġbeg": 915,
1007
+ "ph": 916,
1008
+ "ors</w>": 917,
1009
+ "Ġthan</w>": 918,
1010
+ "Ġrecor": 919,
1011
+ "ob": 920,
1012
+ "eric": 921,
1013
+ "ating</w>": 922,
1014
+ "Ġthroug": 923,
1015
+ "king</w>": 924,
1016
+ "Ġout</w>": 925,
1017
+ "Ġnum": 926,
1018
+ "ood</w>": 927,
1019
+ "ollow": 928,
1020
+ "act": 929,
1021
+ "uil": 930,
1022
+ "Ġcre": 931,
1023
+ "olog": 932,
1024
+ "ational</w>": 933,
1025
+ "Ġproduc": 934,
1026
+ "Ġwhile</w>": 935,
1027
+ "Ġlater</w>": 936,
1028
+ "Ġwrit": 937,
1029
+ "ex": 938,
1030
+ "Ġstar": 939,
1031
+ "Ġspec": 940,
1032
+ "ee": 941,
1033
+ "ished</w>": 942,
1034
+ "Ġreg": 943,
1035
+ "ision</w>": 944,
1036
+ "outh</w>": 945,
1037
+ "Ġrele": 946,
1038
+ "Ġass": 947,
1039
+ "Ġseason</w>": 948,
1040
+ "Ġmade</w>": 949,
1041
+ "ily</w>": 950,
1042
+ "ru": 951,
1043
+ "oy": 952,
1044
+ "tur": 953,
1045
+ "te</w>": 954,
1046
+ "Ġqu": 955,
1047
+ "Ġmov": 956,
1048
+ "ury</w>": 957,
1049
+ "ĠAmeric": 958,
1050
+ "ement</w>": 959,
1051
+ "cc": 960,
1052
+ "ound</w>": 961,
1053
+ "Ġlar": 962,
1054
+ "Ġform": 963,
1055
+ "ect</w>": 964,
1056
+ "Ġdef": 965,
1057
+ "Ġmus": 966,
1058
+ "ĠPar": 967,
1059
+ "Ġme": 968,
1060
+ "Ġsub": 969,
1061
+ "way</w>": 970,
1062
+ "op</w>": 971,
1063
+ "oh": 972,
1064
+ "eld</w>": 973,
1065
+ "ie</w>": 974,
1066
+ "emp": 975,
1067
+ "ames</w>": 976,
1068
+ "ern</w>": 977,
1069
+ "Ġnor": 978,
1070
+ "ived</w>": 979,
1071
+ "evel": 980,
1072
+ "Ġsuch</w>": 981,
1073
+ "ards</w>": 982,
1074
+ "Ġind": 983,
1075
+ "ike</w>": 984,
1076
+ "Ġgen": 985,
1077
+ "ert": 986,
1078
+ "Ġyear</w>": 987,
1079
+ "Ġused</w>": 988,
1080
+ "Ġnew</w>": 989,
1081
+ "Ġ5</w>": 990,
1082
+ "Ġalb": 991,
1083
+ "sp": 992,
1084
+ "yp": 993,
1085
+ "Ġwith": 994,
1086
+ "Ġwhere</w>": 995,
1087
+ "ics</w>": 996,
1088
+ "ĠThis</w>": 997,
1089
+ "Ġthem</w>": 998,
1090
+ "wn</w>": 999
1091
+ },
1092
+ "merges": [
1093
+ [
1094
+ "Ġ",
1095
+ "t"
1096
+ ],
1097
+ [
1098
+ "Ġt",
1099
+ "h"
1100
+ ],
1101
+ [
1102
+ "Ġ",
1103
+ "a"
1104
+ ],
1105
+ [
1106
+ "Ġth",
1107
+ "e</w>"
1108
+ ],
1109
+ [
1110
+ "i",
1111
+ "n"
1112
+ ],
1113
+ [
1114
+ "Ġ",
1115
+ "o"
1116
+ ],
1117
+ [
1118
+ "Ġ",
1119
+ ",</w>"
1120
+ ],
1121
+ [
1122
+ "Ġ",
1123
+ "s"
1124
+ ],
1125
+ [
1126
+ "e",
1127
+ "d</w>"
1128
+ ],
1129
+ [
1130
+ "Ġ",
1131
+ "w"
1132
+ ],
1133
+ [
1134
+ "e",
1135
+ "r"
1136
+ ],
1137
+ [
1138
+ "Ġ",
1139
+ ".</w>"
1140
+ ],
1141
+ [
1142
+ "Ġ",
1143
+ "i"
1144
+ ],
1145
+ [
1146
+ "r",
1147
+ "e"
1148
+ ],
1149
+ [
1150
+ "Ġ",
1151
+ "c"
1152
+ ],
1153
+ [
1154
+ "n",
1155
+ "d</w>"
1156
+ ],
1157
+ [
1158
+ "Ġ",
1159
+ "f"
1160
+ ],
1161
+ [
1162
+ "Ġ",
1163
+ "b"
1164
+ ],
1165
+ [
1166
+ "a",
1167
+ "t"
1168
+ ],
1169
+ [
1170
+ "Ġo",
1171
+ "f</w>"
1172
+ ],
1173
+ [
1174
+ "e",
1175
+ "r</w>"
1176
+ ],
1177
+ [
1178
+ "e",
1179
+ "n"
1180
+ ],
1181
+ [
1182
+ "a",
1183
+ "r"
1184
+ ],
1185
+ [
1186
+ "o",
1187
+ "r"
1188
+ ],
1189
+ [
1190
+ "i",
1191
+ "t"
1192
+ ],
1193
+ [
1194
+ "Ġ",
1195
+ "p"
1196
+ ],
1197
+ [
1198
+ "Ġ",
1199
+ "h"
1200
+ ],
1201
+ [
1202
+ "Ġa",
1203
+ "nd</w>"
1204
+ ],
1205
+ [
1206
+ "o",
1207
+ "n"
1208
+ ],
1209
+ [
1210
+ "in",
1211
+ "g</w>"
1212
+ ],
1213
+ [
1214
+ "a",
1215
+ "n"
1216
+ ],
1217
+ [
1218
+ "r",
1219
+ "o"
1220
+ ],
1221
+ [
1222
+ "Ġ",
1223
+ "m"
1224
+ ],
1225
+ [
1226
+ "Ġ",
1227
+ "d"
1228
+ ],
1229
+ [
1230
+ "e",
1231
+ "s</w>"
1232
+ ],
1233
+ [
1234
+ "Ġi",
1235
+ "n</w>"
1236
+ ],
1237
+ [
1238
+ "o",
1239
+ "n</w>"
1240
+ ],
1241
+ [
1242
+ "Ġt",
1243
+ "o</w>"
1244
+ ],
1245
+ [
1246
+ "o",
1247
+ "u"
1248
+ ],
1249
+ [
1250
+ "i",
1251
+ "s"
1252
+ ],
1253
+ [
1254
+ "Ġ",
1255
+ "a</w>"
1256
+ ],
1257
+ [
1258
+ "i",
1259
+ "c"
1260
+ ],
1261
+ [
1262
+ "Ġ",
1263
+ "T"
1264
+ ],
1265
+ [
1266
+ "a",
1267
+ "l"
1268
+ ],
1269
+ [
1270
+ "Ġ",
1271
+ "l"
1272
+ ],
1273
+ [
1274
+ "Ġ",
1275
+ "=</w>"
1276
+ ],
1277
+ [
1278
+ "Ġ",
1279
+ "re"
1280
+ ],
1281
+ [
1282
+ "Ġ",
1283
+ "\"</w>"
1284
+ ],
1285
+ [
1286
+ "e",
1287
+ "s"
1288
+ ],
1289
+ [
1290
+ "Ġ",
1291
+ "S"
1292
+ ],
1293
+ [
1294
+ "a",
1295
+ "s</w>"
1296
+ ],
1297
+ [
1298
+ "a",
1299
+ "l</w>"
1300
+ ],
1301
+ [
1302
+ "i",
1303
+ "l"
1304
+ ],
1305
+ [
1306
+ "e",
1307
+ "l"
1308
+ ],
1309
+ [
1310
+ "i",
1311
+ "on</w>"
1312
+ ],
1313
+ [
1314
+ "Ġ",
1315
+ "A"
1316
+ ],
1317
+ [
1318
+ "Ġ",
1319
+ "C"
1320
+ ],
1321
+ [
1322
+ "Ġ",
1323
+ "1"
1324
+ ],
1325
+ [
1326
+ "Ġ",
1327
+ "Ċ</w>"
1328
+ ],
1329
+ [
1330
+ "u",
1331
+ "r"
1332
+ ],
1333
+ [
1334
+ "ĠT",
1335
+ "h"
1336
+ ],
1337
+ [
1338
+ "Ġ",
1339
+ "n"
1340
+ ],
1341
+ [
1342
+ "a",
1343
+ "s"
1344
+ ],
1345
+ [
1346
+ "Ġ",
1347
+ "@"
1348
+ ],
1349
+ [
1350
+ "e",
1351
+ "c"
1352
+ ],
1353
+ [
1354
+ "o",
1355
+ "m"
1356
+ ],
1357
+ [
1358
+ "a",
1359
+ "c"
1360
+ ],
1361
+ [
1362
+ "Ġ",
1363
+ "e"
1364
+ ],
1365
+ [
1366
+ "Ġw",
1367
+ "as</w>"
1368
+ ],
1369
+ [
1370
+ "Ġ",
1371
+ "M"
1372
+ ],
1373
+ [
1374
+ "o",
1375
+ "r</w>"
1376
+ ],
1377
+ [
1378
+ "a",
1379
+ "n</w>"
1380
+ ],
1381
+ [
1382
+ "a",
1383
+ "m"
1384
+ ],
1385
+ [
1386
+ "e",
1387
+ "n</w>"
1388
+ ],
1389
+ [
1390
+ "o",
1391
+ "l"
1392
+ ],
1393
+ [
1394
+ "Ġ",
1395
+ "in"
1396
+ ],
1397
+ [
1398
+ "Ġ",
1399
+ "g"
1400
+ ],
1401
+ [
1402
+ "Ġ",
1403
+ "'</w>"
1404
+ ],
1405
+ [
1406
+ "Ġ",
1407
+ "B"
1408
+ ],
1409
+ [
1410
+ "l",
1411
+ "y</w>"
1412
+ ],
1413
+ [
1414
+ "a",
1415
+ "t</w>"
1416
+ ],
1417
+ [
1418
+ "i",
1419
+ "v"
1420
+ ],
1421
+ [
1422
+ "t",
1423
+ "s</w>"
1424
+ ],
1425
+ [
1426
+ "ĠTh",
1427
+ "e</w>"
1428
+ ],
1429
+ [
1430
+ "u",
1431
+ "s"
1432
+ ],
1433
+ [
1434
+ "-",
1435
+ "@</w>"
1436
+ ],
1437
+ [
1438
+ "Ġ@",
1439
+ "-@</w>"
1440
+ ],
1441
+ [
1442
+ "i",
1443
+ "s</w>"
1444
+ ],
1445
+ [
1446
+ "Ġ",
1447
+ "I"
1448
+ ],
1449
+ [
1450
+ "Ġw",
1451
+ "h"
1452
+ ],
1453
+ [
1454
+ "i",
1455
+ "g"
1456
+ ],
1457
+ [
1458
+ "Ġ",
1459
+ "H"
1460
+ ],
1461
+ [
1462
+ "Ġs",
1463
+ "t"
1464
+ ],
1465
+ [
1466
+ "o",
1467
+ "s"
1468
+ ],
1469
+ [
1470
+ "u",
1471
+ "n"
1472
+ ],
1473
+ [
1474
+ "t",
1475
+ "h"
1476
+ ],
1477
+ [
1478
+ "Ġ",
1479
+ "P"
1480
+ ],
1481
+ [
1482
+ "Ġw",
1483
+ "it"
1484
+ ],
1485
+ [
1486
+ "Ġth",
1487
+ "at</w>"
1488
+ ],
1489
+ [
1490
+ "i",
1491
+ "r"
1492
+ ],
1493
+ [
1494
+ "Ġa",
1495
+ "s</w>"
1496
+ ],
1497
+ [
1498
+ "e",
1499
+ "m"
1500
+ ],
1501
+ [
1502
+ "Ġo",
1503
+ "n</w>"
1504
+ ],
1505
+ [
1506
+ "r",
1507
+ "a"
1508
+ ],
1509
+ [
1510
+ "Ġf",
1511
+ "or</w>"
1512
+ ],
1513
+ [
1514
+ "Ġ",
1515
+ "R"
1516
+ ],
1517
+ [
1518
+ "e",
1519
+ "t"
1520
+ ],
1521
+ [
1522
+ "o",
1523
+ "w"
1524
+ ],
1525
+ [
1526
+ "Ġ",
1527
+ "2"
1528
+ ],
1529
+ [
1530
+ "i",
1531
+ "d"
1532
+ ],
1533
+ [
1534
+ "Ġ",
1535
+ "D"
1536
+ ],
1537
+ [
1538
+ "l",
1539
+ "e</w>"
1540
+ ],
1541
+ [
1542
+ "Ġwit",
1543
+ "h</w>"
1544
+ ],
1545
+ [
1546
+ "l",
1547
+ "a"
1548
+ ],
1549
+ [
1550
+ "en",
1551
+ "t</w>"
1552
+ ],
1553
+ [
1554
+ "i",
1555
+ "m"
1556
+ ],
1557
+ [
1558
+ "Ġ",
1559
+ "F"
1560
+ ],
1561
+ [
1562
+ "e",
1563
+ "a"
1564
+ ],
1565
+ [
1566
+ "i",
1567
+ "on"
1568
+ ],
1569
+ [
1570
+ "Ġb",
1571
+ "y</w>"
1572
+ ],
1573
+ [
1574
+ "Ġ",
1575
+ ")</w>"
1576
+ ],
1577
+ [
1578
+ "Ġ",
1579
+ "(</w>"
1580
+ ],
1581
+ [
1582
+ "Ġa",
1583
+ "l"
1584
+ ],
1585
+ [
1586
+ "Ġc",
1587
+ "on"
1588
+ ],
1589
+ [
1590
+ "en",
1591
+ "t"
1592
+ ],
1593
+ [
1594
+ "Ġ",
1595
+ "W"
1596
+ ],
1597
+ [
1598
+ "Ġi",
1599
+ "s</w>"
1600
+ ],
1601
+ [
1602
+ "er",
1603
+ "e</w>"
1604
+ ],
1605
+ [
1606
+ "Ġ",
1607
+ "G"
1608
+ ],
1609
+ [
1610
+ "Ġ",
1611
+ "N"
1612
+ ],
1613
+ [
1614
+ "Ġ",
1615
+ "L"
1616
+ ],
1617
+ [
1618
+ "Ġh",
1619
+ "a"
1620
+ ],
1621
+ [
1622
+ "er",
1623
+ "s</w>"
1624
+ ],
1625
+ [
1626
+ "r",
1627
+ "i"
1628
+ ],
1629
+ [
1630
+ "t",
1631
+ "h</w>"
1632
+ ],
1633
+ [
1634
+ "t",
1635
+ "ed</w>"
1636
+ ],
1637
+ [
1638
+ "u",
1639
+ "c"
1640
+ ],
1641
+ [
1642
+ "Ġ",
1643
+ "J"
1644
+ ],
1645
+ [
1646
+ "Ġ1",
1647
+ "9"
1648
+ ],
1649
+ [
1650
+ "e",
1651
+ "v"
1652
+ ],
1653
+ [
1654
+ "u",
1655
+ "l"
1656
+ ],
1657
+ [
1658
+ "Ġ",
1659
+ "v"
1660
+ ],
1661
+ [
1662
+ "c",
1663
+ "e</w>"
1664
+ ],
1665
+ [
1666
+ "at",
1667
+ "ion</w>"
1668
+ ],
1669
+ [
1670
+ "ro",
1671
+ "m</w>"
1672
+ ],
1673
+ [
1674
+ "Ġb",
1675
+ "e"
1676
+ ],
1677
+ [
1678
+ "Ġ",
1679
+ "E"
1680
+ ],
1681
+ [
1682
+ "i",
1683
+ "n</w>"
1684
+ ],
1685
+ [
1686
+ "Ġth",
1687
+ "e"
1688
+ ],
1689
+ [
1690
+ "Ġf",
1691
+ "rom</w>"
1692
+ ],
1693
+ [
1694
+ "Ġ",
1695
+ "O"
1696
+ ],
1697
+ [
1698
+ "t",
1699
+ "er</w>"
1700
+ ],
1701
+ [
1702
+ "Ġp",
1703
+ "ro"
1704
+ ],
1705
+ [
1706
+ "Ġa",
1707
+ "r"
1708
+ ],
1709
+ [
1710
+ "a",
1711
+ "d"
1712
+ ],
1713
+ [
1714
+ "Ġc",
1715
+ "om"
1716
+ ],
1717
+ [
1718
+ "i",
1719
+ "c</w>"
1720
+ ],
1721
+ [
1722
+ "a",
1723
+ "g"
1724
+ ],
1725
+ [
1726
+ "Ġh",
1727
+ "is</w>"
1728
+ ],
1729
+ [
1730
+ "Ġs",
1731
+ "h"
1732
+ ],
1733
+ [
1734
+ "Ġa",
1735
+ "t</w>"
1736
+ ],
1737
+ [
1738
+ "o",
1739
+ "v"
1740
+ ],
1741
+ [
1742
+ "i",
1743
+ "es</w>"
1744
+ ],
1745
+ [
1746
+ "o",
1747
+ "o"
1748
+ ],
1749
+ [
1750
+ "p",
1751
+ "p"
1752
+ ],
1753
+ [
1754
+ "s",
1755
+ "t"
1756
+ ],
1757
+ [
1758
+ "c",
1759
+ "h"
1760
+ ],
1761
+ [
1762
+ "Ġ",
1763
+ "r"
1764
+ ],
1765
+ [
1766
+ "Ġ2",
1767
+ "0"
1768
+ ],
1769
+ [
1770
+ "a",
1771
+ "y</w>"
1772
+ ],
1773
+ [
1774
+ "i",
1775
+ "f"
1776
+ ],
1777
+ [
1778
+ "Ġw",
1779
+ "ere</w>"
1780
+ ],
1781
+ [
1782
+ "Ġc",
1783
+ "h"
1784
+ ],
1785
+ [
1786
+ "u",
1787
+ "t</w>"
1788
+ ],
1789
+ [
1790
+ "s",
1791
+ "t</w>"
1792
+ ],
1793
+ [
1794
+ "u",
1795
+ "t"
1796
+ ],
1797
+ [
1798
+ "d",
1799
+ "s</w>"
1800
+ ],
1801
+ [
1802
+ "o",
1803
+ "p"
1804
+ ],
1805
+ [
1806
+ "u",
1807
+ "m"
1808
+ ],
1809
+ [
1810
+ "Ġi",
1811
+ "t</w>"
1812
+ ],
1813
+ [
1814
+ "o",
1815
+ "c"
1816
+ ],
1817
+ [
1818
+ "t",
1819
+ "er"
1820
+ ],
1821
+ [
1822
+ "l",
1823
+ "e"
1824
+ ],
1825
+ [
1826
+ "ig",
1827
+ "h"
1828
+ ],
1829
+ [
1830
+ "u",
1831
+ "d"
1832
+ ],
1833
+ [
1834
+ "Ġe",
1835
+ "x"
1836
+ ],
1837
+ [
1838
+ "ion",
1839
+ "s</w>"
1840
+ ],
1841
+ [
1842
+ "at",
1843
+ "e</w>"
1844
+ ],
1845
+ [
1846
+ "it",
1847
+ "y</w>"
1848
+ ],
1849
+ [
1850
+ "at",
1851
+ "ed</w>"
1852
+ ],
1853
+ [
1854
+ "Ġ",
1855
+ "un"
1856
+ ],
1857
+ [
1858
+ "e",
1859
+ "p"
1860
+ ],
1861
+ [
1862
+ "q",
1863
+ "u"
1864
+ ],
1865
+ [
1866
+ "Ġn",
1867
+ "o"
1868
+ ],
1869
+ [
1870
+ "Ġ",
1871
+ "K"
1872
+ ],
1873
+ [
1874
+ "iv",
1875
+ "e</w>"
1876
+ ],
1877
+ [
1878
+ "is",
1879
+ "t"
1880
+ ],
1881
+ [
1882
+ "Ġo",
1883
+ "n"
1884
+ ],
1885
+ [
1886
+ "am",
1887
+ "e</w>"
1888
+ ],
1889
+ [
1890
+ "ou",
1891
+ "n"
1892
+ ],
1893
+ [
1894
+ "i",
1895
+ "r</w>"
1896
+ ],
1897
+ [
1898
+ "a",
1899
+ "b"
1900
+ ],
1901
+ [
1902
+ "Ġ",
1903
+ "â"
1904
+ ],
1905
+ [
1906
+ "in",
1907
+ "g"
1908
+ ],
1909
+ [
1910
+ "Ġh",
1911
+ "e</w>"
1912
+ ],
1913
+ [
1914
+ "l",
1915
+ "d</w>"
1916
+ ],
1917
+ [
1918
+ "u",
1919
+ "g"
1920
+ ],
1921
+ [
1922
+ "ic",
1923
+ "h</w>"
1924
+ ],
1925
+ [
1926
+ "Ġa",
1927
+ "n</w>"
1928
+ ],
1929
+ [
1930
+ "e",
1931
+ "d"
1932
+ ],
1933
+ [
1934
+ "Ġ",
1935
+ "k"
1936
+ ],
1937
+ [
1938
+ "Ġâ",
1939
+ "Ģ"
1940
+ ],
1941
+ [
1942
+ "Ġha",
1943
+ "d</w>"
1944
+ ],
1945
+ [
1946
+ "v",
1947
+ "e</w>"
1948
+ ],
1949
+ [
1950
+ "a",
1951
+ "in"
1952
+ ],
1953
+ [
1954
+ "Ġs",
1955
+ "e"
1956
+ ],
1957
+ [
1958
+ "t",
1959
+ "ion</w>"
1960
+ ],
1961
+ [
1962
+ "or",
1963
+ "e</w>"
1964
+ ],
1965
+ [
1966
+ "re",
1967
+ "s"
1968
+ ],
1969
+ [
1970
+ "Ġwh",
1971
+ "ich</w>"
1972
+ ],
1973
+ [
1974
+ "ĠI",
1975
+ "n</w>"
1976
+ ],
1977
+ [
1978
+ "o",
1979
+ "d"
1980
+ ],
1981
+ [
1982
+ "th",
1983
+ "er</w>"
1984
+ ],
1985
+ [
1986
+ "a",
1987
+ "k"
1988
+ ],
1989
+ [
1990
+ "Ġs",
1991
+ "p"
1992
+ ],
1993
+ [
1994
+ "a",
1995
+ "r</w>"
1996
+ ],
1997
+ [
1998
+ "Ġ",
1999
+ "y"
2000
+ ],
2001
+ [
2002
+ "ĠC",
2003
+ "h"
2004
+ ],
2005
+ [
2006
+ "on",
2007
+ "g</w>"
2008
+ ],
2009
+ [
2010
+ "Ġa",
2011
+ "c"
2012
+ ],
2013
+ [
2014
+ "es",
2015
+ "t</w>"
2016
+ ],
2017
+ [
2018
+ "Ġ",
2019
+ "U"
2020
+ ],
2021
+ [
2022
+ "a",
2023
+ "p"
2024
+ ],
2025
+ [
2026
+ "f",
2027
+ "f"
2028
+ ],
2029
+ [
2030
+ "al",
2031
+ "ly</w>"
2032
+ ],
2033
+ [
2034
+ "r",
2035
+ "it"
2036
+ ],
2037
+ [
2038
+ "ĠS",
2039
+ "t"
2040
+ ],
2041
+ [
2042
+ "u",
2043
+ "b"
2044
+ ],
2045
+ [
2046
+ "g",
2047
+ "e</w>"
2048
+ ],
2049
+ [
2050
+ "b",
2051
+ "er</w>"
2052
+ ],
2053
+ [
2054
+ "e",
2055
+ "t</w>"
2056
+ ],
2057
+ [
2058
+ "Ġb",
2059
+ "e</w>"
2060
+ ],
2061
+ [
2062
+ "e",
2063
+ "ar"
2064
+ ],
2065
+ [
2066
+ "Ġre",
2067
+ "c"
2068
+ ],
2069
+ [
2070
+ "er",
2071
+ "s"
2072
+ ],
2073
+ [
2074
+ "Ġf",
2075
+ "ir"
2076
+ ],
2077
+ [
2078
+ "o",
2079
+ "t"
2080
+ ],
2081
+ [
2082
+ "Ġar",
2083
+ "e</w>"
2084
+ ],
2085
+ [
2086
+ "Ġa",
2087
+ "n"
2088
+ ],
2089
+ [
2090
+ "c",
2091
+ "h</w>"
2092
+ ],
2093
+ [
2094
+ "o",
2095
+ "g"
2096
+ ],
2097
+ [
2098
+ "i",
2099
+ "a</w>"
2100
+ ],
2101
+ [
2102
+ "es",
2103
+ "t"
2104
+ ],
2105
+ [
2106
+ "in",
2107
+ "e</w>"
2108
+ ],
2109
+ [
2110
+ "il",
2111
+ "l"
2112
+ ],
2113
+ [
2114
+ "an",
2115
+ "d"
2116
+ ],
2117
+ [
2118
+ "e",
2119
+ "l</w>"
2120
+ ],
2121
+ [
2122
+ "ar",
2123
+ "y</w>"
2124
+ ],
2125
+ [
2126
+ "e",
2127
+ "w</w>"
2128
+ ],
2129
+ [
2130
+ "i",
2131
+ "d</w>"
2132
+ ],
2133
+ [
2134
+ "Ġf",
2135
+ "or"
2136
+ ],
2137
+ [
2138
+ "Ġ",
2139
+ ";</w>"
2140
+ ],
2141
+ [
2142
+ "Ġcom",
2143
+ "p"
2144
+ ],
2145
+ [
2146
+ "Ġ",
2147
+ "V"
2148
+ ],
2149
+ [
2150
+ "Ġin",
2151
+ "c"
2152
+ ],
2153
+ [
2154
+ "t",
2155
+ "r"
2156
+ ],
2157
+ [
2158
+ "Ġ20",
2159
+ "0"
2160
+ ],
2161
+ [
2162
+ "Ġthe",
2163
+ "ir</w>"
2164
+ ],
2165
+ [
2166
+ "u",
2167
+ "s</w>"
2168
+ ],
2169
+ [
2170
+ "Ġb",
2171
+ "ut</w>"
2172
+ ],
2173
+ [
2174
+ "r",
2175
+ "an"
2176
+ ],
2177
+ [
2178
+ "ic",
2179
+ "al</w>"
2180
+ ],
2181
+ [
2182
+ "Ġfir",
2183
+ "st</w>"
2184
+ ],
2185
+ [
2186
+ "Ġd",
2187
+ "e"
2188
+ ],
2189
+ [
2190
+ "Ġin",
2191
+ "t"
2192
+ ],
2193
+ [
2194
+ "Ġ",
2195
+ "ro"
2196
+ ],
2197
+ [
2198
+ "s",
2199
+ "o</w>"
2200
+ ],
2201
+ [
2202
+ "ĠâĢ",
2203
+ "ĵ</w>"
2204
+ ],
2205
+ [
2206
+ "Ġno",
2207
+ "t</w>"
2208
+ ],
2209
+ [
2210
+ "d",
2211
+ "ing</w>"
2212
+ ],
2213
+ [
2214
+ "f",
2215
+ "ter</w>"
2216
+ ],
2217
+ [
2218
+ "ur",
2219
+ "e</w>"
2220
+ ],
2221
+ [
2222
+ "Ġp",
2223
+ "ar"
2224
+ ],
2225
+ [
2226
+ "Ġ",
2227
+ ":</w>"
2228
+ ],
2229
+ [
2230
+ "i",
2231
+ "an</w>"
2232
+ ],
2233
+ [
2234
+ "Ġt",
2235
+ "w"
2236
+ ],
2237
+ [
2238
+ "ou",
2239
+ "ld</w>"
2240
+ ],
2241
+ [
2242
+ "Ġal",
2243
+ "so</w>"
2244
+ ],
2245
+ [
2246
+ "Ġi",
2247
+ "ts</w>"
2248
+ ],
2249
+ [
2250
+ "Ġw",
2251
+ "or"
2252
+ ],
2253
+ [
2254
+ "u",
2255
+ "m</w>"
2256
+ ],
2257
+ [
2258
+ "Ġo",
2259
+ "r</w>"
2260
+ ],
2261
+ [
2262
+ "os",
2263
+ "t</w>"
2264
+ ],
2265
+ [
2266
+ "0",
2267
+ "0</w>"
2268
+ ],
2269
+ [
2270
+ "ou",
2271
+ "r"
2272
+ ],
2273
+ [
2274
+ "ar",
2275
+ "d</w>"
2276
+ ],
2277
+ [
2278
+ "Ġre",
2279
+ "s"
2280
+ ],
2281
+ [
2282
+ "m",
2283
+ "p"
2284
+ ],
2285
+ [
2286
+ "u",
2287
+ "e</w>"
2288
+ ],
2289
+ [
2290
+ "Ġa",
2291
+ "b"
2292
+ ],
2293
+ [
2294
+ "is",
2295
+ "h</w>"
2296
+ ],
2297
+ [
2298
+ "Ġcon",
2299
+ "t"
2300
+ ],
2301
+ [
2302
+ "Ġa",
2303
+ "d"
2304
+ ],
2305
+ [
2306
+ "ow",
2307
+ "n</w>"
2308
+ ],
2309
+ [
2310
+ "al",
2311
+ "l</w>"
2312
+ ],
2313
+ [
2314
+ "ou",
2315
+ "g"
2316
+ ],
2317
+ [
2318
+ "Ġh",
2319
+ "er</w>"
2320
+ ],
2321
+ [
2322
+ "as",
2323
+ "t</w>"
2324
+ ],
2325
+ [
2326
+ "Ġ",
2327
+ "en"
2328
+ ],
2329
+ [
2330
+ "om",
2331
+ "e</w>"
2332
+ ],
2333
+ [
2334
+ "al",
2335
+ "l"
2336
+ ],
2337
+ [
2338
+ "d",
2339
+ "ed</w>"
2340
+ ],
2341
+ [
2342
+ "o",
2343
+ "w</w>"
2344
+ ],
2345
+ [
2346
+ "Ġha",
2347
+ "ve</w>"
2348
+ ],
2349
+ [
2350
+ "Ġ",
2351
+ "us"
2352
+ ],
2353
+ [
2354
+ "ea",
2355
+ "r</w>"
2356
+ ],
2357
+ [
2358
+ "ac",
2359
+ "k</w>"
2360
+ ],
2361
+ [
2362
+ "d",
2363
+ "uc"
2364
+ ],
2365
+ [
2366
+ "i",
2367
+ "al</w>"
2368
+ ],
2369
+ [
2370
+ "s",
2371
+ "s"
2372
+ ],
2373
+ [
2374
+ "en",
2375
+ "ts</w>"
2376
+ ],
2377
+ [
2378
+ "a",
2379
+ "in</w>"
2380
+ ],
2381
+ [
2382
+ "t",
2383
+ "ing</w>"
2384
+ ],
2385
+ [
2386
+ "Ġon",
2387
+ "e</w>"
2388
+ ],
2389
+ [
2390
+ "es",
2391
+ "s"
2392
+ ],
2393
+ [
2394
+ "Ġh",
2395
+ "as</w>"
2396
+ ],
2397
+ [
2398
+ "igh",
2399
+ "t</w>"
2400
+ ],
2401
+ [
2402
+ "a",
2403
+ "v"
2404
+ ],
2405
+ [
2406
+ "Ġe",
2407
+ "v"
2408
+ ],
2409
+ [
2410
+ "ou",
2411
+ "t</w>"
2412
+ ],
2413
+ [
2414
+ "a",
2415
+ "y"
2416
+ ],
2417
+ [
2418
+ "en",
2419
+ "ce</w>"
2420
+ ],
2421
+ [
2422
+ "Ġbe",
2423
+ "en</w>"
2424
+ ],
2425
+ [
2426
+ "e",
2427
+ "w"
2428
+ ],
2429
+ [
2430
+ "Ġtw",
2431
+ "o</w>"
2432
+ ],
2433
+ [
2434
+ "Ġc",
2435
+ "l"
2436
+ ],
2437
+ [
2438
+ "d",
2439
+ "er</w>"
2440
+ ],
2441
+ [
2442
+ "im",
2443
+ "e</w>"
2444
+ ],
2445
+ [
2446
+ "k",
2447
+ "s</w>"
2448
+ ],
2449
+ [
2450
+ "es",
2451
+ "s</w>"
2452
+ ],
2453
+ [
2454
+ "is",
2455
+ "h"
2456
+ ],
2457
+ [
2458
+ ".",
2459
+ "@</w>"
2460
+ ],
2461
+ [
2462
+ "Ġ@",
2463
+ ".@</w>"
2464
+ ],
2465
+ [
2466
+ "Ġp",
2467
+ "la"
2468
+ ],
2469
+ [
2470
+ "Ġp",
2471
+ "l"
2472
+ ],
2473
+ [
2474
+ "Ġo",
2475
+ "r"
2476
+ ],
2477
+ [
2478
+ "u",
2479
+ "p</w>"
2480
+ ],
2481
+ [
2482
+ "m",
2483
+ "ent</w>"
2484
+ ],
2485
+ [
2486
+ "ur",
2487
+ "ing</w>"
2488
+ ],
2489
+ [
2490
+ "ol",
2491
+ "l"
2492
+ ],
2493
+ [
2494
+ "ĠI",
2495
+ "n"
2496
+ ],
2497
+ [
2498
+ "Ġth",
2499
+ "is</w>"
2500
+ ],
2501
+ [
2502
+ "Ġb",
2503
+ "ec"
2504
+ ],
2505
+ [
2506
+ "Ġcom",
2507
+ "m"
2508
+ ],
2509
+ [
2510
+ "Ġd",
2511
+ "is"
2512
+ ],
2513
+ [
2514
+ "at",
2515
+ "er</w>"
2516
+ ],
2517
+ [
2518
+ "ag",
2519
+ "e</w>"
2520
+ ],
2521
+ [
2522
+ "Ġa",
2523
+ "pp"
2524
+ ],
2525
+ [
2526
+ "ou",
2527
+ "s</w>"
2528
+ ],
2529
+ [
2530
+ "e",
2531
+ "y</w>"
2532
+ ],
2533
+ [
2534
+ "i",
2535
+ "l</w>"
2536
+ ],
2537
+ [
2538
+ "p",
2539
+ "er"
2540
+ ],
2541
+ [
2542
+ "ĠA",
2543
+ "l"
2544
+ ],
2545
+ [
2546
+ "ion",
2547
+ "al</w>"
2548
+ ],
2549
+ [
2550
+ "l",
2551
+ "ud"
2552
+ ],
2553
+ [
2554
+ "el",
2555
+ "y</w>"
2556
+ ],
2557
+ [
2558
+ "t",
2559
+ "t"
2560
+ ],
2561
+ [
2562
+ "il",
2563
+ "e</w>"
2564
+ ],
2565
+ [
2566
+ "i",
2567
+ "z"
2568
+ ],
2569
+ [
2570
+ "Ġ",
2571
+ "j"
2572
+ ],
2573
+ [
2574
+ "Ġwh",
2575
+ "o</w>"
2576
+ ],
2577
+ [
2578
+ "Ġa",
2579
+ "g"
2580
+ ],
2581
+ [
2582
+ "i",
2583
+ "b"
2584
+ ],
2585
+ [
2586
+ "Ġthe",
2587
+ "y</w>"
2588
+ ],
2589
+ [
2590
+ "f",
2591
+ "or"
2592
+ ],
2593
+ [
2594
+ "Ġo",
2595
+ "v"
2596
+ ],
2597
+ [
2598
+ "at",
2599
+ "h"
2600
+ ],
2601
+ [
2602
+ "e",
2603
+ "g"
2604
+ ],
2605
+ [
2606
+ "Ġs",
2607
+ "c"
2608
+ ],
2609
+ [
2610
+ "i",
2611
+ "p"
2612
+ ],
2613
+ [
2614
+ "Ġ20",
2615
+ "1"
2616
+ ],
2617
+ [
2618
+ "Ġ",
2619
+ "3"
2620
+ ],
2621
+ [
2622
+ "Ġp",
2623
+ "er"
2624
+ ],
2625
+ [
2626
+ "or",
2627
+ "y</w>"
2628
+ ],
2629
+ [
2630
+ "Ġd",
2631
+ "es"
2632
+ ],
2633
+ [
2634
+ "id",
2635
+ "e</w>"
2636
+ ],
2637
+ [
2638
+ "Ġs",
2639
+ "er"
2640
+ ],
2641
+ [
2642
+ "s",
2643
+ "e</w>"
2644
+ ],
2645
+ [
2646
+ "ĠH",
2647
+ "e</w>"
2648
+ ],
2649
+ [
2650
+ "la",
2651
+ "nd</w>"
2652
+ ],
2653
+ [
2654
+ "at",
2655
+ "ions</w>"
2656
+ ],
2657
+ [
2658
+ "r",
2659
+ "ic"
2660
+ ],
2661
+ [
2662
+ "i",
2663
+ "t</w>"
2664
+ ],
2665
+ [
2666
+ "re",
2667
+ "s</w>"
2668
+ ],
2669
+ [
2670
+ "er",
2671
+ "ed</w>"
2672
+ ],
2673
+ [
2674
+ "Ġp",
2675
+ "re"
2676
+ ],
2677
+ [
2678
+ "ĠS",
2679
+ "h"
2680
+ ],
2681
+ [
2682
+ "an",
2683
+ "ce</w>"
2684
+ ],
2685
+ [
2686
+ "or",
2687
+ "t</w>"
2688
+ ],
2689
+ [
2690
+ "an",
2691
+ "t</w>"
2692
+ ],
2693
+ [
2694
+ ",",
2695
+ "@</w>"
2696
+ ],
2697
+ [
2698
+ "Ġ@",
2699
+ ",@</w>"
2700
+ ],
2701
+ [
2702
+ "el",
2703
+ "l</w>"
2704
+ ],
2705
+ [
2706
+ "Ġ",
2707
+ "Y"
2708
+ ],
2709
+ [
2710
+ "n",
2711
+ "ed</w>"
2712
+ ],
2713
+ [
2714
+ "el",
2715
+ "l"
2716
+ ],
2717
+ [
2718
+ "it",
2719
+ "e</w>"
2720
+ ],
2721
+ [
2722
+ "Ġinc",
2723
+ "lud"
2724
+ ],
2725
+ [
2726
+ "Ġre",
2727
+ "p"
2728
+ ],
2729
+ [
2730
+ "Ġa",
2731
+ "fter</w>"
2732
+ ],
2733
+ [
2734
+ "Ġs",
2735
+ "uc"
2736
+ ],
2737
+ [
2738
+ "re",
2739
+ "e</w>"
2740
+ ],
2741
+ [
2742
+ "an",
2743
+ "y</w>"
2744
+ ],
2745
+ [
2746
+ "i",
2747
+ "m</w>"
2748
+ ],
2749
+ [
2750
+ "or",
2751
+ "t"
2752
+ ],
2753
+ [
2754
+ "Ġ1",
2755
+ "8"
2756
+ ],
2757
+ [
2758
+ "Ġs",
2759
+ "u"
2760
+ ],
2761
+ [
2762
+ "ad",
2763
+ "e</w>"
2764
+ ],
2765
+ [
2766
+ "ou",
2767
+ "r</w>"
2768
+ ],
2769
+ [
2770
+ "ĠU",
2771
+ "n"
2772
+ ],
2773
+ [
2774
+ "ĠI",
2775
+ "t</w>"
2776
+ ],
2777
+ [
2778
+ "i",
2779
+ "k"
2780
+ ],
2781
+ [
2782
+ "ĠM",
2783
+ "ar"
2784
+ ],
2785
+ [
2786
+ "em",
2787
+ "ber</w>"
2788
+ ],
2789
+ [
2790
+ "Ġ",
2791
+ "1</w>"
2792
+ ],
2793
+ [
2794
+ "e",
2795
+ "en</w>"
2796
+ ],
2797
+ [
2798
+ "a",
2799
+ "nd</w>"
2800
+ ],
2801
+ [
2802
+ "Ġs",
2803
+ "ec"
2804
+ ],
2805
+ [
2806
+ "ic",
2807
+ "e</w>"
2808
+ ],
2809
+ [
2810
+ "Ġt",
2811
+ "ime</w>"
2812
+ ],
2813
+ [
2814
+ "ĠA",
2815
+ "n"
2816
+ ],
2817
+ [
2818
+ "Ġint",
2819
+ "o</w>"
2820
+ ],
2821
+ [
2822
+ "Ġf",
2823
+ "in"
2824
+ ],
2825
+ [
2826
+ "Ġo",
2827
+ "ther</w>"
2828
+ ],
2829
+ [
2830
+ "Ġa",
2831
+ "tt"
2832
+ ],
2833
+ [
2834
+ "il",
2835
+ "l</w>"
2836
+ ],
2837
+ [
2838
+ "re",
2839
+ "n"
2840
+ ],
2841
+ [
2842
+ "ac",
2843
+ "h"
2844
+ ],
2845
+ [
2846
+ "as",
2847
+ "s"
2848
+ ],
2849
+ [
2850
+ "er",
2851
+ "al</w>"
2852
+ ],
2853
+ [
2854
+ "es",
2855
+ "e</w>"
2856
+ ],
2857
+ [
2858
+ "s",
2859
+ "h"
2860
+ ],
2861
+ [
2862
+ "al",
2863
+ "s</w>"
2864
+ ],
2865
+ [
2866
+ "it",
2867
+ "ion</w>"
2868
+ ],
2869
+ [
2870
+ "oug",
2871
+ "h</w>"
2872
+ ],
2873
+ [
2874
+ "l",
2875
+ "es</w>"
2876
+ ],
2877
+ [
2878
+ "am",
2879
+ "p"
2880
+ ],
2881
+ [
2882
+ "Ġw",
2883
+ "ould</w>"
2884
+ ],
2885
+ [
2886
+ "Ġm",
2887
+ "ore</w>"
2888
+ ],
2889
+ [
2890
+ "ro",
2891
+ "ug"
2892
+ ],
2893
+ [
2894
+ "ri",
2895
+ "b"
2896
+ ],
2897
+ [
2898
+ "er",
2899
+ "y</w>"
2900
+ ],
2901
+ [
2902
+ "ac",
2903
+ "e</w>"
2904
+ ],
2905
+ [
2906
+ "Ġ",
2907
+ "A</w>"
2908
+ ],
2909
+ [
2910
+ "Ġpla",
2911
+ "y"
2912
+ ],
2913
+ [
2914
+ "it",
2915
+ "ed</w>"
2916
+ ],
2917
+ [
2918
+ "k",
2919
+ "ed</w>"
2920
+ ],
2921
+ [
2922
+ "is",
2923
+ "t</w>"
2924
+ ],
2925
+ [
2926
+ "i",
2927
+ "ed</w>"
2928
+ ],
2929
+ [
2930
+ "Ġ",
2931
+ "2</w>"
2932
+ ],
2933
+ [
2934
+ "as",
2935
+ "ed</w>"
2936
+ ],
2937
+ [
2938
+ "ing",
2939
+ "s</w>"
2940
+ ],
2941
+ [
2942
+ "an",
2943
+ "g"
2944
+ ],
2945
+ [
2946
+ "a",
2947
+ "m</w>"
2948
+ ],
2949
+ [
2950
+ "i",
2951
+ "p</w>"
2952
+ ],
2953
+ [
2954
+ "Ġb",
2955
+ "o"
2956
+ ],
2957
+ [
2958
+ "ab",
2959
+ "le</w>"
2960
+ ],
2961
+ [
2962
+ "t",
2963
+ "y</w>"
2964
+ ],
2965
+ [
2966
+ "Ġch",
2967
+ "ar"
2968
+ ],
2969
+ [
2970
+ "Ġc",
2971
+ "ent"
2972
+ ],
2973
+ [
2974
+ "et",
2975
+ "w"
2976
+ ],
2977
+ [
2978
+ "at",
2979
+ "es</w>"
2980
+ ],
2981
+ [
2982
+ "ro",
2983
+ "p"
2984
+ ],
2985
+ [
2986
+ "Ġ",
2987
+ "I</w>"
2988
+ ],
2989
+ [
2990
+ "u",
2991
+ "nd</w>"
2992
+ ],
2993
+ [
2994
+ "ĠA",
2995
+ "m"
2996
+ ],
2997
+ [
2998
+ "c",
2999
+ "es</w>"
3000
+ ],
3001
+ [
3002
+ "o",
3003
+ "in"
3004
+ ],
3005
+ [
3006
+ "Ġin",
3007
+ "ter"
3008
+ ],
3009
+ [
3010
+ "u",
3011
+ "p"
3012
+ ],
3013
+ [
3014
+ "c",
3015
+ "t"
3016
+ ],
3017
+ [
3018
+ "on",
3019
+ "e</w>"
3020
+ ],
3021
+ [
3022
+ "Ġt",
3023
+ "ra"
3024
+ ],
3025
+ [
3026
+ "an",
3027
+ "t"
3028
+ ],
3029
+ [
3030
+ "ec",
3031
+ "t"
3032
+ ],
3033
+ [
3034
+ "Ġal",
3035
+ "l</w>"
3036
+ ],
3037
+ [
3038
+ "e",
3039
+ "f"
3040
+ ],
3041
+ [
3042
+ "Ġcon",
3043
+ "s"
3044
+ ],
3045
+ [
3046
+ "ub",
3047
+ "l"
3048
+ ],
3049
+ [
3050
+ "n",
3051
+ "ing</w>"
3052
+ ],
3053
+ [
3054
+ "an",
3055
+ "s</w>"
3056
+ ],
3057
+ [
3058
+ "Ġf",
3059
+ "e"
3060
+ ],
3061
+ [
3062
+ "us",
3063
+ "t</w>"
3064
+ ],
3065
+ [
3066
+ "Ġ",
3067
+ "0"
3068
+ ],
3069
+ [
3070
+ "Ġre",
3071
+ "m"
3072
+ ],
3073
+ [
3074
+ "as",
3075
+ "e</w>"
3076
+ ],
3077
+ [
3078
+ "on",
3079
+ "g"
3080
+ ],
3081
+ [
3082
+ "Ġwh",
3083
+ "en</w>"
3084
+ ],
3085
+ [
3086
+ "e",
3087
+ "b"
3088
+ ],
3089
+ [
3090
+ "ĠW",
3091
+ "h"
3092
+ ],
3093
+ [
3094
+ "Ġe",
3095
+ "ar"
3096
+ ],
3097
+ [
3098
+ "ev",
3099
+ "er</w>"
3100
+ ],
3101
+ [
3102
+ "Ġov",
3103
+ "er</w>"
3104
+ ],
3105
+ [
3106
+ "Ġk",
3107
+ "n"
3108
+ ],
3109
+ [
3110
+ "a",
3111
+ "us"
3112
+ ],
3113
+ [
3114
+ "Ġp",
3115
+ "os"
3116
+ ],
3117
+ [
3118
+ "a",
3119
+ "d</w>"
3120
+ ],
3121
+ [
3122
+ "er",
3123
+ "m"
3124
+ ],
3125
+ [
3126
+ "Ġsh",
3127
+ "e</w>"
3128
+ ],
3129
+ [
3130
+ "Ġ",
3131
+ "ra"
3132
+ ],
3133
+ [
3134
+ "Ġd",
3135
+ "uring</w>"
3136
+ ],
3137
+ [
3138
+ "as",
3139
+ "on</w>"
3140
+ ],
3141
+ [
3142
+ "v",
3143
+ "i"
3144
+ ],
3145
+ [
3146
+ "Ġex",
3147
+ "p"
3148
+ ],
3149
+ [
3150
+ "Ġl",
3151
+ "ea"
3152
+ ],
3153
+ [
3154
+ "Ġ",
3155
+ "el"
3156
+ ],
3157
+ [
3158
+ "Ġ",
3159
+ "4"
3160
+ ],
3161
+ [
3162
+ "Ġon",
3163
+ "ly</w>"
3164
+ ],
3165
+ [
3166
+ "o",
3167
+ "nd</w>"
3168
+ ],
3169
+ [
3170
+ "Ġd",
3171
+ "ec"
3172
+ ],
3173
+ [
3174
+ "Ġac",
3175
+ "c"
3176
+ ],
3177
+ [
3178
+ "Ġo",
3179
+ "ff"
3180
+ ],
3181
+ [
3182
+ "is",
3183
+ "s"
3184
+ ],
3185
+ [
3186
+ "Ġf",
3187
+ "l"
3188
+ ],
3189
+ [
3190
+ "ĠE",
3191
+ "n"
3192
+ ],
3193
+ [
3194
+ "o",
3195
+ "t</w>"
3196
+ ],
3197
+ [
3198
+ "en",
3199
+ "s"
3200
+ ],
3201
+ [
3202
+ "os",
3203
+ "e</w>"
3204
+ ],
3205
+ [
3206
+ "ak",
3207
+ "e</w>"
3208
+ ],
3209
+ [
3210
+ "o",
3211
+ "m</w>"
3212
+ ],
3213
+ [
3214
+ "Ġs",
3215
+ "ev"
3216
+ ],
3217
+ [
3218
+ "ac",
3219
+ "h</w>"
3220
+ ],
3221
+ [
3222
+ "etw",
3223
+ "een</w>"
3224
+ ],
3225
+ [
3226
+ "er",
3227
+ "n"
3228
+ ],
3229
+ [
3230
+ "Ġ",
3231
+ "3</w>"
3232
+ ],
3233
+ [
3234
+ "Ġp",
3235
+ "r"
3236
+ ],
3237
+ [
3238
+ "Ġg",
3239
+ "ro"
3240
+ ],
3241
+ [
3242
+ "r",
3243
+ "uc"
3244
+ ],
3245
+ [
3246
+ "Ġd",
3247
+ "i"
3248
+ ],
3249
+ [
3250
+ "Ġ19",
3251
+ "9"
3252
+ ],
3253
+ [
3254
+ "ĠA",
3255
+ "r"
3256
+ ],
3257
+ [
3258
+ "Ġg",
3259
+ "ame</w>"
3260
+ ],
3261
+ [
3262
+ "Ġh",
3263
+ "im</w>"
3264
+ ],
3265
+ [
3266
+ "oo",
3267
+ "k</w>"
3268
+ ],
3269
+ [
3270
+ "Ġ",
3271
+ "up</w>"
3272
+ ],
3273
+ [
3274
+ "Ġab",
3275
+ "out</w>"
3276
+ ],
3277
+ [
3278
+ "Ġre",
3279
+ "l"
3280
+ ],
3281
+ [
3282
+ "for",
3283
+ "m"
3284
+ ],
3285
+ [
3286
+ "Ġth",
3287
+ "ree</w>"
3288
+ ],
3289
+ [
3290
+ "at",
3291
+ "t"
3292
+ ],
3293
+ [
3294
+ "ĠC",
3295
+ "om"
3296
+ ],
3297
+ [
3298
+ "Ġs",
3299
+ "a"
3300
+ ],
3301
+ [
3302
+ "ear",
3303
+ "s</w>"
3304
+ ],
3305
+ [
3306
+ "Ġ",
3307
+ "5"
3308
+ ],
3309
+ [
3310
+ "r",
3311
+ "y</w>"
3312
+ ],
3313
+ [
3314
+ "Ġi",
3315
+ "mp"
3316
+ ],
3317
+ [
3318
+ "Ġm",
3319
+ "ost</w>"
3320
+ ],
3321
+ [
3322
+ "f",
3323
+ "er"
3324
+ ],
3325
+ [
3326
+ "Ġp",
3327
+ "res"
3328
+ ],
3329
+ [
3330
+ "Ġf",
3331
+ "il"
3332
+ ],
3333
+ [
3334
+ "Ġb",
3335
+ "etween</w>"
3336
+ ],
3337
+ [
3338
+ "Ġbe",
3339
+ "g"
3340
+ ],
3341
+ [
3342
+ "p",
3343
+ "h"
3344
+ ],
3345
+ [
3346
+ "or",
3347
+ "s</w>"
3348
+ ],
3349
+ [
3350
+ "Ġth",
3351
+ "an</w>"
3352
+ ],
3353
+ [
3354
+ "Ġrec",
3355
+ "or"
3356
+ ],
3357
+ [
3358
+ "o",
3359
+ "b"
3360
+ ],
3361
+ [
3362
+ "er",
3363
+ "ic"
3364
+ ],
3365
+ [
3366
+ "at",
3367
+ "ing</w>"
3368
+ ],
3369
+ [
3370
+ "Ġth",
3371
+ "roug"
3372
+ ],
3373
+ [
3374
+ "k",
3375
+ "ing</w>"
3376
+ ],
3377
+ [
3378
+ "Ġo",
3379
+ "ut</w>"
3380
+ ],
3381
+ [
3382
+ "Ġn",
3383
+ "um"
3384
+ ],
3385
+ [
3386
+ "oo",
3387
+ "d</w>"
3388
+ ],
3389
+ [
3390
+ "oll",
3391
+ "ow"
3392
+ ],
3393
+ [
3394
+ "ac",
3395
+ "t"
3396
+ ],
3397
+ [
3398
+ "u",
3399
+ "il"
3400
+ ],
3401
+ [
3402
+ "Ġc",
3403
+ "re"
3404
+ ],
3405
+ [
3406
+ "ol",
3407
+ "og"
3408
+ ],
3409
+ [
3410
+ "at",
3411
+ "ional</w>"
3412
+ ],
3413
+ [
3414
+ "Ġpro",
3415
+ "duc"
3416
+ ],
3417
+ [
3418
+ "Ġwh",
3419
+ "ile</w>"
3420
+ ],
3421
+ [
3422
+ "Ġl",
3423
+ "ater</w>"
3424
+ ],
3425
+ [
3426
+ "Ġw",
3427
+ "rit"
3428
+ ],
3429
+ [
3430
+ "e",
3431
+ "x"
3432
+ ],
3433
+ [
3434
+ "Ġst",
3435
+ "ar"
3436
+ ],
3437
+ [
3438
+ "Ġsp",
3439
+ "ec"
3440
+ ],
3441
+ [
3442
+ "e",
3443
+ "e"
3444
+ ],
3445
+ [
3446
+ "ish",
3447
+ "ed</w>"
3448
+ ],
3449
+ [
3450
+ "Ġre",
3451
+ "g"
3452
+ ],
3453
+ [
3454
+ "is",
3455
+ "ion</w>"
3456
+ ],
3457
+ [
3458
+ "ou",
3459
+ "th</w>"
3460
+ ],
3461
+ [
3462
+ "Ġre",
3463
+ "le"
3464
+ ],
3465
+ [
3466
+ "Ġa",
3467
+ "ss"
3468
+ ],
3469
+ [
3470
+ "Ġse",
3471
+ "ason</w>"
3472
+ ],
3473
+ [
3474
+ "Ġm",
3475
+ "ade</w>"
3476
+ ],
3477
+ [
3478
+ "il",
3479
+ "y</w>"
3480
+ ],
3481
+ [
3482
+ "r",
3483
+ "u"
3484
+ ],
3485
+ [
3486
+ "o",
3487
+ "y"
3488
+ ],
3489
+ [
3490
+ "t",
3491
+ "ur"
3492
+ ],
3493
+ [
3494
+ "t",
3495
+ "e</w>"
3496
+ ],
3497
+ [
3498
+ "Ġ",
3499
+ "qu"
3500
+ ],
3501
+ [
3502
+ "Ġm",
3503
+ "ov"
3504
+ ],
3505
+ [
3506
+ "ur",
3507
+ "y</w>"
3508
+ ],
3509
+ [
3510
+ "ĠAm",
3511
+ "eric"
3512
+ ],
3513
+ [
3514
+ "em",
3515
+ "ent</w>"
3516
+ ],
3517
+ [
3518
+ "c",
3519
+ "c"
3520
+ ],
3521
+ [
3522
+ "ou",
3523
+ "nd</w>"
3524
+ ],
3525
+ [
3526
+ "Ġl",
3527
+ "ar"
3528
+ ],
3529
+ [
3530
+ "Ġfor",
3531
+ "m"
3532
+ ],
3533
+ [
3534
+ "ec",
3535
+ "t</w>"
3536
+ ],
3537
+ [
3538
+ "Ġde",
3539
+ "f"
3540
+ ],
3541
+ [
3542
+ "Ġm",
3543
+ "us"
3544
+ ],
3545
+ [
3546
+ "ĠP",
3547
+ "ar"
3548
+ ],
3549
+ [
3550
+ "Ġm",
3551
+ "e"
3552
+ ],
3553
+ [
3554
+ "Ġs",
3555
+ "ub"
3556
+ ],
3557
+ [
3558
+ "w",
3559
+ "ay</w>"
3560
+ ],
3561
+ [
3562
+ "o",
3563
+ "p</w>"
3564
+ ],
3565
+ [
3566
+ "o",
3567
+ "h"
3568
+ ],
3569
+ [
3570
+ "el",
3571
+ "d</w>"
3572
+ ],
3573
+ [
3574
+ "i",
3575
+ "e</w>"
3576
+ ],
3577
+ [
3578
+ "em",
3579
+ "p"
3580
+ ],
3581
+ [
3582
+ "am",
3583
+ "es</w>"
3584
+ ],
3585
+ [
3586
+ "er",
3587
+ "n</w>"
3588
+ ],
3589
+ [
3590
+ "Ġn",
3591
+ "or"
3592
+ ],
3593
+ [
3594
+ "iv",
3595
+ "ed</w>"
3596
+ ],
3597
+ [
3598
+ "ev",
3599
+ "el"
3600
+ ],
3601
+ [
3602
+ "Ġsuc",
3603
+ "h</w>"
3604
+ ],
3605
+ [
3606
+ "ar",
3607
+ "ds</w>"
3608
+ ],
3609
+ [
3610
+ "Ġin",
3611
+ "d"
3612
+ ],
3613
+ [
3614
+ "ik",
3615
+ "e</w>"
3616
+ ],
3617
+ [
3618
+ "Ġg",
3619
+ "en"
3620
+ ],
3621
+ [
3622
+ "er",
3623
+ "t"
3624
+ ],
3625
+ [
3626
+ "Ġy",
3627
+ "ear</w>"
3628
+ ],
3629
+ [
3630
+ "Ġus",
3631
+ "ed</w>"
3632
+ ],
3633
+ [
3634
+ "Ġn",
3635
+ "ew</w>"
3636
+ ],
3637
+ [
3638
+ "Ġ",
3639
+ "5</w>"
3640
+ ],
3641
+ [
3642
+ "Ġal",
3643
+ "b"
3644
+ ],
3645
+ [
3646
+ "s",
3647
+ "p"
3648
+ ],
3649
+ [
3650
+ "y",
3651
+ "p"
3652
+ ],
3653
+ [
3654
+ "Ġwit",
3655
+ "h"
3656
+ ],
3657
+ [
3658
+ "Ġwh",
3659
+ "ere</w>"
3660
+ ],
3661
+ [
3662
+ "ic",
3663
+ "s</w>"
3664
+ ],
3665
+ [
3666
+ "ĠTh",
3667
+ "is</w>"
3668
+ ],
3669
+ [
3670
+ "Ġthe",
3671
+ "m</w>"
3672
+ ],
3673
+ [
3674
+ "w",
3675
+ "n</w>"
3676
+ ]
3677
+ ]
3678
+ }
3679
+ }
tokenizer/tokenizer_config.json CHANGED
@@ -1,29 +1,11 @@
1
  {
2
  "add_prefix_space": false,
3
- "added_tokens_decoder": {
4
- "0": {
5
- "content": "<|startoftext|>",
6
- "lstrip": false,
7
- "normalized": true,
8
- "rstrip": false,
9
- "single_word": false,
10
- "special": true
11
- },
12
- "1": {
13
- "content": "<|endoftext|>",
14
- "lstrip": false,
15
- "normalized": true,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": true
19
- }
20
- },
21
  "bos_token": "<|startoftext|>",
22
- "clean_up_tokenization_spaces": false,
23
  "do_lower_case": true,
24
  "eos_token": "<|endoftext|>",
25
  "errors": "replace",
26
- "extra_special_tokens": {},
27
  "model_max_length": 77,
28
  "pad_token": "<|endoftext|>",
29
  "tokenizer_class": "CLIPTokenizer",
 
1
  {
2
  "add_prefix_space": false,
3
+ "backend": "tokenizers",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  "bos_token": "<|startoftext|>",
 
5
  "do_lower_case": true,
6
  "eos_token": "<|endoftext|>",
7
  "errors": "replace",
8
+ "is_local": true,
9
  "model_max_length": 77,
10
  "pad_token": "<|endoftext|>",
11
  "tokenizer_class": "CLIPTokenizer",