File size: 2,173 Bytes
c03dbc6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
{
  "training_metadata": {
    "timestamp": "20251016_174948",
    "training_date": "2025-10-17",
    "training_time": "00:42:52",
    "final_epoch": 3.0382470119521914,
    "total_steps": 1431,
    "status": "completed"
  },
  "model_config": {
    "base_model": "Qwen/Qwen2.5-Coder-32B-Instruct",
    "model_type": "causal_lm",
    "architecture": "Qwen2ForCausalLM"
  },
  "lora_config": {
    "r": 64,
    "lora_alpha": 128,
    "lora_dropout": 0.05,
    "target_modules": [
      "q_proj",
      "k_proj",
      "v_proj",
      "o_proj",
      "gate_proj",
      "up_proj",
      "down_proj"
    ]
  },
  "training_config": {
    "num_epochs": 5,
    "per_device_train_batch_size": 2,
    "per_device_eval_batch_size": 1,
    "gradient_accumulation_steps": 8,
    "effective_batch_size": 32,
    "learning_rate": 5e-05,
    "lr_scheduler_type": "cosine",
    "warmup_ratio": 0.02,
    "weight_decay": 0.1,
    "max_grad_norm": 0.5,
    "bf16": true,
    "gradient_checkpointing": true,
    "optim": "adamw_torch",
    "logging_steps": 10,
    "save_steps": 50,
    "eval_steps": 25
  },
  "dataset_info": {
    "train_samples": 15057,
    "eval_samples": 1674,
    "max_seq_length": 8192,
    "sample_packing": false
  },
  "hardware_config": {
    "num_gpus": 2,
    "gpu_model": "Unknown",
    "distributed_strategy": "DeepSpeed ZeRO-2",
    "flash_attention": "2.8.3"
  },
  "performance_metrics": {
    "final_train_loss": 0.3949,
    "final_eval_loss": 0.4636613428592682,
    "final_train_perplexity": 1.4842357599234954,
    "final_eval_perplexity": 1.5898844535357601,
    "final_token_accuracy": 0.8872479304671288,
    "initial_loss": 1.724,
    "initial_perplexity": 5.606911313988792,
    "initial_accuracy": 0.5987553134560585
  },
  "framework_versions": {
    "torch": "2.4.1+cu124",
    "transformers": "4.57.1",
    "peft": "0.17.1",
    "trl": "0.23.1",
    "deepspeed": "0.18.0",
    "flash_attn": "2.8.3",
    "python": "3.12.3"
  },
  "special_features": {
    "flash_attention_2": true,
    "gradient_checkpointing": true,
    "bf16_training": true,
    "sample_packing": false,
    "deepspeed_zero2": true,
    "distributed_training": true
  }
}