Skip to content

Commit 5e81239

Browse files
authored
[npu] add npu fsdp example (#6697)
1 parent 6cfaeaa commit 5e81239

File tree

2 files changed

+52
-0
lines changed

2 files changed

+52
-0
lines changed
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
{
2+
"compute_environment": "LOCAL_MACHINE",
3+
"debug": false,
4+
"distributed_type": "FSDP",
5+
"downcast_bf16": "no",
6+
"mixed_precision": "bf16",
7+
"num_machines": 1,
8+
"num_processes": 8,
9+
"machine_rank": 0,
10+
"rdzv_backend": "static",
11+
"same_network": true,
12+
"use_cpu": false,
13+
"fsdp_config": {
14+
"fsdp_auto_wrap_policy": "TRANSFORMER_BASED_WRAP",
15+
"fsdp_transformer_cls_names_to_wrap": "Qwen3DecoderLayer",
16+
"fsdp_sharding_strategy": "FULL_SHARD",
17+
"fsdp_backward_prefetch": "BACKWARD_PRE",
18+
"fsdp_forward_prefetch": true,
19+
"fsdp_limit_all_gathers": true,
20+
"fsdp_state_dict_type": "FULL_STATE_DICT",
21+
"fsdp_sync_module_states": true,
22+
"fsdp_cpu_ram_efficient_loading": true,
23+
"fsdp_use_orig_params": true,
24+
"fsdp_offload_params": false
25+
}
26+
}
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# hardware: Atlas 900 A2
2+
export TASK_QUEUE_ENABLE=2
3+
export CPU_AFFINITY_CONF=2
4+
ASCEND_RT_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
5+
accelerate launch --config_file "./examples/ascend/train/qwen3_lora_fsdp/fsdp.json" \
6+
swift/cli/sft.py \
7+
--model 'Qwen/Qwen3-32B' \
8+
--train_type lora \
9+
--dataset 'swift/self-cognition#1000' \
10+
--torch_dtype bfloat16 \
11+
--per_device_train_batch_size 10 \
12+
--gradient_accumulation_steps 2 \
13+
--gradient_checkpointing true \
14+
--gradient_checkpointing_kwargs '{"use_reentrant": false}' \
15+
--max_length 1200 \
16+
--num_train_epochs 2 \
17+
--eval_strategy no \
18+
--save_steps 500 \
19+
--logging_steps 1 \
20+
--dataloader_num_workers 8 \
21+
--dataset_num_proc 8 \
22+
--save_total_limit 2 \
23+
--save_only_model true \
24+
--output_dir output \
25+
--attn_impl 'flash_attention_2' \
26+
--packing true

0 commit comments

Comments
 (0)