[npu] add npu deepspeed example (#6716)

addsubmuldiv · web-flow · commit 57cd99f81dde · 2025-11-22T22:13:16.000+08:00
diff --git a/examples/ascend/train/qwen3_lora_deepspeed/train.sh b/examples/ascend/train/qwen3_lora_deepspeed/train.sh
@@ -0,0 +1,31 @@
+# hardware: Atlas 900 A2
+export TASK_QUEUE_ENABLE=2
+export CPU_AFFINITY_CONF=2
+nproc_per_node=8
+ASCEND_RT_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
+NPROC_PER_NODE=$nproc_per_node \
+swift sft \
+    --model 'Qwen/Qwen3-32B' \
+    --train_type lora \
+    --dataset 'swift/self-cognition#1000' \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 10 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-4 \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --target_modules all-linear \
+    --gradient_accumulation_steps $(expr 16 / $nproc_per_node) \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --save_total_limit 2 \
+    --logging_steps 1 \
+    --max_length 2048 \
+    --output_dir output \
+    --system 'You are a helpful assistant.' \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --model_author swift \
+    --model_name swift-robot \
+    --deepspeed zero3