Files
heterogeneous-distributed-t…/examples/multimodal/text_generation_mistral_clip.sh
tianyutong d6ce507681 Initial Commit of Megatron-LM-0.8.0
Change-Id: Ifb4c061207ee2644a21e161ad52fc6ff40564e39
2025-05-23 09:54:48 +08:00

118 lines
3.0 KiB
Bash

#!/bin/bash
export NCCL_IB_SL=1
export CUDA_DEVICE_MAX_CONNECTIONS=1
export NVTE_APPLY_QK_LAYER_SCALING=0
INPUT_METADATA_PATH="placeholder"
GROUNDTRUTH_PATH="placeholder"
while [[ $# -gt 0 ]]; do
case $1 in
--input-image-path)
INPUT_IMAGE_PATH="$2"
shift
shift
;;
--input-metadata-path)
INPUT_METADATA_PATH="$2"
shift
shift
;;
-g|--groundtruth-path)
GROUNDTRUTH_PATH="$2"
shift
shift
;;
-o|--output-path)
OUTPUT_PATH="$2"
shift
shift
;;
-m|--model-path)
MODEL_PATH="$2"
shift
shift
;;
-t|--tokenizer-path)
TOKENIZER_PATH="$2"
shift
shift
;;
--task)
TASK="$2"
shift
shift
;;
-g|--gt-path)
GROUNDTRUTH_PATH="$2"
shift
shift
;;
-*|--*)
echo "Invalid option $1"
exit 1
;;
esac
done
# Please modify these as needed.
NUM_PARTITIONS=100
START=2
END=0
for PARTITION_ID in $( eval echo {$START..$END} )
do
torchrun --nproc_per_node 4 examples/multimodal/run_text_generation.py \
--img-embedding-idx 1 \
--apply-layernorm-1p \
--attention-softmax-in-fp32 \
--use-flash-attn \
--transformer-impl transformer_engine \
--use-te \
--use-checkpoint-args \
--normalization RMSNorm \
--language-model-type mistral_7b \
--untie-embeddings-and-output-weights \
--disable-bias-linear \
--position-embedding-type rope \
--rotary-percent 1.0 \
--rotary-base 1000000 \
--swiglu \
--attention-dropout 0.0 \
--hidden-dropout 0.0 \
--tensor-model-parallel-size 4 \
--pipeline-model-parallel-size 1 \
--group-query-attention \
--num-query-groups 8 \
--num-layers 32 \
--hidden-size 4096 \
--ffn-hidden-size 14336 \
--num-attention-heads 32 \
--max-position-embeddings 4096 \
--no-masked-softmax-fusion \
--load ${MODEL_PATH} \
--tokenizer-type MistralTokenizer \
--tokenizer-model ${TOKENIZER_PATH} \
--bf16 \
--micro-batch-size 1 \
--seq-length 2048 \
--out-seq-length 700 \
--temperature 1.0 \
--img-h 336 \
--img-w 336 \
--patch-dim 14 \
--seed 153 \
--top_k 1 \
--no-load-rng \
--no-load-optim \
--input-image-path ${INPUT_IMAGE_PATH} \
--input-metadata-path ${INPUT_METADATA_PATH} \
--num-partitions ${NUM_PARTITIONS} \
--partition-id ${PARTITION_ID} \
--output-path ${OUTPUT_PATH}-${TASK}-${PARTITION_ID}.jsonl \
--gt-path ${GROUNDTRUTH_PATH} \
--task ${TASK} \
--disable-vision-class-token
done