lang=all
lr=5e-5
batch_size=4
source_length=512
data_dir=dataset_fine_grain
label_num=6
output_dir=model_${label_num}/$lang
train_file=$data_dir/$lang/train.json
dev_file=$data_dir/$lang/dev.json
test_file=$data_dir/$lang/test.json
epochs=2
debug_size=10
delete_weight=4
replace_weight=4
insert_weight=4
block_split_weight=12
select_method=bm25
model_type=codet5
pretrained_model=salesforce/codet5-large
load_locator_model_path=model/$lang/checkpoint-last/pytorch_model.bin
python run.py --lang $lang --do_test --do_train --do_eval --model_type $model_type --model_name_or_path $pretrained_model --train_filename $train_file --dev_filename $dev_file --test_filename $test_file --output_dir $output_dir --max_source_length $source_length --locator_batch_size $batch_size --learning_rate $lr --num_train_epochs $epochs --delete_weight $delete_weight --replace_weight $replace_weight --insert_weight $insert_weight --block_split_weight $block_split_weight --select_method $select_method --label_num $label_num
LANG=all
MODEL_TAG=Salesforce/codet5-base
GPU=0
DATA_NUM=-1
BS=4
LR=5e-5
SRC_LEN=512
TRG_LEN=128
PATIENCE=5
EPOCH=1
WARMUP=100
paper_name=new
label_num=6
MODEL_DIR=./model_${label_num}_${paper_name}
OUTPUT_DIR=${MODEL_DIR}/${LANG}
SUMMARY_DIR=.
DATA_DIR=dataset_fine_grain/${LANG}
RES_DIR=${OUTPUT_DIR}/prediction
LOG=${OUTPUT_DIR}/train.log
SELECT_METHOD=bm25
beam_size=10
mkdir -p ${OUTPUT_DIR}
mkdir -p ${RES_DIR}
CUDA_VISIBLE_DEVICES=1
python run.py --do_train --do_eval --do_test --model_type codet5 --data_num $DATA_NUM --num_train_epochs $EPOCH --warmup_steps $WARMUP --learning_rate $LR --patience $PATIENCE --tokenizer_name $MODEL_TAG --model_name_or_path $MODEL_TAG --data_dir $DATA_DIR --output_dir $OUTPUT_DIR --summary_dir $SUMMARY_DIR --save_last_checkpoints --always_save_model --res_dir $RES_DIR --estimator_batch_size $BS --train_batch_size $BS --eval_batch_size $BS --max_source_length $SRC_LEN --max_target_length $TRG_LEN --select_method $SELECT_METHOD --label_num $label_num --beam_size $beam_size --paper_name $paper_name
lr=5e-5
batch_size=16
source_length=512
data_dir=dataset
output_dir=model
train_file=$data_dir/train.json
dev_file=$data_dir/valid.json
test_file=$data_dir/test.json
epochs=2
debug_size=10
model_type=roberta
pretrained_model=microsoft/codebert-base
load_locator_model_path=model/checkpoint-last/pytorch_model.bin
python run.py \
--do_test --do_train --do_eval \
--model_type $model_type --model_name_or_path $pretrained_model \
--train_filename $train_file --dev_filename $dev_file --test_filename $test_file \
--output_dir $output_dir --dataset_dir $data_dir \
--max_source_length $source_length \
--batch_size $batch_size \
--learning_rate $lr --num_train_epochs $epochs \