kungchuking
/

E2E_SCSI

Depth Estimation

Model card Files Files and versions

E2E_SCSI / train.csh

kungchuking's picture

Copied from github repository.

2c76547 23 days ago

history blame contribute delete

1.18 kB

	#!/bin/csh

	set arg_count = $#argv
	if ( $arg_count >= 1 ) then
	if ( "$argv[1]" == "-clean" \|\| "$argv[1]" == "-clean_only" ) then
	echo "[INFO] Killing alll other GPU processes to free up resources."

	sh -c 'ps \| grep python \| sed "s/ pts.\+$//g" > .tmp.csh'
	chmod +x .tmp.csh
	sed -i "s/^/kill -9 /g" .tmp.csh
	source .tmp.csh
	rm -rf .tmp.csh
	rm -rf debug_rank_*
	rm -rf dynamicstereo_sf_dr
	endif

	if ( "$argv[1]" == "-clean_only" ) then
	exit 0
	endif
	endif

	setenv PYTORCH_CUDA_ALLOC_CONF "max_split_size_mb:32,garbage_collection_threshold:0.5,expandable_segments:False"
	setenv CUDA_LAUNCH_BLOCKING 1
	setenv PYTORCH_NO_CUDA_MEMORY_CACHING 1
	setenv CUBLAS_WORKSPACE_CONFIG ":16:8"
	setenv CUDA_VISIBLE_DEVICES 3

	# -- GPU OOM Error when trained with sample_len=8 on kilby.
	python train.py --batch_size 1 \
	--image_size 480 640 --saturation_range 0 1.4 --num_steps 200000 \
	--ckpt_path dynamicstereo_sf_dr \
	--sample_len 8 --lr 0.0003 --train_iters 8 --valid_iters 8 \
	--num_workers 28 --save_freq 100 --update_block_3d --different_update_blocks \
	--attention_type self_stereo_temporal_update_time_update_space --train_datasets dynamic_replica