head	1.5;
access;
symbols
	charm6_1:1.5
	charm_6_0_1:1.5
	charm6_0_1:1.5
	charm6_0:1.3
	ChaNGa_1-0:1.3
	charm5_9:1.3;
locks; strict;
comment	@# @;


1.5
date	2009.02.09.21.58.13;	author gioachin;	state Exp;
branches;
next	1.4;
commitid	NCnEka49RcYC9OBt;

1.4
date	2009.02.09.16.26.37;	author gzheng;	state Exp;
branches;
next	1.3;
commitid	do5ularGpRNOjMBt;

1.3
date	2005.04.12.02.14.15;	author gioachin;	state Exp;
branches;
next	1.2;

1.2
date	2005.04.06.08.22.44;	author gioachin;	state Exp;
branches;
next	1.1;

1.1
date	2004.06.02.19.58.06;	author skumar2;	state Exp;
branches;
next	;


desc
@@


1.5
log
@added check to make sure executable exists before trying to run the program
@
text
@#!/bin/sh
#
# Conv-host for IBM SP:
#  Translates +pN-style charmrun options into 
# POE (Parallel Operating Environment) options.

args=""
pes=1

while [ $# -gt 0 ]
  do
  case $1 in
      +p)
	  pes=$2
	  shift
	  ;;
      +ppn)
	  args=$args"$1 $2"
	  ppnused="$2"
	  shift
	  ;;
      +p[0-9]*)
          pes=`expr substr $1 3 10`
          ;;
      +ln)
	  logical=$2
	  shift
	  ;;
      +ns)
	  nodesize=$2
	  shift
	  ;;
      +ll)
	  llfile=$2
	  shift
	  ;;
      *)
	  args=$args"$1 "
	  ;;
  esac
  shift
done

if [ -n "$args" ]
    then
    args1=`expr substr "$args" 1 1`
fi

if [ -z "$args" -o "$args1" == "-" -o "$args1" == "+" ]
    then
    # print usage help
    echo "help"
    exit
fi

executable=`echo $args | awk '{print $1}'`

# check if the node size has been set, or a file called .nodesize is present
#if [ -z "$nodesize" ]
#    then
#    if [ -f ".nodesize" ]
#	then
#	nodesize=`cat .nodesize`
#    elif [ -f "$HOME/.nodesize" ]
#	then
#	nodesize=`cat $HOME/.nodesize`
#    fi
#fi

# check that nodesize is a real number
#if [ -n "$nodesize" -a "$nodesize" -gt 0 ]
#    then
#    echo
#    else
#    echo
#    echo "No node size specified. Either use flag \"+ns <size>\" or create a file called"
#    echo "\".nodesize\" in the current directory on in your home directory."
#    echo
#    exit
#fi

# check if the load leveler template file has been set or a file called charmrun.ll is present
if [ -z "$llfile" ]
    then
    if [ -f "charmrun.ll" ]
	then
	llfile="charmrun.ll"
    elif [ -f "$HOME/charmrun.ll" ]
	then
	llfile="$HOME/charmrun.ll"
    fi
fi

if [ -z "$llfile" ]
    then
    echo "No load leveler template specified. Either use the flag \"+ll <llfile>\" or create"
    echo "a file called \"charmrun.ll\" in the current directory on in your home directory."
    echo
    exit
fi

filename="charmrun_script.$$.ll"
interactive=`grep queue $llfile|wc -l`

if [ $interactive -gt 1 ]
    then
    echo "Multiple queueing not supported!"
fi

if [ $interactive -eq 0 ]
    then
    node_usage="shared"
else
    node_usage="not_shared"
fi

if [ -n "$ppnused" ]
    then
    total_tasks=`expr $pes / $ppnused`
    if [ $pes -ne `expr $total_tasks \* $ppnused` ]
	then
	echo
	echo "number of processors must be a multiple of number of processors per node!"
	echo
	exit
    fi
    tasks_per_node=1
    if [ -n "$logical" ]
	then
	tasks_per_node=$logical
    fi
    if [ -n "$nodesize" ]
	then
	tasks_per_node=`expr $nodesize / $ppnused`
    fi
    if [ -n "$logical" -a -n "$nodesize" ]
	then if [ $logical -ne $tasks_per_node ]
	    then
	    echo
	    echo "Both +ln and +ns used, but they do not meet the rule <+ln> = <+ns> / <+ppn>"
	    echo
	    exit
	fi
    fi
    tasks_per_node="#@@ tasks_per_node = "$tasks_per_node
else
    total_tasks=$pes
fi

cat > $filename <<EOF
# System settings
#@@ node_usage = $node_usage
#@@ network.LAPI = csss,,US
#@@ total_tasks = $total_tasks
$tasks_per_node

# User settings
EOF

grep -E -v "node|network|tasks|queue" $llfile | grep -E "#@@"  >> $filename

cat >> $filename <<EOF

# Default Settings
EOF
present=`grep job_type $llfile|wc -l`
if [ $present -eq 0 ]
    then
    cat >> $filename <<EOF
#@@ job_type = parallel
EOF
fi

present=`grep job_name $llfile|wc -l`
if [ $present -eq 0 ]
    then
    cat >> $filename <<EOF
#@@ job_name = charmrun_$executable.$$
EOF
fi

present=`grep wall_clock_limit $llfile|wc -l`
if [ $present -eq 0 ]
    then
    cat >> $filename <<EOF
#@@ wall_clock_limit = 0:10:00
EOF
fi

present=`grep notification $llfile|wc -l`
if [ $present -eq 0 ]
    then
    cat >> $filename <<EOF
#@@ notification = never
EOF
fi

if [ $interactive -ne 0 ]
    then
    present=`grep output $llfile|wc -l`
    if [ $present -eq 0 ]
	then
	cat >> $filename <<EOF
#@@ output = output_$executable.$$
EOF
    fi

    present=`grep error $llfile|wc -l`
    if [ $present -eq 0 ]
    then
	cat >> $filename <<EOF
#@@ error = error_$executable.$$
EOF
    fi
fi
 
cat >> $filename <<EOF
#@@ queue

# System defined exports
MP_MSG_API=lapi
LAPI_USE_SHM=yes
MP_INTRDELAY=100
MP_EAGER_LIMIT=65536
MP_SHARED_MEMORY=yes
MP_USE_BULK_XFER=yes
MEMORY_AFFINITY=MCM
MP_TASK_AFFINITY=MCM
MP_EUILIB=us

# User defined exports
EOF

# user exports
grep -E -v "#@@" $llfile | grep "export" >> $filename

if [ ! -x "$PWD/$executable" ]
    then
    echo "Executable not found: $PWD/$executable"
    exit
fi

if [ $interactive -eq 0 ]
    then
    # submit the job as poe, iterating 
    echo "Running interactively> poe $PWD/$args -llfile $filename"
    poe $PWD/$args -llfile $filename
    status=$?
    if [ $status -eq 255 ];
      then
      llstatus=1
    else
      llstatus=0
    fi
    until (exit $llstatus);
      do
      echo "Retrying in two minutes..."
      sleep 120
      poe $PWD/$args -llfile $filename;
      status=$?
      if [ $status -eq 255 ];
        then
        llstatus=1
      else
        llstatus=0
      fi
    done
    if [ $status -ne 0 ];then exit $status;fi
    
else
    # append the command at the end of the file and llsubmit it
    echo "Submitting batch> poe $PWD/$args"
    cat >> $filename <<EOF

poe $PWD/$args
EOF
    llsubmit $filename
fi

rm $filename
@


1.4
log
@+ll never worked because of incorrect shell syntax
@
text
@d237 6
@


1.3
log
@smarter version, able to iterate over LoadLeveler errors (255) but signalling
user errors (not 255)
@
text
@d34 1
a34 1
	  $llfile=$2
@


1.2
log
@Completely new version of LAPI. Most of the files has been completely rewritten,
so a diff with the previous is useless. Code cleanup and optimization have to be
performed. It runs megatest correctly, both in normal and SMP version.
@
text
@a6 3
# COMMENT BY FILIPPO:
# THIS FILE IS USELESS AT THE MOMENT, IT NEEDS TO USE LOAD LEVELER!

d239 1
a239 1
    # submit the job as poe
d242 22
d266 1
a266 1
    echo "Running batch> poe $args"
d269 1
a269 1
poe $args
@


1.1
log
@
New lapi version of charm. IT IS BROKEN. One of me or Filippo will fix it. But most of the code should be there.
@
text
@d7 3
d14 31
a44 18
do
	case $1 in
	+p)
		pes=$2
		shift
		;;
	+ppn)
		args=$args"$1 $2"
		shift
		;;
	+p*)
		pes=`echo $1 | awk '{print substr($1,3)}'`
		;;
	*) 
		args=$args"$1 "
		;;
	esac
	shift
d47 192
a238 12
# Try to guess the number of nodes and tasks per node to use
nodes=$pes
tp=1
for i in 4 2 3
do
  if [ `expr $pes / $i '*' $i` -eq $pes ]
  then
	nodes=`expr $pes / $i`
	tp=$i
	break
  fi
done
d240 9
a248 1
extra="-nodes $nodes -tasks_per_node $tp -rmpool 1"
d250 4
a253 2
# Prepend path to executable
args=`pwd`/"$args"
d255 1
a255 2
echo "Running> poe $args $extra"
poe $args $extra
@

