From 561860161683f6d4ae450a4b294550529ed74023 Mon Sep 17 00:00:00 2001 From: Nathan Baltzell Date: Mon, 20 Oct 2025 13:36:07 -0400 Subject: [PATCH 01/10] better jvm memory opts, cleanup --- bin/run-clara | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/bin/run-clara b/bin/run-clara index 5ad061ba34..086b4f3161 100755 --- a/bin/run-clara +++ b/bin/run-clara @@ -34,7 +34,6 @@ do c) CLARA_HOME=$OPTARG ;; t) threads=$OPTARG && echo $threads | grep -q -E '^[0-9]+$' || error "-t must be an integer, threads" ;; n) nevents="-e $OPTARG" && echo "$nevents" | grep -q -E '^-e [0-9]+$' || error "-n must be an integer, events" ;; - g) memory_gb="-e $OPTARG" && echo "$memory_gb" | grep -q -E '^-e [0-9]+$' || error "-g must be an integer, GB of memory" ;; m) merge=1 ;; q) quiet=1 ;; h) echo -e "\n$usage" && echo -e $info && exit 0 ;; @@ -43,8 +42,9 @@ done shift $((OPTIND-1)) inputs=$@ -# Configure JVM -Xmx memory setting: -[ -z ${memory_gb+x} ] && memory_gb=$((threads+2)) +# Configure JVM memory settings (overridedable via $JAVA_OPTS): +gb=$((threads<3?x+2:x+1)) +java_opts=-Xmx${gb}g # Check configuration: [ $# -lt 1 ] && error "Input data files are required" @@ -76,11 +76,14 @@ done [ $(cat $CLARA_USER_DATA/filelist.txt | wc -l) -gt 0 ] || error "Found no input files" # Set some JVM options: -export JAVA_OPTS="$JAVA_OPTS -Xmx${memory_gb}g -XX:+IgnoreUnrecognizedVMOptions" -export JAVA_OPTS="$JAVA_OPTS -Djava.io.tmpdir=$CLARA_USER_DATA -Dorg.sqlite.tmpdir=$CLARA_USER_DATA" +JAVA_OPTS="$java_opts $JAVA_OPTS -XX:+IgnoreUnrecognizedVMOptions" +JAVA_OPTS="$JAVA_OPTS -Djava.io.tmpdir=$CLARA_USER_DATA -Dorg.sqlite.tmpdir=$CLARA_USER_DATA" + # Set verbosity: [ -z ${quiet+x} ] && stub=fine || stub=info -export JAVA_OPTS="$JAVA_OPTS -Djava.util.logging.config.file=$CLAS12DIR/etc/logging/$stub.properties" +JAVA_OPTS="$JAVA_OPTS -Djava.util.logging.config.file=$CLAS12DIR/etc/logging/$stub.properties" + +export JAVA_OPTS function get_host_ip() { if command -v ip >/dev/null 2>&1 From 82294556275e4a0ae41d40870b15cd8cfcaabbcc Mon Sep 17 00:00:00 2001 From: Nathan Baltzell Date: Mon, 20 Oct 2025 13:43:10 -0400 Subject: [PATCH 02/10] better memory setting --- bin/run-clara | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/run-clara b/bin/run-clara index 086b4f3161..59e4156d20 100755 --- a/bin/run-clara +++ b/bin/run-clara @@ -43,7 +43,7 @@ shift $((OPTIND-1)) inputs=$@ # Configure JVM memory settings (overridedable via $JAVA_OPTS): -gb=$((threads<3?x+2:x+1)) +gb=$((threads<3?threads+2:2+threads-threads/4)) java_opts=-Xmx${gb}g # Check configuration: From 488bc8782822485cd204aceddf9e8dd245edb6e1 Mon Sep 17 00:00:00 2001 From: Nathan Baltzell Date: Mon, 20 Oct 2025 13:56:08 -0400 Subject: [PATCH 03/10] more memory options --- bin/run-clara | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/bin/run-clara b/bin/run-clara index 59e4156d20..30ef4f77c0 100755 --- a/bin/run-clara +++ b/bin/run-clara @@ -25,7 +25,7 @@ function error() { threads=2 prefix=rec_ CLARA_USER_DATA=. -while getopts y:o:p:c:t:n:qmh opt +while getopts y:o:p:c:t:n:lqmh opt do case $opt in y) yaml=$OPTARG ;; @@ -34,6 +34,7 @@ do c) CLARA_HOME=$OPTARG ;; t) threads=$OPTARG && echo $threads | grep -q -E '^[0-9]+$' || error "-t must be an integer, threads" ;; n) nevents="-e $OPTARG" && echo "$nevents" | grep -q -E '^-e [0-9]+$' || error "-n must be an integer, events" ;; + l) large=1 ;; m) merge=1 ;; q) quiet=1 ;; h) echo -e "\n$usage" && echo -e $info && exit 0 ;; @@ -43,8 +44,15 @@ shift $((OPTIND-1)) inputs=$@ # Configure JVM memory settings (overridedable via $JAVA_OPTS): -gb=$((threads<3?threads+2:2+threads-threads/4)) -java_opts=-Xmx${gb}g +if [ -z ${large+x} ] +then + gb_max=$((threads<3?threads+2:threads+2-threads/4)) + gb_init=$((threads<3?threads:threads-threads/3)) +else + gb_max=$((threads<3?threads+2:threads+2-threads/8)) + gb_init=$((threads<3?threads:threads-threads/6)) +fi +java_opts="-Xms${gb_init}g -Xmx${gb_max}g" # Check configuration: [ $# -lt 1 ] && error "Input data files are required" From f78528581caabe616f43d0fa4495aea8756fc369 Mon Sep 17 00:00:00 2001 From: Nathan Baltzell Date: Mon, 20 Oct 2025 14:12:18 -0400 Subject: [PATCH 04/10] add missing help printout --- bin/run-clara | 1 + 1 file changed, 1 insertion(+) diff --git a/bin/run-clara b/bin/run-clara index 30ef4f77c0..152d218326 100755 --- a/bin/run-clara +++ b/bin/run-clara @@ -14,6 +14,7 @@ Options:\n \t-t number of threads (default=2)\n \t-n number of events (default=-1)\n \t-m merge output files (see dependencies below)\n +\t-l larger JVM memory requests\n \t-h print this help and exit\n\n Merging outputs (-m) requires hipo-utils and yq (https://github.com/mikefarah/yq).' From 75aabc38445f3c223c4c7f25972f6b54b14a502d Mon Sep 17 00:00:00 2001 From: Nathan Baltzell Date: Thu, 23 Oct 2025 18:01:40 -0400 Subject: [PATCH 05/10] add scaling test --- libexec/scaling | 102 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100755 libexec/scaling diff --git a/libexec/scaling b/libexec/scaling new file mode 100755 index 0000000000..2b2ff9300b --- /dev/null +++ b/libexec/scaling @@ -0,0 +1,102 @@ +#!/usr/bin/env python3 + +def cli(): + import os,argparse + cli = argparse.ArgumentParser(description='CLARA scaling test') + cli.add_argument('-y',help='YAML file',required=True) + cli.add_argument('-c',help='CLARA_HOME path',default=os.getenv('CLARA_HOME',None)) + cli.add_argument('-t',help='threads',default=[2,8,16,32],type=int,action='append') + cli.add_argument('-e',help='events per thread',default=100,type=int) + cli.add_argument('input',help='input data file') + cfg = cli.parse_args() + import sys + if cfg.c is None: sys.exit('-c or $CLARA_HOME is required') + return cfg + +def run(cmd): + import subprocess + print('scaling >>> '+' '.join(cmd)) + p = subprocess.Popen(cmd,stdout=subprocess.PIPE,stderr=subprocess.STDOUT,universal_newlines=True,encoding='latin-1') + for line in iter(p.stdout.readline, ''): + line = line.strip() + if len(line) > 0: + yield line + p.wait() + if p.returncode != 0: + pass + +def benchmark(cfg, threads): + import collections + exiting,benchmarks = False,collections.OrderedDict() + cmd = ['run-clara', + '-c',cfg.c, + '-n',str(cfg.e*threads), + '-t',str(threads), + '-y',cfg.y, + '-o',f'tmp-scaling-{threads}', + cfg.input] + for line in run(cmd): + cols = line.split() + print(line) + try: + if line.find('Benchmark results:') >= 0: + exiting = True + elif line.find('Processing is complete') >= 0: + exiting = False + elif len(cols) > 20: + if line.find('Processed') >= 0: + benchmarks['event'] = float(cols[12]) + elif exiting: + # catch-all for services: + if len(cols) > 14: + if 'services' not in benchmarks: + benchmarks['services'] = collections.OrderedDict() + benchmarks['services'][cols[2]] = float(cols[14]) + # FIXME: what are these, why don't they add up? + elif line.find('Average processing time') >= 0: + benchmarks['avg'] = float(cols[6]) + elif line.find('Total processing time') >= 0: + benchmarks['total'] = float(cols[6]) + elif line.find('Total orchestrator time') >= 0: + benchmarks['orch'] = float(cols[6]) + except ValueError: + pass + return benchmarks + +def table(benchmarks): + table = [] + header = [ 'threads' ] + b = benchmarks[0][1] + header.extend([x for x in b if x != 'services']) + if 'services' in b: + header.extend(b['services'].keys()) + table.append(header) + for b in benchmarks: + threads,benchmark = b[0],b[1] + row = [threads] + for k in ['event','avg','total','orch','services']: + if k in benchmark: + if k == 'services': + row.extend(benchmark[k].values()) + else: + row.append(benchmark[k]) + table.append(row) + return table + +def show(benchmarks): + for row in table(benchmarks): + print(' '.join([str(x) for x in row])) + +def save(benchmarks): + with open('scaling.txt','w') as f: + for row in table(benchmarks): + f.write(' '.join([str(x) for x in row])) + +if __name__ == '__main__': + cfg = cli() + benchmarks = [] + for threads in cfg.t: + benchmarks.append([threads, benchmark(cfg, threads)]) + show(benchmarks) + save(benchmarks) + From 5143691f8a1a55ffdd57bd7651ed9b3ab2e2f9ea Mon Sep 17 00:00:00 2001 From: Nathan Baltzell Date: Tue, 28 Oct 2025 17:46:37 -0400 Subject: [PATCH 06/10] better default --- libexec/scaling | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libexec/scaling b/libexec/scaling index 2b2ff9300b..909b85631f 100755 --- a/libexec/scaling +++ b/libexec/scaling @@ -5,7 +5,7 @@ def cli(): cli = argparse.ArgumentParser(description='CLARA scaling test') cli.add_argument('-y',help='YAML file',required=True) cli.add_argument('-c',help='CLARA_HOME path',default=os.getenv('CLARA_HOME',None)) - cli.add_argument('-t',help='threads',default=[2,8,16,32],type=int,action='append') + cli.add_argument('-t',help='threads',default=[2,4],type=int,action='append') cli.add_argument('-e',help='events per thread',default=100,type=int) cli.add_argument('input',help='input data file') cfg = cli.parse_args() From 5d773f653dfe2d04a9c73622f5263084c9509b0d Mon Sep 17 00:00:00 2001 From: Nathan Baltzell Date: Tue, 28 Oct 2025 17:47:44 -0400 Subject: [PATCH 07/10] test --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ef5a94ca11..257506c5cd 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -199,7 +199,7 @@ jobs: - name: run test run: | ls -lhtr - ./bin/run-clara -y ./etc/services/rgd-clarode.yml -t 4 -n 500 -c ./clara -o ./tmp ./clas_018779.evio.00001 + ./libexec/scaling -y ./etc/services/data-cv-uber.yml -c ./clara -o ./clas_018779.evio.00001 ls -lhtr test_coatjava: From 433325fdc8bb23c2dd68367358fc64f63190c92b Mon Sep 17 00:00:00 2001 From: Nathan Baltzell Date: Tue, 28 Oct 2025 17:54:50 -0400 Subject: [PATCH 08/10] bugfix --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 257506c5cd..04e0fb549f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -199,7 +199,7 @@ jobs: - name: run test run: | ls -lhtr - ./libexec/scaling -y ./etc/services/data-cv-uber.yml -c ./clara -o ./clas_018779.evio.00001 + ./libexec/scaling -y ./etc/services/data-cv-uber.yml -c ./clara ./clas_018779.evio.00001 ls -lhtr test_coatjava: From 6be52f24138537922cc31809a421ed9325016202 Mon Sep 17 00:00:00 2001 From: Nathan Baltzell Date: Tue, 28 Oct 2025 18:30:10 -0400 Subject: [PATCH 09/10] bugfix --- .github/workflows/ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 04e0fb549f..99f886193d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -199,6 +199,7 @@ jobs: - name: run test run: | ls -lhtr + export PATH=./bin:$PATH ./libexec/scaling -y ./etc/services/data-cv-uber.yml -c ./clara ./clas_018779.evio.00001 ls -lhtr From e8fac233337c5449e57563f4bafddbef499ea714 Mon Sep 17 00:00:00 2001 From: Nathan Baltzell Date: Tue, 28 Oct 2025 19:23:55 -0400 Subject: [PATCH 10/10] restore --- .github/workflows/ci.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 99f886193d..c0d262be0a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -199,8 +199,7 @@ jobs: - name: run test run: | ls -lhtr - export PATH=./bin:$PATH - ./libexec/scaling -y ./etc/services/data-cv-uber.yml -c ./clara ./clas_018779.evio.00001 + ./bin/run-clara -y ./etc/services/rgd-clarode.yml -t 4 -n 500 -c ./clara -o ./tmp ./clas_018779.evio.00001 ls -lhtr test_coatjava: