diff --git a/alltoallv_validation/.gitignore b/alltoallv_validation/.gitignore new file mode 100644 index 0000000..80feba8 --- /dev/null +++ b/alltoallv_validation/.gitignore @@ -0,0 +1,16 @@ +Makefile +aclocal.m4 +autom4te.cache +config +config.log +config.status +configure +src/Makefile +src/stamp-h1 +src/test_config.h +**.in +**~ +**.o +src/alltoallv_ddt +src/sanity +src/.deps diff --git a/alltoallv_validation/Makefile.am b/alltoallv_validation/Makefile.am new file mode 100644 index 0000000..cb8ce98 --- /dev/null +++ b/alltoallv_validation/Makefile.am @@ -0,0 +1,12 @@ +# -*- makefile -*- +# +# Copyright (c) 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# $HEADER$ +# + +ACLOCAL_AMFLAGS = -I config + +# The reporting subdir must be built before all others + +SUBDIRS = src diff --git a/alltoallv_validation/README.md b/alltoallv_validation/README.md new file mode 100644 index 0000000..2071e85 --- /dev/null +++ b/alltoallv_validation/README.md @@ -0,0 +1,169 @@ +# Alltoallv Validation of complex datatypes + +This test creates a variety of configurations for testing data validation of +the alltoallv collective using non-standard datatypes. + +The approach is the following sequence: + - Create some datatype + - Determine the packed size, and allocate both packed and unpacked buffers to + hold the send data. + - Fill the packed buffer with a test pattern, then sendrecv it to the unpacked + send buffer by sending from a MPI_BYTES buffer to the test datatype. + - Perform the alltoallv collective + - Transfer the received data back into a packed format. + - Verify the contents of the packed format using knowledge of what data was + being sent. + - Verify that no buffer under-runs or over-runs occured in the buffers by + checking some guard bytes. + +Validation is the only purpose of this test. It should not be used for +performance timing, as many extra memory copies and assignments are performed. +No timing is printed. + +The code is written in C++ only to access a predictable random number generator. +All MPI calls are done via C interface. + +## Test Overview + +Tests are broken into complexity levels. + +### Level 1 + +Level 1 types are composed of basic MPI types like `MPI_CHAR`, `MPI_REAL`, +`MPI_INT64_T` and so forth. The data types are not exhaustive, only 9 are used. +Executing only the level 1 tests will perform only 9 tests: both sending and +receiving the same datatype. + +### Level 2 + +Level 2 types are collections of Level 1 types. There are 7 Level 1 types in +various configurations including: + + - increasing the count, using the same type + - contiguous and non-contiguous vectors + - contiguous and non-contiguous vectors with negative stride + +Level 2 tests all exchange compatible types, therefore all combinations of the +above are used as send and receive types. With 7 types, Level 2 executes 49 +tests. + +All level 2 tests are performed with the same basic datatype (MPI_INT). + +Note that each "one" of these types is a vector, so setting `--item-count` to 10 +really means you are sending 10 vectors each with some number (happens to be 12) +of basic types. + +### Level 3 + +Level 3 tests collections of two different Level 1 types. We test MPI_INT and +MPI_CHAR together. These tests create the type using MPI_Type_create_struct in +various orders and configurations including: + - contiguous and non-contiguous in-order elements + - contiguous and non-contiguous reverse-order elements + - Negative lower bounds + - Padding in extents + +There are 6 Level 3 tests, and like Level 2 tests they are all compatible types, +so 36 total tests are executed. + +### Level 4 + +There are two hand-made Level 4 tests. These are composed of several layers of +level 2 and level 3 types in combination with each other to make collections of +different kinds of types in vectors with various paddings and spacings. Best to +read the code for these. They are not cross-compatible, so only 2 tests are +executed. + +Again note that these constructed tytes are somewhat large themselves (hundreds +of bytes), so setting a high `--item-count` could result in longer runtimes. + +### Total + +As of the initial version of this program, there were 96 tests. The +configuration where all ranks send and receive 1 count for only 1 iteration +results in each rank sending and receiving approximately 2.7KBytes of data per +rank during the full test battery. + +However there is not so much data that the execution time is unreasonable. Test +execution of 32 ranks on a single host using all default options takes less than +5 seconds, and most ranks send about 630 KBytes. + +# Compile +``` +$ ./autogen.sh && ./configure && make + +$ mpirun -n 13 src/alltoallv_ddt +Rank 0 sent 254104 bytes, and received 265152 bytes. +[OK] All tests passsed. Executed 96 tests with seed 0 with 13 total ranks). + +``` + +# Usage +``` +Test alltoallv using various ddt's and validate results. +This test uses pseudo-random sequences from C++'s mt19937 generator. +The test (but not necessarily the implementation) is deterministic +when the options and number of ranks remain the same. +Options: + [-s|--seed ] Change the seed to shuffle which datapoints are exchanged + [-c|--item-count ] Each rank will create to consider for exchange (default=10). + [-i|--prob-item ] Probability that rank r will send item k to rank q. (0.50) + [-r|--prob-rank ] Probability that rank r will send anything to rank q. (0.90) + [-w|--prob-world ] Probability that rank r will do anything at all. (0.95) + [-t|--iters ] The number of iterations to test each dtype. + [-o|--only ] Only execute a specific test signified by the pair high,low. + [-v|--verbose=level ] Set verbosity during execution (0=quiet (default). 1,2,3: loud). + [-h|--help] Print this help and exit. + [-z|--verbose-rank] Only the provided rank will print. Default=0. ALL = -1. +``` + +Some recommended test cases: +``` +# no ranks exchange any data +alltoallv_ddt -w 0 + +# same as alltoall: all ranks exchange same amount of data +alltoallv_ddt -w 1 -r 1 -i 1 + +# perform a different test each time you run, or repeat the same test: +alltoallv_ddt -s $RANDOM +alltoallv_ddt -s 1234 +``` + +Note since alltoall is a hefty collective, and we go to the trouble of +validating every single message, caution should be used when exercising large +numbers of ranks, large numbers of counts, or large numbers of iterations. + +# Debugging + +In the case of data validation failure: re-run the test harness on only the +failing test (using `--only` and increase the verbosity up to 3. You may also +need to set the verbosity of a particular rank with `-z`). + +For example at verbosity 0, we only know that validation failed on rank 1, but +not which test. + +``` +mpirun -n 2 src/alltoallv_ddt -z 1 -v 3 -w 1 +Rank 1 failed to validate data! +ERROR: Validation failed on rank 1! +``` + +Setting the rank-specific verbosity to that rank (or to all ranks) and the +verbosity up to 2 reveals some additional details including which test, and what +part of the buffer: + +``` +$ mpirun -n 2 src/alltoallv_ddt -z 1 -v 3 -w 1 +--- Starting test 2,1. Crossing 0 x 0 +Rank 1 failed to validate data! +0010: 42-42 99-43 44-44 45-45 46-46 47-47 48-48 49-49 50-50 51-51 -- CORRUPT +0020: 52-52 53-53 54-54 55-55 56-56 57-57 58-58 59-59 60-60 61-61 -- VALID +ERROR: Validation failed on rank 1! +``` + +Buffer addresses are provided. These are base-10 addresses relative to the +packed representation of the datatype. The first number is what was received, +the second number is what was expected. To avoid too much print-outs, +subsequent CORRUPT lines are skipped and only the next valid line is printed, so +output will allways appear to alternate between CORRUPT and VALID. diff --git a/alltoallv_validation/autogen.sh b/alltoallv_validation/autogen.sh new file mode 100755 index 0000000..1e99d71 --- /dev/null +++ b/alltoallv_validation/autogen.sh @@ -0,0 +1 @@ +autoreconf -ivf diff --git a/alltoallv_validation/configure.ac b/alltoallv_validation/configure.ac new file mode 100644 index 0000000..d9397c8 --- /dev/null +++ b/alltoallv_validation/configure.ac @@ -0,0 +1,152 @@ +# -*- shell-script -*- +# +# Copyright (c) 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# $HEADER$ +# +# modified from ompi-tests/cxx-test-suite's autoconf by Luke Robison 2024. + +# +# Init autoconf +# + +AC_PREREQ([2.63]) +AC_INIT([alltoallv_validation], [1.0], [devel@open-mpi.org], [openmpi-cxx-test-suite]) +AC_CONFIG_AUX_DIR([config]) +AC_CONFIG_MACRO_DIR([config]) + +# +# Get the version of ompitest that we are configuring +# + +echo "Configuring Open MPI C++ test suite" + +AM_INIT_AUTOMAKE([1.10 foreign dist-bzip2 no-define]) + +# If Automake supports silent rules, enable them. +m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) + +# Setup the reporting/ompitest_config.h file + +AH_TOP([/* -*- c -*- + * + * ompitest configuation header file. + * + * Function: - OS, CPU and compiler dependent configuration + */ + +#ifndef OMPITEST_CONFIG_H +#define OMPITEST_CONFIG_H +]) +AH_BOTTOM([#endif /* OMPITEST_CONFIG_H */]) + +# +# This is useful later +# + +AC_CANONICAL_HOST +AC_DEFINE_UNQUOTED(OMPITEST_ARCH, "$host", + [Architecture that we are compiled for]) + +# +# We always want debugging flags +# +CXXFLAGS="$CXXFLAGS -g" +CFLAGS="$CFLAGS -g" + +# +# Get various programs +# C compiler - bias towards mpicc +# + +if test "$CC" != ""; then + BASE="`basename $CC`" +else + BASE= +fi +if test "$BASE" = "" -o "$BASE" = "." -o "$BASE" = "cc" -o \ + "$BASE" = "gcc" -o "$BASE" = "xlc" -o \ + "$BASE" = "icc" -o "$BASE" = "pgcc"; then + AC_CHECK_PROG(HAVE_MPICC, mpicc, yes, no) + if test "$HAVE_MPICC" = "yes"; then + CC=mpicc + export CC + fi +fi + +CFLAGS_save="$CFLAGS" +AC_PROG_CC +CFLAGS="$CFLAGS_save" + +# +# Get various programs +# C++ compiler - bias towards mpic++, with fallback to mpiCC +# + +if test "$CXX" != ""; then + BASE="`basename $CXX`" +else + BASE= +fi +if test "$BASE" = "" -o "$BASE" = "." -o "$BASE" = "CC" -o \ + "$BASE" = "g++" -o "$BASE" = "c++" -o "$BASE" = "xlC" -o \ + "$BASE" = "icpc" -o "$BASE" = "pgCC"; then + AC_CHECK_PROG(HAVE_MPICPP, mpic++, yes, no) + if test "$HAVE_MPICPP" = "yes"; then + CXX=mpic++ + export CXX + else + AC_CHECK_PROG(HAVE_MPICXX, mpiCC, yes, no) + if test "$HAVE_MPICXX" = "yes"; then + CXX=mpiCC + export CXX + fi + fi +fi + +CXXFLAGS_save="$CXXFLAGS" +AC_PROG_CXX +CXXFLAGS="$CXXFLAGS_save" + +# +# Find ranlib +# + +AC_PROG_RANLIB + +# +# Ensure that we can compile and link an MPI program +# + +# See if we can find +AC_CHECK_HEADER([mpi.h], [], + [AC_MSG_WARN([Cannot find mpi.h]) + AC_MSG_ERROR([Cannot continue]) + ]) + +# +# See if we can find the symbol MPI_Init. Be a little smart and use +# AC CHECK_FUNC if we're using mpicc, or AC CHECK_LIB otherwise. +# Aborts if MPI_Init is not found. +# +base=`basename $CC` +bad=0 +AS_IF([test "$base" = "mpicc"], + [AC_CHECK_FUNC([MPI_Init], [], [bad=1])], + [AC_CHECK_LIB([mpi], [MPI_Init], [], [bad=1])]) + +AS_IF([test "$bad" = "1"], + [AC_MSG_WARN([Cannot link against MPI_Init]) + AC_MSG_ERROR([Cannot continue]) + ]) + +# +# Party on +# + +AC_CONFIG_HEADERS([src/test_config.h]) +AC_CONFIG_FILES([ + Makefile + src/Makefile +]) +AC_OUTPUT diff --git a/alltoallv_validation/src/Makefile.am b/alltoallv_validation/src/Makefile.am new file mode 100644 index 0000000..35be06b --- /dev/null +++ b/alltoallv_validation/src/Makefile.am @@ -0,0 +1,16 @@ +# -*- makefile -*- +# + +bin_PROGRAMS = \ + alltoallv_ddt \ + sanity + +alltoallv_ddt_SOURCES = \ + $(common_sources) \ + alltoallv_ddt.cpp + +sanity_SOURCES = \ + $(common_sources) \ + sanity.cpp + +common_sources = typemap.c diff --git a/alltoallv_validation/src/alltoallv_ddt.cpp b/alltoallv_validation/src/alltoallv_ddt.cpp new file mode 100644 index 0000000..8cb380a --- /dev/null +++ b/alltoallv_validation/src/alltoallv_ddt.cpp @@ -0,0 +1,1005 @@ +/* + * Copyright (c) 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Additional copyrights may follow + * + */ + + +#include +#include +#include + +#include +#include +#include +#include + +extern "C" void printMapDatatype(MPI_Datatype datatype); + + +#define NUM_LEVEL1_TESTS 9 +#define NUM_LEVEL2_TESTS 7 +#define NUM_LEVEL3_TESTS 6 + + +#define ERROR_CHECK( err, errlab ) if(err) { printf("ERROR: An error (%d) in an MPI call was detected at %s:%d!\n", err, __FILE__, __LINE__); goto errlab; } +#define MIN(a,b) (((a)<(b))?(a):(b)) +#define MAX(a,b) (((a)>(b))?(a):(b)) + +int execute_test(struct run_config *run); + +#define VERBOSE_LEVEL_QUIET (user.verbose == 0) +#define VERBOSE_LEVEL_DEFAULT (user.verbose >= 1) +#define VERBOSE_LEVEL_LOUD (user.verbose >= 2) +#define VERBOSE_LEVEL_VERY_LOUD (user.verbose >= 3) + +struct user_config +{ + int seed = 0; + int item_count = 10; + int iters = 2; + /* probability that rank r will send item k to rank q: */ + double prob_item = 0.50; + /* probability that rank r will send anything to rank q: */ + double prob_rank = 0.85; + /* probability that rank r will send or receive anything at all: */ + double prob_world = 0.9; + + /* verbose: 0 is very quiet. 1 is default. 2 is loud 3 is very loud. */ + int verbose = 0; + int only_high = 0; + int only_low = 0; +}; + +static struct user_config user; + +size_t tot_bytes_sent = 0; +size_t tot_bytes_recv = 0; +size_t tot_tests_exec = 0; + +void *my_malloc(size_t size) { + if (size==0) size++; + return malloc(size); +} +#define malloc my_malloc + +void dump_user_config(struct user_config *conf) { + std::cout << "seed: " << conf->seed << "\n"; + std::cout << "items-count: " << conf->item_count << "\n"; + std::cout << "prob-item: " << conf->prob_item << "\n"; + std::cout << "prob-rank: " << conf->prob_rank << "\n"; + std::cout << "prob-world: " << conf->prob_world << "\n"; + std::cout << "verbose: " << conf->verbose << "\n"; +} + +void dump_type_info(MPI_Datatype dtype, const char *label) { + MPI_Aint lb, extent, true_lb, true_extent; + int size; + + MPI_Type_get_extent(dtype, &lb, &extent); + MPI_Type_get_true_extent(dtype, &true_lb, &true_extent); + MPI_Type_size(dtype, &size); + printf("%s: Extent %ld (true: %ld). LB %ld (true: %ld). Size %d\n",label, + extent, true_extent, lb, true_lb, size); + printMapDatatype(dtype); +} + +struct run_config +{ + struct user_config *user; + + uint8_t *send_mat; + uint8_t *recv_mat; + + int *sendcounts; + int *recvcounts; + int *sdispls; + int *rdispls; + int *remote_sdispls; + size_t sum_send_count; + size_t sum_recv_count; + + MPI_Datatype sdtype; + MPI_Datatype rdtype; + int sdcount_mult; + int rdcount_mult; +}; + +void print_help() +{ + printf("Test alltoallv using various ddt's and validate results.\n"); + printf("This test uses pseudo-random sequences from C++'s mt19937 generator.\n"); + printf("The test (but not necessarily the implementation) is deterministic\n"); + printf("when the options and number of ranks remain the same.\n"); + printf("Options:\n"); + printf("\t [-s|--seed ] Change the seed to shuffle which datapoints are exchanged\n"); + printf("\t [-c|--item-count ] Each rank will create to consider for exchange (default=10).\n"); + printf("\t [-i|--prob-item ] Probability that rank r will send item k to rank q. (0.50)\n"); + printf("\t [-r|--prob-rank ] Probability that rank r will send anything to rank q. (0.90)\n"); + printf("\t [-w|--prob-world ] Probability that rank r will do anything at all. (0.95)\n"); + printf("\t [-t|--iters ] The number of iterations to test each dtype.\n"); + printf("\t [-o|--only ] Only execute a specific test signified by the pair high,low.\n"); + printf("\t low=0 means run all tests in that high level\n"); + printf("\t [-v|--verbose=level ] Set verbosity during execution (0=quiet (default). 1,2,3: loud).\n"); + printf("\t [-h|--help] Print this help and exit.\n"); + printf("\t [-z|--verbose-rank] Only the provided rank will print. Default=0. ALL = -1.\n"); + + printf("\n"); +} + +int level1_types( int jtest, MPI_Datatype *dtype) { + switch (jtest) { + case 0: *dtype = MPI_CHAR; break; + case 1: *dtype = MPI_REAL; break; + case 2: *dtype = MPI_INT; break; + case 3: *dtype = MPI_INT8_T; break; + case 4: *dtype = MPI_INT16_T; break; + case 5: *dtype = MPI_INT32_T; break; + case 6: *dtype = MPI_INT64_T; break; + case 7: *dtype = MPI_REAL4; break; + case 8: *dtype = MPI_REAL8; break; + case NUM_LEVEL1_TESTS: + default: + dtype = NULL; + return 1; + } + return 0; +} + + +bool is_predefined_type(MPI_Datatype dtype) { + MPI_Datatype dt_test; + for (int j=0; jsdcount_mult = 1; + run->rdcount_mult = 1; + err = MPI_Comm_rank(MPI_COMM_WORLD, &rank); + ERROR_CHECK(err, on_error); + + low_counter = 0; + /* + Level1: + + These test various basic types on their own. We cannot send one type and + receive another, so this is a single loop + */ + for (int jd=0; jduser->only_high && run->user->only_high != 1) break; + if (run->user->only_low && run->user->only_low != low_counter) continue; + if (VERBOSE_LEVEL_DEFAULT) printf("--- Starting test 1,%d\n",low_counter); + + err = level1_types(jd, &run->sdtype); + ERROR_CHECK(err, on_error); + err = level1_types(jd, &run->rdtype); + ERROR_CHECK(err, on_error); + err = execute_test(run); + ERROR_CHECK(err, on_error); + + + MPI_Barrier(MPI_COMM_WORLD); + } + + run->sdcount_mult = 1; + run->rdcount_mult = 1; + low_counter = 0; + /* + Level2: + + These test multiples of a single type in various forms including + non-contiguous. As long as the base type is the same (and we just re-use + MPI_INT) then these types are compatible with each other, so the double loop + makes sure they can all inter-operate. + */ + for (int js=0; jsuser->only_high && run->user->only_high != 2) break; + for (int jr=0; jruser->only_low && run->user->only_low != low_counter) continue; + if (VERBOSE_LEVEL_DEFAULT) printf("--- Starting test 2,%d. Crossing %d x %d\n",low_counter, js, jr); + + err = level2_types( js, 12, MPI_INT, &run->sdtype, &run->sdcount_mult); + err |= level2_types( jr, 12, MPI_INT, &run->rdtype, &run->rdcount_mult); + ERROR_CHECK(err, on_error); + + err = execute_test(run); + ERROR_CHECK(err, on_error); + if (!is_predefined_type(run->sdtype)) { + MPI_Type_free( &run->sdtype ); + } + if (!is_predefined_type(run->rdtype)) { + MPI_Type_free( &run->rdtype ); + } + MPI_Barrier(MPI_COMM_WORLD); + } + } + + run->rdcount_mult = 1; + run->sdcount_mult = 1; + low_counter = 0; + for (int js=0; jsuser->only_high && run->user->only_high != 3) break; + if (run->user->only_low && run->user->only_low != low_counter) continue; + if (VERBOSE_LEVEL_DEFAULT) printf("--- Starting test 3,%d. Crossing %d x %d\n",low_counter, js, jr); + err = level3_types( js, MPI_INT, MPI_CHAR, &run->sdtype); + err |= level3_types( jr, MPI_INT, MPI_CHAR, &run->rdtype); + ERROR_CHECK(err, on_error); + err = execute_test(run); + ERROR_CHECK(err, on_error); + if (!is_predefined_type(run->sdtype)) { + err = MPI_Type_free( &run->sdtype ); + ERROR_CHECK(err, on_error); + } + if (!is_predefined_type(run->rdtype)) { + err = MPI_Type_free( &run->rdtype ); + ERROR_CHECK(err, on_error); + } + err = MPI_Barrier(MPI_COMM_WORLD); + ERROR_CHECK(err, on_error); + } + } + + low_counter = 0; + do { + low_counter++; + if (run->user->only_high && run->user->only_high != 4) break; + if (run->user->only_low && run->user->only_low != low_counter) continue; + if (VERBOSE_LEVEL_DEFAULT) printf("--- Starting test 4,%d\n",low_counter); + if (low_counter == 1) { + int blk_lens[2]; + MPI_Aint blk_displ[2]; + MPI_Datatype blk_types[2]; + MPI_Datatype dtype; + blk_lens[0] = 1; + blk_lens[1] = 1; + blk_displ[0] = -4; + blk_displ[1] = 4; + blk_types[0] = MPI_CHAR; + blk_types[1] = MPI_CHAR; + + err = MPI_Type_create_struct(2, blk_lens, blk_displ, blk_types, &dtype); + ERROR_CHECK(err, on_error); + err = MPI_Type_commit(&dtype); + ERROR_CHECK(err, on_error); + if (rank != 0) { + run->rdcount_mult = 1; + run->rdtype = dtype; + run->sdcount_mult = 1; + run->sdtype = dtype; + } else { + run->rdcount_mult = 2; + run->rdtype = MPI_CHAR; + run->sdcount_mult = 2; + run->sdtype = MPI_CHAR; + } + + err = execute_test(run); + ERROR_CHECK(err, on_error); + + + } else if (low_counter == 2) { + + MPI_Datatype send_pile[4]; + MPI_Datatype recv_pile[9]; + int ignored; + + /** + * In case you have to debug this, it should look something like this: + * Sender + * Vector 48 long of: ((real,char),(char,int64)) + * + * Reciever + * ( + * ( + * Vector 36 long of: ((real,char),(char,int64)) + * (real,char),(char,int64)) + * ), + * ( + * Vector 10 long of: ((real,char),(char,int64)), + * (real,char),(char,int64)) + * ) + * ) + * And of course, the layouts of each of those things is all messy. + */ + + // level3_types( 5, MPI_INT, MPI_CHAR, &send_pile[0]); + // level3_types( 6, MPI_INT, MPI_CHAR, &recv_pile[0]); + + level3_types( 4, MPI_CHAR, MPI_CHAR, &send_pile[0]); + level3_types( 3, MPI_CHAR, MPI_CHAR, &recv_pile[0]); + + level3_types( 2, MPI_CHAR, MPI_CHAR, &send_pile[1]); + level3_types( 1, MPI_CHAR, MPI_CHAR, &recv_pile[1]); + + level3_types( 2, send_pile[0], send_pile[1], &send_pile[2]); + level3_types( 2, recv_pile[0], recv_pile[1], &recv_pile[2]); + + /* create our vector: note that level1 tests other than 0 don't use the mult so we ignore it. */ + level2_types( 5, 48, send_pile[2], &send_pile[3], &ignored); + + // /* create two vectors, totaling 46, then two extra items to add up to 48.*/ + level2_types( 3, 36, recv_pile[2], &recv_pile[3], &ignored); + level2_types( 1, 10, recv_pile[2], &recv_pile[4], &ignored); + level3_types( 5, recv_pile[3], recv_pile[2], &recv_pile[6]); + level3_types( 5, recv_pile[4], recv_pile[2], &recv_pile[7]); + level3_types( 5, recv_pile[6], recv_pile[7], &recv_pile[8]); + + run->rdcount_mult = 1; + run->sdcount_mult = 1; + run->sdtype = send_pile[3]; + // run->rdtype = recv_pile[8]; + run->rdtype = send_pile[3]; + // run->sdtype = send_pile[2]; + // run->rdtype = recv_pile[2]; + MPI_Type_commit(&run->sdtype); + MPI_Type_commit(&run->rdtype); + + err = execute_test(run); + ERROR_CHECK(err, on_error); + } + + } while (low_counter+1 <= 2); + + return 0; + on_error: + return -1; +} + + +/* simple pattern so we can check it later. We only care about byte position. + Note: reserve 0 and 1, so we can memset those as "holes" in the send and recv + message buffers respectively. */ +void fill_pattern_buf(uint8_t *buf, size_t nbytes, int rank, int iter) { + uint8_t last_val = iter; + for (int jbyte = 0; jbyterdtype); + ERROR_CHECK(err, on_error) + err = MPI_Type_commit(&run->sdtype); + ERROR_CHECK(err, on_error) + + MPI_Aint lbr, lbs, lbr_true, lbs_true; + MPI_Aint lbs_shift, lbr_shift; + MPI_Aint sdtype_extent, rdtype_extent; + MPI_Aint sdtype_true_extent, rdtype_true_extent; + MPI_Request req; + MPI_Status status; + + MPI_Type_size(run->sdtype, &sdtype_size); + MPI_Type_size(run->rdtype, &rdtype_size); + if (sdtype_size * run->sdcount_mult != rdtype_size * run->rdcount_mult) { + printf("Error in types or in test harness. Attempting to send/recv types of differing sizes: %d*%d != %d*%d!\n", + sdtype_size, run->sdcount_mult, rdtype_size, run->rdcount_mult); + return 1; + } + + MPI_Type_get_extent(run->sdtype, &lbs, &sdtype_extent); + MPI_Type_get_extent(run->rdtype, &lbr, &rdtype_extent); + MPI_Type_get_true_extent(run->rdtype, &lbr_true, &rdtype_true_extent); + MPI_Type_get_true_extent(run->sdtype, &lbs_true, &sdtype_true_extent); + if (VERBOSE_LEVEL_VERY_LOUD) { + printf("Datatype (send,recv) extents (%ld,%ld), size (%d,%d), and lb (%ld,%ld)\n", + sdtype_extent, rdtype_extent, sdtype_size, rdtype_size, lbs, lbr); + printf("Datatype TRUE (send,recv) extents (%ld,%ld), size (%d,%d), and lb (%ld,%ld)\n", + sdtype_true_extent, rdtype_true_extent, sdtype_size, rdtype_size, lbs_true, lbr_true); + } + + lbs_shift = -lbs_true; + lbr_shift = -lbr_true; + + sum_send_count = run->sum_send_count * run->sdcount_mult; + sum_recv_count = run->sum_recv_count * run->rdcount_mult; + + size_t send_buf_len, recv_buf_len; + send_buf_len = (sum_send_count>0?1:0)*sdtype_true_extent + MAX(0,sum_send_count-1)*sdtype_extent; + recv_buf_len = (sum_recv_count>0?1:0)*rdtype_true_extent + MAX(0,sum_recv_count-1)*rdtype_extent; + // printf("SEND_BUF_LEN: Allocating (%ld)+(%ld) + 2*(%d) bytes\n",sdtype_true_extent,MAX(0,sum_send_count-1)*sdtype_extent, guard_len); + + svalidation_buf = (uint8_t*)malloc( sdtype_size * sum_send_count + 2*guard_len) + guard_len; + rvalidation_buf = (uint8_t*)malloc( rdtype_size * sum_recv_count + 2*guard_len) + guard_len; + valb_guards[0] = svalidation_buf -guard_len; + valb_guards[1] = rvalidation_buf -guard_len; + valb_guards[2] = svalidation_buf + sdtype_size * sum_send_count; + valb_guards[3] = rvalidation_buf + rdtype_size * sum_recv_count; + + smsg_buf = (uint8_t*)malloc( send_buf_len + 2*guard_len) + guard_len; + rmsg_buf = (uint8_t*)malloc( recv_buf_len + 2*guard_len) + guard_len; + msg_guards[0] = smsg_buf - guard_len; + msg_guards[1] = rmsg_buf - guard_len; + msg_guards[2] = smsg_buf + send_buf_len; + msg_guards[3] = rmsg_buf + recv_buf_len; + + set_guard_bytes(msg_guards, guard_len, 127); + err = check_guard_bytes( msg_guards, guard_len, 127, "SANITY1" ); + set_guard_bytes(valb_guards, guard_len, 128); + err |= check_guard_bytes( valb_guards, guard_len, 128, "SANITY2" ); + err |= check_guard_bytes( msg_guards, guard_len, 127, "SANITY3" ); + ERROR_CHECK(err, on_error); + + sendcounts = (int*)malloc( sizeof(int) * world_size); + recvcounts = (int*)malloc( sizeof(int) * world_size); + sdispls = (int*)malloc( sizeof(int) * world_size); + rdispls = (int*)malloc( sizeof(int) * world_size); + for (int jrank = 0; jrank < world_size; jrank++ ) { + sendcounts[jrank] = run->sendcounts[jrank] * run->sdcount_mult; + recvcounts[jrank] = run->recvcounts[jrank] * run->rdcount_mult; + sdispls[jrank] = run->sdispls[jrank] * run->sdcount_mult; + rdispls[jrank] = run->rdispls[jrank] * run->rdcount_mult; + } + + for (int jiter = 0; jiter < run->user->iters; jiter++) { + + fill_pattern_buf( svalidation_buf, sdtype_size * sum_send_count, rank, jiter); + memset( smsg_buf, 0, send_buf_len); + memset( rmsg_buf, 1, recv_buf_len); + + err = check_guard_bytes( msg_guards, guard_len, 127, "message buffer1" ); + ERROR_CHECK(err, on_error); + + err = check_guard_bytes( valb_guards, guard_len, 128, "validation buffer" ); + ERROR_CHECK(err, on_error); + + + /* move data from pattern buf to message buf using send-to-self calls: */ + err = MPI_Irecv(smsg_buf+lbs_shift, sum_send_count, run->sdtype, 0, 0, MPI_COMM_SELF, &req); + ERROR_CHECK(err, on_error); + err = MPI_Send(svalidation_buf, sdtype_size * sum_send_count, MPI_BYTE, 0, 0, MPI_COMM_SELF); + ERROR_CHECK(err, on_error); + err = MPI_Wait(&req, &status); + ERROR_CHECK(err==MPI_ERR_IN_STATUS && status.MPI_ERROR, on_error); + err = check_guard_bytes( msg_guards, guard_len, 127, "message buffer after filling send-buf with pattern" ); + err |= check_guard_bytes( valb_guards, guard_len, 128, "validation buffer after filling send-buf with pattern" ); + ERROR_CHECK(err, on_error); + + /* exchange data */ + err = MPI_Alltoallv( + smsg_buf+lbs_shift, sendcounts, sdispls, run->sdtype, + rmsg_buf+lbr_shift, recvcounts, rdispls, run->rdtype, + MPI_COMM_WORLD + ); + ERROR_CHECK(err, on_error); + err = check_guard_bytes( msg_guards, guard_len, 127, "message buffer3" ); + err |= check_guard_bytes( valb_guards, guard_len, 128, "validation buffer" ); + ERROR_CHECK(err, on_error); + + MPI_Barrier(MPI_COMM_WORLD); + + /* move data from rmsg_buf to receive validation buf using send-to-self calls: */ + err = MPI_Irecv(rvalidation_buf, rdtype_size * sum_recv_count, MPI_BYTE, 0, 0, MPI_COMM_SELF, &req); + ERROR_CHECK(err, on_error); + err = MPI_Send(rmsg_buf+lbr_shift, sum_recv_count, run->rdtype, 0, 0, MPI_COMM_SELF); + ERROR_CHECK(err, on_error); + err = MPI_Wait(&req, &status); + ERROR_CHECK(err==MPI_ERR_IN_STATUS && status.MPI_ERROR, on_error); + ERROR_CHECK(err, on_error); + err = check_guard_bytes( msg_guards, guard_len, 127, "message buffer4" ); + err |= check_guard_bytes( valb_guards, guard_len, 128, "validation buffer" ); + ERROR_CHECK(err, on_error); + + /* + * because: + * rdtype_size * rdcount_mult <=> sdtype_size * sdcount_mult <=> remote's sdtype_size * sdcount_mult + * run->remote_sdispls is in units of the remote's [sdtype_size*sdcount_mult] Bytes + * + * So let the check function scale everything by type_size*mult, and + * provide the run-> varaibles for recvcounts and remote_sdispls + */ + err = check_pattern_buf( rvalidation_buf, rdtype_size*run->rdcount_mult, world_size, run->recvcounts, run->remote_sdispls, jiter); + if (err) { + printf("ERROR: Validation failed on rank %d!\n",rank); + goto on_error; + } + tot_bytes_sent += sdtype_size * sum_send_count; + tot_bytes_recv += rdtype_size * sum_recv_count; + } + + free(smsg_buf-guard_len); + free(rmsg_buf-guard_len); + free(svalidation_buf-guard_len); + free(rvalidation_buf-guard_len); + free(sendcounts); + free(recvcounts); + free(rdispls); + free(sdispls); + tot_tests_exec++; + + return 0; + + on_error: + MPI_Abort(MPI_COMM_WORLD, 1); + return 1; +} + +int main(int argc, char *argv[]) { + typedef std::chrono::high_resolution_clock myclock; + myclock::time_point beginning = myclock::now(); + + int err; + + + MPI_Init(&argc, &argv); + int rank, world_size; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &world_size); + struct run_config run; + run.user = &user; + + + static struct option long_options[] = { + { "seed", required_argument, 0, 's' }, + { "item-count", required_argument, 0, 'c' }, + { "prob-item", required_argument, 0, 'i' }, + { "prob-rank", required_argument, 0, 'r' }, + { "prob-world", required_argument, 0, 'w' }, + { "iters", required_argument, 0, 't' }, + { "only", required_argument, 0, 'o' }, + { "verbose", required_argument, 0, 'v' }, + { "verbose-rank", required_argument, 0, 'z' }, + { "help", no_argument, 0, 'h' } + }; + + int opt; + int option_index; + int verbose_rank = 0; + user.verbose = 0; + + while (1) + { + char *s1, *s2; + opt = getopt_long(argc, argv, "s:c:i:r:w:t:v:hz:", long_options, &option_index); + if (opt == -1) break; + switch(opt) { + case 's': + user.seed = atoi(optarg); + break; + case 'c': + user.item_count = atoi(optarg); + break; + case 'i': + user.prob_item = atof(optarg); + break; + case 'r': + user.prob_rank = atof(optarg); + break; + case 'w': + user.prob_world = atof(optarg); + break; + case 't': + user.iters = atoi(optarg); + break; + case 'o': + s1 = strtok(optarg, ","); + s2 = strtok(NULL, ","); + if (s1==NULL || s2==NULL) { + print_help(); + printf("Option to --only should be like \"0,3\"."); + } + user.only_high = atoi(s1); + user.only_low = atoi(s2); + break; + case 'v': + user.verbose = atoi(optarg); + break; + case 'h': + if (rank==0) { + print_help(); + } + MPI_Finalize(); + return EXIT_SUCCESS; + case 'z': + verbose_rank = atoi(optarg); + break; + default: + if (rank==0) { + print_help(); + printf("Unexpected option: %c\n",opt); + } + MPI_Finalize(); + return EXIT_FAILURE; + } + } + if (verbose_rank != -1 && verbose_rank != rank) { + user.verbose = 0; + } + + if (VERBOSE_LEVEL_DEFAULT && (user.only_high || user.only_low)) { + printf("Requested only test %d,%d\n",user.only_high,user.only_low); + } + + if (VERBOSE_LEVEL_LOUD) { + dump_user_config(&user); + printf("-----------\n"); + } + MPI_Barrier(MPI_COMM_WORLD); + // mt19937 is a standard mersenne_twister_engine, seeded with our rank. + std::mt19937 rngseq(user.seed+rank); + std::uniform_real_distribution uniform_double(0.0, 1.0); + std::uniform_int_distribution uniform_uint32(0, UINT32_MAX); + // printf("On rank %d: Rank+seed seed produced: %u then %f\n",rank, uniform_uint32(rngseq), uniform_double(rngseq)); + + uint8_t *send_mat = (uint8_t*)malloc(sizeof(*send_mat) * world_size * user.item_count); + uint8_t *recv_mat = (uint8_t*)malloc(sizeof(*send_mat) * world_size * user.item_count); + uint8_t *rank_on = (uint8_t*)malloc(sizeof(*rank_on) * world_size); + + uint8_t this_rank_is_on = uniform_double(rngseq) < user.prob_world; + + err = MPI_Allgather( &this_rank_is_on, 1, MPI_UINT8_T, rank_on, 1, MPI_UINT8_T, MPI_COMM_WORLD); + ERROR_CHECK( err, on_error ); + + for (int jrank=0; jrank +#include +#include + +#include +#include +#include +#include + +#define ERROR_CHECK( err, errlab ) if(err) { printf("ERROR: An error (%d) in an MPI call was detected at %s:%d!\n", err, __FILE__, __LINE__); goto errlab; } + +int datatype_check() { + int err, size; + int block_count = 3, per_block = 3; + MPI_Datatype dtype; + MPI_Aint lb, extent, lb_true, extent_true; + + /* a contiguous block going backwards */ + err = MPI_Type_vector(block_count, per_block, -per_block, MPI_UINT8_T, &dtype); + ERROR_CHECK( err, on_error ); + MPI_Type_commit(&dtype); + + err = MPI_Type_get_extent(dtype, &lb, &extent); + ERROR_CHECK( err, on_error); + err = MPI_Type_get_true_extent(dtype, &lb_true, &extent_true); + ERROR_CHECK( err, on_error); + err = MPI_Type_size(dtype, &size); + ERROR_CHECK( err, on_error); + printf("Datatype (normal,true) extents (%ld,%ld), size (%d,--), and lb (%ld,%ld)\n", + extent, extent_true, size, lb, lb_true); + + if (0) { +on_error: + MPI_Finalize(); + return 1; + } + return 0; +} +int check_bounds() { + + int err, size; + const int nbytes = 25; + uint8_t buf_bytes[nbytes]; + uint8_t buf_msg[nbytes]; + MPI_Aint lb, extent; + + for (int j=0; j +#include +#include + +void printMapDatatype(MPI_Datatype datatype); + +MPI_Aint printdatatype( MPI_Datatype datatype, MPI_Aint prevExtentTot ) { + int *array_of_ints; + MPI_Aint *array_of_adds; + MPI_Datatype *array_of_dtypes; + int num_ints, num_adds, num_dtypes, combiner; + int i, j; + + + MPI_Type_get_envelope( datatype, &num_ints, &num_adds, &num_dtypes, &combiner ); + + array_of_ints = (int *) malloc( num_ints * sizeof(int) ); + array_of_adds = (MPI_Aint *) malloc( num_adds * sizeof(MPI_Aint) ); + array_of_dtypes = (MPI_Datatype *) malloc( num_dtypes * sizeof(MPI_Datatype) ); + + MPI_Aint extent, subExtent, LB; + MPI_Type_get_extent(datatype, &LB, &extent); + + switch (combiner) { + case MPI_COMBINER_NAMED: + // To print the specific type, we can match against the predefined forms. + + if (datatype == MPI_BYTE) printf( "(MPI_BYTE, %ld)", prevExtentTot); + else if (datatype == MPI_PACKED) printf( "(MPI_PACKED, %ld)", prevExtentTot); + else if (datatype == MPI_CHAR) printf( "(MPI_CHAR, %ld)", prevExtentTot); + else if (datatype == MPI_DOUBLE) printf( "(MPI_DOUBLE, %ld)", prevExtentTot); + else if (datatype == MPI_FLOAT) printf( "(MPI_FLOAT, %ld)", prevExtentTot); + else if (datatype == MPI_REAL) printf( "(MPI_REAL, %ld)", prevExtentTot ); + else if (datatype == MPI_INT) printf( "(MPI_INT, %ld)", prevExtentTot ); + else if (datatype == MPI_INT64_T) printf( "(MPI_INT64_T, %ld)", prevExtentTot ); + else if (datatype == MPI_LONG) printf( "(MPI_LONG, %ld)", prevExtentTot); + else if (datatype == MPI_LONG_DOUBLE) printf( "(MPI_LONG_DOUBLE, %ld)", prevExtentTot); + else if (datatype == MPI_LONG_LONG) printf( "(MPI_LONG_LONG, %ld)", prevExtentTot); + else if (datatype == MPI_LONG_LONG_INT) printf( "(MPI_LONG_LONG_INT, %ld)", prevExtentTot); + else if (datatype == MPI_SHORT) printf( "(MPI_SHORT, %ld)", prevExtentTot); + else if (datatype == MPI_SIGNED_CHAR) printf( "(MPI_SIGNED_CHAR, %ld)", prevExtentTot); + else if (datatype == MPI_UNSIGNED) printf( "(MPI_UNSIGNED, %ld)", prevExtentTot); + else if (datatype == MPI_UNSIGNED_CHAR) printf( "(MPI_UNSIGNED_CHAR, %ld)", prevExtentTot); + else if (datatype == MPI_UNSIGNED_LONG) printf( "(MPI_UNSIGNED_LONG, %ld)", prevExtentTot); + else if (datatype == MPI_UNSIGNED_LONG_LONG)printf( "(MPI_UNSIGNED_LONG_LONG, %ld)", prevExtentTot); + else if (datatype == MPI_UNSIGNED_SHORT) printf( "(MPI_UNSIGNED_SHORT, %ld)", prevExtentTot); + else if (datatype == MPI_WCHAR) printf( "(MPI_WCHAR, %ld)", prevExtentTot); + + free( array_of_ints ); + free( array_of_adds ); + free( array_of_dtypes ); + + return prevExtentTot; + break; + case MPI_COMBINER_DUP: + MPI_Type_get_contents( datatype, num_ints, num_adds, num_dtypes, array_of_ints, array_of_adds, array_of_dtypes ); + + printdatatype( array_of_dtypes[0], prevExtentTot); + + printf(", \n"); + + break; + case MPI_COMBINER_CONTIGUOUS: + MPI_Type_get_contents( datatype, num_ints, num_adds, num_dtypes, array_of_ints, array_of_adds, array_of_dtypes ); + + MPI_Type_get_extent(array_of_dtypes[0], &LB, &subExtent); // no need to do in loop because same type + + for (i=0; i < array_of_ints[0]; i++) { + prevExtentTot = printdatatype( array_of_dtypes[0], prevExtentTot); + prevExtentTot += subExtent; + printf(", "); + } + + break; + case MPI_COMBINER_VECTOR: + MPI_Type_get_contents( datatype, num_ints, num_adds, num_dtypes, array_of_ints, array_of_adds, array_of_dtypes ); + + MPI_Type_get_extent(array_of_dtypes[0], &LB, &subExtent); // no need to do in loop because same type + + printf("["); + for (i = 0; i < array_of_ints[0]; i++) { //count + printf( "BL : %d - ", array_of_ints[1]); + for (j = 0; j < array_of_ints[2]; j++) { // stride + if (j < array_of_ints[1]) { // if in blocklength + prevExtentTot = printdatatype( array_of_dtypes[0], prevExtentTot); + printf(", "); + } + prevExtentTot += subExtent; + + } + } + printf("], "); + + break; + case MPI_COMBINER_HVECTOR:{ + MPI_Aint backupPrevExtent = prevExtentTot; + + MPI_Type_get_contents( datatype, num_ints, num_adds, num_dtypes, array_of_ints, array_of_adds, array_of_dtypes ); + + MPI_Type_get_extent(array_of_dtypes[0], &LB, &subExtent); // no need to do in loop because same type + + printf("["); + for (i = 0; i < array_of_ints[0]; i++) { //count + printf( "BL : %d - ", array_of_ints[1]); + for (j = 0; j < array_of_ints[1]; j++) { // blocklength + prevExtentTot = printdatatype( array_of_dtypes[0], prevExtentTot); + printf(", "); + prevExtentTot += subExtent; + } + prevExtentTot = backupPrevExtent + array_of_adds[0]; // + stride un byte + } + printf("], "); + + break; + } + case MPI_COMBINER_INDEXED:{ + MPI_Aint tmpPrevExtent; + int count, blocklength; + MPI_Type_get_contents( datatype, num_ints, num_adds, num_dtypes, array_of_ints, array_of_adds, array_of_dtypes ); + + MPI_Type_get_extent(array_of_dtypes[0], &LB, &subExtent); // no need to do in loop because same type + + printf("<"); + count = array_of_ints[0]; + for (i = 0; i < count; i++) { // count + blocklength = array_of_ints[i + 1]; // array of blocklength + tmpPrevExtent = prevExtentTot; + tmpPrevExtent += array_of_ints[count + 1 + i] * subExtent; // + displacement * size of block + printf( "BL : %d - ", blocklength); + for (j = 0; j < blocklength; j++) { // blocklength + tmpPrevExtent = printdatatype( array_of_dtypes[0], tmpPrevExtent); + printf(", "); + tmpPrevExtent += subExtent; + } + } + printf(">, "); + + prevExtentTot = tmpPrevExtent; + + break; + } + case MPI_COMBINER_HINDEXED:{ + MPI_Aint tmpPrevExtent; + int count, blocklength; + MPI_Type_get_contents( datatype, num_ints, num_adds, num_dtypes, array_of_ints, array_of_adds, array_of_dtypes ); + + MPI_Type_get_extent(array_of_dtypes[0], &LB, &subExtent); // no need to do in loop because same type + + printf("<"); + count = array_of_ints[0]; + for (i = 0; i < count; i++) { // count + blocklength = array_of_ints[i + 1]; // array of blocklength + tmpPrevExtent = prevExtentTot; + tmpPrevExtent += array_of_adds[i]; // + displacement in byte + printf( "BL : %d - ", blocklength); + for (j = 0; j < blocklength; j++) { + tmpPrevExtent = printdatatype( array_of_dtypes[0], tmpPrevExtent); + printf(", "); + tmpPrevExtent += subExtent; + } + } + printf(">, "); + + prevExtentTot = tmpPrevExtent; + + break; + } + case MPI_COMBINER_INDEXED_BLOCK:{ + MPI_Aint tmpPrevExtent; + int count; + MPI_Type_get_contents( datatype, num_ints, num_adds, num_dtypes, array_of_ints, array_of_adds, array_of_dtypes ); + + MPI_Type_get_extent(array_of_dtypes[0], &LB, &subExtent); // no need to do in loop because same type + + printf("<"); + count = array_of_ints[0]; + for (i = 0; i < count; i++) { // count + tmpPrevExtent = prevExtentTot; + tmpPrevExtent += array_of_ints[i + 2] * subExtent; // + displacement * size of block + printf( "BL : %d - ", array_of_ints[i + 1]); + for (j = 0; j < array_of_ints[1]; j++) { // blocklength + tmpPrevExtent = printdatatype( array_of_dtypes[0], tmpPrevExtent); + printf(", "); + tmpPrevExtent += subExtent; + } + } + printf(">, "); + + prevExtentTot = tmpPrevExtent; + + break; + } + case MPI_COMBINER_STRUCT:{ + MPI_Aint tmpPrevExtent; + MPI_Type_get_contents( datatype, num_ints, num_adds, num_dtypes, array_of_ints, array_of_adds, array_of_dtypes ); + + printf( "{"); + for (i = 0; i < array_of_ints[0]; i++) { // count + tmpPrevExtent = prevExtentTot + array_of_adds[i]; // origin + displacement + printf( "BL : %d - ", array_of_ints[i + 1]); + tmpPrevExtent = printdatatype( array_of_dtypes[i], tmpPrevExtent); + tmpPrevExtent += subExtent; + printf(", "); + } + printf("}, "); + + prevExtentTot = tmpPrevExtent; + + break; + } + case MPI_COMBINER_SUBARRAY: + // I don't know what is interresting to display here... + printf("... subarray not handled ..."); + break; + case MPI_COMBINER_DARRAY: + // Same + printf("... darray not handled ..."); + break; + case MPI_COMBINER_RESIZED: + MPI_Type_get_contents( datatype, num_ints, num_adds, num_dtypes, array_of_ints, array_of_adds, array_of_dtypes ); + + prevExtentTot = printdatatype( array_of_dtypes[0], prevExtentTot); + + printf(", \n"); + + break; + default: + printf( "Unrecognized combiner type\n" ); + } + + free( array_of_ints ); + free( array_of_adds ); + free( array_of_dtypes ); + + return prevExtentTot; +} + +void printMapDatatype(MPI_Datatype datatype) { + MPI_Aint lb, extent; + MPI_Type_get_extent(datatype, &lb, &extent); + + printf("\"(LB, %ld), ", lb); + printdatatype(datatype, 0); + printf("(UB, %ld)\"\n", lb+extent); +} \ No newline at end of file