Brian Barrett wrote:
> It doesn't look like we added any debugging information as to *why* a
> component didn't select itself.
OK, I've added some (hacked version of ssi_coll_shmem.c attached).
> My initial guess is that you are running out of system V semaphores
> or shared memory. But then again, it sounds like you bumped those
> limits up high enough that something else might be going on.
Something else is going on. It bails out based on the results of the
call to:
lam_ssi_base_param_lookup_int(param_shmem)
Here's my mpirun command:
/usr/local/v9b/ultra3cu/lam-7.1.2/bin/mpirun -ssi boot rsh -ssi rpi
usysv -ssi coll shmem,lam_basic -ssi coll_verbose level:10000 -nsigs
-pty -w -wd /sciclone/home00/tom/renderer/tests/PGL.sciclone -sa -v
-nger /tmp/pbslam.app_schema.19312
Here's the output from the rank 0 process:
----------
n0<19330> ssi:coll:open: Opening
n0<19330> ssi:coll:open:crossover: 4 processes
n0<19330> ssi:coll:open:associative: 0
n0<19330> ssi:coll:open: opening coll module lam_basic
n0<19330> ssi:coll:open: opened coll module lam_basic
n0<19330> ssi:coll:open: opening coll module shmem
n0<19330> ssi:coll:open: opened coll module shmem
n0<19330> ssi:coll:open: skipping non-selected module smp
n0<19330> ssi:coll:query: querying coll module shmem
n0<19330> ssi:coll:query: coll module shmem available
n0<19330> ssi:coll:query: querying coll module lam_basic
n0<19330> ssi:coll:query: coll module lam_basic available
n0<19330> ssi:coll:init_comm: new communicator: MPI_COMM_SELF (cid 1)
n0<19330> ssi:coll:shmem: not enabled for communicator MPI_COMM_SELF
(lam_ssi_base_param_lookup_int)
n0<19330> ssi:coll:init_comm: module not available: shmem, priority: -1
n0<19330> ssi:coll:init_comm: module available: lam_basic, priority: 100
n0<19330> ssi:coll:lam_basic: init communicator MPI_COMM_SELF
n0<19330> ssi:coll:init_comm: Selected coll module lam_basic
n0<19330> ssi:coll:init_comm: new communicator: MPI_COMM_WORLD (cid 0)
n0<19330> ssi:coll:shmem: not enabled for communicator MPI_COMM_WORLD
(lam_ssi_base_param_lookup_int)
n0<19330> ssi:coll:init_comm: module not available: shmem, priority: -1
n0<19330> ssi:coll:init_comm: module available: lam_basic, priority: 0
n0<19330> ssi:coll:lam_basic: init communicator MPI_COMM_WORLD
n0<19330> ssi:coll:init_comm: Selected coll module lam_basic
n0<19330> ssi:coll:init_comm: new communicator: <no name> (cid 2)
n0<19330> ssi:coll:shmem: not enabled for communicator
(lam_ssi_base_param_lookup_int)
n0<19330> ssi:coll:init_comm: module not available: shmem, priority: -1
n0<19330> ssi:coll:init_comm: module available: lam_basic, priority: 0
n0<19330> ssi:coll:lam_basic: init communicator
n0<19330> ssi:coll:init_comm: Selected coll module lam_basic
n0<19330> ssi:coll:finalize_comm: communicator: MPI_COMM_SELF (cid 1)
n0<19330> ssi:coll:lam_basic: finalize communicator MPI_COMM_SELF
n0<19330> ssi:coll:finalize_comm: communicator: MPI_COMM_WORLD (cid 0)
n0<19330> ssi:coll:lam_basic: finalize communicator MPI_COMM_WORLD
n0<19330> ssi:coll:finalize_comm: communicator: <no name> (cid 2)
n0<19330> ssi:coll:lam_basic: finalize communicator
n0<19330> ssi:coll:close: Closing
----------
In this particular example, I'm running with two processes on a single node.
Question: Does the "crossover" value have any bearing on the selection
of the shmem module? As I interpret the manual, that parameter is
independent of the choice of coll modules, but what do I know?
-Tom
--
Tom Crockett
College of William and Mary email: tom_at_[hidden]
Computational Science Cluster phone: (757) 221-2762
Savage House fax: (757) 221-2023
P.O. Box 8795
Williamsburg, VA 23187-8795
/*
* Copyright (c) 2001-2004 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 1998-2001 University of Notre Dame.
* All rights reserved.
* Copyright (c) 1994-1998 The Ohio State University.
* All rights reserved.
*
* This file is part of the LAM/MPI software package. For license
* information, see the LICENSE file in the top level directory of the
* LAM/MPI source distribution.
*
* $HEADER$
*
* $Id: ssi_coll_shmem.c,v 1.4 2004/03/09 16:53:08 adharurk Exp $
*
* Function: - SHMEM collective routines
*/
#include <string.h>
#include <lam_config.h>
#if LAM_WANT_PROFILE
#define LAM_PROFILELIB 1
#endif
#include <lam-ssi-coll-shmem-config.h>
#include <lam-ssi-rpi.h>
#include <lam-ssi-coll.h>
#include <lam-ssi-coll-shmem.h>
#include <mpisys.h>
#include <blktype.h>
/*
* Extern Functions
*/
extern const lam_ssi_coll_actions_1_1_0_t *
lam_ssi_coll_lam_basic_query(MPI_Comm comm, int *priority);
extern int lam_ssi_coll_lam_basic_end_query(MPI_Comm comm);
/*
* Local functions
*/
static void free_lcd_mem(lam_ssi_coll_data_t*);
/*
* Local variables
*/
static int param_shmem;
static lam_ssi_coll_data_t *local_coll_data;
static lam_ssi_coll_actions_1_1_0_t lb_functions;
static lam_ssi_coll_actions_1_1_0_t shmem_functions = {
/* Per-communicator initialization and finalization functions */
lam_ssi_coll_shmem_init,
lam_ssi_coll_shmem_finalize,
/* Checkpoint / restart functions */
lam_ssi_coll_base_empty_checkpoint,
lam_ssi_coll_base_empty_continue,
lam_ssi_coll_base_empty_restart,
lam_ssi_coll_base_empty_interrupt,
/* Collective function pointers */
/* Allgather */
lam_ssi_coll_shmem_allgather,
/* Allgatherv -- use lam_basic */
NULL,
/* Allreduce -- use lam_basic */
NULL,
/* Alltoall */
lam_ssi_coll_shmem_alltoall,
/* Alltoallv -- use lam_basic */
NULL,
/* Alltoallw -- use lam_basic */
NULL,
/* Barrier */
lam_ssi_coll_shmem_barrier,
/* Bcast */
1,
lam_ssi_coll_shmem_bcast,
/* Exscan -- use lam_basic */
NULL,
/* Gather */
lam_ssi_coll_shmem_gather,
/* Gatherv -- use lam_basic */
NULL,
/* Reduce */
1,
lam_ssi_coll_shmem_reduce,
/* Reduce scatter: use lam_basic */
NULL,
/* Scan: use lam_basic */
NULL,
/* Scatter */
lam_ssi_coll_shmem_scatter,
/* Scatterv -- use lam_basic */
NULL,
};
/*
* Open the shmem module -- basically just sets up some runtime
* parameters
*/
int
lam_ssi_coll_shmem_open(OPT *ad)
{
param_shmem =
lam_ssi_base_param_register_int("coll", "shmem", NULL,
NULL, 0);
return 1;
}
/*
* Initial query function that is invoked during MPI_INIT, allowing
* this module to indicate what level of thread support it provides.
*/
int
lam_ssi_coll_shmem_thread_query(int *thread_min, int *thread_max)
{
*thread_min = MPI_THREAD_SINGLE;
*thread_max = MPI_THREAD_MULTIPLE;
return 0;
}
/*
* Invoked when there's a new communicator that has been created.
* Look at the communicator and decide which set of functions and
* priority we want to return.
*
* This module is selected
*/
const lam_ssi_coll_actions_1_1_0_t *
lam_ssi_coll_shmem_query(MPI_Comm comm, int *priority)
{
int i;
int size, rank;
int shmem_size, control_size, array_size;
int invalid_shmemid = -1;
int invalid_semid = -1;
int shared_ids[2];
int bogus_priority;
int success, all_success;
int *successes;
lam_ssi_coll_data_t * lcd = NULL;
/* See if shmem module is enabled - proceed only if enabled */
if (lam_ssi_base_param_lookup_int(param_shmem) != 1) {
if (lam_ssi_coll_verbose >= 100) {
lam_debug(lam_ssi_coll_did,
"shmem: not enabled for communicator %s (lam_ssi_base_param_lookup_int)",
comm->c_name);
}
return NULL;
}
if (LAM_IS_INTER(comm)) {
if (lam_ssi_coll_verbose >= 100) {
lam_debug(lam_ssi_coll_did,
"shmem: not enabled for communicator %s (LAM_IS_INTER)",
comm->c_name);
}
return NULL;
}
/* Find out whether all the processes in the comm are on the same node */
MPI_Comm_size(comm, &size);
for (i = 0; i < size - 1; ++i) {
if ((comm->c_group->g_procs[i]->p_gps.gps_node) !=
(comm->c_group->g_procs[i+1]->p_gps.gps_node)) {
if (lam_ssi_coll_verbose >= 100) {
lam_debug(lam_ssi_coll_did,
"shmem: processes not on same node for communicator %s",
comm->c_name);
}
return NULL;
}
}
if (size == 1) {
/* For MPI_COMM_SELF goto lam_basic only */
if (lam_ssi_coll_verbose >= 100) {
lam_debug(lam_ssi_coll_did,
"shmem: only one process in communicator %s", comm->c_name);
}
return NULL;
}
success = 1;
/* Ok now we satisfy the condition, check if enough shared memory can be
allocated for this communicator. First allocate space for the local
per communicator structure. Lowest ranked process will create the
area and send the shmid to rest of the processes. All other processes will
see if they can attach to it. If yes then success otherwise they will
return NULL
*/
lcd = (lam_ssi_coll_data_t *) malloc(sizeof(lam_ssi_coll_data_t));
if (lcd == NULL) {
if (lam_ssi_coll_verbose >= 100) {
lam_debug(lam_ssi_coll_did,
"shmem: malloc failed for local struct in communicator %s",
comm->c_name);
}
success = 0;
}
lcd->lcd_message_pool_size =
lam_ssi_coll_base_get_param(comm,
LAM_MPI_SSI_COLL_SHMEM_MESSAGE_POOL_SIZE);
lcd->lcd_num_segments =
lam_ssi_coll_base_get_param(comm, LAM_MPI_SSI_COLL_SHMEM_NUM_SEGMENTS);
/* Initialize shared pointers to NULL and then allocate memory for them */
lcd->lcd_barrier = NULL;
lcd->lcd_collective_track = NULL;
lcd->lcd_message_segment = NULL;
lcd->lcd_barrier = (volatile int**)
malloc(sizeof(int*) * (lcd->lcd_num_segments + 1));
lcd->lcd_collective_track = (volatile int**)
malloc(sizeof(int*) * (lcd->lcd_num_segments + 1));
lcd->lcd_message_segment = (char**)
malloc(sizeof(char*) * lcd->lcd_num_segments);
if (lcd->lcd_barrier == NULL || lcd->lcd_collective_track == NULL
|| lcd->lcd_message_segment == NULL) {
if (lam_ssi_coll_verbose >= 100) {
lam_debug(lam_ssi_coll_did,
"shmem: malloc failed for pointer arrays in communicator %s",
comm->c_name);
}
success = 0;
}
MPI_Comm_rank(comm, &rank);
array_size = size * (2 * lcd->lcd_num_segments + 2) * CACHELINESIZE;
control_size = array_size;
shmem_size = control_size + lcd->lcd_message_pool_size;
if (rank == 0) {
if (((lam_ssi_coll_shmem_create_area(shmem_size,
&(lcd->lcd_shared_segment_base),
&(lcd->lcd_shmemid),
&(lcd->lcd_semid), size))
== LAMERROR)
|| ((successes = (int*) malloc(size * sizeof(int))) == NULL)) {
free_lcd_mem(lcd);
shared_ids[0] = invalid_shmemid;
shared_ids[1] = invalid_semid;
for (i = 1; i < size; ++i) {
MPI_Send(shared_ids, 2, MPI_INT, i, 1, comm);
}
if (lam_ssi_coll_verbose >= 100) {
lam_debug(lam_ssi_coll_did,
"shmem: rank %d process unable to allocate shared memory for communicator %s",
rank, comm->c_name);
}
return NULL;
}
/* Send Information to other ranks */
shared_ids[0] = lcd->lcd_shmemid;
shared_ids[1] = lcd->lcd_semid;
for (i = 1; i < size; ++i) {
MPI_Send(shared_ids, 2, MPI_INT, i, 1, comm);
}
} else {
/* Receive the shmemid from rank 0 and see if I can attach to the shared
memory segment */
MPI_Recv(shared_ids, 2, MPI_INT, 0, 1, comm, MPI_STATUS_IGNORE);
lcd->lcd_shmemid = shared_ids[0];
lcd->lcd_semid = shared_ids[1];
if ((lcd->lcd_shmemid == -1) || (lcd->lcd_semid == -1)) {
if (lam_ssi_coll_verbose >= 100) {
lam_debug(lam_ssi_coll_did,
"shmem: rank %d received invalid shared mem id from rank 0 for communicator %s",
rank, comm->c_name);
}
return NULL;
}
/* Attach the shared memory segment */
if ((lam_ssi_coll_shmem_attach_area(lcd->lcd_shmemid, lcd->lcd_semid,
&(lcd->lcd_shared_segment_base)))
== LAMERROR) {
if (lam_ssi_coll_verbose >= 100) {
lam_debug(lam_ssi_coll_did,
"shmem: rank %d process unable to attach to shared memory segment in communicator %s",
rank, comm->c_name);
}
free_lcd_mem(lcd);
success = 0;
}
}
*priority = 50;
/* Get lam_basic function pointers so that we can use them */
lb_functions = *(lam_ssi_coll_lam_basic_query(comm, &bogus_priority));
/* End the query */
if (lam_ssi_coll_lam_basic_end_query(comm) == LAMERROR) {
if (lam_ssi_coll_verbose >= 100) {
lam_debug(lam_ssi_coll_did,
"shmem: failed to end lam_basic query for communicator %s (lam_ssi_coll_lam_basic_end_query)",
comm->c_name);
}
free_lcd_mem(lcd);
success = 0;
}
lb_functions.lsca_init = lam_ssi_coll_shmem_init;
lb_functions.lsca_finalize = lam_ssi_coll_shmem_finalize;
/* Allreduce to confirm that all the processes have successfully attached
the shared memory buffer */
lb_functions.lsca_gather(&success, 1, MPI_INT, successes, 1, MPI_INT,
0, comm);
if (rank == 0) {
for (i = 0; i < size; ++i) {
if (!successes[i])
break;
}
if (i == size) {
all_success = 1;
} else {
all_success = 0;
}
free(successes);
}
lb_functions.lsca_bcast(&all_success, 1, MPI_INT, 0, comm);
if (all_success == 1) {
lcd->lcd_shmem_size = shmem_size;
lcd->lcd_control_segment_size = control_size;
lcd->lcd_num_procs = size;
local_coll_data = lcd;
return &lb_functions;
} else {
if (lam_ssi_coll_verbose >= 100) {
lam_debug(lam_ssi_coll_did,
"shmem: one or more processes failed query for communicator %s",
comm->c_name);
}
free_lcd_mem(lcd);
return NULL;
}
}
int
lam_ssi_coll_shmem_end_query(MPI_Comm comm) {
lam_ssi_coll_data_t *lcd;
/* Free the shmem-coll-specific data on the communicator */
if (comm->c_ssi_coll_data == NULL) {
/* The module was not selected so we need to free the shared memory. Get
the details from locally cached pointer */
lcd = local_coll_data;
}
else {
lcd = comm->c_ssi_coll_data;
}
if (lam_ssi_coll_shmem_free_area(lcd->lcd_shmemid, lcd->lcd_semid,
lcd->lcd_shared_segment_base) == LAMERROR)
return LAMERROR;
free_lcd_mem(lcd);
return 0;
}
/*
* When this module has been selected, it's all about creating and
* initializing the module-specific data that hangs off the
* communicator.
*/
int
lam_ssi_coll_shmem_init(MPI_Comm comm,
const lam_ssi_coll_actions_1_1_0_t **new_actions)
{
int i;
lam_ssi_coll_data_t *lcd;
/* Initialize all the pointers that we need for implementing shared
memory collectives */
comm->c_ssi_coll_data = local_coll_data;
lcd = comm->c_ssi_coll_data;
lcd->lcd_segment_in_use = 0;
/* Detrmine the number of cpus */
lcd->lcd_num_cpus = lam_ssi_coll_shmem_get_num_cpu();
/* Set pointers to the control and message area */
lcd->lcd_shared_control_base = lcd->lcd_shared_segment_base;
lcd->lcd_shared_message_pool_base = lcd->lcd_shared_segment_base
+ lcd->lcd_control_segment_size;
/* Initialize rest of the pointers that access shared memory. Aligning to the
cacheline */
lcd->lcd_barrier[0] = (int*) (lcd->lcd_shared_control_base);
lcd->lcd_collective_track[0] = (int*)
((char*)lcd->lcd_barrier[0] + lcd->lcd_num_procs* CACHELINESIZE);
for (i = 1; i < lcd->lcd_num_segments + 1; ++i) {
lcd->lcd_barrier[i] = (int*)
((char*)lcd->lcd_collective_track[i - 1] +
lcd->lcd_num_procs* CACHELINESIZE);
lcd->lcd_collective_track[i] = (int*)
((char*)lcd->lcd_barrier[i] + lcd->lcd_num_procs* CACHELINESIZE);
}
for (i = 0; i < lcd->lcd_num_segments; ++i) {
lcd->lcd_message_segment[i] = lcd->lcd_shared_message_pool_base +
i * (lcd->lcd_message_pool_size / lcd->lcd_num_segments);
}
/* Now that that's all setup, assign the shmem functions to this
communicator */
*new_actions = &shmem_functions;
/* Announce */
if (lam_ssi_coll_verbose >= 10) {
lam_debug(lam_ssi_coll_did, "shmem: init communicator %s", comm->c_name);
}
/* All done */
lam_debug(lam_ssi_coll_did, "shmem: done initializing %s", comm->c_name);
return 0;
}
int
lam_ssi_coll_shmem_finalize(MPI_Comm comm)
{
/* Announce */
if (lam_ssi_coll_verbose >= 10)
lam_debug(lam_ssi_coll_did, "shmem: finalize communicator %s",
comm->c_name);
if (lam_ssi_coll_shmem_end_query(comm) == LAMERROR)
return LAMERROR;
return 0;
}
void
free_lcd_mem(lam_ssi_coll_data_t *lcd) {
if (lcd != NULL) {
if (lcd->lcd_barrier != NULL)
free(lcd->lcd_barrier);
if (lcd->lcd_collective_track != NULL)
free(lcd->lcd_collective_track);
if (lcd->lcd_message_segment != NULL)
free(lcd->lcd_message_segment);
free(lcd);
}
lcd = NULL;
/* All freed */
}
|