[kitten] 2 new revisions pushed by BrianK...@gmail.com on 2013-08-07 19:55 GMT

2 views
Skip to first unread message

kit...@googlecode.com

unread,
Aug 7, 2013, 3:56:18 PM8/7/13
to kitten-...@googlegroups.com
2 new revisions:

Revision: 37d2f1db5164
Branch: default
Author: Brian Kocoloski <bko...@sandia.gov>
Date: Wed Aug 7 10:26:49 2013
Log: Another minor e1000 fix. Interrupts were still enabled before the
card...
http://code.google.com/p/kitten/source/detail?r=37d2f1db5164

Revision: 507764cb0219
Branch: default
Author: Brian Kocoloski <bko...@sandia.gov>
Date: Wed Aug 7 12:54:32 2013
Log: Kitten userspace modifications. The PCT can now be configured to
recei...
http://code.google.com/p/kitten/source/detail?r=507764cb0219

==============================================================================
Revision: 37d2f1db5164
Branch: default
Author: Brian Kocoloski <bko...@sandia.gov>
Date: Wed Aug 7 10:26:49 2013
Log: Another minor e1000 fix. Interrupts were still enabled before the
card was fully
functional
http://code.google.com/p/kitten/source/detail?r=37d2f1db5164

Modified:
/drivers/net/e1000/e1000.c

=======================================
--- /drivers/net/e1000/e1000.c Thu Jul 18 09:24:28 2013
+++ /drivers/net/e1000/e1000.c Wed Aug 7 10:26:49 2013
@@ -598,15 +598,10 @@
uint16_t cmd = pci_read(dev->pci_dev, PCIR_COMMAND, 2);
cmd |= PCIM_CMD_BUSMASTEREN;
pci_write(dev->pci_dev, PCIR_COMMAND, 2, cmd);
-
- // enable all interrupts (and clear existing pending ones)
- mmio_write32(E1000_REG_IMS, 0x1F6DC);
- mmio_read32(E1000_REG_ICR);
-
+
// Initialize the E1000 transmit and receive state
e1000_rx_init(netif);
e1000_tx_init(netif);
-
e1000_rx_enable(netif);

// Register our interrupt handler
@@ -617,6 +612,10 @@
}
printk(KERN_INFO "E1000 IDT vector: %d\n", vector);
irq_request(vector, &e1000_interrupt_handler, 0, "e1000", netif);
+
+ // enable all interrupts (and clear existing pending ones)
+ mmio_write32(E1000_REG_IMS, 0x1F6DC);
+ mmio_read32(E1000_REG_ICR);

return 0;
}

==============================================================================
Revision: 507764cb0219
Branch: default
Author: Brian Kocoloski <bko...@sandia.gov>
Date: Wed Aug 7 12:54:32 2013
Log: Kitten userspace modifications. The PCT can now be configured to
receive
application information from an off node Linux job server. To enable this
mode, use environment variable "WAIT_LAUNCH=1". Alternatively, the PCT
can revert to its original format by not setting this variable.

Multi-node PMI support is added. PMI commits and barriers generated by
applications are sent to the server. Then, when an application issues a
get to its local key-val store, if the request misses, it is forwarded to
the server.

The runtime2/ codebase has been restructured to accommodate these changes.
The Linux job server is located in runtime2/pmi-server. A sample test
file is given in nidpid.txt.

Finally, the get.pl external script now generates 2 Portals source trees. It
configures 1 for Kitten and 1 for Linux.
http://code.google.com/p/kitten/source/detail?r=507764cb0219

Added:
/user/runtime2/pmi-common/common.c
/user/runtime2/pmi-common/common.h
/user/runtime2/pmi-common/hydra.h
/user/runtime2/pmi-common/pmi_server.c
/user/runtime2/pmi-common/pmi_server.h
/user/runtime2/pmi-common/pmip.h
/user/runtime2/pmi-common/pmip_pmi.h
/user/runtime2/pmi-common/pmip_pmi_v1.c
/user/runtime2/pmi-common/simple_pmiutil.c
/user/runtime2/pmi-common/simple_pmiutil.h
/user/runtime2/pmi-common/string.c
/user/runtime2/pmi-server/Makefile
/user/runtime2/pmi-server/nidpid.txt
/user/runtime2/pmi-server/server.c
Deleted:
/user/runtime2/libpmi/simple_pmiutil.c
/user/runtime2/libpmi/simple_pmiutil.h
/user/runtime2/pct/pct_internal.h
/user/runtime2/pct/pmi/common.c
/user/runtime2/pct/pmi/common.h
/user/runtime2/pct/pmi/hydra.h
/user/runtime2/pct/pmi/pmi_server.c
/user/runtime2/pct/pmi/pmip.h
/user/runtime2/pct/pmi/pmip_pmi.h
/user/runtime2/pct/pmi/pmip_pmi_v1.c
/user/runtime2/pct/pmi/string.c
Modified:
/user/Makefile.footer
/user/Makefile.header
/user/external/get.pl
/user/runtime2/libpmi/Makefile
/user/runtime2/libpmi/pmi.c
/user/runtime2/libpmi/test_pmi_hello.c
/user/runtime2/pct/Makefile
/user/runtime2/pct/pct.c
/user/runtime2/pct/pct.h
/user/runtime2/portals_util/Makefile

=======================================
--- /dev/null
+++ /user/runtime2/pmi-common/common.c Wed Aug 7 12:54:32 2013
@@ -0,0 +1,331 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+/*
+ * (C) 2008 by Argonne National Laboratory.
+ * See COPYRIGHT in top-level directory.
+ */
+
+#include "hydra.h"
+#include "common.h"
+
+char *HYD_dbg_prefix = (char *) "unknown";
+
+void HYD_pmcd_init_header(struct HYD_pmcd_hdr *hdr)
+{
+ hdr->cmd = INVALID_CMD;
+ hdr->buflen = -1;
+ hdr->pid = -1;
+ hdr->pmi_version = -1;
+ hdr->pgid = -1;
+ hdr->proxy_id = -1;
+ hdr->rank = -1;
+ hdr->signum = -1;
+}
+
+HYD_status HYD_pmcd_pmi_parse_pmi_cmd(char *obuf, int pmi_version, char
**pmi_cmd,
+ char *args[])
+{
+ char *tbuf = NULL, *seg, *str1 = NULL, *cmd;
+ char *buf;
+ char *tmp[HYD_NUM_TMP_STRINGS], *targs[HYD_NUM_TMP_STRINGS];
+ const char *delim;
+ int i, j, k;
+ HYD_status status = HYD_SUCCESS;
+
+ HYDU_FUNC_ENTER();
+
+ /* Make a copy of the original buffer */
+ if ((buf = HYDU_strdup(obuf)) == NULL) {
+ fprintf(stderr, "HYDU_strdup(obuf) failed\n");
+ abort();
+ }
+
+ if (buf[strlen(obuf) - 1] == '\n')
+ buf[strlen(obuf) - 1] = '\0';
+
+ if (pmi_version == 1) {
+ if (!strncmp(buf, "cmd=", strlen("cmd=")))
+ delim = " ";
+ else
+ delim = "\n";
+
+ /* Here we only get PMI-1 commands or backward compatible
+ * PMI-2 commands, so we always explicitly use the PMI-1
+ * delimiter. This allows us to get backward-compatible PMI-2
+ * commands interleaved with regular PMI-2 commands. */
+ tbuf = HYDU_strdup(buf);
+ cmd = strtok(tbuf, delim);
+ for (i = 0; i < HYD_NUM_TMP_STRINGS; i++) {
+ targs[i] = strtok(NULL, delim);
+ if (targs[i] == NULL)
+ break;
+ }
+
+ /* Make a pass through targs and merge space separated
+ * arguments which are actually part of the same key */
+ k = 0;
+ for (i = 0; targs[i]; i++) {
+ if (!strrchr(targs[i], ' ')) {
+ /* no spaces */
+ args[k++] = HYDU_strdup(targs[i]);
+ }
+ else {
+ /* space in the argument; each segment is either a new
+ * key, or a space-separated part of the previous
+ * key */
+ j = 0;
+ seg = strtok(targs[i], " ");
+ while (1) {
+ if (!seg || strrchr(seg, '=')) {
+ /* segment has an '='; it's a start of a new key */
+ if (j) {
+ tmp[j++] = NULL;
+ status = HYDU_str_alloc_and_join(tmp,
&args[k++]);
+ HYDU_ERR_POP(status, "error while joining
strings\n");
+ HYDU_free_strlist(tmp);
+ }
+ j = 0;
+
+ if (!seg)
+ break;
+ }
+ else {
+ /* no '='; part of the previous key */
+ tmp[j++] = HYDU_strdup(" ");
+ }
+ tmp[j++] = HYDU_strdup(seg);
+
+ seg = strtok(NULL, " ");
+ }
+ }
+ }
+ args[k++] = NULL;
+ }
+ else { /* PMI-v2 */
+ delim = ";";
+
+ tbuf = HYDU_strdup(buf);
+ cmd = strtok(tbuf, delim);
+ for (i = 0; i < HYD_NUM_TMP_STRINGS; i++) {
+ args[i] = strtok(NULL, delim);
+ if (args[i] == NULL)
+ break;
+ args[i] = HYDU_strdup(args[i]);
+ }
+ }
+
+ /* Search for the PMI command in our table */
+ status = HYDU_strsplit(cmd, &str1, pmi_cmd, '=');
+ HYDU_ERR_POP(status, "string split returned error\n");
+
+ fn_exit:
+ HYDU_FREE(buf);
+ if (tbuf)
+ HYDU_FREE(tbuf);
+ if (str1)
+ HYDU_FREE(str1);
+ HYDU_FUNC_EXIT();
+ return status;
+
+ fn_fail:
+ goto fn_exit;
+}
+
+HYD_status HYD_pmcd_pmi_args_to_tokens(char *args[], struct HYD_pmcd_token
**tokens,
+ int *count)
+{
+ int i, j;
+ char *arg;
+ HYD_status status = HYD_SUCCESS;
+
+ for (i = 0; args[i]; i++);
+ *count = i;
+ HYDU_MALLOC(*tokens, struct HYD_pmcd_token *, *count * sizeof(struct
HYD_pmcd_token),
+ status);
+
+ for (i = 0; args[i]; i++) {
+ arg = HYDU_strdup(args[i]);
+ (*tokens)[i].key = arg;
+ for (j = 0; arg[j] && arg[j] != '='; j++);
+ if (!arg[j]) {
+ (*tokens)[i].val = NULL;
+ }
+ else {
+ arg[j] = 0;
+ (*tokens)[i].val = &arg[++j];
+ }
+ }
+
+ fn_exit:
+ return status;
+
+ fn_fail:
+ goto fn_exit;
+}
+
+void HYD_pmcd_pmi_free_tokens(struct HYD_pmcd_token *tokens, int
token_count)
+{
+ int i;
+
+ for (i = 0; i < token_count; i++)
+ HYDU_FREE(tokens[i].key);
+ HYDU_FREE(tokens);
+}
+
+char *HYD_pmcd_pmi_find_token_keyval(struct HYD_pmcd_token *tokens, int
count, const char *key)
+{
+ int i;
+
+ for (i = 0; i < count; i++) {
+ if (!strcmp(tokens[i].key, key))
+ return tokens[i].val;
+ }
+
+ return NULL;
+}
+
+HYD_status HYD_pmcd_pmi_allocate_kvs(struct HYD_pmcd_pmi_kvs ** kvs, int
pgid)
+{
+ HYD_status status = HYD_SUCCESS;
+
+ HYDU_FUNC_ENTER();
+
+ HYDU_MALLOC(*kvs, struct HYD_pmcd_pmi_kvs *, sizeof(struct
HYD_pmcd_pmi_kvs), status);
+ HYDU_snprintf((*kvs)->kvs_name, PMI_MAXKVSLEN, "kvs_%d_%d", (int)
getpid(), pgid);
+ (*kvs)->key_pair = NULL;
+
+ fn_exit:
+ HYDU_FUNC_EXIT();
+ return status;
+
+ fn_fail:
+ goto fn_exit;
+}
+
+void HYD_pmcd_free_pmi_kvs_list(struct HYD_pmcd_pmi_kvs *kvs_list)
+{
+ struct HYD_pmcd_pmi_kvs_pair *key_pair, *tmp;
+
+ HYDU_FUNC_ENTER();
+
+ key_pair = kvs_list->key_pair;
+ while (key_pair) {
+ tmp = key_pair->next;
+ HYDU_FREE(key_pair);
+ key_pair = tmp;
+ }
+ HYDU_FREE(kvs_list);
+
+ HYDU_FUNC_EXIT();
+}
+
+HYD_status HYD_pmcd_pmi_add_kvs(const char *key, char *val, struct
HYD_pmcd_pmi_kvs *kvs,
+ int *ret)
+{
+ struct HYD_pmcd_pmi_kvs_pair *key_pair, *run;
+ HYD_status status = HYD_SUCCESS;
+
+ HYDU_FUNC_ENTER();
+
+ HYDU_MALLOC(key_pair, struct HYD_pmcd_pmi_kvs_pair *, sizeof(struct
HYD_pmcd_pmi_kvs_pair),
+ status);
+ HYDU_snprintf(key_pair->key, PMI_MAXKEYLEN, "%s", key);
+ HYDU_snprintf(key_pair->val, PMI_MAXVALLEN, "%s", val);
+ key_pair->next = NULL;
+
+ *ret = 0;
+
+ if (kvs->key_pair == NULL) {
+ kvs->key_pair = key_pair;
+ }
+ else {
+ run = kvs->key_pair;
+ while (run->next) {
+ if (!strcmp(run->key, key_pair->key)) {
+ /* duplicate key found */
+ *ret = -1;
+ break;
+ }
+ run = run->next;
+ }
+ run->next = key_pair;
+ }
+
+ fn_exit:
+ HYDU_FUNC_EXIT();
+ return status;
+
+ fn_fail:
+ goto fn_exit;
+}
+
+/* Return a comma delimited buf of key-val pairs */
+HYD_status HYD_pmcd_pmi_get_kvs_string(struct HYD_pmcd_pmi_kvs * kvs, char
** kvs_list)
+{
+ struct HYD_pmcd_pmi_kvs_pair *run;
+ unsigned long length;
+ HYD_status status = HYD_SUCCESS;
+ char *tmp[5], *buf;
+
+ HYDU_FUNC_ENTER();
+
+ for (length = 0, run = kvs->key_pair; run; run = run->next) {
+ length += strlen(run->key) + strlen(run->val) + 2;
+ }
+
+ length++; // NULL at the end
+ HYDU_MALLOC(*kvs_list, char *, length, status);
+ *kvs_list[0] = 0;
+
+ for (run = kvs->key_pair; run; run = run->next) {
+ tmp[0] = HYDU_strdup(run->key);
+ tmp[1] = HYDU_strdup(";");
+ tmp[2] = HYDU_strdup(run->val);
+ tmp[3] = HYDU_strdup(",");
+ tmp[4] = NULL;
+
+ status = HYDU_str_alloc_and_join(tmp, &buf);
+ HYDU_ERR_POP(status, "unable to join strings\n");
+ HYDU_free_strlist(tmp);
+
+ HYDU_strcat(*kvs_list, buf);
+ HYDU_FREE(buf);
+ }
+
+ fn_exit:
+ HYDU_FUNC_EXIT();
+ return status;
+
+ fn_fail:
+ goto fn_exit;
+}
+
+/* Split comma delimited key-val string into a struct HYD_pmcd_pmi_kvs */
+HYD_status HYD_pmcd_pmi_get_kvs_struct(char * kvs_list, struct
HYD_pmcd_pmi_kvs ** kvs)
+{
+ HYD_status status = HYD_SUCCESS;
+ char *key, *val;
+ int ret;
+
+ HYDU_FUNC_ENTER();
+ HYDU_MALLOC(*kvs, struct HYD_pmcd_pmi_kvs *, sizeof(struct
HYD_pmcd_pmi_kvs), status);
+ (*kvs)->key_pair = NULL;
+
+ for (key = HYDU_strtok(kvs_list, ";,\n"); key; key =
HYDU_strtok(NULL, ";,\n")) {
+ val = HYDU_strtok(NULL, ";,\n");
+ if (!val) {
+ status = HYD_FAILURE;
+ goto fn_fail;
+ }
+
+ status = HYD_pmcd_pmi_add_kvs(key, val, *kvs, &ret);
+ HYDU_ERR_POP(status, "unable to add keypair to kvs\n");
+ }
+
+ fn_exit:
+ HYDU_FUNC_EXIT();
+ return status;
+
+ fn_fail:
+ goto fn_exit;
+
+}
=======================================
--- /dev/null
+++ /user/runtime2/pmi-common/common.h Wed Aug 7 12:54:32 2013
@@ -0,0 +1,85 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+/*
+ * (C) 2008 by Argonne National Laboratory.
+ * See COPYRIGHT in top-level directory.
+ */
+
+#ifndef COMMON_H_INCLUDED
+#define COMMON_H_INCLUDED
+
+#include "hydra.h"
+
+/* Generic definitions */
+#define PMI_MAXKEYLEN (64) /* max length of key in keyval space */
+#define PMI_MAXVALLEN (1024) /* max length of value in keyval space */
+#define PMI_MAXKVSLEN (256) /* max length of various names */
+
+struct HYD_pmcd_pmi_kvs_pair {
+ char key[PMI_MAXKEYLEN];
+ char val[PMI_MAXVALLEN];
+ struct HYD_pmcd_pmi_kvs_pair *next;
+};
+
+struct HYD_pmcd_pmi_kvs {
+ char kvs_name[PMI_MAXKVSLEN]; /* Name of this kvs */
+ struct HYD_pmcd_pmi_kvs_pair *key_pair;
+};
+
+struct HYD_pmcd_hdr {
+ /* The set of commands supported */
+ enum HYD_pmcd_cmd {
+ INVALID_CMD = 0, /* for sanity testing */
+
+ /* UI to proxy commands */
+ PROC_INFO,
+ CKPOINT,
+ PMI_RESPONSE,
+ SIGNAL,
+ STDIN,
+
+ /* Proxy to UI commands */
+ PID_LIST,
+ EXIT_STATUS,
+ PMI_CMD,
+ STDOUT,
+ STDERR,
+ PROCESS_TERMINATED
+ } cmd;
+
+ /* Generic */
+ int buflen;
+
+ /* PMI_CMD */
+ int pid; /* ID of the requesting process */
+ int pmi_version; /* PMI version */
+
+ /* STDOUT/STDERR */
+ int pgid;
+ int proxy_id;
+ int rank;
+
+ /* SIGNAL */
+ int signum;
+};
+
+struct HYD_pmcd_token {
+ char *key;
+ char *val;
+};
+
+void HYD_pmcd_init_header(struct HYD_pmcd_hdr *hdr);
+HYD_status HYD_pmcd_pmi_parse_pmi_cmd(char *buf, int pmi_version, char
**pmi_cmd,
+ char *args[]);
+HYD_status HYD_pmcd_pmi_args_to_tokens(char *args[], struct HYD_pmcd_token
**tokens,
+ int *count);
+void HYD_pmcd_pmi_free_tokens(struct HYD_pmcd_token *tokens, int
token_count);
+char *HYD_pmcd_pmi_find_token_keyval(struct HYD_pmcd_token *tokens, int
count,
+ const char *key);
+HYD_status HYD_pmcd_pmi_allocate_kvs(struct HYD_pmcd_pmi_kvs **kvs, int
pgid);
+void HYD_pmcd_free_pmi_kvs_list(struct HYD_pmcd_pmi_kvs *kvs_list);
+HYD_status HYD_pmcd_pmi_add_kvs(const char *key, char *val, struct
HYD_pmcd_pmi_kvs *kvs,
+ int *ret);
+HYD_status HYD_pmcd_pmi_get_kvs_string(struct HYD_pmcd_pmi_kvs *kvs, char
** kvs_list);
+HYD_status HYD_pmcd_pmi_get_kvs_struct(char * kvs_list, struct
HYD_pmcd_pmi_kvs **kvs);
+
+#endif /* COMMON_H_INCLUDED */
=======================================
--- /dev/null
+++ /user/runtime2/pmi-common/hydra.h Wed Aug 7 12:54:32 2013
@@ -0,0 +1,211 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+/*
+ * (C) 2008 by Argonne National Laboratory.
+ * See COPYRIGHT in top-level directory.
+ */
+
+#ifndef HYDRA_H_INCLUDED
+#define HYDRA_H_INCLUDED
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <portals4.h>
+#include <portals4_util.h>
+
+extern char *HYD_dbg_prefix;
+
+#define HYD_TMPBUF_SIZE (64 * 1024)
+#define HYD_TMP_STRLEN 1024
+#define HYD_NUM_TMP_STRINGS 1000
+
+/* Status information */
+typedef enum {
+ HYD_SUCCESS = 0,
+ HYD_FAILURE, /* general failure */
+
+ /* Silent errors */
+ HYD_GRACEFUL_ABORT,
+ HYD_TIMED_OUT,
+
+ /* Regular errors */
+ HYD_NO_MEM,
+ HYD_SOCK_ERROR,
+ HYD_INVALID_PARAM,
+ HYD_INTERNAL_ERROR
+} HYD_status;
+
+typedef unsigned short HYD_event_t;
+
+/* Argument matching functions */
+struct HYD_arg_match_table {
+ const char *arg;
+ HYD_status(*handler_fn) (char *arg, char ***argv_p);
+ void (*help_fn) (void);
+};
+
+struct HYD_env_global {
+ struct HYD_env *system;
+ struct HYD_env *user;
+ struct HYD_env *inherited;
+ char *prop;
+};
+
+/* Executable information */
+struct HYD_exec {
+ char *exec[HYD_NUM_TMP_STRINGS];
+ char *wdir;
+
+ int proc_count;
+ struct HYD_env *user_env;
+ char *env_prop;
+
+ int appnum;
+
+ struct HYD_exec *next;
+};
+
+/* Global user parameters */
+struct HYD_user_global {
+ /* RMK */
+ char *rmk;
+
+ /* Launcher */
+ char *launcher;
+ char *launcher_exec;
+
+ /* Processor topology */
+ char *binding;
+ char *topolib;
+
+ /* Checkpoint restart */
+ char *ckpointlib;
+ char *ckpoint_prefix;
+ int ckpoint_num;
+
+ /* Demux engine */
+ char *demux;
+
+ /* Network interface */
+ char *iface;
+
+ /* Other random parameters */
+ int enablex;
+ int debug;
+
+ int auto_cleanup;
+
+ struct HYD_env_global global_env;
+};
+
+/* Disable for now; we might add something here in the future */
+#define HYDU_FUNC_ENTER() do {} while (0)
+#define HYDU_FUNC_EXIT() do {} while (0)
+
+#define HYDU_dump_prefix(fp) \
+ { \
+ fprintf(fp, "[%s] ", HYD_dbg_prefix); \
+ fflush(fp); \
+ }
+
+#define HYDU_dump_noprefix(fp, ...) \
+ { \
+ fprintf(fp, __VA_ARGS__); \
+ fflush(fp); \
+ }
+
+#define HYDU_dump(fp, ...) \
+ { \
+ HYDU_dump_prefix(fp); \
+ HYDU_dump_noprefix(fp, __VA_ARGS__); \
+ }
+
+#define HYDU_error_printf(...) \
+ { \
+ HYDU_dump_prefix(stderr); \
+ HYDU_dump_noprefix(stderr, "%s (%d): ", __FILE__, __LINE__); \
+ HYDU_dump_noprefix(stderr, __VA_ARGS__); \
+ }
+
+#define HYD_SILENT_ERROR(status) \
+ (((status) == HYD_GRACEFUL_ABORT) || ((status) == HYD_TIMED_OUT))
+
+#define HYDU_ASSERT(x, status) \
+ { \
+ if (!(x)) { \
+ HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, \
+ "assert (%s) failed\n", #x); \
+ } \
+ }
+
+#define HYDU_IGNORE_TIMEOUT(status) \
+ { \
+ if ((status) == HYD_TIMED_OUT) \
+ (status) = HYD_SUCCESS; \
+ }
+
+#define HYDU_ERR_POP(status, ...) \
+ { \
+ if (status && !HYD_SILENT_ERROR(status)) { \
+ HYDU_error_printf(__VA_ARGS__); \
+ goto fn_fail; \
+ } \
+ else if (HYD_SILENT_ERROR(status)) { \
+ goto fn_exit; \
+ } \
+ }
+
+#define HYDU_ERR_SETANDJUMP(status, error, ...) \
+ { \
+ status = error; \
+ HYDU_ERR_POP(status, __VA_ARGS__); \
+ }
+
+#define HYDU_ERR_CHKANDJUMP(status, chk, error, ...) \
+ { \
+ if ((chk)) \
+ HYDU_ERR_SETANDJUMP(status, error, __VA_ARGS__); \
+ }
+
+#define HYDU_MALLOC(p, type, size, status) \
+ { \
+ HYDU_ASSERT(size, status); \
+ (p) = (type) HYDU_malloc((size)); \
+ if ((p) == NULL) \
+ HYDU_ERR_SETANDJUMP((status), HYD_NO_MEM, \
+ "failed to allocate %d bytes\n", \
+ (int) (size)); \
+ }
+
+#define HYDU_FREE(p) \
+ { \
+ HYDU_free((void *) p); \
+ }
+
+#define HYDU_mem_init()
+#define HYDU_strdup strdup
+#define HYDU_strtok strtok
+#define HYDU_strcat strcat
+#define HYDU_strncpy strncpy
+#define HYDU_malloc malloc
+#define HYDU_free free
+#define HYDU_snprintf snprintf
+
+HYD_status HYDU_list_append_strlist(char **exec, char **client_arg);
+HYD_status HYDU_print_strlist(char **args);
+void HYDU_free_strlist(char **args);
+HYD_status HYDU_str_alloc_and_join(char **strlist, char **strjoin);
+HYD_status HYDU_strsplit(char *str, char **str1, char **str2, char sep);
+HYD_status HYDU_strdup_list(char *src[], char **dest[]);
+char *HYDU_size_t_to_str(size_t x);
+char *HYDU_int_to_str(int x);
+char *HYDU_int_to_str_pad(int x, int maxlen);
+int HYDU_strlist_lastidx(char **strlist);
+char **HYDU_str_to_strlist(char *str);
+
+#endif
=======================================
--- /dev/null
+++ /user/runtime2/pmi-common/pmi_server.c Wed Aug 7 12:54:32 2013
@@ -0,0 +1,96 @@
+#include "hydra.h"
+#include "pmip.h"
+#include "pmip_pmi.h"
+#include "pmi_server.h"
+#include <pct.h>
+
+
+#define CLIENT_RXQ_NUM_BLOCKS 2
+#define CLIENT_RXQ_BLOCK_SIZE (PCT_MAX_PMI_MSG * 64)
+
+
+/* Structure that holds generic PMI state */
+struct HYD_pmcd_pmip HYD_pmcd_pmip;
+
+/**
+ * Initializes the PCT's PMI state.
+ * This gets us setup to receive PMI requests from clients and send
responses.
+ */
+int
+pmi_init(pct_t *pct, app_t *app, ptl_pt_index_t pt_index, ptl_process_t
match_id)
+{
+ pmi_state_t *state = &app->pmi_state;
+ int status;
+
+ /* Initialize the Portals RX queue for incoming client requests */
+ PTL_CHECK(PtlEQAlloc(pct->ni_h, 1024, &state->client.rx_eq_h));
+
+ PTL_CHECK(
+ //ptl_queue_init(pct->ni_h, app->user_id, match_id, pt_index,
+ ptl_queue_init(pct->ni_h, PTL_UID_ANY, match_id, pt_index,
+ state->client.rx_eq_h,
+ CLIENT_RXQ_NUM_BLOCKS, CLIENT_RXQ_BLOCK_SIZE,
+ PCT_MAX_PMI_MSG, &state->client.rx_q)
+ );
+
+ state->client.pt_index = pt_index;
+
+ /* Initialize the Portals state needed to send replies to clients */
+ PTL_CHECK(PtlEQAlloc(pct->ni_h, 4, &state->client.tx_eq_h));
+
+ state->client.tx_buf_size = PCT_MAX_PMI_MSG;
+ state->client.tx_buf = MALLOC(state->client.tx_buf_size);
+
+ state->client.tx_md.start = state->client.tx_buf;
+ state->client.tx_md.length = state->client.tx_buf_size;
+ state->client.tx_md.options = 0;
+ state->client.tx_md.eq_handle = state->client.tx_eq_h;
+ state->client.tx_md.ct_handle = PTL_CT_NONE;
+
+ PTL_CHECK(PtlMDBind(pct->ni_h, &state->client.tx_md,
&state->client.tx_md_h));
+
+ /* Initialize the PMI key value store */
+ status = HYD_pmcd_pmi_allocate_kvs(&HYD_pmcd_pmip.local.kvs, 0);
+ if (status) {
+ fprintf(stderr, "unable to allocate kvs space\n");
+ abort();
+ }
+
+ return 0;
+}
+
+int
+pmi_process_event(pct_t *pct, app_t *app, const ptl_event_t *ev)
+{
+ int status = HYD_SUCCESS;
+ char *cmd = NULL;
+ char *args[HYD_NUM_TMP_STRINGS] = { 0 };
+ struct HYD_pmcd_pmip_pmi_handle *h;
+
+ printf("ev = %s\n", (char *)ev->start);
+
+ /* Parse out the command and trailing keyval strings */
+ status = HYD_pmcd_pmi_parse_pmi_cmd((char *)ev->start, 1, &cmd, args);
+ HYDU_ERR_POP(status, "unable to parse PMI command\n");
+
+ /* Call the handler for the command */
+ h = HYD_pmcd_pmip_pmi_v1;
+ while (h->handler) {
+ if (!strcmp(cmd, h->cmd)) {
+ status = h->handler(pct, app, ev, args);
+ HYDU_ERR_POP(status, "PMI handler returned error\n");
+ goto fn_exit;
+ }
+ h++;
+ }
+
+ fn_exit:
+ if (cmd)
+ HYDU_FREE(cmd);
+ HYDU_free_strlist(args);
+ HYDU_FUNC_EXIT();
+ return status;
+
+ fn_fail:
+ goto fn_exit;
+}
=======================================
--- /dev/null
+++ /user/runtime2/pmi-common/pmi_server.h Wed Aug 7 12:54:32 2013
@@ -0,0 +1,102 @@
+#ifndef PMI_SERVER_H
+#define PMI_SERVER_H
+
+#include <sched.h>
+#include <lwk/liblwk.h>
+#include <portals4.h>
+
+
+#define PPE_INFO_STRING_SIZE 512
+
+
+// Aborts program if x returns a non-zero error code
+#define CHECK(x) \
+ do { \
+ int status; \
+ if ((status = x) != 0) { \
+ fprintf(stderr, "'%s' failed, status=%d\n", #x, status); \
+ fprintf(stderr, "In %s, %s() line %u\n", \
+ __FILE__, __FUNCTION__, (unsigned int)__LINE__); \
+ abort(); \
+ } \
+ } while (0)
+
+
+typedef struct pmi_state {
+ struct client {
+ // Portals state for handling client requests
+ ptl_queue_t rx_q;
+ ptl_handle_eq_t rx_eq_h;
+
+ // Portals state for sending responses to clients
+ ptl_handle_eq_t tx_eq_h;
+ ptl_md_t tx_md;
+ ptl_handle_md_t tx_md_h;
+ size_t tx_buf_size;
+ char * tx_buf;
+
+ // Client's pt index
+ ptl_pt_index_t pt_index;
+ } client;
+
+ struct {
+ // Server's pt index
+ ptl_pt_index_t pt_index;
+
+ // Server's NID/PID
+ ptl_process_t ptl_id;
+ } server;
+} pmi_state_t;
+
+typedef struct process {
+ id_t local_index; // Local index of the process
+
+ // Misc IDs, many of these get copied to the start_state struct below
+ id_t task_id;
+ id_t aspace_id;
+ id_t cpu_id;
+
+ // This structure tells Kitten how to start a new task executing
+ start_state_t start_state;
+
+ // This string contains information about the Portals Progress Engine
(PPE)
+ char ppe_info[PPE_INFO_STRING_SIZE];
+
+ // Portals address of the process
+ ptl_process_t ptl_id;
+} process_t;
+
+
+typedef struct app {
+ int world_size; // For MPI, size of MPI_COMM_WORLD
+ int universe_size; // For MPI, world_size + spawn capability
+
+ int local_size; // Number of processes running locally
+ process_t * procs; // Array of descriptors, one per local
process
+
+ id_t user_id;
+ id_t group_id;
+
+ cpu_set_t avail_cpus; // Bitmap of CPUs that app procs can run
on
+
+ pmi_state_t pmi_state; // State needed for app process <-> PCT
comm
+
+ int base_rank; // Rank that our app processes start from
+} app_t;
+
+
+typedef struct pct {
+ id_t aspace_id; // The PCT's address space ID
+ app_t app; // Metadata for app PCT is managing
+
+ ptl_handle_ni_t ni_h; // The network interface handle the PCT
+ // uses for Portals communication.
+ ptl_process_t ptl_id; // The PCT's Portals ID
+} pct_t;
+
+// PMI server prototypes
+int pmi_init(pct_t *pct, app_t *app, ptl_pt_index_t pt_index,
ptl_process_t match_id);
+int pmi_process_event(pct_t *pct, app_t *app, const ptl_event_t *ev);
+
+
+#endif
=======================================
--- /dev/null
+++ /user/runtime2/pmi-common/pmip.h Wed Aug 7 12:54:32 2013
@@ -0,0 +1,91 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+/*
+ * (C) 2008 by Argonne National Laboratory.
+ * See COPYRIGHT in top-level directory.
+ */
+
+#ifndef PMIP_H_INCLUDED
+#define PMIP_H_INCLUDED
+
+#include "hydra.h"
+#include "common.h"
+
+struct HYD_pmcd_pmip_map {
+ int left;
+ int current;
+ int right;
+ int total;
+};
+
+struct HYD_pmcd_pmip {
+ struct HYD_user_global user_global;
+
+ struct {
+ struct HYD_pmcd_pmip_map global_core_map;
+ struct HYD_pmcd_pmip_map filler_process_map;
+
+ int global_process_count;
+ char *jobid;
+
+ /* PMI */
+ char *pmi_fd;
+ char *pmi_port;
+ int pmi_rank; /* If this is -1, we auto-generate it */
+ char *pmi_process_mapping;
+ } system_global; /* Global system parameters */
+
+ struct {
+ /* Upstream server contact information */
+ char *server_name;
+ int server_port;
+ int control;
+ } upstream;
+
+ /* Currently our downstream only consists of actual MPI
+ * processes */
+ struct {
+ int *out;
+ int *err;
+ int in;
+
+ int *pid;
+ int *exit_status;
+
+ int *pmi_rank;
+ int *pmi_fd;
+ int *pmi_fd_active;
+
+ int forced_cleanup;
+ } downstream;
+
+ /* Proxy details */
+ struct {
+ int id;
+ int pgid;
+ char *iface_ip_env_name;
+ char *hostname;
+ char *local_binding;
+
+ int proxy_core_count;
+ int proxy_process_count;
+
+ char *spawner_kvs_name;
+ struct HYD_pmcd_pmi_kvs *kvs; /* Node-level KVS space for node
attributes */
+
+ char **ckpoint_prefix_list;
+
+ int retries;
+ } local;
+
+ /* Process segmentation information for this proxy */
+ struct HYD_exec *exec_list;
+};
+
+extern struct HYD_pmcd_pmip HYD_pmcd_pmip;
+extern struct HYD_arg_match_table HYD_pmcd_pmip_match_table[];
+
+HYD_status HYD_pmcd_pmip_get_params(char **t_argv);
+void HYD_pmcd_pmip_kill_localprocs(void);
+HYD_status HYD_pmcd_pmip_control_cmd_cb(int fd, HYD_event_t events, void
*userp);
+
+#endif /* PMIP_H_INCLUDED */
=======================================
--- /dev/null
+++ /user/runtime2/pmi-common/pmip_pmi.h Wed Aug 7 12:54:32 2013
@@ -0,0 +1,27 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+/*
+ * (C) 2008 by Argonne National Laboratory.
+ * See COPYRIGHT in top-level directory.
+ */
+
+#ifndef PMIP_PMI_H_INCLUDED
+#define PMIP_PMI_H_INCLUDED
+
+#include "hydra.h"
+#include "common.h"
+#include "pmi_server.h"
+
+/* PMI-1 specific definitions */
+extern struct HYD_pmcd_pmip_pmi_handle *HYD_pmcd_pmip_pmi_v1;
+
+/* PMI-2 specific definitions */
+extern struct HYD_pmcd_pmip_pmi_handle *HYD_pmcd_pmip_pmi_v2;
+
+struct HYD_pmcd_pmip_pmi_handle {
+ const char *cmd;
+ HYD_status(*handler) (pct_t *pct, app_t *app, const ptl_event_t *ev,
char *args[]);
+};
+
+extern struct HYD_pmcd_pmip_pmi_handle *HYD_pmcd_pmip_pmi_handle;
+
+#endif /* PMIP_PMI_H_INCLUDED */
=======================================
--- /dev/null
+++ /user/runtime2/pmi-common/pmip_pmi_v1.c Wed Aug 7 12:54:32 2013
@@ -0,0 +1,893 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+/*
+ * (C) 2008 by Argonne National Laboratory.
+ * See COPYRIGHT in top-level directory.
+ */
+
+#include "pmip_pmi.h"
+#include "pmip.h"
+#include "pct.h"
+
+static HYD_status
+send_cmd_downstream(pmi_state_t *state,
+ ptl_process_t target_id,
+ const char *cmd)
+{
+ ptl_event_t tx_ev;
+
+ /* Copy the command to the pre-registered transmit buffer */
+ strncpy(state->client.tx_buf, cmd, state->client.tx_buf_size);
+ state->client.tx_buf[state->client.tx_buf_size - 1] = '\0';
+
+ /* Send the response to the client */
+ PTL_CHECK(
+ PtlPut(state->client.tx_md_h, 0, strlen(state->client.tx_buf) + 1,
+ PTL_NO_ACK_REQ, target_id, state->client.pt_index,
+ 0, 0, NULL, 0)
+ );
+
+ /* Wait for the SEND_END */
+ PTL_CHECK(PtlEQWait(state->client.tx_eq_h, &tx_ev));
+ PTL_ASSERT(tx_ev.type == PTL_EVENT_SEND);
+
+ return HYD_SUCCESS;
+}
+
+static HYD_status
+send_cmd_to_server(pmi_state_t *state,
+ ptl_process_t target_id,
+ const char *cmd)
+{
+ ptl_event_t tx_ev;
+
+ /* Copy the command to the pre-registered transmit buffer */
+ strncpy(state->client.tx_buf, cmd, state->client.tx_buf_size);
+ state->client.tx_buf[state->client.tx_buf_size - 1] = '\0';
+
+ /* Send the response to the server */
+ PTL_CHECK(
+ PtlPut(state->client.tx_md_h, 0, strlen(state->client.tx_buf) + 1,
+ PTL_NO_ACK_REQ, target_id, state->server.pt_index,
+ 0, 0, NULL, 0)
+ );
+
+ /* Wait for the SEND_END */
+ PTL_CHECK(PtlEQWait(state->client.tx_eq_h, &tx_ev));
+ PTL_ASSERT(tx_ev.type == PTL_EVENT_SEND);
+
+ return HYD_SUCCESS;
+}
+
+static HYD_status fn_init(pct_t *pct, app_t *app, const ptl_event_t *ev,
char *args[])
+{
+ int pmi_version, pmi_subversion, task_id, i;
+ const char *tmp;
+ HYD_status status = HYD_SUCCESS;
+
+ HYDU_FUNC_ENTER();
+
+ strtok(args[0], "=");
+ pmi_version = atoi(strtok(NULL, "="));
+ strtok(args[1], "=");
+ pmi_subversion = atoi(strtok(NULL, "="));
+ strtok(args[2], "=");
+ task_id = atoi(strtok(NULL, "="));
+
+ if (pmi_version == 1 && pmi_subversion <= 1)
+ tmp = HYDU_strdup("cmd=response_to_init pmi_version=1 pmi_subversion=1
rc=0\n");
+ else if (pmi_version == 2 && pmi_subversion == 0)
+ tmp = HYDU_strdup("cmd=response_to_init pmi_version=2 pmi_subversion=0
rc=0\n");
+ else /* PMI version mismatch */
+ HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
+ "PMI version mismatch; %d.%d\n", pmi_version, pmi_subversion);
+
+ /* Remember the Portals NID/PID for the process */
+ for (i = 0; i < app->local_size; i++) {
+ if (task_id == app->procs[i].task_id) {
+ app->procs[i].ptl_id.phys.nid = ev->initiator.phys.nid;
+ app->procs[i].ptl_id.phys.pid = ev->initiator.phys.pid;
+ break;
+ }
+ }
+
+ status = send_cmd_downstream(&app->pmi_state, ev->initiator, tmp);
+ HYDU_ERR_POP(status, "error sending PMI response\n");
+ HYDU_FREE(tmp);
+
+ fn_exit:
+ HYDU_FUNC_EXIT();
+ return status;
+
+ fn_fail:
+ abort();
+}
+
+static HYD_status fn_app_init(pct_t *pct, app_t *app, const ptl_event_t
*ev, char *args[])
+{
+ const char *base_rank, *local_size, *world_size, *universe_size;
+ struct HYD_pmcd_token *tokens;
+ int token_count;
+ HYD_status status = HYD_SUCCESS;
+
+ HYDU_FUNC_ENTER();
+
+ status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count);
+ HYDU_ERR_POP(status, "unable to convert args to tokens\n");
+
+ base_rank = HYD_pmcd_pmi_find_token_keyval(tokens,
token_count, "base_rank");
+ HYDU_ERR_CHKANDJUMP(status, base_rank == NULL, HYD_INTERNAL_ERROR,
+ "unable to find token: base_rank\n");
+
+ local_size = HYD_pmcd_pmi_find_token_keyval(tokens,
token_count, "local_size");
+ HYDU_ERR_CHKANDJUMP(status, local_size == NULL, HYD_INTERNAL_ERROR,
+ "unable to find token: local_size\n");
+
+ world_size = HYD_pmcd_pmi_find_token_keyval(tokens,
token_count, "world_size");
+ HYDU_ERR_CHKANDJUMP(status, world_size == NULL, HYD_INTERNAL_ERROR,
+ "unable to find token: world_size\n");
+
+ universe_size = HYD_pmcd_pmi_find_token_keyval(tokens,
token_count, "universe_size");
+ HYDU_ERR_CHKANDJUMP(status, universe_size == NULL, HYD_INTERNAL_ERROR,
+ "unable to find token: universe_size\n");
+
+ /* Remember the Portals NID/PID for the server */
+ app->pmi_state.server.ptl_id.phys.nid = ev->initiator.phys.nid;
+ app->pmi_state.server.ptl_id.phys.pid = ev->initiator.phys.pid;
+
+ /* Remember the Portals pt index of the server */
+ app->pmi_state.server.pt_index = PCT_PMI_SERVER_PT_INDEX;
+
+ /* Set our local app's information */
+ app->base_rank = atoi(base_rank);
+ app->local_size = atoi(local_size);
+ app->world_size = atoi(world_size);
+ app->universe_size = atoi(universe_size);
+
+ fn_exit:
+ HYDU_FUNC_EXIT();
+ return status;
+
+ fn_fail:
+ abort();
+}
+
+static HYD_status fn_get_maxes(pct_t *pct, app_t *app, const ptl_event_t
*ev, char *args[])
+{
+ int i;
+ char *tmp[HYD_NUM_TMP_STRINGS], *cmd;
+ HYD_status status = HYD_SUCCESS;
+
+ HYDU_FUNC_ENTER();
+
+ i = 0;
+ tmp[i++] = HYDU_strdup("cmd=maxes kvsname_max=");
+ tmp[i++] = HYDU_int_to_str(PMI_MAXKVSLEN);
+ tmp[i++] = HYDU_strdup(" keylen_max=");
+ tmp[i++] = HYDU_int_to_str(PMI_MAXKEYLEN);
+ tmp[i++] = HYDU_strdup(" vallen_max=");
+ tmp[i++] = HYDU_int_to_str(PMI_MAXVALLEN);
+ tmp[i++] = HYDU_strdup("\n");
+ tmp[i++] = NULL;
+
+ status = HYDU_str_alloc_and_join(tmp, &cmd);
+ HYDU_ERR_POP(status, "unable to join strings\n");
+ HYDU_free_strlist(tmp);
+
+ status = send_cmd_downstream(&app->pmi_state, ev->initiator, cmd);
+ HYDU_ERR_POP(status, "error sending PMI response\n");
+ HYDU_FREE(cmd);
+
+ fn_exit:
+ HYDU_FUNC_EXIT();
+ return status;
+
+ fn_fail:
+ abort();
+}
+
+static HYD_status fn_get_appnum(pct_t *pct, app_t *app, const ptl_event_t
*ev, char *args[])
+{
+ int i, idx;
+ struct HYD_exec *exec;
+ char *tmp[HYD_NUM_TMP_STRINGS], *cmd;
+ HYD_status status = HYD_SUCCESS;
+
+ HYDU_FUNC_ENTER();
+
+ /* Get the process index */
+ for (i = 0; i < app->local_size; i++)
+ if ((ev->initiator.phys.nid == app->procs[i].ptl_id.phys.nid) &&
+ (ev->initiator.phys.pid == app->procs[i].ptl_id.phys.pid))
+ break;
+ idx = i;
+ HYDU_ASSERT(idx < app->local_size, status);
+
+ i = 0;
+ for (exec = HYD_pmcd_pmip.exec_list; exec; exec = exec->next) {
+ i += exec->proc_count;
+ if (idx < i)
+ break;
+ }
+
+ i = 0;
+ tmp[i++] = HYDU_strdup("cmd=appnum appnum=");
+ //tmp[i++] = HYDU_int_to_str(exec->appnum);
+ tmp[i++] = HYDU_int_to_str(0); /* only support one exec for now , so hard
code to 0 */
+ tmp[i++] = HYDU_strdup("\n");
+ tmp[i++] = NULL;
+
+ status = HYDU_str_alloc_and_join(tmp, &cmd);
+ HYDU_ERR_POP(status, "unable to join strings\n");
+ HYDU_free_strlist(tmp);
+
+ status = send_cmd_downstream(&app->pmi_state, ev->initiator, cmd);
+ HYDU_ERR_POP(status, "error sending PMI response\n");
+ HYDU_FREE(cmd);
+
+ fn_exit:
+ HYDU_FUNC_EXIT();
+ return status;
+
+ fn_fail:
+ abort();
+}
+
+static HYD_status fn_get_my_kvsname(pct_t *pct, app_t *app, const
ptl_event_t *ev, char *args[])
+{
+ char *tmp[HYD_NUM_TMP_STRINGS], *cmd;
+ int i;
+ HYD_status status = HYD_SUCCESS;
+
+ HYDU_FUNC_ENTER();
+
+ i = 0;
+ tmp[i++] = HYDU_strdup("cmd=my_kvsname kvsname=");
+ tmp[i++] = HYDU_strdup(HYD_pmcd_pmip.local.kvs->kvs_name);
+ tmp[i++] = HYDU_strdup("\n");
+ tmp[i++] = NULL;
+
+ status = HYDU_str_alloc_and_join(tmp, &cmd);
+ HYDU_ERR_POP(status, "unable to join strings\n");
+ HYDU_free_strlist(tmp);
+
+ status = send_cmd_downstream(&app->pmi_state, ev->initiator, cmd);
+ HYDU_ERR_POP(status, "error sending PMI response\n");
+ HYDU_FREE(cmd);
+
+ fn_exit:
+ HYDU_FUNC_EXIT();
+ return status;
+
+ fn_fail:
+ abort();
+}
+
+static HYD_status fn_get_usize(pct_t *pct, app_t *app, const ptl_event_t
*ev, char *args[])
+{
+ int i;
+ char *tmp[HYD_NUM_TMP_STRINGS], *cmd;
+ HYD_status status = HYD_SUCCESS;
+
+ HYDU_FUNC_ENTER();
+
+ i = 0;
+ tmp[i++] = HYDU_strdup("cmd=universe_size size=");
+ tmp[i++] = HYDU_int_to_str(app->universe_size);
+ tmp[i++] = HYDU_strdup("\n");
+ tmp[i++] = NULL;
+
+ status = HYDU_str_alloc_and_join(tmp, &cmd);
+ HYDU_ERR_POP(status, "unable to join strings\n");
+ HYDU_free_strlist(tmp);
+
+ status = send_cmd_downstream(&app->pmi_state, ev->initiator, cmd);
+ HYDU_ERR_POP(status, "error sending PMI response\n");
+ HYDU_FREE(cmd);
+
+ fn_exit:
+ HYDU_FUNC_EXIT();
+ return status;
+
+ fn_fail:
+ abort();
+}
+
+static HYD_status fn_put(pct_t *pct, app_t *app, const ptl_event_t *ev,
char *args[])
+{
+ int i, ret;
+ char *kvsname, *key, *val;
+ char *tmp[HYD_NUM_TMP_STRINGS], *cmd;
+ struct HYD_pmcd_token *tokens;
+ int token_count;
+ HYD_status status = HYD_SUCCESS;
+
+ HYDU_FUNC_ENTER();
+
+ status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count);
+ HYDU_ERR_POP(status, "unable to convert args to tokens\n");
+
+ kvsname = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "kvsname");
+ HYDU_ERR_CHKANDJUMP(status, kvsname == NULL, HYD_INTERNAL_ERROR,
+ "unable to find token: kvsname\n");
+
+ key = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "key");
+ HYDU_ERR_CHKANDJUMP(status, key == NULL, HYD_INTERNAL_ERROR,
+ "unable to find token: key\n");
+
+ val = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "value");
+ if (val == NULL) {
+ /* the user sent an empty string */
+ val = HYDU_strdup("");
+ }
+
+ if (strcmp(HYD_pmcd_pmip.local.kvs->kvs_name, kvsname))
+ HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
+ "kvsname (%s) does not match this group's kvs space (%s)\n",
+ kvsname, HYD_pmcd_pmip.local.kvs->kvs_name);
+
+ status = HYD_pmcd_pmi_add_kvs(key, val, HYD_pmcd_pmip.local.kvs, &ret);
+ HYDU_ERR_POP(status, "unable to add keypair to kvs\n");
+
+ i = 0;
+ tmp[i++] = HYDU_strdup("cmd=put_result rc=");
+ tmp[i++] = HYDU_int_to_str(ret);
+ if (ret == 0) {
+ tmp[i++] = HYDU_strdup(" msg=success");
+ }
+ else {
+ tmp[i++] = HYDU_strdup(" msg=duplicate_key");
+ tmp[i++] = HYDU_strdup(key);
+ }
+ tmp[i++] = HYDU_strdup("\n");
+ tmp[i++] = NULL;
+
+ status = HYDU_str_alloc_and_join(tmp, &cmd);
+ HYDU_ERR_POP(status, "unable to join strings\n");
+ HYDU_free_strlist(tmp);
+
+ status = send_cmd_downstream(&app->pmi_state, ev->initiator, cmd);
+ HYDU_ERR_POP(status, "error sending PMI response\n");
+ HYDU_FREE(cmd);
+
+ fn_exit:
+ HYD_pmcd_pmi_free_tokens(tokens, token_count);
+ HYDU_FUNC_EXIT();
+ return status;
+
+ fn_fail:
+ abort();
+}
+
+
+static HYD_status fn_get(pct_t *pct, app_t *app, const ptl_event_t *ev,
char *args[])
+{
+ struct HYD_pmcd_pmi_kvs_pair *run;
+ char *kvsname, *key, *val;
+ char *tmp[HYD_NUM_TMP_STRINGS], *cmd;
+ struct HYD_pmcd_token *tokens;
+ int token_count, i;
+ HYD_status status = HYD_SUCCESS;
+
+ HYDU_FUNC_ENTER();
+
+ status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count);
+ HYDU_ERR_POP(status, "unable to convert args to tokens\n");
+
+ kvsname = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "kvsname");
+ HYDU_ERR_CHKANDJUMP(status, kvsname == NULL, HYD_INTERNAL_ERROR,
+ "unable to find token: kvsname\n");
+
+ key = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "key");
+ HYDU_ERR_CHKANDJUMP(status, key == NULL, HYD_INTERNAL_ERROR,
+ "unable to find token: key\n");
+
+ val = NULL;
+ if (!strcmp(key, "PMI_dead_processes")) {
+ val = 0; /* FIXME: return actual number of dead processes */
+ goto found_val;
+ }
+
+ /* Make sure the key value store name is what we expect */
+ if (strcmp(HYD_pmcd_pmip.local.kvs->kvs_name, kvsname))
+ HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
+ "kvsname (%s) does not match this group's kvs space (%s)\n",
+ kvsname, HYD_pmcd_pmip.local.kvs->kvs_name);
+
+ /* Try to find the value associated with the key */
+ for (run = HYD_pmcd_pmip.local.kvs->key_pair; run; run = run->next) {
+ if (!strcmp(run->key, key)) {
+ val = run->val;
+ break;
+ }
+ }
+
+found_val:
+ i = 0;
+ if (val) {
+ tmp[i++] = HYDU_strdup("cmd=get_result rc=");
+ tmp[i++] = HYDU_strdup("0 msg=success value=");
+ tmp[i++] = HYDU_strdup(val);
+ tmp[i++] = HYDU_strdup("\n");
+ tmp[i++] = NULL;
+
+ status = HYDU_str_alloc_and_join(tmp, &cmd);
+ HYDU_ERR_POP(status, "unable to join strings\n");
+ HYDU_free_strlist(tmp);
+
+ /* Send result to app */
+ status = send_cmd_downstream(&app->pmi_state, ev->initiator, cmd);
+ HYDU_ERR_POP(status, "error sending PMI response\n");
+ HYDU_FREE(cmd);
+ } else {
+ /* Need to send the request to the server */
+ tmp[i++] = HYDU_strdup("cmd=get_server kvsname=");
+ tmp[i++] = HYDU_strdup(kvsname);
+ tmp[i++] = HYDU_strdup(" key=");
+ tmp[i++] = HYDU_strdup(key);
+ tmp[i++] = HYDU_strdup(" nid=");
+ tmp[i++] = HYDU_int_to_str(ev->initiator.phys.nid);
+ tmp[i++] = HYDU_strdup(" pid=");
+ tmp[i++] = HYDU_int_to_str(ev->initiator.phys.pid);
+ tmp[i++] = HYDU_strdup("\n");
+ tmp[i++] = NULL;
+
+ status = HYDU_str_alloc_and_join(tmp, &cmd);
+ HYDU_ERR_POP(status, "unable to join strings\n");
+ HYDU_free_strlist(tmp);
+
+ /* Send get to server */
+ status = send_cmd_to_server(&app->pmi_state,
app->pmi_state.server.ptl_id, cmd);
+ HYDU_ERR_POP(status, "error sending PMI response\n");
+ HYDU_FREE(cmd);
+ }
+
+ fn_exit:
+ HYD_pmcd_pmi_free_tokens(tokens, token_count);
+ HYDU_FUNC_EXIT();
+ return status;
+
+ fn_fail:
+ abort();
+}
+
+/* Master has received a get request */
+static HYD_status fn_get_server(pct_t *pct, app_t *app, const ptl_event_t
*ev, char *args[])
+{
+ struct HYD_pmcd_pmi_kvs_pair *run;
+ char *kvsname, *key, *val, *nid, *pid;
+ char *tmp[HYD_NUM_TMP_STRINGS], *cmd;
+ struct HYD_pmcd_token *tokens;
+ int token_count, i;
+ HYD_status status = HYD_SUCCESS;
+
+ HYDU_FUNC_ENTER();
+
+ status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count);
+ HYDU_ERR_POP(status, "unable to convert args to tokens\n");
+
+ kvsname = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "kvsname");
+ HYDU_ERR_CHKANDJUMP(status, kvsname == NULL, HYD_INTERNAL_ERROR,
+ "unable to find token: kvsname\n");
+
+ key = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "key");
+ HYDU_ERR_CHKANDJUMP(status, key == NULL, HYD_INTERNAL_ERROR,
+ "unable to find token: key\n");
+
+ nid = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "nid");
+ HYDU_ERR_CHKANDJUMP(status, nid == NULL, HYD_INTERNAL_ERROR,
+ "unable to find token: nid\n");
+
+ pid = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "pid");
+ HYDU_ERR_CHKANDJUMP(status, pid == NULL, HYD_INTERNAL_ERROR,
+ "unable to find token: pid\n");
+
+ /* Make sure the key value store name is what we expect */
+ if (strcmp(HYD_pmcd_pmip.local.kvs->kvs_name, kvsname))
+ HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
+ "kvsname (%s) does not match this group's kvs space (%s)\n",
+ kvsname, HYD_pmcd_pmip.local.kvs->kvs_name);
+
+ val = NULL;
+ if (!strcmp(key, "PMI_dead_processes")) {
+ val = 0; /* FIXME: return actual number of dead processes */
+ goto found_val;
+ }
+
+ /* Try to find the value associated with the key */
+ for (run = HYD_pmcd_pmip.local.kvs->key_pair; run; run = run->next) {
+ if (!strcmp(run->key, key)) {
+ val = run->val;
+ break;
+ }
+ }
+
+found_val:
+ i = 0;
+ tmp[i++] = HYDU_strdup("cmd=get_server_result rc=");
+ if (val) {
+ tmp[i++] = HYDU_strdup("0 msg=success key=");
+ tmp[i++] = HYDU_strdup(key);
+ tmp[i++] = HYDU_strdup(" value=");
+ tmp[i++] = HYDU_strdup(val);
+ } else {
+ tmp[i++] = HYDU_strdup("-1 msg=key_");
+ tmp[i++] = HYDU_strdup(key);
+ tmp[i++] = HYDU_strdup("_not_found value=unknown");
+ }
+ tmp[i++] = HYDU_strdup(" nid=");
+ tmp[i++] = HYDU_strdup(nid);
+ tmp[i++] = HYDU_strdup(" pid=");
+ tmp[i++] = HYDU_strdup(pid);
+ tmp[i++] = HYDU_strdup("\n");
+ tmp[i++] = NULL;
+
+ status = HYDU_str_alloc_and_join(tmp, &cmd);
+ HYDU_ERR_POP(status, "unable to join strings\n");
+ HYDU_free_strlist(tmp);
+
+ status = send_cmd_downstream(&app->pmi_state, ev->initiator, cmd);
+ HYDU_ERR_POP(status, "error sending PMI response\n");
+ HYDU_FREE(cmd);
+
+ fn_exit:
+ HYD_pmcd_pmi_free_tokens(tokens, token_count);
+ HYDU_FUNC_EXIT();
+ return status;
+
+ fn_fail:
+ abort();
+}
+
+/* PCT has received a get_server response */
+static HYD_status fn_get_server_result(pct_t *pct, app_t *app, const
ptl_event_t *ev, char *args[])
+{
+ char *msg, *key, *value, *nid, *pid;
+ char *tmp[HYD_NUM_TMP_STRINGS], *cmd;
+ struct HYD_pmcd_token *tokens;
+ int token_count, i, found, ret;
+ ptl_process_t target;
+ HYD_status status = HYD_SUCCESS;
+
+ HYDU_FUNC_ENTER();
+
+ status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count);
+ HYDU_ERR_POP(status, "unable to convert args to tokens\n");
+
+ msg = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "msg");
+ HYDU_ERR_CHKANDJUMP(status, msg == NULL, HYD_INTERNAL_ERROR,
+ "unable to find token: msg\n");
+
+ key = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "key");
+ HYDU_ERR_CHKANDJUMP(status, key == NULL, HYD_INTERNAL_ERROR,
+ "unable to find token: key\n");
+
+ value = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "value");
+ HYDU_ERR_CHKANDJUMP(status, value == NULL, HYD_INTERNAL_ERROR,
+ "unable to find token: value\n");
+
+ nid = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "nid");
+ HYDU_ERR_CHKANDJUMP(status, nid == NULL, HYD_INTERNAL_ERROR,
+ "unable to find token: nid\n");
+
+ pid = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "pid");
+ HYDU_ERR_CHKANDJUMP(status, pid == NULL, HYD_INTERNAL_ERROR,
+ "unable to find token: pid\n");
+
+ /* Did the server find it */
+ found = (strcmp(value, "unknown") != 0);
+
+ i = 0;
+ tmp[i++] = HYDU_strdup("cmd=get_result rc=");
+ if (found) {
+ tmp[i++] = HYDU_strdup("0 msg=");
+ }
+ else {
+ tmp[i++] = HYDU_strdup("-1 msg=");
+ }
+ tmp[i++] = HYDU_strdup(msg);
+ tmp[i++] = HYDU_strdup(" value=");
+ tmp[i++] = HYDU_strdup(value);
+ tmp[i++] = HYDU_strdup("\n");
+ tmp[i++] = NULL;
+
+ target.phys.nid = atoi(nid);
+ target.phys.pid = atoi(pid);
+
+ status = HYDU_str_alloc_and_join(tmp, &cmd);
+ HYDU_ERR_POP(status, "unable to join strings\n");
+ HYDU_free_strlist(tmp);
+
+ status = send_cmd_downstream(&app->pmi_state, target, cmd);
+ HYDU_ERR_POP(status, "error sending PMI response\n");
+ HYDU_FREE(cmd);
+
+ /* Store value locally, if we found it */
+ if (found) {
+ status = HYD_pmcd_pmi_add_kvs(key, value, HYD_pmcd_pmip.local.kvs, &ret);
+ HYDU_ERR_POP(status, "unable to add keypair to kvs\n");
+ }
+
+ fn_exit:
+ HYD_pmcd_pmi_free_tokens(tokens, token_count);
+ HYDU_FUNC_EXIT();
+ return status;
+
+ fn_fail:
+ abort();
+}
+
+
+static HYD_status fn_barrier_out(pct_t *pct, app_t *app, const ptl_event_t
*ev, char *args[])
+{
+ const char *cmd;
+ int i;
+ HYD_status status = HYD_SUCCESS;
+
+ HYDU_FUNC_ENTER();
+
+ cmd = HYDU_strdup("cmd=barrier_out\n");
+
+ for (i = 0; i < app->local_size; i++) {
+ status = send_cmd_downstream(&app->pmi_state, app->procs[i].ptl_id, cmd);
+ HYDU_ERR_POP(status, "error sending PMI response\n");
+ }
+
+ HYDU_FREE(cmd);
+
+ fn_exit:
+ HYDU_FUNC_EXIT();
+ return status;
+
+ fn_fail:
+ abort();
+}
+
+static HYD_status fn_barrier_in(pct_t *pct, app_t *app, const ptl_event_t
*ev, char *args[])
+{
+ int i;
+ static int barrier_count = 0;
+ char *tmp[HYD_NUM_TMP_STRINGS], *cmd;
+ HYD_status status = HYD_SUCCESS;
+
+ HYDU_FUNC_ENTER();
+
+ barrier_count++;
+ if (barrier_count < app->local_size) {
+ goto fn_exit;
+ }
+
+ /* All local apps are here - tell server */
+ barrier_count = 0;
+
+ i = 0;
+ tmp[i++] = HYDU_strdup("cmd=barrier_server\n");
+ tmp[i++] = NULL;
+
+ status = HYDU_str_alloc_and_join(tmp, &cmd);
+ HYDU_ERR_POP(status, "unable to join strings\n");
+ HYDU_free_strlist(tmp);
+
+ /* Send barrier to server */
+ status = send_cmd_to_server(&app->pmi_state,
app->pmi_state.server.ptl_id, cmd);
+ HYDU_ERR_POP(status, "error sending PMI response\n");
+ HYDU_FREE(cmd);
+
+ fn_exit:
+ HYDU_FUNC_EXIT();
+ return status;
+
+ fn_fail:
+ abort();
+}
+
+static HYD_status fn_barrier_server(pct_t *pct, app_t *app, const
ptl_event_t *ev, char *args[])
+{
+ static int barrier_count = 0;
+ HYD_status status = HYD_SUCCESS;
+
+ HYDU_FUNC_ENTER();
+
+ barrier_count++;
+ if (barrier_count == app->world_size) {
+ barrier_count = 0;
+
+ /* Release everyone from the barrier */
+ fn_barrier_out(pct, app, NULL, NULL);
+ }
+
+ HYDU_FUNC_EXIT();
+ return status;
+}
+
+static HYD_status fn_commit(pct_t *pct, app_t *app, const ptl_event_t *ev,
char *args[])
+{
+ char *kvsname;
+ char *tmp[HYD_NUM_TMP_STRINGS], *cmd, *kvs_buf, *launch_cmd;
+ struct HYD_pmcd_token *tokens;
+ static int commit_count = 0;
+ int token_count;
+ HYD_status status = HYD_SUCCESS;
+
+ HYDU_FUNC_ENTER();
+
+ status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count);
+ HYDU_ERR_POP(status, "unable to convert args to tokens\n");
+
+ kvsname = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "kvsname");
+ HYDU_ERR_CHKANDJUMP(status, kvsname == NULL, HYD_INTERNAL_ERROR,
+ "unable to find token: kvsname\n");
+
+ /* Make sure the key value store name is what we expect */
+ if (strcmp(HYD_pmcd_pmip.local.kvs->kvs_name, kvsname))
+ HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
+ "kvsname (%s) does not match this group's kvs space (%s)\n",
+ kvsname, HYD_pmcd_pmip.local.kvs->kvs_name);
+
+ commit_count++;
+ if (commit_count < app->local_size) {
+ goto out;
+ }
+
+ /* Everyone locally has committed - send whole commit to server */
+ commit_count = 0;
+
+ /* Construct a string of all of our key-val pairs, and send it to the
server */
+ /* Try to find the value associated with the key */
+ status = HYD_pmcd_pmi_get_kvs_string(HYD_pmcd_pmip.local.kvs, &kvs_buf);
+
+ if (status)
+ goto fn_fail;
+
+ /* Prepare commit */
+ tmp[0] = HYDU_strdup("cmd=commit_server kvsname=");
+ tmp[1] = HYDU_strdup(kvsname);
+ tmp[2] = HYDU_strdup(" kvs=");
+ tmp[3] = HYDU_strdup(kvs_buf);
+ tmp[4] = HYDU_strdup("\n");
+ tmp[5] = NULL;
+ HYDU_FREE(kvs_buf);
+
+ status = HYDU_str_alloc_and_join(tmp, &launch_cmd);
+ HYDU_ERR_POP(status, "unable to join strings\n");
+ HYDU_free_strlist(tmp);
+
+ /* Send commit to server */
+ status = send_cmd_to_server(&app->pmi_state,
app->pmi_state.server.ptl_id, launch_cmd);
+ HYDU_ERR_POP(status, "error sending PMI response\n");
+ HYDU_FREE(launch_cmd);
+
+out:
+ /* Send result to app */
+ cmd = HYDU_strdup("cmd=commit_result\n");
+ status = send_cmd_downstream(&app->pmi_state, ev->initiator, cmd);
+ HYDU_ERR_POP(status, "error sending PMI response\n");
+ HYDU_FREE(cmd);
+
+ fn_exit:
+ HYD_pmcd_pmi_free_tokens(tokens, token_count);
+ HYDU_FUNC_EXIT();
+ return status;
+
+ fn_fail:
+ abort();
+
+}
+
+static HYD_status fn_commit_server(pct_t *pct, app_t *app, const
ptl_event_t *ev, char *args[])
+{
+ HYD_status status = HYD_SUCCESS;
+ struct HYD_pmcd_token *tokens;
+ struct HYD_pmcd_pmi_kvs *kvs;
+ struct HYD_pmcd_pmi_kvs_pair *run;
+ int token_count, ret;
+ static int commit_count_server = 0;
+ char *kvsname, *kvs_list;
+
+ HYDU_FUNC_ENTER();
+
+ status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count);
+ HYDU_ERR_POP(status, "unable to convert args to tokens\n");
+
+ kvsname = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "kvsname");
+ HYDU_ERR_CHKANDJUMP(status, kvsname == NULL, HYD_INTERNAL_ERROR,
+ "unable to find token: kvsname\n");
+
+ kvs_list = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "kvs");
+ HYDU_ERR_CHKANDJUMP(status, kvs_list == NULL, HYD_INTERNAL_ERROR,
+ "unable to find token: kvs\n");
+
+ /* Allocate a temporary kvs structure for the client information */
+ status = HYD_pmcd_pmi_get_kvs_struct(kvs_list, &kvs);
+
+ /* If this is the first commit, save the name - otherwise, make sure the
name matches
+ * what has already been committed */
+ if (commit_count_server == 0) {
+ HYDU_strncpy(HYD_pmcd_pmip.local.kvs->kvs_name, kvsname, PMI_MAXKVSLEN);
+ } else {
+ HYDU_ERR_CHKANDJUMP(status, strcmp(HYD_pmcd_pmip.local.kvs->kvs_name,
kvsname) != 0,
+ HYD_INTERNAL_ERROR, "received invalid kvsname in client commit\n");
+ }
+ commit_count_server++;
+
+ for (run = kvs->key_pair; run; run = run->next) {
+ status = HYD_pmcd_pmi_add_kvs(run->key, run->val,
HYD_pmcd_pmip.local.kvs, &ret);
+ HYDU_ERR_POP(status, "unable to add keypair to kvs\n");
+ }
+ HYDU_FREE(kvs);
+
+ fn_exit:
+ HYDU_FUNC_EXIT();
+ return status;
+
+ fn_fail:
+ abort();
+}
+
+
+static HYD_status fn_finalize(pct_t *pct, app_t *app, const ptl_event_t
*ev, char *args[])
+{
+ const char *cmd;
+ HYD_status status = HYD_SUCCESS;
+
+ HYDU_FUNC_ENTER();
+
+ /* Tell the server we're done */
+ cmd = HYDU_strdup("cmd=finalize_server\n");
+ status = send_cmd_to_server(&app->pmi_state,
app->pmi_state.server.ptl_id, cmd);
+ HYDU_ERR_POP(status, "error sending PMI response\n");
+ HYDU_FREE(cmd);
+
+ /* Respond to app */
+ cmd = HYDU_strdup("cmd=finalize_ack\n");
+ status = send_cmd_downstream(&app->pmi_state, ev->initiator, cmd);
+ HYDU_ERR_POP(status, "error sending PMI response\n");
+ HYDU_FREE(cmd);
+
+ fn_exit:
+ HYDU_FUNC_EXIT();
+ return status;
+
+ fn_fail:
+ abort();
+}
+
+
+static HYD_status fn_finalize_server(pct_t *pct, app_t *app, const
ptl_event_t *ev, char *args[])
+{
+ static int finalize_count = 0;
+ HYD_status status = HYD_SUCCESS;
+
+ HYDU_FUNC_ENTER();
+
+ finalize_count++;
+ if (finalize_count == app->world_size) {
+ finalize_count = 0;
+ app->world_size = app->universe_size = app->local_size = 0;
+ }
+
+ HYDU_FUNC_EXIT();
+ return status;
+}
+
+static struct HYD_pmcd_pmip_pmi_handle pmi_v1_handle_fns_foo[] = {
+ {"init", fn_init},
+ {"app_init", fn_app_init},
+ {"get_maxes", fn_get_maxes},
+ {"get_appnum", fn_get_appnum},
+ {"get_my_kvsname", fn_get_my_kvsname},
+ {"get_universe_size", fn_get_usize},
+ {"put", fn_put},
+ {"get", fn_get},
+ {"get_server", fn_get_server},
+ {"get_server_result", fn_get_server_result},
+ {"barrier_in", fn_barrier_in},
+ {"barrier_out", fn_barrier_out},
+ {"barrier_server", fn_barrier_server},
+ {"commit", fn_commit},
+ {"commit_server", fn_commit_server},
+ {"finalize", fn_finalize},
+ {"finalize_server", fn_finalize_server},
+ {"\0", NULL}
+};
+
+struct HYD_pmcd_pmip_pmi_handle *HYD_pmcd_pmip_pmi_v1 =
pmi_v1_handle_fns_foo;
=======================================
--- /dev/null
+++ /user/runtime2/pmi-common/simple_pmiutil.c Wed Aug 7 12:54:32 2013
@@ -0,0 +1,290 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+/*
+ * (C) 2001 by Argonne National Laboratory.
+ * See COPYRIGHT in top-level directory.
+ */
+
+/* Allow fprintf to logfile */
+/* style: allow:fprintf:1 sig:0 */
+
+/* Utility functions associated with PMI implementation, but not part of
+ the PMI interface itself. Reading and writing on pipes, signals, and
parsing
+ key=value messages
+*/
+//#include "config.h"
+
+#include <stdio.h>
+//#ifdef HAVE_STDLIB_H
+#include <stdlib.h>
+//#endif
+#include <stdarg.h>
+//#ifdef HAVE_STRING_H
+#include <string.h>
+//#endif
+//#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+//#endif
+#include <errno.h>
+#include "simple_pmiutil.h"
+
+#define MAXVALLEN 1024
+#define MAXKEYLEN 32
+
+/* These are not the keyvals in the keyval space that is part of the
+ PMI specification.
+ They are just part of this implementation's internal utilities.
+*/
+struct PMIU_keyval_pairs {
+ char key[MAXKEYLEN];
+ char value[MAXVALLEN];
+};
+static struct PMIU_keyval_pairs PMIU_keyval_tab[64] = { { {0}, {0} } };
+static int PMIU_keyval_tab_idx = 0;
+
+/* This is used to prepend printed output. Set the initial value to
+ "unset" */
+static char PMIU_print_id[PMIU_IDSIZE] = "unset";
+
+void PMIU_Set_rank( int PMI_rank )
+{
+ snprintf( PMIU_print_id, PMIU_IDSIZE, "cli_%d", PMI_rank );
+}
+void PMIU_SetServer( void )
+{
+ strncpy( PMIU_print_id, "server", PMIU_IDSIZE );
+}
+
+/* Note that vfprintf is part of C89 */
+
+#if 0
+/* style: allow:fprintf:1 sig:0 */
+/* style: allow:vfprintf:1 sig:0 */
+/* This should be combined with the message routines */
+void PMIU_printf( int print_flag, const char *fmt, ... )
+{
+ va_list ap;
+ static FILE *logfile= 0;
+
+ /* In some cases when we are debugging, the handling of stdout or
+ stderr may be unreliable. In that case, we make it possible to
+ select an output file. */
+ if (!logfile) {
+ char *p;
+ p = getenv("PMI_USE_LOGFILE");
+ if (p) {
+ char filename[1024];
+ p = getenv("PMI_ID");
+ if (p) {
+ snprintf( filename, sizeof(filename),
+ "testclient-%s.out", p );
+ logfile = fopen( filename, "w" );
+ }
+ else {
+ logfile = fopen( "testserver.out", "w" );
+ }
+ }
+ else
+ logfile = stderr;
+ }
+
+ if ( print_flag ) {
+ /* MPIU_Error_printf( "[%s]: ", PMIU_print_id ); */
+ /* FIXME: Decide what role PMIU_printf should have (if any) and
+ select the appropriate MPIU routine */
+ fprintf( logfile, "[%s]: ", PMIU_print_id );
+ va_start( ap, fmt );
+ vfprintf( logfile, fmt, ap );
+ va_end( ap );
+ fflush( logfile );
+ }
+}
+#endif
+
+#define MAX_READLINE 1024
+/*
+ * Return the next newline-terminated string of maximum length maxlen.
+ * This is a buffered version, and reads from fd as necessary. A
+ */
+int PMIU_readline( int fd, char *buf, int maxlen )
+{
+ static char readbuf[MAX_READLINE];
+ static char *nextChar = 0, *lastChar = 0; /* lastChar is really one
past
+ last char */
+ static int lastErrno = 0;
+ static int lastfd = -1;
+ int curlen, n;
+ char *p, ch;
+
+ /* Note: On the client side, only one thread at a time should
+ be calling this, and there should only be a single fd.
+ Server side code should not use this routine (see the
+ replacement version in src/pm/util/pmiserv.c) */
+ if (nextChar != lastChar && fd != lastfd) {
+ fprintf(stderr, "Panic - buffer inconsistent\n" );
+ return -1;
+ }
+
+ p = buf;
+ curlen = 1; /* Make room for the null */
+ while (curlen < maxlen) {
+ if (nextChar == lastChar) {
+ lastfd = fd;
+ do {
+ n = read( fd, readbuf, sizeof(readbuf)-1 );
+ } while (n == -1 && errno == EINTR);
+ if (n == 0) {
+ /* EOF */
+ break;
+ }
+ else if (n < 0) {
+ /* Error. Return a negative value if there is no
+ data. Save the errno in case we need to return it
+ later. */
+ lastErrno = errno;
+ if (curlen == 1) {
+ curlen = 0;
+ }
+ break;
+ }
+ nextChar = readbuf;
+ lastChar = readbuf + n;
+ /* Add a null at the end just to make it easier to print
+ the read buffer */
+ readbuf[n] = 0;
+ /* FIXME: Make this an optional output */
+ /* printf( "Readline %s\n", readbuf ); */
+ }
+
+ ch = *nextChar++;
+ *p++ = ch;
+ curlen++;
+ if (ch == '\n') break;
+ }
+
+ /* We null terminate the string for convenience in printing */
+ *p = 0;
+
+ /* Return the number of characters, not counting the null */
+ return curlen-1;
+}
+
+int PMIU_writeline( int fd, char *buf )
+{
+ int size, n;
+
+ size = strlen( buf );
+ if ( size > PMIU_MAXLINE ) {
+ buf[PMIU_MAXLINE-1] = '\0';
+ PMIU_printf( 1, "write_line: message string too big: :%s:\n", buf );
+ }
+ else if ( buf[strlen( buf ) - 1] != '\n' ) /* error: no newline at
end */
+ PMIU_printf( 1, "write_line: message string doesn't end in
newline: :%s:\n",
+ buf );
+ else {
+ do {
+ n = write( fd, buf, size );
+ } while (n == -1 && errno == EINTR);
+
+ if ( n < 0 ) {
+ PMIU_printf( 1, "write_line error; fd=%d buf=:%s:\n", fd, buf );
+ perror("system msg for write_line failure ");
+ return(-1);
+ }
+ if ( n < size)
+ PMIU_printf( 1, "write_line failed to write entire message\n" );
+ }
+ return 0;
+}
+
+/*
+ * Given an input string st, parse it into internal storage that can be
+ * queried by routines such as PMIU_getval.
+ */
+int PMIU_parse_keyvals( char *st )
+{
+ char *p, *keystart, *valstart;
+ int offset;
+
+ if ( !st )
+ return( -1 );
+
+ PMIU_keyval_tab_idx = 0;
+ p = st;
+ while ( 1 ) {
+ while ( *p == ' ' )
+ p++;
+ /* got non-blank */
+ if ( *p == '=' ) {
+ PMIU_printf( 1, "PMIU_parse_keyvals: unexpected = at character %d
in %s\n",
+ p - st, st );
+ return( -1 );
+ }
+ if ( *p == '\n' || *p == '\0' )
+ return( 0 ); /* normal exit */
+ /* got normal character */
+ keystart = p; /* remember where key started */
+ while ( *p != ' ' && *p != '=' && *p != '\n' && *p != '\0' )
+ p++;
+ if ( *p == ' ' || *p == '\n' || *p == '\0' ) {
+ PMIU_printf( 1,
+ "PMIU_parse_keyvals: unexpected key delimiter at character %d
in %s\n",
+ p - st, st );
+ return( -1 );
+ }
+ /* Null terminate the key */
+ *p = 0;
+ /* store key */
+ strncpy( PMIU_keyval_tab[PMIU_keyval_tab_idx].key, keystart,
+ MAXKEYLEN );
+
+ valstart = ++p; /* start of value */
+ while ( *p != ' ' && *p != '\n' && *p != '\0' )
+ p++;
+ /* store value */
+ strncpy( PMIU_keyval_tab[PMIU_keyval_tab_idx].value, valstart,
+ MAXVALLEN );
+ offset = p - valstart;
+ /* When compiled with -fPIC, the pgcc compiler generates incorrect
+ code if "p - valstart" is used instead of using the
+ intermediate offset */
+ PMIU_keyval_tab[PMIU_keyval_tab_idx].value[offset] = '\0';
+ PMIU_keyval_tab_idx++;
+ if ( *p == ' ' )
+ continue;
+ if ( *p == '\n' || *p == '\0' )
+ return( 0 ); /* value has been set to empty */
+ }
+}
+
+void PMIU_dump_keyvals( void )
+{
+ int i;
+ for (i=0; i < PMIU_keyval_tab_idx; i++)
+ PMIU_printf(1, " %s=%s\n",PMIU_keyval_tab[i].key,
PMIU_keyval_tab[i].value);
+}
+
+char *PMIU_getval( const char *keystr, char *valstr, int vallen )
+{
+ int i;
+
+ for (i = 0; i < PMIU_keyval_tab_idx; i++) {
+ if ( strcmp( keystr, PMIU_keyval_tab[i].key ) == 0 ) {
+ strncpy( valstr, PMIU_keyval_tab[i].value, vallen );
+ return valstr;
+ }
+ }
+ valstr[0] = '\0';
+ return NULL;
+}
+
+void PMIU_chgval( const char *keystr, char *valstr )
+{
+ int i;
+
+ for ( i = 0; i < PMIU_keyval_tab_idx; i++ ) {
+ if ( strcmp( keystr, PMIU_keyval_tab[i].key ) == 0 ) {
+ strncpy( PMIU_keyval_tab[i].value, valstr, MAXVALLEN - 1 );
+ PMIU_keyval_tab[i].value[MAXVALLEN - 1] = '\0';
+ }
+ }
+}
=======================================
--- /dev/null
+++ /user/runtime2/pmi-common/simple_pmiutil.h Wed Aug 7 12:54:32 2013
@@ -0,0 +1,35 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+/*
+ * (C) 2001 by Argonne National Laboratory.
+ * See COPYRIGHT in top-level directory.
+ */
+
+/* maximum sizes for arrays */
+#define PMIU_MAXLINE 1024
+#define PMIU_IDSIZE 32
+
+/* we don't have access to MPIU_Assert and friends here in the PMI code */
+#if defined(HAVE_ASSERT_H)
+# include <assert.h>
+# define PMIU_Assert(expr) assert(expr)
+#else
+# define PMIU_Assert(expr)
+#endif
+
+
+/* prototypes for PMIU routines */
+void PMIU_Set_rank( int PMI_rank );
+void PMIU_SetServer( void );
+
+#if 1
+#include <stdio.h>
+#define PMIU_printf(a, fmt, args...) printf(fmt, ## args)
+#else
+void PMIU_printf( int print_flag, const char *fmt, ... );
+#endif
+int PMIU_readline( int fd, char *buf, int max );
+int PMIU_writeline( int fd, char *buf );
+int PMIU_parse_keyvals( char *st );
+void PMIU_dump_keyvals( void );
+char *PMIU_getval( const char *keystr, char *valstr, int vallen );
+void PMIU_chgval( const char *keystr, char *valstr );
=======================================
--- /dev/null
+++ /user/runtime2/pmi-common/string.c Wed Aug 7 12:54:32 2013
@@ -0,0 +1,274 @@
+/* -*- Mode: C; c-basic-offset:4 ; -*- */
+/*
+ * (C) 2008 by Argonne National Laboratory.
+ * See COPYRIGHT in top-level directory.
+ */
+
+#include "hydra.h"
+
+HYD_status HYDU_list_append_strlist(char **src_strlist, char
**dest_strlist)
+{
+ int i, j;
+ HYD_status status = HYD_SUCCESS;
+
+ HYDU_FUNC_ENTER();
+
+ i = HYDU_strlist_lastidx(dest_strlist);
+ for (j = 0; src_strlist[j]; j++)
+ dest_strlist[i++] = HYDU_strdup(src_strlist[j]);
+ dest_strlist[i++] = NULL;
+
+ HYDU_FUNC_EXIT();
+ return status;
+}
+
+
+HYD_status HYDU_print_strlist(char **strlist)
+{
+ int arg;
+ HYD_status status = HYD_SUCCESS;
+
+ HYDU_FUNC_ENTER();
+
+ for (arg = 0; strlist[arg]; arg++)
+ HYDU_dump_noprefix(stdout, "%s ", strlist[arg]);
+ HYDU_dump_noprefix(stdout, "\n");
+
+ HYDU_FUNC_EXIT();
+ return status;
+}
+
+
+void HYDU_free_strlist(char **strlist)
+{
+ int arg;
+
+ HYDU_FUNC_ENTER();
+
+ for (arg = 0; strlist[arg]; arg++)
+ HYDU_FREE(strlist[arg]);
+
+ HYDU_FUNC_EXIT();
+}
+
+
+HYD_status HYDU_str_alloc_and_join(char **strlist, char **strjoin)
+{
+ int len = 0, i, count;
+ HYD_status status = HYD_SUCCESS;
+
+ HYDU_FUNC_ENTER();
+
+ for (i = 0; strlist[i] != NULL; i++) {
+ len += strlen(strlist[i]);
+ }
+
+ HYDU_MALLOC(*strjoin, char *, len + 1, status);
+ count = 0;
+ (*strjoin)[0] = 0;
+
+ for (i = 0; strlist[i] != NULL; i++) {
+ HYDU_snprintf(*strjoin + count, len - count + 1, "%s", strlist[i]);
+ count += strlen(strlist[i]);
+ }
+
+ fn_exit:
+ HYDU_FUNC_EXIT();
+ return status;
+
+ fn_fail:
+ goto fn_exit;
+}
+
+
+HYD_status HYDU_strsplit(char *str, char **str1, char **str2, char sep)
+{
+ int i;
+ HYD_status status = HYD_SUCCESS;
+
+ HYDU_FUNC_ENTER();
+
+ if (str == NULL)
+ HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "strsplit");
+
+ *str1 = HYDU_strdup(str);
+ for (i = 0; (*str1)[i] && ((*str1)[i] != sep); i++);
+
+ if ((*str1)[i] == 0) /* End of the string */
+ *str2 = NULL;
+ else {
+ *str2 = HYDU_strdup(&((*str1)[i + 1]));
+ (*str1)[i] = 0;
+ }
+
+ fn_exit:
+ HYDU_FUNC_EXIT();
+ return status;
+
+ fn_fail:
+ goto fn_exit;
+}
+
+
+HYD_status HYDU_strdup_list(char *src[], char **dest[])
+{
+ int i, count;
+ HYD_status status = HYD_SUCCESS;
+
+ HYDU_FUNC_ENTER();
+
+ count = HYDU_strlist_lastidx(src);
+ HYDU_MALLOC(*dest, char **, (count + 1) * sizeof(char *), status);
+
+ for (i = 0; i < count; i++)
+ (*dest)[i] = HYDU_strdup(src[i]);
+ (*dest)[i] = NULL;
+
+ fn_exit:
+ HYDU_FUNC_EXIT();
+ return status;
+
+ fn_fail:
+ goto fn_exit;
+}
+
+
+char *HYDU_size_t_to_str(size_t x)
+{
+ int len = 1, i;
+ size_t max = 10;
+ char *str = NULL;
+ HYD_status status = HYD_SUCCESS;
+
+ HYDU_FUNC_ENTER();
+
+ while (x >= max) {
+ len++;
+ max *= 10;
+ }
+ len++;
+
+ HYDU_MALLOC(str, char *, len, status);
+ HYDU_ERR_POP(status, "unable to allocate memory\n");
+
+ for (i = 0; i < len; i++)
+ str[i] = '0';
+
+ HYDU_snprintf(str, len, "%llu", (unsigned long long) x);
+
+ fn_exit:
+ HYDU_FUNC_EXIT();
+ return str;
+
+ fn_fail:
+ goto fn_exit;
+}
+
+
+char *HYDU_int_to_str(int x)
+{
+ return HYDU_int_to_str_pad(x, 0);
+}
+
+
+char *HYDU_int_to_str_pad(int x, int maxlen)
+{
+ int len = 1, max = 10, y;
+ int actual_len, i;
+ char *str = NULL;
+ HYD_status status = HYD_SUCCESS;
+
+ HYDU_FUNC_ENTER();
+
+ if (x < 0) {
+ len++;
+ y = -x;
+ }
+ else
+ y = x;
+
+ while (y >= max) {
+ len++;
+ max *= 10;
+ }
+
+ if (len > maxlen)
+ actual_len = len + 1;
+ else
+ actual_len = maxlen + 1;
+
+ HYDU_MALLOC(str, char *, actual_len, status);
+ HYDU_ERR_POP(status, "unable to allocate memory\n");
+
+ for (i = 0; i < actual_len; i++)
+ str[i] = '0';
+
+ HYDU_snprintf(str + actual_len - len - 1, len + 1, "%d", x);
+
+ fn_exit:
+ HYDU_FUNC_EXIT();
+ return str;
+
+ fn_fail:
+ goto fn_exit;
+}
+
+int HYDU_strlist_lastidx(char **strlist)
+{
+ int i;
+
+ for (i = 0; strlist[i]; i++);
+
+ return i;
+}
+
+char **HYDU_str_to_strlist(char *str)
+{
+ int argc = 0, i;
+ char **strlist = NULL;
+ char *p;
+ HYD_status status = HYD_SUCCESS;
+
+ HYDU_FUNC_ENTER();
+
+ HYDU_MALLOC(strlist, char **, HYD_NUM_TMP_STRINGS * sizeof(char *),
status);
+ if (!strlist)
+ HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
+ "Unable to allocate mem for strlist\n");
+
+ for (i = 0; i < HYD_NUM_TMP_STRINGS; i++)
+ strlist[i] = NULL;
+
+ p = str;
+ while (*p) {
+ while (isspace(*p))
+ p++;
+
+ if (argc >= HYD_NUM_TMP_STRINGS)
+ HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "too many
arguments in line\n");
+
+ HYDU_MALLOC(strlist[argc], char *, HYD_TMP_STRLEN, status);
+
+ /* Copy till you hit a space */
+ i = 0;
+ while (*p && !isspace(*p)) {
+ strlist[argc][i] = *p;
+ i++;
+ p++;
+ }
+ if (i) {
+ strlist[argc][i] = 0;
+ argc++;
+ }
+ }
+ if (strlist[argc])
+ HYDU_FREE(strlist[argc]);
+ strlist[argc] = NULL;
+
+ fn_exit:
+ HYDU_FUNC_EXIT();
+ return strlist;
+
+ fn_fail:
+ goto fn_exit;
+}
=======================================
--- /dev/null
+++ /user/runtime2/pmi-server/Makefile Wed Aug 7 12:54:32 2013
@@ -0,0 +1,23 @@
+BASE=../..
+include $(BASE)/Makefile.header
+
+PROGRAMS = server
+
+PMI_COMMON_BASE=../pmi-common
+server_SOURCES = \
+ server.c \
+ ${PMI_COMMON_BASE}/common.c \
+ ${PMI_COMMON_BASE}/string.c \
+ ${PMI_COMMON_BASE}/pmip_pmi_v1.c \
+ ${PMI_COMMON_BASE}/pmi_server.c
+
+server_LDADD = -lpmi -lportals_util -lportals -lxpmem -lrt -lpthread
+
+CFLAGS += -std=gnu99
+INCDIRS += -I../pct -I../pmi-common/
+LIBDIRS += -L.
+
+# We are building a linux executable
+TARGET=linux
+
+include $(BASE)/Makefile.footer
=======================================
--- /dev/null
+++ /user/runtime2/pmi-server/nidpid.txt Wed Aug 7 12:54:32 2013
@@ -0,0 +1,3 @@
+2 4
+192.168.1.2:1 2
+192.168.1.3:1 2
=======================================
--- /dev/null
+++ /user/runtime2/pmi-server/server.c Wed Aug 7 12:54:32 2013
@@ -0,0 +1,186 @@
+/*
+ * This is the extremely minimalistic PMI job server.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <portals4.h>
+#include <portals4_util.h>
+
+#include <pct.h>
+#include <pmi_server.h>
+#include <simple_pmiutil.h>
+
+#define DELIM ": \t\n"
+
+typedef struct pmi_client {
+ ptl_process_t ptl_id;
+ pmi_state_t pmi_state;
+} pmi_client_t;
+
+static int init_portals(pct_t * pct) {
+ PTL_CHECK(PtlInit());
+ PTL_CHECK(PtlNIInit(PTL_IFACE_DEFAULT, PTL_NI_MATCHING | PTL_NI_PHYSICAL,
+ getpid(), NULL, NULL, &pct->ni_h));
+ PTL_CHECK(PtlGetPhysId(pct->ni_h, &pct->ptl_id));
+}
+
+static int send_to_app(pmi_client_t * client) {
+ ptl_event_t ev;
+ ptl_process_t target = client->ptl_id;
+ pmi_state_t * state = &client->pmi_state;
+
+ /* Send the message */
+ PTL_CHECK(
+ ptl_enqueue(target, state->client.pt_index, state->client.tx_md_h,
+ 0, strlen(state->client.tx_buf) + 1, state->client.tx_eq_h)
+ );
+
+ return 0;
+}
+
+static int pmi_app_init(
+ int client_base_rank,
+ int local_size,
+ int world_size,
+ int universe_size,
+ pmi_client_t * client
+)
+{
+ char buf[PMIU_MAXLINE];
+ int status;
+
+ status = snprintf(buf, PMIU_MAXLINE,
+ "cmd=app_init base_rank=%d local_size=%d world_size=%d
universe_size=%d\n",
+ client_base_rank, local_size, world_size, universe_size);
+
+ if (status < 0) {
+ return -1;
+ }
+
+ strcpy(client->pmi_state.client.tx_buf, buf);
+ return send_to_app(client);
+}
+
+static uint32_t parse_ipv4_addr(const char * addr) {
+ int ipbytes[4];
+ sscanf(addr, "%d.%d.%d.%d", &ipbytes[3], &ipbytes[2], &ipbytes[1],
&ipbytes[0]);
+ return ipbytes[0] | (ipbytes[1] << 8) | (ipbytes[2] << 16) | (ipbytes[3]
<< 24);
+}
+
+int main(int argc, char ** argv) {
+ FILE * fp;
+ char * line, * token, * token2, * token3;
+ size_t line_size, read;
+ int num_nodes, global_size, i, num_ranks_on_node, rank_no;
+ unsigned int handle_no;
+ pct_t fake_pct;
+ pmi_client_t * pmi_clients;
+ ptl_process_t match_id;
+
+ if (argc != 2) {
+ fprintf(stderr, "Usage: %s: <NID/PID filename>\n", *argv);
+ return EXIT_FAILURE;
+ }
+
+ if ((fp = fopen(*(++argv), "r")) == NULL) {
+ fprintf(stderr, "Cannot open file %s: ", *argv);
+ perror("");
+ return EXIT_FAILURE;
+ }
+
+ line_size = 0;
+ line = NULL;
+
+ getline(&line, &line_size, fp);
+ token = strtok(line, DELIM);
+ token2 = strtok(NULL, DELIM);
+
+ num_nodes = atoi(token);
+ global_size = atoi(token2);
+ printf("Num nodes: %d, num ranks: %d\n", num_nodes, global_size);
+
+ // Setup a fake pct
+ fake_pct.app.user_id = PTL_UID_ANY;
+ fake_pct.app.local_size = fake_pct.app.world_size = num_nodes;
+
+ // Initialize portals
+ init_portals(&fake_pct);
+
+ // Allocate procs for the app, as if they were all running locally
+ if ((fake_pct.app.procs = malloc(sizeof(process_t) * num_nodes)) == NULL)
{
+ abort();
+ }
+
+ // Allocate the pmi clients
+ if ((pmi_clients = (pmi_client_t *)malloc(sizeof(pmi_client_t) *
num_nodes)) == NULL) {
+ abort();
+ }
+
+ match_id.phys.nid = PTL_NID_ANY; match_id.phys.pid = PTL_PID_ANY;
+ pmi_init(&fake_pct, &fake_pct.app, PCT_PMI_SERVER_PT_INDEX, match_id);
+
+ // OK, at this point, Portals is ready. We need to initialize each
client's PMI state
+ // and send a Put to each client's PCT
+ for (i = 0, rank_no = 0; i < num_nodes; i++) {
+ read = getline(&line, &line_size, fp);
+ if (read == -1) {
+ fprintf(stderr, "Invalid rank information in file...\n");
+ return EXIT_FAILURE;
+ }
+
+ token = strtok(line, DELIM);
+ token2 = strtok(NULL, DELIM);
+ token3 = strtok(NULL, DELIM);
+
+ pmi_clients[i].ptl_id.phys.nid = parse_ipv4_addr(token);
+ pmi_clients[i].ptl_id.phys.pid = atoi(token2);
+ num_ranks_on_node = atoi(token3);
+
+ // Save the node's portals ID
+ fake_pct.app.procs[i].ptl_id = pmi_clients[i].ptl_id;
+
+ // Save the pmi state
+ pmi_clients[i].pmi_state = fake_pct.app.pmi_state;
+ pmi_clients[i].pmi_state.client.pt_index = PCT_PMI_PT_INDEX;
+
+ if (0 != pmi_app_init(rank_no, num_ranks_on_node, global_size,
global_size, &pmi_clients[i])) {
+ fprintf(stderr, "Failed to initialize app %d\n", i);
+ return EXIT_FAILURE;
+ }
+
+ rank_no += num_ranks_on_node;
+ }
+
+ fake_pct.app.pmi_state.client.pt_index = PCT_PMI_PT_INDEX;
+ for (;;) {
+ ptl_event_t ev;
+
+ //PTL_CHECK(PtlEQPoll(handles, num_nodes, PTL_TIME_FOREVER, &ev,
&handle_no));
+ PTL_CHECK(PtlEQWait(fake_pct.app.pmi_state.client.rx_eq_h, &ev));
+
+ if (ev.type == PTL_EVENT_PUT) {
+ CHECK(pmi_process_event(&fake_pct, &fake_pct.app, &ev));
+ }
+
+ //ptl_queue_process_event(queues[handle_no], &ev);
+ ptl_queue_process_event(fake_pct.app.pmi_state.client.rx_q, &ev);
+
+ if (fake_pct.app.local_size == 0) {
+ break;
+ }
+ }
+
+ printf("Application exited on all ranks!\n");
+ sleep(3);
+
+ // TODO: Free queue allocations?
+ free(fake_pct.app.procs);
+ free(pmi_clients);
+ free(line);
+ fclose(fp);
+ return EXIT_SUCCESS;
+}
=======================================
--- /user/runtime2/libpmi/simple_pmiutil.c Wed Apr 3 13:43:34 2013
+++ /dev/null
@@ -1,288 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; -*- */
-/*
- * (C) 2001 by Argonne National Laboratory.
- * See COPYRIGHT in top-level directory.
- */
-
-/* Allow fprintf to logfile */
-/* style: allow:fprintf:1 sig:0 */
-
-/* Utility functions associated with PMI implementation, but not part of
- the PMI interface itself. Reading and writing on pipes, signals, and
parsing
- key=value messages
-*/
-//#include "config.h"
-
-#include <stdio.h>
-//#ifdef HAVE_STDLIB_H
-#include <stdlib.h>
-//#endif
-#include <stdarg.h>
-//#ifdef HAVE_STRING_H
-#include <string.h>
-//#endif
-//#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-//#endif
-#include <errno.h>
-#include "simple_pmiutil.h"
-
-#define MAXVALLEN 1024
-#define MAXKEYLEN 32
-
-/* These are not the keyvals in the keyval space that is part of the
- PMI specification.
- They are just part of this implementation's internal utilities.
-*/
-struct PMIU_keyval_pairs {
- char key[MAXKEYLEN];
- char value[MAXVALLEN];
-};
-static struct PMIU_keyval_pairs PMIU_keyval_tab[64] = { { {0}, {0} } };
-static int PMIU_keyval_tab_idx = 0;
-
-/* This is used to prepend printed output. Set the initial value to
- "unset" */
-static char PMIU_print_id[PMIU_IDSIZE] = "unset";
-
-void PMIU_Set_rank( int PMI_rank )
-{
- snprintf( PMIU_print_id, PMIU_IDSIZE, "cli_%d", PMI_rank );
-}
-void PMIU_SetServer( void )
-{
- strncpy( PMIU_print_id, "server", PMIU_IDSIZE );
-}
-
-/* Note that vfprintf is part of C89 */
-
-/* style: allow:fprintf:1 sig:0 */
-/* style: allow:vfprintf:1 sig:0 */
-/* This should be combined with the message routines */
-void PMIU_printf( int print_flag, const char *fmt, ... )
-{
- va_list ap;
- static FILE *logfile= 0;
-
- /* In some cases when we are debugging, the handling of stdout or
- stderr may be unreliable. In that case, we make it possible to
- select an output file. */
- if (!logfile) {
- char *p;
- p = getenv("PMI_USE_LOGFILE");
- if (p) {
- char filename[1024];
- p = getenv("PMI_ID");
- if (p) {
- snprintf( filename, sizeof(filename),
- "testclient-%s.out", p );
- logfile = fopen( filename, "w" );
- }
- else {
- logfile = fopen( "testserver.out", "w" );
- }
- }
- else
- logfile = stderr;
- }
-
- if ( print_flag ) {
- /* MPIU_Error_printf( "[%s]: ", PMIU_print_id ); */
- /* FIXME: Decide what role PMIU_printf should have (if any) and
- select the appropriate MPIU routine */
- fprintf( logfile, "[%s]: ", PMIU_print_id );
- va_start( ap, fmt );
- vfprintf( logfile, fmt, ap );
- va_end( ap );
- fflush( logfile );
- }
-}
-
-#define MAX_READLINE 1024
-/*
- * Return the next newline-terminated string of maximum length maxlen.
- * This is a buffered version, and reads from fd as necessary. A
- */
-int PMIU_readline( int fd, char *buf, int maxlen )
-{
- static char readbuf[MAX_READLINE];
- static char *nextChar = 0, *lastChar = 0; /* lastChar is really one
past
- last char */
- static int lastErrno = 0;
- static int lastfd = -1;
- int curlen, n;
- char *p, ch;
-
- /* Note: On the client side, only one thread at a time should
- be calling this, and there should only be a single fd.
- Server side code should not use this routine (see the
- replacement version in src/pm/util/pmiserv.c) */
- if (nextChar != lastChar && fd != lastfd) {
- fprintf(stderr, "Panic - buffer inconsistent\n" );
- return -1;
- }
-
- p = buf;
- curlen = 1; /* Make room for the null */
- while (curlen < maxlen) {
- if (nextChar == lastChar) {
- lastfd = fd;
- do {
- n = read( fd, readbuf, sizeof(readbuf)-1 );
- } while (n == -1 && errno == EINTR);
- if (n == 0) {
- /* EOF */
- break;
- }
- else if (n < 0) {
- /* Error. Return a negative value if there is no
- data. Save the errno in case we need to return it
- later. */
- lastErrno = errno;
- if (curlen == 1) {
- curlen = 0;
- }
- break;
- }
- nextChar = readbuf;
- lastChar = readbuf + n;
- /* Add a null at the end just to make it easier to print
- the read buffer */
- readbuf[n] = 0;
- /* FIXME: Make this an optional output */
- /* printf( "Readline %s\n", readbuf ); */
- }
-
- ch = *nextChar++;
- *p++ = ch;
- curlen++;
- if (ch == '\n') break;
- }
-
- /* We null terminate the string for convenience in printing */
- *p = 0;
-
- /* Return the number of characters, not counting the null */
- return curlen-1;
-}
-
-int PMIU_writeline( int fd, char *buf )
-{
- int size, n;
-
- size = strlen( buf );
- if ( size > PMIU_MAXLINE ) {
- buf[PMIU_MAXLINE-1] = '\0';
- PMIU_printf( 1, "write_line: message string too big: :%s:\n", buf );
- }
- else if ( buf[strlen( buf ) - 1] != '\n' ) /* error: no newline at
end */
- PMIU_printf( 1, "write_line: message string doesn't end in
newline: :%s:\n",
- buf );
- else {
- do {
- n = write( fd, buf, size );
- } while (n == -1 && errno == EINTR);
-
- if ( n < 0 ) {
- PMIU_printf( 1, "write_line error; fd=%d buf=:%s:\n", fd, buf );
- perror("system msg for write_line failure ");
- return(-1);
- }
- if ( n < size)
- PMIU_printf( 1, "write_line failed to write entire message\n" );
- }
- return 0;
-}
-
-/*
- * Given an input string st, parse it into internal storage that can be
- * queried by routines such as PMIU_getval.
- */
-int PMIU_parse_keyvals( char *st )
-{
- char *p, *keystart, *valstart;
- int offset;
-
- if ( !st )
- return( -1 );
-
- PMIU_keyval_tab_idx = 0;
- p = st;
- while ( 1 ) {
- while ( *p == ' ' )
- p++;
- /* got non-blank */
- if ( *p == '=' ) {
- PMIU_printf( 1, "PMIU_parse_keyvals: unexpected = at character %d
in %s\n",
- p - st, st );
- return( -1 );
- }
- if ( *p == '\n' || *p == '\0' )
- return( 0 ); /* normal exit */
- /* got normal character */
- keystart = p; /* remember where key started */
- while ( *p != ' ' && *p != '=' && *p != '\n' && *p != '\0' )
- p++;
- if ( *p == ' ' || *p == '\n' || *p == '\0' ) {
- PMIU_printf( 1,
- "PMIU_parse_keyvals: unexpected key delimiter at character %d
in %s\n",
- p - st, st );
- return( -1 );
- }
- /* Null terminate the key */
- *p = 0;
- /* store key */
- strncpy( PMIU_keyval_tab[PMIU_keyval_tab_idx].key, keystart,
- MAXKEYLEN );
-
- valstart = ++p; /* start of value */
- while ( *p != ' ' && *p != '\n' && *p != '\0' )
- p++;
- /* store value */
- strncpy( PMIU_keyval_tab[PMIU_keyval_tab_idx].value, valstart,
- MAXVALLEN );
- offset = p - valstart;
- /* When compiled with -fPIC, the pgcc compiler generates incorrect
- code if "p - valstart" is used instead of using the
- intermediate offset */
- PMIU_keyval_tab[PMIU_keyval_tab_idx].value[offset] = '\0';
- PMIU_keyval_tab_idx++;
- if ( *p == ' ' )
- continue;
- if ( *p == '\n' || *p == '\0' )
- return( 0 ); /* value has been set to empty */
- }
-}
-
-void PMIU_dump_keyvals( void )
-{
- int i;
- for (i=0; i < PMIU_keyval_tab_idx; i++)
- PMIU_printf(1, " %s=%s\n",PMIU_keyval_tab[i].key,
PMIU_keyval_tab[i].value);
-}
-
-char *PMIU_getval( const char *keystr, char *valstr, int vallen )
-{
- int i;
-
- for (i = 0; i < PMIU_keyval_tab_idx; i++) {
- if ( strcmp( keystr, PMIU_keyval_tab[i].key ) == 0 ) {
- strncpy( valstr, PMIU_keyval_tab[i].value, vallen );
- return valstr;
- }
- }
- valstr[0] = '\0';
- return NULL;
-}
-
-void PMIU_chgval( const char *keystr, char *valstr )
-{
- int i;
-
- for ( i = 0; i < PMIU_keyval_tab_idx; i++ ) {
- if ( strcmp( keystr, PMIU_keyval_tab[i].key ) == 0 ) {
- strncpy( PMIU_keyval_tab[i].value, valstr, MAXVALLEN - 1 );
- PMIU_keyval_tab[i].value[MAXVALLEN - 1] = '\0';
- }
- }
-}
=======================================
--- /user/runtime2/libpmi/simple_pmiutil.h Wed Apr 3 13:43:34 2013
+++ /dev/null
@@ -1,29 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; -*- */
-/*
- * (C) 2001 by Argonne National Laboratory.
- * See COPYRIGHT in top-level directory.
- */
-
-/* maximum sizes for arrays */
-#define PMIU_MAXLINE 1024
-#define PMIU_IDSIZE 32
-
-/* we don't have access to MPIU_Assert and friends here in the PMI code */
-#if defined(HAVE_ASSERT_H)
-# include <assert.h>
-# define PMIU_Assert(expr) assert(expr)
-#else
-# define PMIU_Assert(expr)
-#endif
-
-
-/* prototypes for PMIU routines */
-void PMIU_Set_rank( int PMI_rank );
-void PMIU_SetServer( void );
-void PMIU_printf( int print_flag, const char *fmt, ... );
-int PMIU_readline( int fd, char *buf, int max );
-int PMIU_writeline( int fd, char *buf );
-int PMIU_parse_keyvals( char *st );
-void PMIU_dump_keyvals( void );
-char *PMIU_getval( const char *keystr, char *valstr, int vallen );
-void PMIU_chgval( const char *keystr, char *valstr );
=======================================
--- /user/runtime2/pct/pct_internal.h Wed Apr 3 13:43:34 2013
+++ /dev/null
@@ -1,91 +0,0 @@
-#ifndef PCT_INTERNAL_H
-#define PCT_INTERNAL_H
-
-#include <sched.h>
-#include <lwk/liblwk.h>
-#include <portals4.h>
-
-
-#define PPE_INFO_STRING_SIZE 512
-
-
-// Aborts program if x returns a non-zero error code
-#define CHECK(x) \
- do { \
- int status; \
- if ((status = x) != 0) { \
- fprintf(stderr, "'%s' failed, status=%d\n", #x, status); \
- fprintf(stderr, "In %s, %s() line %u\n", \
- __FILE__, __FUNCTION__, (unsigned int)__LINE__); \
- abort(); \
- } \
- } while (0)
-
-
-typedef struct pmi_state {
- ptl_pt_index_t client_pt_index;
-
- // Portals state for handling client requests
- ptl_queue_t client_rx_q;
- ptl_handle_eq_t client_rx_eq_h;
-
- // Portals state for sending responses to clients
- ptl_handle_eq_t client_tx_eq_h;
- ptl_md_t client_tx_md;
- ptl_handle_md_t client_tx_md_h;
- size_t client_tx_buf_size;
- char * client_tx_buf;
-} pmi_state_t;
-
-
-typedef struct process {
- id_t local_index; // Local index of the process
-
- // Misc IDs, many of these get copied to the start_state struct below
- id_t task_id;
- id_t aspace_id;
- id_t cpu_id;
-
- // This structure tells Kitten how to start a new task executing
- start_state_t start_state;
-
- // This string contains information about the Portals Progress Engine
(PPE)
- char ppe_info[PPE_INFO_STRING_SIZE];
-
- // Portals address of the process
- ptl_process_t ptl_id;
-} process_t;
-
-
-typedef struct app {
- int world_size; // For MPI, size of MPI_COMM_WORLD
- int universe_size; // For MPI, world_size + spawn capability
-
- int local_size; // Number of processes running locally
- process_t * procs; // Array of descriptors, one per local
process
-
- id_t user_id;
- id_t group_id;
-
- cpu_set_t avail_cpus; // Bitmap of CPUs that app procs can run
on
-
- pmi_state_t pmi_state; // State needed for app process <-> PCT
comm
-} app_t;
-
-
-typedef struct pct {
- id_t aspace_id; // The PCT's address space ID
- app_t app; // Metadata for app PCT is managing
-
- ptl_handle_ni_t ni_h; // The network interface handle the PCT
- // uses for Portals communication.
- ptl_process_t ptl_id; // The PCT's Portals ID
-} pct_t;
-
-
-// PMI server related prototypes
-int pmi_init(pct_t *pct, app_t *app, ptl_pt_index_t pt_index);
-int pmi_process_event(pct_t *pct, app_t *app, const ptl_event_t *ev);
-
-
-#endif
=======================================
--- /user/runtime2/pct/pmi/common.c Wed Apr 3 13:43:34 2013
+++ /dev/null
@@ -1,259 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; -*- */
-/*
- * (C) 2008 by Argonne National Laboratory.
- * See COPYRIGHT in top-level directory.
- */
-
-#include "hydra.h"
-#include "common.h"
-
-char *HYD_dbg_prefix = (char *) "unknown";
-
-void HYD_pmcd_init_header(struct HYD_pmcd_hdr *hdr)
-{
- hdr->cmd = INVALID_CMD;
- hdr->buflen = -1;
- hdr->pid = -1;
- hdr->pmi_version = -1;
- hdr->pgid = -1;
- hdr->proxy_id = -1;
- hdr->rank = -1;
- hdr->signum = -1;
-}
-
-HYD_status HYD_pmcd_pmi_parse_pmi_cmd(char *obuf, int pmi_version, char
**pmi_cmd,
- char *args[])
-{
- char *tbuf = NULL, *seg, *str1 = NULL, *cmd;
- char *buf;
- char *tmp[HYD_NUM_TMP_STRINGS], *targs[HYD_NUM_TMP_STRINGS];
- const char *delim;
- int i, j, k;
- HYD_status status = HYD_SUCCESS;
-
- HYDU_FUNC_ENTER();
-
- /* Make a copy of the original buffer */
- if ((buf = HYDU_strdup(obuf)) == NULL) {
- fprintf(stderr, "HYDU_strdup(obuf) failed\n");
- abort();
- }
-
- if (buf[strlen(obuf) - 1] == '\n')
- buf[strlen(obuf) - 1] = '\0';
-
- if (pmi_version == 1) {
- if (!strncmp(buf, "cmd=", strlen("cmd=")))
- delim = " ";
- else
- delim = "\n";
-
- /* Here we only get PMI-1 commands or backward compatible
- * PMI-2 commands, so we always explicitly use the PMI-1
- * delimiter. This allows us to get backward-compatible PMI-2
- * commands interleaved with regular PMI-2 commands. */
- tbuf = HYDU_strdup(buf);
- cmd = strtok(tbuf, delim);
- for (i = 0; i < HYD_NUM_TMP_STRINGS; i++) {
- targs[i] = strtok(NULL, delim);
- if (targs[i] == NULL)
- break;
- }
-
- /* Make a pass through targs and merge space separated
- * arguments which are actually part of the same key */
- k = 0;
- for (i = 0; targs[i]; i++) {
- if (!strrchr(targs[i], ' ')) {
- /* no spaces */
- args[k++] = HYDU_strdup(targs[i]);
- }
- else {
- /* space in the argument; each segment is either a new
- * key, or a space-separated part of the previous
- * key */
- j = 0;
- seg = strtok(targs[i], " ");
- while (1) {
- if (!seg || strrchr(seg, '=')) {
- /* segment has an '='; it's a start of a new key */
- if (j) {
- tmp[j++] = NULL;
- status = HYDU_str_alloc_and_join(tmp,
&args[k++]);
- HYDU_ERR_POP(status, "error while joining
strings\n");
- HYDU_free_strlist(tmp);
- }
- j = 0;
-
- if (!seg)
- break;
- }
- else {
- /* no '='; part of the previous key */
- tmp[j++] = HYDU_strdup(" ");
- }
- tmp[j++] = HYDU_strdup(seg);
-
- seg = strtok(NULL, " ");
- }
- }
- }
- args[k++] = NULL;
- }
- else { /* PMI-v2 */
- delim = ";";
-
- tbuf = HYDU_strdup(buf);
- cmd = strtok(tbuf, delim);
- for (i = 0; i < HYD_NUM_TMP_STRINGS; i++) {
- args[i] = strtok(NULL, delim);
- if (args[i] == NULL)
- break;
- args[i] = HYDU_strdup(args[i]);
- }
- }
-
- /* Search for the PMI command in our table */
- status = HYDU_strsplit(cmd, &str1, pmi_cmd, '=');
- HYDU_ERR_POP(status, "string split returned error\n");
-
- fn_exit:
- HYDU_FREE(buf);
- if (tbuf)
- HYDU_FREE(tbuf);
- if (str1)
- HYDU_FREE(str1);
- HYDU_FUNC_EXIT();
- return status;
-
- fn_fail:
- goto fn_exit;
-}
-
-HYD_status HYD_pmcd_pmi_args_to_tokens(char *args[], struct HYD_pmcd_token
**tokens,
- int *count)
-{
- int i, j;
- char *arg;
- HYD_status status = HYD_SUCCESS;
-
- for (i = 0; args[i]; i++);
- *count = i;
- HYDU_MALLOC(*tokens, struct HYD_pmcd_token *, *count * sizeof(struct
HYD_pmcd_token),
- status);
-
- for (i = 0; args[i]; i++) {
- arg = HYDU_strdup(args[i]);
- (*tokens)[i].key = arg;
- for (j = 0; arg[j] && arg[j] != '='; j++);
- if (!arg[j]) {
- (*tokens)[i].val = NULL;
- }
- else {
- arg[j] = 0;
- (*tokens)[i].val = &arg[++j];
- }
- }
-
- fn_exit:
- return status;
-
- fn_fail:
- goto fn_exit;
-}
-
-void HYD_pmcd_pmi_free_tokens(struct HYD_pmcd_token *tokens, int
token_count)
-{
- int i;
-
- for (i = 0; i < token_count; i++)
- HYDU_FREE(tokens[i].key);
- HYDU_FREE(tokens);
-}
-
-char *HYD_pmcd_pmi_find_token_keyval(struct HYD_pmcd_token *tokens, int
count, const char *key)
-{
- int i;
-
- for (i = 0; i < count; i++) {
- if (!strcmp(tokens[i].key, key))
- return tokens[i].val;
- }
-
- return NULL;
-}
-
-HYD_status HYD_pmcd_pmi_allocate_kvs(struct HYD_pmcd_pmi_kvs ** kvs, int
pgid)
-{
- HYD_status status = HYD_SUCCESS;
-
- HYDU_FUNC_ENTER();
-
- HYDU_MALLOC(*kvs, struct HYD_pmcd_pmi_kvs *, sizeof(struct
HYD_pmcd_pmi_kvs), status);
- HYDU_snprintf((*kvs)->kvs_name, PMI_MAXKVSLEN, "kvs_%d_%d", (int)
getpid(), pgid);
- (*kvs)->key_pair = NULL;
-
- fn_exit:
- HYDU_FUNC_EXIT();
- return status;
-
- fn_fail:
- goto fn_exit;
-}
-
-void HYD_pmcd_free_pmi_kvs_list(struct HYD_pmcd_pmi_kvs *kvs_list)
-{
- struct HYD_pmcd_pmi_kvs_pair *key_pair, *tmp;
-
- HYDU_FUNC_ENTER();
-
- key_pair = kvs_list->key_pair;
- while (key_pair) {
- tmp = key_pair->next;
- HYDU_FREE(key_pair);
- key_pair = tmp;
- }
- HYDU_FREE(kvs_list);
-
- HYDU_FUNC_EXIT();
-}
-
-HYD_status HYD_pmcd_pmi_add_kvs(const char *key, char *val, struct
HYD_pmcd_pmi_kvs *kvs,
- int *ret)
-{
- struct HYD_pmcd_pmi_kvs_pair *key_pair, *run;
- HYD_status status = HYD_SUCCESS;
-
- HYDU_FUNC_ENTER();
-
- HYDU_MALLOC(key_pair, struct HYD_pmcd_pmi_kvs_pair *, sizeof(struct
HYD_pmcd_pmi_kvs_pair),
- status);
- HYDU_snprintf(key_pair->key, PMI_MAXKEYLEN, "%s", key);
- HYDU_snprintf(key_pair->val, PMI_MAXVALLEN, "%s", val);
- key_pair->next = NULL;
-
- *ret = 0;
-
- if (kvs->key_pair == NULL) {
- kvs->key_pair = key_pair;
- }
- else {
- run = kvs->key_pair;
- while (run->next) {
- if (!strcmp(run->key, key_pair->key)) {
- /* duplicate key found */
- *ret = -1;
- break;
- }
- run = run->next;
- }
- run->next = key_pair;
- }
-
- fn_exit:
- HYDU_FUNC_EXIT();
- return status;
-
- fn_fail:
- goto fn_exit;
-}
=======================================
--- /user/runtime2/pct/pmi/common.h Wed Apr 3 13:43:34 2013
+++ /dev/null
@@ -1,83 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; -*- */
-/*
- * (C) 2008 by Argonne National Laboratory.
- * See COPYRIGHT in top-level directory.
- */
-
-#ifndef COMMON_H_INCLUDED
-#define COMMON_H_INCLUDED
-
-#include "hydra.h"
-
-/* Generic definitions */
-#define PMI_MAXKEYLEN (64) /* max length of key in keyval space */
-#define PMI_MAXVALLEN (1024) /* max length of value in keyval space */
-#define PMI_MAXKVSLEN (256) /* max length of various names */
-
-struct HYD_pmcd_pmi_kvs_pair {
- char key[PMI_MAXKEYLEN];
- char val[PMI_MAXVALLEN];
- struct HYD_pmcd_pmi_kvs_pair *next;
-};
-
-struct HYD_pmcd_pmi_kvs {
- char kvs_name[PMI_MAXKVSLEN]; /* Name of this kvs */
- struct HYD_pmcd_pmi_kvs_pair *key_pair;
-};
-
-struct HYD_pmcd_hdr {
- /* The set of commands supported */
- enum HYD_pmcd_cmd {
- INVALID_CMD = 0, /* for sanity testing */
-
- /* UI to proxy commands */
- PROC_INFO,
- CKPOINT,
- PMI_RESPONSE,
- SIGNAL,
- STDIN,
-
- /* Proxy to UI commands */
- PID_LIST,
- EXIT_STATUS,
- PMI_CMD,
- STDOUT,
- STDERR,
- PROCESS_TERMINATED
- } cmd;
-
- /* Generic */
- int buflen;
-
- /* PMI_CMD */
- int pid; /* ID of the requesting process */
- int pmi_version; /* PMI version */
-
- /* STDOUT/STDERR */
- int pgid;
- int proxy_id;
- int rank;
-
- /* SIGNAL */
- int signum;
-};
-
-struct HYD_pmcd_token {
- char *key;
- char *val;
-};
-
-void HYD_pmcd_init_header(struct HYD_pmcd_hdr *hdr);
-HYD_status HYD_pmcd_pmi_parse_pmi_cmd(char *buf, int pmi_version, char
**pmi_cmd,
- char *args[]);
-HYD_status HYD_pmcd_pmi_args_to_tokens(char *args[], struct HYD_pmcd_token
**tokens,
- int *count);
-void HYD_pmcd_pmi_free_tokens(struct HYD_pmcd_token *tokens, int
token_count);
-char *HYD_pmcd_pmi_find_token_keyval(struct HYD_pmcd_token *tokens, int
count,
- const char *key);
-HYD_status HYD_pmcd_pmi_allocate_kvs(struct HYD_pmcd_pmi_kvs **kvs, int
pgid);
-void HYD_pmcd_free_pmi_kvs_list(struct HYD_pmcd_pmi_kvs *kvs_list);
-HYD_status HYD_pmcd_pmi_add_kvs(const char *key, char *val, struct
HYD_pmcd_pmi_kvs *kvs,
- int *ret);
-
-#endif /* COMMON_H_INCLUDED */
=======================================
--- /user/runtime2/pct/pmi/hydra.h Wed Apr 3 13:43:34 2013
+++ /dev/null
@@ -1,208 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; -*- */
-/*
- * (C) 2008 by Argonne National Laboratory.
- * See COPYRIGHT in top-level directory.
- */
-
-#ifndef HYDRA_H_INCLUDED
-#define HYDRA_H_INCLUDED
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdarg.h>
-#include <errno.h>
-#include <sys/types.h>
-#include <unistd.h>
-#include <ctype.h>
-#include <portals4.h>
-#include <portals4_util.h>
-
-extern char *HYD_dbg_prefix;
-
-#define HYD_TMPBUF_SIZE (64 * 1024)
-#define HYD_TMP_STRLEN 1024
-#define HYD_NUM_TMP_STRINGS 1000
-
-/* Status information */
-typedef enum {
- HYD_SUCCESS = 0,
- HYD_FAILURE, /* general failure */
-
- /* Silent errors */
- HYD_GRACEFUL_ABORT,
- HYD_TIMED_OUT,
-
- /* Regular errors */
- HYD_NO_MEM,
- HYD_SOCK_ERROR,
- HYD_INVALID_PARAM,
- HYD_INTERNAL_ERROR
-} HYD_status;
-
-typedef unsigned short HYD_event_t;
-
-/* Argument matching functions */
-struct HYD_arg_match_table {
- const char *arg;
- HYD_status(*handler_fn) (char *arg, char ***argv_p);
- void (*help_fn) (void);
-};
-
-struct HYD_env_global {
- struct HYD_env *system;
- struct HYD_env *user;
- struct HYD_env *inherited;
- char *prop;
-};
-
-/* Executable information */
-struct HYD_exec {
- char *exec[HYD_NUM_TMP_STRINGS];
- char *wdir;
-
- int proc_count;
- struct HYD_env *user_env;
- char *env_prop;
-
- int appnum;
-
- struct HYD_exec *next;
-};
-
-/* Global user parameters */
-struct HYD_user_global {
- /* RMK */
- char *rmk;
-
- /* Launcher */
- char *launcher;
- char *launcher_exec;
-
- /* Processor topology */
- char *binding;
- char *topolib;
-
- /* Checkpoint restart */
- char *ckpointlib;
- char *ckpoint_prefix;
- int ckpoint_num;
-
- /* Demux engine */
- char *demux;
-
- /* Network interface */
- char *iface;
-
- /* Other random parameters */
- int enablex;
- int debug;
-
- int auto_cleanup;
-
- struct HYD_env_global global_env;
-};
-
-/* Disable for now; we might add something here in the future */
-#define HYDU_FUNC_ENTER() do {} while (0)
-#define HYDU_FUNC_EXIT() do {} while (0)
-
-#define HYDU_dump_prefix(fp) \
- { \
- fprintf(fp, "[%s] ", HYD_dbg_prefix); \
- fflush(fp); \
- }
-
-#define HYDU_dump_noprefix(fp, ...) \
- { \
- fprintf(fp, __VA_ARGS__); \
- fflush(fp); \
- }
-
-#define HYDU_dump(fp, ...) \
- { \
- HYDU_dump_prefix(fp); \
- HYDU_dump_noprefix(fp, __VA_ARGS__); \
- }
-
-#define HYDU_error_printf(...) \
- { \
- HYDU_dump_prefix(stderr); \
- HYDU_dump_noprefix(stderr, "%s (%d): ", __FILE__, __LINE__); \
- HYDU_dump_noprefix(stderr, __VA_ARGS__); \
- }
-
-#define HYD_SILENT_ERROR(status) \
- (((status) == HYD_GRACEFUL_ABORT) || ((status) == HYD_TIMED_OUT))
-
-#define HYDU_ASSERT(x, status) \
- { \
- if (!(x)) { \
- HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, \
- "assert (%s) failed\n", #x); \
- } \
- }
-
-#define HYDU_IGNORE_TIMEOUT(status) \
- { \
- if ((status) == HYD_TIMED_OUT) \
- (status) = HYD_SUCCESS; \
- }
-
-#define HYDU_ERR_POP(status, ...) \
- { \
- if (status && !HYD_SILENT_ERROR(status)) { \
- HYDU_error_printf(__VA_ARGS__); \
- goto fn_fail; \
- } \
- else if (HYD_SILENT_ERROR(status)) { \
- goto fn_exit; \
- } \
- }
-
-#define HYDU_ERR_SETANDJUMP(status, error, ...) \
- { \
- status = error; \
- HYDU_ERR_POP(status, __VA_ARGS__); \
- }
-
-#define HYDU_ERR_CHKANDJUMP(status, chk, error, ...) \
- { \
- if ((chk)) \
- HYDU_ERR_SETANDJUMP(status, error, __VA_ARGS__); \
- }
-
-#define HYDU_MALLOC(p, type, size, status) \
- { \
- HYDU_ASSERT(size, status); \
- (p) = (type) HYDU_malloc((size)); \
- if ((p) == NULL) \
- HYDU_ERR_SETANDJUMP((status), HYD_NO_MEM, \
- "failed to allocate %d bytes\n", \
- (int) (size)); \
- }
-
-#define HYDU_FREE(p) \
- { \
- HYDU_free((void *) p); \
- }
-
-#define HYDU_mem_init()
-#define HYDU_strdup strdup
-#define HYDU_malloc malloc
-#define HYDU_free free
-#define HYDU_snprintf snprintf
-
-HYD_status HYDU_list_append_strlist(char **exec, char **client_arg);
-HYD_status HYDU_print_strlist(char **args);
-void HYDU_free_strlist(char **args);
-HYD_status HYDU_str_alloc_and_join(char **strlist, char **strjoin);
-HYD_status HYDU_strsplit(char *str, char **str1, char **str2, char sep);
-HYD_status HYDU_strdup_list(char *src[], char **dest[]);
-char *HYDU_size_t_to_str(size_t x);
-char *HYDU_int_to_str(int x);
-char *HYDU_int_to_str_pad(int x, int maxlen);
-int HYDU_strlist_lastidx(char **strlist);
-char **HYDU_str_to_strlist(char *str);
-
-#endif
=======================================
--- /user/runtime2/pct/pmi/pmi_server.c Wed Apr 3 13:43:34 2013
+++ /dev/null
@@ -1,99 +0,0 @@
-#include "hydra.h"
-#include "pmip.h"
-#include "pmip_pmi.h"
-#include "pct.h"
-#include "pct_internal.h"
-
-
-#define CLIENT_RXQ_NUM_BLOCKS 2
-#define CLIENT_RXQ_BLOCK_SIZE (PCT_MAX_PMI_MSG * 64)
-
-
-/* Structure that holds generic PMI state */
-struct HYD_pmcd_pmip HYD_pmcd_pmip;
-
-
-/**
- * Initializes the PCT's PMI state.
- * This gets us setup to receive PMI requests from clients and send
responses.
- */
-int
-pmi_init(pct_t *pct, app_t *app, ptl_pt_index_t pt_index)
-{
- pmi_state_t *state = &app->pmi_state;
- ptl_process_t match_id;
- int status;
-
- state->client_pt_index = pt_index;
-
- /* Initialize the Portals RX queue for incoming client requests */
- PTL_CHECK(PtlEQAlloc(pct->ni_h, 1024, &state->client_rx_eq_h));
-
- match_id.phys.nid = pct->ptl_id.phys.nid;
- match_id.phys.pid = PTL_PID_ANY;
-
- PTL_CHECK(
- ptl_queue_init(pct->ni_h, app->user_id, match_id, pt_index,
- state->client_rx_eq_h,
- CLIENT_RXQ_NUM_BLOCKS, CLIENT_RXQ_BLOCK_SIZE,
- PCT_MAX_PMI_MSG, &state->client_rx_q)
- );
-
- /* Initialize the Portals state needed to send replies to clients */
- PTL_CHECK(PtlEQAlloc(pct->ni_h, 4, &state->client_tx_eq_h));
-
- state->client_tx_buf_size = PCT_MAX_PMI_MSG;
- state->client_tx_buf = MALLOC(state->client_tx_buf_size);
-
- state->client_tx_md.start = state->client_tx_buf;
- state->client_tx_md.length = state->client_tx_buf_size;
- state->client_tx_md.options = 0;
- state->client_tx_md.eq_handle = state->client_tx_eq_h;
- state->client_tx_md.ct_handle = PTL_CT_NONE;
-
- PTL_CHECK(PtlMDBind(pct->ni_h, &state->client_tx_md,
&state->client_tx_md_h));
-
- /* Initialize the PMI key value store */
- status = HYD_pmcd_pmi_allocate_kvs(&HYD_pmcd_pmip.local.kvs, 0);
- if (status) {
- fprintf(stderr, "unable to allocate kvs space\n");
- abort();
- }
-
- return 0;
-}
-
-
-int
-pmi_process_event(pct_t *pct, app_t *app, const ptl_event_t *ev)
-{
- int status = HYD_SUCCESS;
- char *cmd = NULL;
- char *args[HYD_NUM_TMP_STRINGS] = { 0 };
- struct HYD_pmcd_pmip_pmi_handle *h;
-
- /* Parse out the command and trailing keyval strings */
- status = HYD_pmcd_pmi_parse_pmi_cmd((char *)ev->start, 1, &cmd, args);
- HYDU_ERR_POP(status, "unable to parse PMI command\n");
-
- /* Call the handler for the command */
- h = HYD_pmcd_pmip_pmi_v1;
- while (h->handler) {
- if (!strcmp(cmd, h->cmd)) {
- status = h->handler(pct, app, ev, args);
- HYDU_ERR_POP(status, "PMI handler returned error\n");
- goto fn_exit;
- }
- h++;
- }
-
- fn_exit:
- if (cmd)
- HYDU_FREE(cmd);
- HYDU_free_strlist(args);
- HYDU_FUNC_EXIT();
- return status;
-
- fn_fail:
- goto fn_exit;
-}
=======================================
--- /user/runtime2/pct/pmi/pmip.h Wed Apr 3 13:43:34 2013
+++ /dev/null
@@ -1,91 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; -*- */
-/*
- * (C) 2008 by Argonne National Laboratory.
- * See COPYRIGHT in top-level directory.
- */
-
-#ifndef PMIP_H_INCLUDED
-#define PMIP_H_INCLUDED
-
-#include "hydra.h"
-#include "common.h"
-
-struct HYD_pmcd_pmip_map {
- int left;
- int current;
- int right;
- int total;
-};
-
-struct HYD_pmcd_pmip {
- struct HYD_user_global user_global;
-
- struct {
- struct HYD_pmcd_pmip_map global_core_map;
- struct HYD_pmcd_pmip_map filler_process_map;
-
- int global_process_count;
- char *jobid;
-
- /* PMI */
- char *pmi_fd;
- char *pmi_port;
- int pmi_rank; /* If this is -1, we auto-generate it */
- char *pmi_process_mapping;
- } system_global; /* Global system parameters */
-
- struct {
- /* Upstream server contact information */
- char *server_name;
- int server_port;
- int control;
- } upstream;
-
- /* Currently our downstream only consists of actual MPI
- * processes */
- struct {
- int *out;
- int *err;
- int in;
-
- int *pid;
- int *exit_status;
-
- int *pmi_rank;
- int *pmi_fd;
- int *pmi_fd_active;
-
- int forced_cleanup;
- } downstream;
-
- /* Proxy details */
- struct {
- int id;
- int pgid;
- char *iface_ip_env_name;
- char *hostname;
- char *local_binding;
-
- int proxy_core_count;
- int proxy_process_count;
-
- char *spawner_kvs_name;
- struct HYD_pmcd_pmi_kvs *kvs; /* Node-level KVS space for node
attributes */
-
- char **ckpoint_prefix_list;
-
- int retries;
- } local;
-
- /* Process segmentation information for this proxy */
- struct HYD_exec *exec_list;
-};
-
-extern struct HYD_pmcd_pmip HYD_pmcd_pmip;
-extern struct HYD_arg_match_table HYD_pmcd_pmip_match_table[];
-
-HYD_status HYD_pmcd_pmip_get_params(char **t_argv);
-void HYD_pmcd_pmip_kill_localprocs(void);
-HYD_status HYD_pmcd_pmip_control_cmd_cb(int fd, HYD_event_t events, void
*userp);
-
-#endif /* PMIP_H_INCLUDED */
=======================================
--- /user/runtime2/pct/pmi/pmip_pmi.h Wed Apr 3 13:43:34 2013
+++ /dev/null
@@ -1,27 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; -*- */
-/*
- * (C) 2008 by Argonne National Laboratory.
- * See COPYRIGHT in top-level directory.
- */
-
-#ifndef PMIP_PMI_H_INCLUDED
-#define PMIP_PMI_H_INCLUDED
-
-#include "hydra.h"
-#include "common.h"
-#include "pct_internal.h"
-
-/* PMI-1 specific definitions */
-extern struct HYD_pmcd_pmip_pmi_handle *HYD_pmcd_pmip_pmi_v1;
-
-/* PMI-2 specific definitions */
-extern struct HYD_pmcd_pmip_pmi_handle *HYD_pmcd_pmip_pmi_v2;
-
-struct HYD_pmcd_pmip_pmi_handle {
- const char *cmd;
- HYD_status(*handler) (pct_t *pct, app_t *app, const ptl_event_t *ev,
char *args[]);
-};
-
-extern struct HYD_pmcd_pmip_pmi_handle *HYD_pmcd_pmip_pmi_handle;
-
-#endif /* PMIP_PMI_H_INCLUDED */
=======================================
--- /user/runtime2/pct/pmi/pmip_pmi_v1.c Wed Apr 3 13:43:34 2013
+++ /dev/null
@@ -1,440 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; -*- */
-/*
- * (C) 2008 by Argonne National Laboratory.
- * See COPYRIGHT in top-level directory.
- */
-
-#include "pmip_pmi.h"
-#include "pmip.h"
-
-static HYD_status
-send_cmd_downstream(pmi_state_t *state,
- ptl_process_t target_id,
- const char *cmd)
-{
- ptl_event_t tx_ev;
-
- /* Copy the command to the pre-registered transmit buffer */
- strncpy(state->client_tx_buf, cmd, state->client_tx_buf_size);
- state->client_tx_buf[state->client_tx_buf_size - 1] = '\0';
-
- /* Send the response to the client */
- PTL_CHECK(
- PtlPut(state->client_tx_md_h, 0, strlen(state->client_tx_buf) + 1,
- PTL_NO_ACK_REQ, target_id, state->client_pt_index,
- 0, 0, NULL, 0)
- );
-
- /* Wait for the SEND_END */
- PTL_CHECK(PtlEQWait(state->client_tx_eq_h, &tx_ev));
- PTL_ASSERT(tx_ev.type == PTL_EVENT_SEND);
-
- return HYD_SUCCESS;
-}
-
-static HYD_status fn_init(pct_t *pct, app_t *app, const ptl_event_t *ev,
char *args[])
-{
- int pmi_version, pmi_subversion, task_id, i;
- const char *tmp;
- HYD_status status = HYD_SUCCESS;
-
- HYDU_FUNC_ENTER();
-
- strtok(args[0], "=");
- pmi_version = atoi(strtok(NULL, "="));
- strtok(args[1], "=");
- pmi_subversion = atoi(strtok(NULL, "="));
- strtok(args[2], "=");
- task_id = atoi(strtok(NULL, "="));
-
- if (pmi_version == 1 && pmi_subversion <= 1)
- tmp = HYDU_strdup("cmd=response_to_init pmi_version=1
pmi_subversion=1 rc=0\n");
- else if (pmi_version == 2 && pmi_subversion == 0)
- tmp = HYDU_strdup("cmd=response_to_init pmi_version=2
pmi_subversion=0 rc=0\n");
- else /* PMI version mismatch */
- HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
- "PMI version mismatch; %d.%d\n", pmi_version,
pmi_subversion);
-
- /* Remember the Portals NID/PID for the process */
- for (i = 0; i < app->local_size; i++) {
- if (task_id == app->procs[i].task_id) {
- app->procs[i].ptl_id.phys.nid = ev->initiator.phys.nid;
- app->procs[i].ptl_id.phys.pid = ev->initiator.phys.pid;
- break;
- }
- }
-
- status = send_cmd_downstream(&app->pmi_state, ev->initiator, tmp);
- HYDU_ERR_POP(status, "error sending PMI response\n");
- HYDU_FREE(tmp);
-
- fn_exit:
- HYDU_FUNC_EXIT();
- return status;
-
- fn_fail:
- abort();
-}
-
-static HYD_status fn_get_maxes(pct_t *pct, app_t *app, const ptl_event_t
*ev, char *args[])
-{
- int i;
- char *tmp[HYD_NUM_TMP_STRINGS], *cmd;
- HYD_status status = HYD_SUCCESS;
-
- HYDU_FUNC_ENTER();
-
- i = 0;
- tmp[i++] = HYDU_strdup("cmd=maxes kvsname_max=");
- tmp[i++] = HYDU_int_to_str(PMI_MAXKVSLEN);
- tmp[i++] = HYDU_strdup(" keylen_max=");
- tmp[i++] = HYDU_int_to_str(PMI_MAXKEYLEN);
- tmp[i++] = HYDU_strdup(" vallen_max=");
- tmp[i++] = HYDU_int_to_str(PMI_MAXVALLEN);
- tmp[i++] = HYDU_strdup("\n");
- tmp[i++] = NULL;
-
- status = HYDU_str_alloc_and_join(tmp, &cmd);
- HYDU_ERR_POP(status, "unable to join strings\n");
- HYDU_free_strlist(tmp);
-
- status = send_cmd_downstream(&app->pmi_state, ev->initiator, cmd);
- HYDU_ERR_POP(status, "error sending PMI response\n");
- HYDU_FREE(cmd);
-
- fn_exit:
- HYDU_FUNC_EXIT();
- return status;
-
- fn_fail:
- abort();
-}
-
-static HYD_status fn_get_appnum(pct_t *pct, app_t *app, const ptl_event_t
*ev, char *args[])
-{
- int i, idx;
- struct HYD_exec *exec;
- char *tmp[HYD_NUM_TMP_STRINGS], *cmd;
- HYD_status status = HYD_SUCCESS;
-
- HYDU_FUNC_ENTER();
-
- /* Get the process index */
- for (i = 0; i < app->local_size; i++)
- if ((ev->initiator.phys.nid == app->procs[i].ptl_id.phys.nid) &&
- (ev->initiator.phys.pid == app->procs[i].ptl_id.phys.pid))
- break;
- idx = i;
- HYDU_ASSERT(idx < app->local_size, status);
-
- i = 0;
- for (exec = HYD_pmcd_pmip.exec_list; exec; exec = exec->next) {
- i += exec->proc_count;
- if (idx < i)
- break;
- }
-
- i = 0;
- tmp[i++] = HYDU_strdup("cmd=appnum appnum=");
- //tmp[i++] = HYDU_int_to_str(exec->appnum);
- tmp[i++] = HYDU_int_to_str(0); /* only support one exec for now , so
hard code to 0 */
- tmp[i++] = HYDU_strdup("\n");
- tmp[i++] = NULL;
-
- status = HYDU_str_alloc_and_join(tmp, &cmd);
- HYDU_ERR_POP(status, "unable to join strings\n");
- HYDU_free_strlist(tmp);
-
- status = send_cmd_downstream(&app->pmi_state, ev->initiator, cmd);
- HYDU_ERR_POP(status, "error sending PMI response\n");
- HYDU_FREE(cmd);
-
- fn_exit:
- HYDU_FUNC_EXIT();
- return status;
-
- fn_fail:
- abort();
-}
-
-static HYD_status fn_get_my_kvsname(pct_t *pct, app_t *app, const
ptl_event_t *ev, char *args[])
-{
- char *tmp[HYD_NUM_TMP_STRINGS], *cmd;
- int i;
- HYD_status status = HYD_SUCCESS;
-
- HYDU_FUNC_ENTER();
-
- i = 0;
- tmp[i++] = HYDU_strdup("cmd=my_kvsname kvsname=");
- tmp[i++] = HYDU_strdup(HYD_pmcd_pmip.local.kvs->kvs_name);
- tmp[i++] = HYDU_strdup("\n");
- tmp[i++] = NULL;
-
- status = HYDU_str_alloc_and_join(tmp, &cmd);
- HYDU_ERR_POP(status, "unable to join strings\n");
- HYDU_free_strlist(tmp);
-
- status = send_cmd_downstream(&app->pmi_state, ev->initiator, cmd);
- HYDU_ERR_POP(status, "error sending PMI response\n");
- HYDU_FREE(cmd);
-
- fn_exit:
- HYDU_FUNC_EXIT();
- return status;
-
- fn_fail:
- abort();
-}
-
-static HYD_status fn_get_usize(pct_t *pct, app_t *app, const ptl_event_t
*ev, char *args[])
-{
- int i;
- char *tmp[HYD_NUM_TMP_STRINGS], *cmd;
- HYD_status status = HYD_SUCCESS;
-
- HYDU_FUNC_ENTER();
-
- i = 0;
- tmp[i++] = HYDU_strdup("cmd=universe_size size=");
- tmp[i++] = HYDU_int_to_str(app->universe_size);
- tmp[i++] = HYDU_strdup("\n");
- tmp[i++] = NULL;
-
- status = HYDU_str_alloc_and_join(tmp, &cmd);
- HYDU_ERR_POP(status, "unable to join strings\n");
- HYDU_free_strlist(tmp);
-
- status = send_cmd_downstream(&app->pmi_state, ev->initiator, cmd);
- HYDU_ERR_POP(status, "error sending PMI response\n");
- HYDU_FREE(cmd);
-
- fn_exit:
- HYDU_FUNC_EXIT();
- return status;
-
- fn_fail:
- abort();
-}
-
-static HYD_status fn_put(pct_t *pct, app_t *app, const ptl_event_t *ev,
char *args[])
-{
- int i, ret;
- char *kvsname, *key, *val;
- char *tmp[HYD_NUM_TMP_STRINGS], *cmd;
- struct HYD_pmcd_token *tokens;
- int token_count;
- HYD_status status = HYD_SUCCESS;
-
- HYDU_FUNC_ENTER();
-
- status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count);
- HYDU_ERR_POP(status, "unable to convert args to tokens\n");
-
- kvsname = HYD_pmcd_pmi_find_token_keyval(tokens,
token_count, "kvsname");
- HYDU_ERR_CHKANDJUMP(status, kvsname == NULL, HYD_INTERNAL_ERROR,
- "unable to find token: kvsname\n");
-
- key = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "key");
- HYDU_ERR_CHKANDJUMP(status, key == NULL, HYD_INTERNAL_ERROR,
- "unable to find token: key\n");
-
- val = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "value");
- if (val == NULL) {
- /* the user sent an empty string */
- val = HYDU_strdup("");
- }
-
- if (strcmp(HYD_pmcd_pmip.local.kvs->kvs_name, kvsname))
- HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
- "kvsname (%s) does not match this group's kvs
space (%s)\n",
- kvsname, HYD_pmcd_pmip.local.kvs->kvs_name);
-
- status = HYD_pmcd_pmi_add_kvs(key, val, HYD_pmcd_pmip.local.kvs, &ret);
- HYDU_ERR_POP(status, "unable to add keypair to kvs\n");
-
- i = 0;
- tmp[i++] = HYDU_strdup("cmd=put_result rc=");
- tmp[i++] = HYDU_int_to_str(ret);
- if (ret == 0) {
- tmp[i++] = HYDU_strdup(" msg=success");
- }
- else {
- tmp[i++] = HYDU_strdup(" msg=duplicate_key");
- tmp[i++] = HYDU_strdup(key);
- }
- tmp[i++] = HYDU_strdup("\n");
- tmp[i++] = NULL;
-
- status = HYDU_str_alloc_and_join(tmp, &cmd);
- HYDU_ERR_POP(status, "unable to join strings\n");
- HYDU_free_strlist(tmp);
-
- status = send_cmd_downstream(&app->pmi_state, ev->initiator, cmd);
- HYDU_ERR_POP(status, "error sending PMI response\n");
- HYDU_FREE(cmd);
-
- fn_exit:
- HYD_pmcd_pmi_free_tokens(tokens, token_count);
- HYDU_FUNC_EXIT();
- return status;
-
- fn_fail:
- abort();
-}
-
-
-static HYD_status fn_get(pct_t *pct, app_t *app, const ptl_event_t *ev,
char *args[])
-{
- struct HYD_pmcd_pmi_kvs_pair *run;
- char *kvsname, *key, *val;
- char *tmp[HYD_NUM_TMP_STRINGS], *cmd;
- struct HYD_pmcd_token *tokens;
- int token_count, i;
- HYD_status status = HYD_SUCCESS;
-
- HYDU_FUNC_ENTER();
-
- status = HYD_pmcd_pmi_args_to_tokens(args, &tokens, &token_count);
- HYDU_ERR_POP(status, "unable to convert args to tokens\n");
-
- kvsname = HYD_pmcd_pmi_find_token_keyval(tokens,
token_count, "kvsname");
- HYDU_ERR_CHKANDJUMP(status, kvsname == NULL, HYD_INTERNAL_ERROR,
- "unable to find token: kvsname\n");
-
- key = HYD_pmcd_pmi_find_token_keyval(tokens, token_count, "key");
- HYDU_ERR_CHKANDJUMP(status, key == NULL, HYD_INTERNAL_ERROR,
- "unable to find token: key\n");
-
- val = NULL;
- if (!strcmp(key, "PMI_dead_processes")) {
- val = 0; /* FIXME: return actual number of dead processes */
- goto found_val;
- }
-
- /* Make sure the key value store name is what we expect */
- if (strcmp(HYD_pmcd_pmip.local.kvs->kvs_name, kvsname))
- HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
- "kvsname (%s) does not match this group's kvs
space (%s)\n",
- kvsname, HYD_pmcd_pmip.local.kvs->kvs_name);
-
- /* Try to find the value associated with the key */
- for (run = HYD_pmcd_pmip.local.kvs->key_pair; run; run = run->next) {
- if (!strcmp(run->key, key)) {
- val = run->val;
- break;
- }
- }
-
-found_val:
- i = 0;
- tmp[i++] = HYDU_strdup("cmd=get_result rc=");
- if (val) {
- tmp[i++] = HYDU_strdup("0 msg=success value=");
- tmp[i++] = HYDU_strdup(val);
- }
- else {
- tmp[i++] = HYDU_strdup("-1 msg=key_");
- tmp[i++] = HYDU_strdup(key);
- tmp[i++] = HYDU_strdup("_not_found value=unknown");
- }
- tmp[i++] = HYDU_strdup("\n");
- tmp[i++] = NULL;
-
- status = HYDU_str_alloc_and_join(tmp, &cmd);
- HYDU_ERR_POP(status, "unable to join strings\n");
- HYDU_free_strlist(tmp);
-
- status = send_cmd_downstream(&app->pmi_state, ev->initiator, cmd);
- HYDU_ERR_POP(status, "error sending PMI response\n");
- HYDU_FREE(cmd);
-
- fn_exit:
- HYD_pmcd_pmi_free_tokens(tokens, token_count);
- HYDU_FUNC_EXIT();
- return status;
-
- fn_fail:
- abort();
-}
-
-static HYD_status fn_barrier_out(pct_t *pct, app_t *app, const ptl_event_t
*ev, char *args[])
-{
- const char *cmd;
- int i;
- HYD_status status = HYD_SUCCESS;
-
- HYDU_FUNC_ENTER();
-
- cmd = HYDU_strdup("cmd=barrier_out\n");
-
- for (i = 0; i < app->local_size; i++) {
- status = send_cmd_downstream(&app->pmi_state,
app->procs[i].ptl_id, cmd);
- HYDU_ERR_POP(status, "error sending PMI response\n");
- }
-
- HYDU_FREE(cmd);
-
- fn_exit:
- HYDU_FUNC_EXIT();
- return status;
-
- fn_fail:
- abort();
-}
-
-static HYD_status fn_barrier_in(pct_t *pct, app_t *app, const ptl_event_t
*ev, char *args[])
-{
- static int barrier_count = 0;
- HYD_status status = HYD_SUCCESS;
-
- HYDU_FUNC_ENTER();
-
- barrier_count++;
- if (barrier_count == app->local_size) {
- barrier_count = 0;
-
- /* Release everyone from the barrier */
- fn_barrier_out(pct, app, NULL, NULL);
- }
-
- HYDU_FUNC_EXIT();
- return status;
-}
-
-static HYD_status fn_finalize(pct_t *pct, app_t *app, const ptl_event_t
*ev, char *args[])
-{
- const char *cmd;
- HYD_status status = HYD_SUCCESS;
-
- HYDU_FUNC_ENTER();
-
- cmd = HYDU_strdup("cmd=finalize_ack\n");
-
- status = send_cmd_downstream(&app->pmi_state, ev->initiator, cmd);
- HYDU_ERR_POP(status, "error sending PMI response\n");
- HYDU_FREE(cmd);
-
- fn_exit:
- HYDU_FUNC_EXIT();
- return status;
-
- fn_fail:
- abort();
-}
-
-static struct HYD_pmcd_pmip_pmi_handle pmi_v1_handle_fns_foo[] = {
- {"init", fn_init},
- {"get_maxes", fn_get_maxes},
- {"get_appnum", fn_get_appnum},
- {"get_my_kvsname", fn_get_my_kvsname},
- {"get_universe_size", fn_get_usize},
- {"put", fn_put},
- {"get", fn_get},
- {"barrier_in", fn_barrier_in},
- {"barrier_out", fn_barrier_out},
- {"finalize", fn_finalize},
- {"\0", NULL}
-};
-
-struct HYD_pmcd_pmip_pmi_handle *HYD_pmcd_pmip_pmi_v1 =
pmi_v1_handle_fns_foo;
=======================================
--- /user/runtime2/pct/pmi/string.c Wed Apr 3 13:43:34 2013
+++ /dev/null
@@ -1,274 +0,0 @@
-/* -*- Mode: C; c-basic-offset:4 ; -*- */
-/*
- * (C) 2008 by Argonne National Laboratory.
- * See COPYRIGHT in top-level directory.
- */
-
-#include "hydra.h"
-
-HYD_status HYDU_list_append_strlist(char **src_strlist, char
**dest_strlist)
-{
- int i, j;
- HYD_status status = HYD_SUCCESS;
-
- HYDU_FUNC_ENTER();
-
- i = HYDU_strlist_lastidx(dest_strlist);
- for (j = 0; src_strlist[j]; j++)
- dest_strlist[i++] = HYDU_strdup(src_strlist[j]);
- dest_strlist[i++] = NULL;
-
- HYDU_FUNC_EXIT();
- return status;
-}
-
-
-HYD_status HYDU_print_strlist(char **strlist)
-{
- int arg;
- HYD_status status = HYD_SUCCESS;
-
- HYDU_FUNC_ENTER();
-
- for (arg = 0; strlist[arg]; arg++)
- HYDU_dump_noprefix(stdout, "%s ", strlist[arg]);
- HYDU_dump_noprefix(stdout, "\n");
-
- HYDU_FUNC_EXIT();
- return status;
-}
-
-
-void HYDU_free_strlist(char **strlist)
-{
- int arg;
-
- HYDU_FUNC_ENTER();
-
- for (arg = 0; strlist[arg]; arg++)
- HYDU_FREE(strlist[arg]);
-
- HYDU_FUNC_EXIT();
-}
-
-
-HYD_status HYDU_str_alloc_and_join(char **strlist, char **strjoin)
-{
- int len = 0, i, count;
- HYD_status status = HYD_SUCCESS;
-
- HYDU_FUNC_ENTER();
-
- for (i = 0; strlist[i] != NULL; i++) {
- len += strlen(strlist[i]);
- }
-
- HYDU_MALLOC(*strjoin, char *, len + 1, status);
- count = 0;
- (*strjoin)[0] = 0;
-
- for (i = 0; strlist[i] != NULL; i++) {
- HYDU_snprintf(*strjoin + count, len - count + 1, "%s", strlist[i]);
- count += strlen(strlist[i]);
- }
-
- fn_exit:
- HYDU_FUNC_EXIT();
- return status;
-
- fn_fail:
- goto fn_exit;
-}
-
-
-HYD_status HYDU_strsplit(char *str, char **str1, char **str2, char sep)
-{
- int i;
- HYD_status status = HYD_SUCCESS;
-
- HYDU_FUNC_ENTER();
-
- if (str == NULL)
- HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "strsplit");
-
- *str1 = HYDU_strdup(str);
- for (i = 0; (*str1)[i] && ((*str1)[i] != sep); i++);
-
- if ((*str1)[i] == 0) /* End of the string */
- *str2 = NULL;
- else {
- *str2 = HYDU_strdup(&((*str1)[i + 1]));
- (*str1)[i] = 0;
- }
-
- fn_exit:
- HYDU_FUNC_EXIT();
- return status;
-
- fn_fail:
- goto fn_exit;
-}
-
-
-HYD_status HYDU_strdup_list(char *src[], char **dest[])
-{
- int i, count;
- HYD_status status = HYD_SUCCESS;
-
- HYDU_FUNC_ENTER();
-
- count = HYDU_strlist_lastidx(src);
- HYDU_MALLOC(*dest, char **, (count + 1) * sizeof(char *), status);
-
- for (i = 0; i < count; i++)
- (*dest)[i] = HYDU_strdup(src[i]);
- (*dest)[i] = NULL;
-
- fn_exit:
- HYDU_FUNC_EXIT();
- return status;
-
- fn_fail:
- goto fn_exit;
-}
-
-
-char *HYDU_size_t_to_str(size_t x)
-{
- int len = 1, i;
- size_t max = 10;
- char *str = NULL;
- HYD_status status = HYD_SUCCESS;
-
- HYDU_FUNC_ENTER();
-
- while (x >= max) {
- len++;
- max *= 10;
- }
- len++;
-
- HYDU_MALLOC(str, char *, len, status);
- HYDU_ERR_POP(status, "unable to allocate memory\n");
-
- for (i = 0; i < len; i++)
- str[i] = '0';
-
- HYDU_snprintf(str, len, "%llu", (unsigned long long) x);
-
- fn_exit:
- HYDU_FUNC_EXIT();
- return str;
-
- fn_fail:
- goto fn_exit;
-}
-
-
-char *HYDU_int_to_str(int x)
-{
- return HYDU_int_to_str_pad(x, 0);
-}
-
-
-char *HYDU_int_to_str_pad(int x, int maxlen)
-{
- int len = 1, max = 10, y;
- int actual_len, i;
- char *str = NULL;
- HYD_status status = HYD_SUCCESS;
-
- HYDU_FUNC_ENTER();
-
- if (x < 0) {
- len++;
- y = -x;
- }
- else
- y = x;
-
- while (y >= max) {
- len++;
- max *= 10;
- }
-
- if (len > maxlen)
- actual_len = len + 1;
- else
- actual_len = maxlen + 1;
-
- HYDU_MALLOC(str, char *, actual_len, status);
- HYDU_ERR_POP(status, "unable to allocate memory\n");
-
- for (i = 0; i < actual_len; i++)
- str[i] = '0';
-
- HYDU_snprintf(str + actual_len - len - 1, len + 1, "%d", x);
-
- fn_exit:
- HYDU_FUNC_EXIT();
- return str;
-
- fn_fail:
- goto fn_exit;
-}
-
-int HYDU_strlist_lastidx(char **strlist)
-{
- int i;
-
- for (i = 0; strlist[i]; i++);
-
- return i;
-}
-
-char **HYDU_str_to_strlist(char *str)
-{
- int argc = 0, i;
- char **strlist = NULL;
- char *p;
- HYD_status status = HYD_SUCCESS;
-
- HYDU_FUNC_ENTER();
-
- HYDU_MALLOC(strlist, char **, HYD_NUM_TMP_STRINGS * sizeof(char *),
status);
- if (!strlist)
- HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
- "Unable to allocate mem for strlist\n");
-
- for (i = 0; i < HYD_NUM_TMP_STRINGS; i++)
- strlist[i] = NULL;
-
- p = str;
- while (*p) {
- while (isspace(*p))
- p++;
-
- if (argc >= HYD_NUM_TMP_STRINGS)
- HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "too many
arguments in line\n");
-
- HYDU_MALLOC(strlist[argc], char *, HYD_TMP_STRLEN, status);
-
- /* Copy till you hit a space */
- i = 0;
- while (*p && !isspace(*p)) {
- strlist[argc][i] = *p;
- i++;
- p++;
- }
- if (i) {
- strlist[argc][i] = 0;
- argc++;
- }
- }
- if (strlist[argc])
- HYDU_FREE(strlist[argc]);
- strlist[argc] = NULL;
-
- fn_exit:
- HYDU_FUNC_EXIT();
- return strlist;
-
- fn_fail:
- goto fn_exit;
-}
=======================================
--- /user/Makefile.footer Fri Jul 26 14:40:19 2013
+++ /user/Makefile.footer Wed Aug 7 12:54:32 2013
@@ -5,19 +5,28 @@
OLIBRARIES = $(addprefix $O/,$(LIBRARIES))
OHEADERS = $(addprefix $O/,$(HEADERS))

-# Install directory
+# Install directories
OPREFIX=$(O)/$(BASE)/install
+OPREFIXLINUX=$(O)/$(BASE)/install-linux
+
+# Which install directory are we targeting?
+ifeq ($(TARGET), linux)
+ PREFIX = $(OPREFIXLINUX)
+else
+ PREFIX = $(OPREFIX)
+ CC_LDFLAGS += -static
+endif

# Tack on default include paths
INCDIRS += \
-I. \
- -I$(OPREFIX)/include \
+ -I$(PREFIX)/include \
-I$(BASE)/../include \
-I$O/$(BASE)/../include \
-I$O/$(BASE)/../include2 \

# Tack on default lib paths
-LIBDIRS += -L$(OPREFIX)/lib
+LIBDIRS += -L$(PREFIX)/lib

# The default target(s)
.PHONY: all
@@ -26,20 +35,35 @@
# The 'make install' target
.PHONY: install
install:: $(OPROGRAMS) $(OLIBRARIES)
- @test -d $(OPREFIX) || mkdir $(OPREFIX)
- @test -d $(OPREFIX)/bin || mkdir $(OPREFIX)/bin
- @test -d $(OPREFIX)/lib || mkdir $(OPREFIX)/lib
- @test -d $(OPREFIX)/include || mkdir $(OPREFIX)/include
- @test -d $(OPREFIX)/include/lwk || mkdir $(OPREFIX)/include/lwk
+ @test -d $(PREFIX) || mkdir $(PREFIX)
+ @test -d $(PREFIX)/bin || mkdir $(PREFIX)/bin
+ @test -d $(PREFIX)/lib || mkdir $(PREFIX)/lib
+ @test -d $(PREFIX)/include || mkdir $(PREFIX)/include
+ @test -d $(PREFIX)/include/lwk || mkdir $(PREFIX)/include/lwk
@for prog in $(OPROGRAMS); do \
- install -m 0755 $$prog $(OPREFIX)/bin; \
+ install -m 0755 $$prog $(PREFIX)/bin; \
+ done
+ @for lib in $(OLIBRARIES); do \
+ install -m 0644 $$lib $(PREFIX)/lib; \
+ done
+ @for hdr in $(OHEADERS); do \
+ install -m 0644 $$hdr $(PREFIX)/include/$(HEADERS_SUBDIR); \
done
+
+# Install headers and libraries in both directories when specified
+ifeq ($(TARGET),both)
+ @test -d $(OPREFIXLINUX) || mkdir $(OPREFIXLINUX)
+ @test -d $(OPREFIXLINUX)/bin || mkdir $(OPREFIXLINUX)/bin
+ @test -d $(OPREFIXLINUX)/lib || mkdir $(OPREFIXLINUX)/lib
+ @test -d $(OPREFIXLINUX)/include || mkdir $(OPREFIXLINUX)/include
+ @test -d $(OPREFIXLINUX)/include/lwk || mkdir $(OPREFIXLINUX)/include/lwk
@for lib in $(OLIBRARIES); do \
- install -m 0644 $$lib $(OPREFIX)/lib; \
+ install -m 0644 $$lib $(OPREFIXLINUX)/lib; \
done
@for hdr in $(OHEADERS); do \
- install -m 0644 $$hdr $(OPREFIX)/include/$(HEADERS_SUBDIR); \
+ install -m 0644 $$hdr $(OPREFIXLINUX)/include/$(HEADERS_SUBDIR); \
done
+endif

#
# Template used to generate the make rule for each program
@@ -91,7 +115,6 @@
CLEAN_FILES += $$O/$(1) $$($(1)_OOBJS)
endef

-
#
# Template used to generate the make rule for each library
#
@@ -157,7 +180,7 @@
buildprog = \
$(call build,LD $1, \
$(RM) $1; \
- $(CC) -static $(CC_LDFLAGS) $(LIBDIRS) -o $1 $2; \
+ $(CC) $(CC_LDFLAGS) $(LIBDIRS) -o $1 $2; \
)

buildrawobj = \
=======================================
--- /user/Makefile.header Mon Jan 21 16:24:27 2013
+++ /user/Makefile.header Wed Aug 7 12:54:32 2013
@@ -20,3 +20,5 @@
.PHONY:all
all::

+# By default, only install in lwk install directory
+TARGET=lwk
=======================================
--- /user/external/get.pl Mon Jul 1 10:18:52 2013
+++ /user/external/get.pl Wed Aug 7 12:54:32 2013
@@ -4,6 +4,8 @@

my $BASEDIR = `pwd`; chomp($BASEDIR);
my $INSTALLDIR = "$BASEDIR/../install";
+my $INSTALLDIR_LINUX = "$BASEDIR/../install-linux";
+my $XPMEM_DIR = "/path/to/xpmem/install/directory";

my @packages;

@@ -12,9 +14,17 @@
$portals4{directory} = "portals4";
$portals4{get_cmd} = "svn checkout
http://portals4.googlecode.com/svn/trunk/ $portals4{directory}";
$portals4{preconfig_cmd} = "./autogen.sh";
-$portals4{config_cmd} = "./configure --enable-kitten --enable-ppe
--disable-transport-ib --disable-transport-udp --disable-shared
--with-xpmem=$INSTALLDIR --prefix=$INSTALLDIR";
+$portals4{config_cmd} = "./configure --enable-kitten --enable-ppe
--disable-transport-ib --enable-transport-udp --disable-shared
--with-xpmem=$INSTALLDIR --prefix=$INSTALLDIR";
push(@packages, \%portals4);

+my %portals4_linux;
+$portals4_linux{name} = "Portals4 SVN Trunk";
+$portals4_linux{directory} = "portals4_linux";
+$portals4_linux{get_cmd} = "cp -r $portals4{directory}
$portals4_linux{directory}; cd $portals4_linux{directory}; rm Makefile;
cd ..";
+$portals4_linux{preconfig_cmd} = "./autogen.sh";
+$portals4_linux{config_cmd} = "./configure --enable-ppe
--disable-transport-ib --enable-transport-udp --disable-shared
--with-xpmem=$XPMEM_DIR --prefix=$INSTALLDIR_LINUX";
+push(@packages, \%portals4_linux);
+
my %ompi;
$ompi{name} = "Open MPI SVN Trunk";
$ompi{directory} = "ompi";
=======================================
--- /user/runtime2/libpmi/Makefile Wed Apr 3 14:01:23 2013
+++ /user/runtime2/libpmi/Makefile Wed Aug 7 12:54:32 2013
@@ -6,13 +6,17 @@

PROGRAMS = test_pmi_hello

-libpmi_SOURCES = pmi.c simple_pmiutil.c
+PMI_COMMON_BASE=../pmi-common
+libpmi_SOURCES = pmi.c ${PMI_COMMON_BASE}/simple_pmiutil.c

test_pmi_hello_SOURCES = test_pmi_hello.c
test_pmi_hello_LDADD = -lpmi -lportals_util -lportals -lxpmem -lrt
-lpthread

CFLAGS += -std=gnu99
-INCDIRS += -I../pct
+INCDIRS += -I../pct -I../pmi-common/
LIBDIRS += -L.

+# Install in both kitten and linux install directories
+TARGET=both
+
include $(BASE)/Makefile.footer
=======================================
--- /user/runtime2/libpmi/pmi.c Wed Apr 3 13:43:34 2013
+++ /user/runtime2/libpmi/pmi.c Wed Aug 7 12:54:32 2013
@@ -290,7 +290,7 @@
{
char *p;
int i;
-
+
PMI_initialized = PMI_UNINITIALIZED;

if ((p = getenv("PMI_DEBUG")) != NULL) {
@@ -487,7 +487,21 @@
int
PMI_KVS_Commit(const char *kvsname)
{
- /* no-op in this implementation */
+ /* Commits push our local kvs to the server */
+ char buf[PMIU_MAXLINE];
+ int status, rc;
+
+ status = snprintf(buf, PMIU_MAXLINE,
+ "cmd=commit kvsname=%s\n",
+ kvsname);
+
+ if (status < 0)
+ return PMI_FAIL;
+
+ status = GetResponse(buf, "commit_result", 0);
+ if (status != PMI_SUCCESS)
+ return status;
+
return PMI_SUCCESS;
}

=======================================
--- /user/runtime2/libpmi/test_pmi_hello.c Wed Apr 3 13:43:34 2013
+++ /user/runtime2/libpmi/test_pmi_hello.c Wed Aug 7 12:54:32 2013
@@ -4,6 +4,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <sched.h>
+#include <unistd.h>
#include <pmi.h>

int
=======================================
--- /user/runtime2/pct/Makefile Mon Jul 29 10:48:11 2013
+++ /user/runtime2/pct/Makefile Wed Aug 7 12:54:32 2013
@@ -7,26 +7,30 @@
# Put pct.h in include/lwk dir
HEADERS_SUBDIR = lwk

+PMI_COMMON_BASE=../pmi-common
+
pct_SOURCES = \
pct.c \
malloc.c \
- pmi/common.c \
- pmi/string.c \
- pmi/pmip_pmi_v1.c \
- pmi/pmi_server.c \
+ ${PMI_COMMON_BASE}/common.c \
+ ${PMI_COMMON_BASE}/string.c \
+ ${PMI_COMMON_BASE}/pmip_pmi_v1.c \
+ ${PMI_COMMON_BASE}/pmi_server.c

CFLAGS += -std=gnu99 -D_GNU_SOURCE -DHAVE_KITTEN -Wall -pthread
-INCDIRS += -Ipmi
+INCDIRS += -I${PMI_COMMON_BASE}

pct_LDADD = -lportals -lportals_ppe -lportals_util -lxpmem -llwk -lrt
-ifeq ($(PIAPI),y)
+
+ifdef USE_PIAPI
CFLAGS += -DUSING_PIAPI
pct_LDADD += -lpiapi
endif
pct_LDADD += -lpthread

# Embed the app ELF executable in the PCT's ELF image
-pct_RAWDATA = ../libpmi/test_pmi_hello
+# pct_RAWDATA = ../libpmi/test_pmi_hello
+pct_RAWDATA = ../../../init_task
CC_LDFLAGS += -Wl,--section-start -Wl,.rawdata=0x1000000
-Wl,--allow-multiple-definition

include $(BASE)/Makefile.footer
=======================================
--- /user/runtime2/pct/pct.c Mon Jul 29 10:48:11 2013
+++ /user/runtime2/pct/pct.c Wed Aug 7 12:54:32 2013
@@ -16,7 +16,7 @@
#include <portals4_util.h>

#include "pct.h"
-#include "pct_internal.h"
+#include <pmi_server.h>

#ifdef USING_PIAPI
#include "piapi.h"
@@ -220,6 +220,7 @@
int world_size,
int universe_size,
int local_size,
+ int base_rank,
id_t user_id,
id_t group_id,
cpu_set_t avail_cpus,
@@ -227,24 +228,32 @@
)
{
app_t *app = &pct->app;
- int i, cpu, offset, src, dst, global_rank, global_size, rank, size;
+ int i, cpu, offset, src, dst, rank;
char env[1024];
char name[32];
- char * env_ptr;

- app->world_size = world_size;
- app->universe_size = universe_size;
- app->local_size = local_size;
+ if (world_size != -1)
+ app->world_size = world_size;
+
+ if (universe_size != -1)
+ app->universe_size = universe_size;
+
+ if (local_size != -1)
+ app->local_size = local_size;
+
+ if (base_rank != -1)
+ app->base_rank = base_rank;
+
app->user_id = user_id;
app->group_id = group_id;
app->avail_cpus = avail_cpus;

// Allocate a state structure for each application process
- app->procs = (process_t *)MALLOC(local_size * sizeof(process_t));
+ app->procs = (process_t *)MALLOC(app->local_size * sizeof(process_t));

// Pre-determine the IDs for each application process
i = 0;
- for (cpu = 0; (cpu <= CPU_SETSIZE) && (i < local_size); cpu++) {
+ for (cpu = 0; (cpu <= CPU_SETSIZE) && (i < app->local_size); cpu++) {
if (!CPU_ISSET(cpu, &app->avail_cpus))
continue;

@@ -255,20 +264,16 @@

++i;
}
-
- global_rank = ((env_ptr = getenv("PMI_RANK")) != NULL) ? atoi(env_ptr) :
-1;
- global_size = ((env_ptr = getenv("PMI_SIZE")) != NULL) ? atoi(env_ptr) :
-1;

// Portals early initialization.
// Must be done before a process's address space is created.
- for (i = 0; i < local_size; i++)
+ for (i = 0; i < app->local_size; i++)
portals_process_init_early(pct, &app->procs[i]);


/*************************************************************************/
printf("Creating address spaces...\n");
- for (i = 0; i < local_size; ++i) {
- rank = (global_rank > -1) ? global_rank : i;
- size = (global_size > -1) ? global_size : local_size;
+ for (i = 0; i < app->local_size; ++i) {
+ rank = app->base_rank + i;

app->procs[i].start_state.task_id = app->procs[i].task_id;
app->procs[i].start_state.cpu_id = app->procs[i].cpu_id;
@@ -276,18 +281,19 @@
app->procs[i].start_state.group_id = app->group_id;

sprintf(app->procs[i].start_state.task_name, "RANK%d", rank);
+ printf("Starting APP with PMI rank = %d\n", rank);

// Setup the process's environment.
// This includes info needed to contact PPE.
offset = 0;
offset += sprintf(env + offset, "PMI_RANK=%d, ", rank);
- offset += sprintf(env + offset, "PMI_SIZE=%d, ", size);
+ offset += sprintf(env + offset, "PMI_SIZE=%d, ", app->world_size);
offset += sprintf(env + offset, "%s", app->procs[i].ppe_info);

sprintf(name, "RANK-%d", rank);

CHECK(elf_load(elf_image, "app", app->procs[i].aspace_id, VM_PAGE_4KB,
- (1024 * 1024 * 16), // heap_size = 16 MB
+ (1024 * 1024 * 512), // heap_size = 512 MB
(1024 * 256), // stack_size = 256 KB
"", // argv_str
env, // envp_str
@@ -300,8 +306,8 @@


/*************************************************************************/
printf("Creating app<->app SMARTMAP mappings...\n");
- for (dst = 0; dst < local_size; dst++) {
- for (src = 0; src < local_size; src++) {
+ for (dst = 0; dst < app->local_size; dst++) {
+ for (src = 0; src < app->local_size; src++) {
// SMARTMAP slot 0 is reserved, so offset by one.
// Process with local index i goes in SMARTMAP slot i+1.
CHECK(aspace_smartmap(app->procs[src].start_state.aspace_id,
@@ -315,11 +321,24 @@

// Portals late initialization.
// Must be done after a process's address space is created.
- for (i = 0; i < local_size; i++)
+ for (i = 0; i < app->local_size; i++)
portals_process_init_late(pct, &app->procs[i]);

return 0;
}
+
+static int wait_for_launch(pct_t * pct) {
+ ptl_event_t ev;
+
+ PTL_CHECK(PtlEQWait(pct->app.pmi_state.client.rx_eq_h, &ev));
+
+ if (ev.type == PTL_EVENT_PUT)
+ CHECK(pmi_process_event(pct, &pct->app, &ev));
+
+ ptl_queue_process_event(pct->app.pmi_state.client.rx_q, &ev);
+
+ return 0;
+}

#ifdef USING_PIAPI
void
@@ -340,6 +359,7 @@
cpu_set_t cpuset;
int num_ranks;
int cpu, i;
+ char * do_wait;

// Figure out my address space ID
aspace_get_myid(&pct.aspace_id);
@@ -364,10 +384,26 @@
piapi_init( &cntx, PIAPI_MODE_PROXY, piapi_callback, PIAPI_AGNT_SADDR,
PIAPI_AGNT_PORT );
#endif

- // Load the application into memory, but don't start it executing yet
- app_load(&pct, num_ranks, num_ranks, num_ranks, 1, 1, cpuset, (void
*)elf_image);
+ ptl_process_t match_id = {.phys.nid = PTL_NID_ANY, .phys.pid =
PTL_PID_ANY};
+ pmi_init(&pct, &pct.app, PCT_PMI_PT_INDEX, match_id);
+
+ do_wait = getenv("WAIT_LAUNCH");
+ if (do_wait && atoi(do_wait) == 1) {
+ // Don't create the tasks until you get the OK from the Linux server
+ printf("Waiting for job server to start task...");
+ fflush(stdout);
+ if (wait_for_launch(&pct)) {
+ printf("failed.\n");
+ return 0;
+ }
+ printf("OK\n");

- pmi_init(&pct, &pct.app, PCT_PMI_PT_INDEX);
+ // Load the application into memory using received values, but don't
start it executing yet
+ app_load(&pct, -1, -1, -1, -1, 1, 1, cpuset, (void *)elf_image);
+ } else {
+ // Load the application into memory using default values, but don't
start it executing yet
+ app_load(&pct, num_ranks, num_ranks, num_ranks, 0, 1, 1, cpuset, (void
*)elf_image);
+ }


/*************************************************************************/
printf("Creating tasks...\n");
@@ -382,18 +418,17 @@
piapi_collect( cntx, PIAPI_PORT_CPU, 60, 1 );
#endif

-
/*************************************************************************/
printf("ENTERING PORTALS EVENT DISPATCH LOOP\n");
while (1) {
ptl_event_t ev;

// Handle PMI requests from local clients
- PTL_CHECK(PtlEQWait(pct.app.pmi_state.client_rx_eq_h, &ev));
+ PTL_CHECK(PtlEQWait(pct.app.pmi_state.client.rx_eq_h, &ev));

if (ev.type == PTL_EVENT_PUT)
CHECK(pmi_process_event(&pct, &pct.app, &ev));

- ptl_queue_process_event(pct.app.pmi_state.client_rx_q, &ev);
+ ptl_queue_process_event(pct.app.pmi_state.client.rx_q, &ev);
}

/*************************************************************************/

=======================================
--- /user/runtime2/pct/pct.h Mon Jul 29 10:48:11 2013
+++ /user/runtime2/pct/pct.h Wed Aug 7 12:54:32 2013
@@ -10,6 +10,10 @@
// The well-known portals table index for PMI messages
#define PCT_PMI_PT_INDEX 1

+// The well-known portals table index for PMI messages sent to
+// the PMI server node
+#define PCT_PMI_SERVER_PT_INDEX 2
+
// The well-known maximum size of PMI message, in bytes
#define PCT_MAX_PMI_MSG 1024

=======================================
--- /user/runtime2/portals_util/Makefile Tue Mar 26 10:01:29 2013
+++ /user/runtime2/portals_util/Makefile Wed Aug 7 12:54:32 2013
@@ -8,4 +8,7 @@

CFLAGS += -std=gnu99

+# Install in both install directories
+TARGET=both
+
include $(BASE)/Makefile.footer
Reply all
Reply to author
Forward
0 new messages