Thanks, Tom, that helped.
I attached the Java equivalent of the ./run_hdfs_job.py from the demo, based on the java client classes in the integration test you pointed me to.
I'm not sure if that is deliberate/a necessary component on top of the existing type:yarn tag that is already explicitly added.
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.netflix.genie.client.JobClient;
import com.netflix.genie.client.configs.GenieNetworkConfiguration;
import com.netflix.genie.common.dto.ClusterCriteria;
import com.netflix.genie.common.dto.Job;
import com.netflix.genie.common.dto.JobExecution;
import com.netflix.genie.common.dto.JobMetadata;
import com.netflix.genie.common.dto.JobRequest;
import com.netflix.genie.common.dto.JobStatus;
import com.netflix.genie.common.exceptions.GenieTimeoutException;
import java.io.IOException;
import java.util.List;
import java.util.Set;
import java.util.UUID;
/**
*/
public class RunHdfsJob
{
// Hardcoding this for easy of use
private static final String TARGET_SCHED = "test";
// Set up by running the demo
private static final String JOB_NAME = "Genie Demo HDFS Job";
private static final String JOB_USER = "root";
private static final String JOB_VERSION = "3.0.0";
private static final String JOB_DESCRIPTION = "Genie 3 Test Job";
public static void main(String[] args) throws IOException, InterruptedException, GenieTimeoutException
{
final GenieNetworkConfiguration genieNetworkConfiguration = new GenieNetworkConfiguration();
genieNetworkConfiguration.setReadTimeout(20000);
JobClient jobClient = new JobClient(BASE_URL, null, genieNetworkConfiguration);
final String jobId = UUID.randomUUID().toString();
final List<ClusterCriteria> clusterCriteriaList
= Lists.newArrayList(new ClusterCriteria(Sets.newHashSet("sched:" + TARGET_SCHED, "type:yarn")));
final Set<String> commandCriteria = Sets.newHashSet("type:hdfs");
final Set<String> configs = Sets.newHashSet();
final Set<String> dependencies = Sets.newHashSet();
final List<String> commandArgs = Lists.newArrayList(
"dfs",
"-ls",
"input"
);
final JobRequest jobRequest = new JobRequest.Builder(
JOB_NAME,
JOB_USER,
JOB_VERSION,
clusterCriteriaList,
commandCriteria
)
.withId(jobId)
.withCommandArgs(commandArgs)
.withDisableLogArchival(true)
.withConfigs(configs)
.withDependencies(dependencies)
.withDescription(JOB_DESCRIPTION)
.build();
final String id = jobClient.submitJob(jobRequest);
final JobStatus jobStatus = jobClient.waitForCompletion(jobId, 600000);
final Job job = jobClient.getJob(id);
final JobRequest jobRequest1 = jobClient.getJobRequest(jobId);
final JobExecution jobExecution = jobClient.getJobExecution(jobId);
final JobMetadata jobMetadata = jobClient.getJobMetadata(jobId);
System.out.println(String.format("Job %s finished with status %s", id, jobStatus.toString()));
}
}