> rhput("/home/admtcp1/ABP/data/teaching_item_for_analysis.csv", "/user/admtcp1/ABP/data/teaching_item_for_analysis.csv")
> rhexists("/user/admtcp1/ABP/data/teaching_item_for_analysis.csv")
[1] TRUE
> hdfsTI
hdfsConn connection
loc=/user/admtcp1/ABP/data/teachingItems/TI; type=text
> rhls("/user/admtcp1/ABP/data/teachingItems")
permission owner group size modtime file
1 drwxrwxrwx admtcp1 hdfs 0 2015-12-18 09:19 /user/admtcp1/ABP/data/teachingItems/TI
> f = drRead.csv("/user/admtcp1/ABP/data/teaching_item_for_analysis.csv",
+ output = hdfsTI, colClasses = "character", header = T)
Error in drRead.table(file = file, header = header, sep = sep, quote = quote, :
output must be a localDiskConn object with input text file on disk
Here’s my code as it stands now. I saw Jeremiah’s comments on https://github.com/tesseradata/datadr/issues/73 and believe I’m implementing things as I should (believe is the key word). Let me know if this helps at all.
Trenton
library(Rhipe)
rhinit()
library(matlab)
library(datadr)
library(trelliscope)
library(ggplot2)
library(reshape2)
library(mapproj)
## create the HDFS connection ##
rhmkdir("/user/admtcp1/ABP/data/teachingItems", 777)
rhchmod("/user/admtcp1/ABP/data/teachingItems", 777)
# input #
hdfsTIcsv = hdfsConn("/user/admtcp1/ABP/data/teachingItems/TIcsv", autoYes = T, type = "text")
## push teachingItems file to hdfs ##
rhput("/home/admtcp1/ABP/data/teaching_item_for_analysis.csv",
"/user/admtcp1/ABP/data/teachingItems/TIcsv/teaching_item_for_analysis.csv")
rhchmod("/user/admtcp1/ABP/data/teachingItems/TIcsv/teaching_item_for_analysis.csv", 777)
# output #
hdfsTI = hdfsConn("/user/admtcp1/ABP/data/teachingItems/TI", autoYes = T)
f = drRead.csv(hdfsTIcsv, output = hdfsConn("/user/admtcp1/ABP/data/teachingItems/TI", autoYes = T),
+ colClasses = "character", header = T)
* Attempting to create directory... success
* Saving connection attributes
* Directory is empty... move some data in here
* testing read on a subset... ok
* Reading in existing 'ddo' attributes
Saving 11 parameters to /tmp/rhipe-temp-params-e4001e6269dfbac9d18bd89681c9bf23 (use rhclean to delete all temp files)
[Fri Dec 18 12:32:07 2015] Name:2015-12-18 12:32:06 Job: job_1447490888639_0124 State: PREP Duration: 0.277
URL: http://l13187.ldschurch.org:8088/proxy/application_1447490888639_0124/
pct numtasks pending running complete killed failed_attempts killed_attempts
map 0 0 0 0 0 0 0 0
reduce 0 0 0 0 0 0 0 0
Waiting 5 seconds
Error in .jcall("RJavaTools", "Ljava/lang/Object;", "invokeMethod", cl, :
java.io.FileNotFoundException: File does not exist: /user/admtcp1/ABP/data/teachingItems/TI
In addition: Warning message:
In Rhipe:::rhwatch.runner(job = job, mon.sec = mon.sec, readback = readback, :
Job failure, deleting output: /user/admtcp1/ABP/data/teachingItems/TI:
>
> traceback()
18: stop(list(message = "java.io.FileNotFoundException: File does not exist: /user/admtcp1/ABP/data/teachingItems/TI",
call = .jcall("RJavaTools", "Ljava/lang/Object;", "invokeMethod",
cl, .jcast(if (inherits(o, "jobjRef") || inherits(o,
"jarrayRef")) o else cl, "java/lang/Object"), .jnew("java/lang/String",
method), j_p, j_pc, use.true.class = TRUE, evalString = simplify,
evalArray = FALSE), jobj = <S4 object of class "jobjRef">))
17: .Call(RJavaCheckExceptions, silent)
16: .jcheck(silent = FALSE)
15: .jcall("RJavaTools", "Ljava/lang/Object;", "invokeMethod", cl,
.jcast(if (inherits(o, "jobjRef") || inherits(o, "jarrayRef")) o else cl,
"java/lang/Object"), .jnew("java/lang/String", method),
j_p, j_pc, use.true.class = TRUE, evalString = simplify,
evalArray = FALSE)
14: .jrcall(x, name, ...)
13: rhoptions()$server$rhdel(folder)
12: rhdel(ofolder)
11: Rhipe:::rhwatch.runner(job = job, mon.sec = mon.sec, readback = readback,
...)
10: rhwatch(setup = setup, map = map, reduce = reduce, input = rhfmt(locs,
type = types[1]), output = outFile, mapred = control$mapred,
readback = FALSE, parameters = params)
9: mrExecInternal.kvHDFSList(data, setup = setup, map = map, reduce = reduce,
output = output, control = control, params = params)
8: mrExecInternal(data, setup = setup, map = map, reduce = reduce,
output = output, control = control, params = params)
7: mrExec(ddo(file), map = map, reduce = 0, control = control, output = output,
overwrite = overwrite, params = c(params, parList), packages = packages)
6: inherits(conn, "ddo")
5: ddf(mrExec(ddo(file), map = map, reduce = 0, control = control,
output = output, overwrite = overwrite, params = c(params,
parList), packages = packages))
4: readTable.hdfsConn(file, rowsPerBlock, skip, header, hd, hdText,
readTabParams, postTransFn, output, overwrite, params, packages,
control)
3: readTable(file, rowsPerBlock, skip, header, hd, hdText, readTabParams,
postTransFn, output, overwrite, params, packages, control)
2: drRead.table(file = file, header = header, sep = sep, quote = quote,
dec = dec, fill = fill, comment.char = comment.char, ...)
1: drRead.csv(hdfsTIcsv, output = hdfsConn("/user/admtcp1/ABP/data/teachingItems/TI",
autoYes = T), colClasses = "character", header = T)
>
> library(Rhipe)
Loading required package: codetools
Loading required package: rJava
------------------------------------------------
| Please call rhinit() else RHIPE will not run |
------------------------------------------------
> rhinit()
Rhipe: Using Rhipe.jar file
Initializing Rhipe v0.75.1.4
2015-12-18 14:51:18,678 WARN [main][NativeCodeLoader] Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
2015-12-18 14:51:20,978 WARN [main][DomainSocketFactory] The short-circuit local reads feature cannot be used because libhadoop cannot be loaded.
Initializing mapfile caches
> library(datadr)
> sessionInfo()
R version 3.1.3 (2015-03-09)
Platform: x86_64-unknown-linux-gnu (64-bit)
Running under: Red Hat Enterprise Linux Server 7.1 (Maipo)
locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
[3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
[5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
[7] LC_PAPER=en_US.UTF-8 LC_NAME=en_US.UTF-8
[9] LC_ADDRESS=en_US.UTF-8 LC_TELEPHONE=en_US.UTF-8
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=en_US.UTF-8
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] datadr_0.7.5.9 Rhipe_0.75.1.4 rJava_0.9-7 codetools_0.2-10
loaded via a namespace (and not attached):
[1] assertthat_0.1 chron_2.3-47 data.table_1.9.6 DBI_0.3.1
[5] digest_0.6.8 dplyr_0.4.3 grid_3.1.3 hexbin_1.27.1
[9] lattice_0.20-33 magrittr_1.5 parallel_3.1.3 R6_2.1.1
[13] Rcpp_0.12.1 tools_3.1.3
<testdump.rda>