Thanks,
Mark
NOTE: I sent a similar question to R-help just before your response
came in. That was more impatience on my part! - MWK
- Mark
My basic issue at this point is that MyZoo has an entry for every
possible date, while MyData has entries only for the dates where there
is data.
Good morning Ted,
I think merge could be part of the solution, but take a look at the
code below and tell me how I can possibly change it to get this
working the way I require.
X is the original stock trade data. (1 month - August only) This is
trade-by-trade, when ever they happen. Some days there are no trades,
other days there are multiple trades.
Y is a new data.frame created to merge against. It has only the date
range than I'm interested in looking at. (2 months by design to ensure
date coverage if August has a trade on every date.
Z is my attempt as using merge for the first time.
What works:
- merge works fine, in a sense. All trades in X matching dates in Y
are copied to Z.
What doesn't work yet:
- I need the cumsum of PL_Pos on a given date. For instance, Z has 3
trades on 2009-08-27. I need these values summed into a single value
and only that value in merged into Z so that I have a single even on
2009-08-27.
- I need the NAs converted to 0.
If I could figure out how to do those two things then I'd be able
to make the calendar-based plot that I need.
For my purposes - if it's easier - Z doesn't need to be a complete
merge. It only needs MyDate from Y and the cumsum(X$PL_Pos) for each
date.
Off looking for answers.
Thanks,
Mark
TStoDate = function (TSDate) {
X = strptime(TSDate + 19e6L, "%Y%m%d")
return(as.Date(X))
}
X = structure(list(Trade = 1951:1971, PosType = c(1, 1, -1, -1, -1,
1, 1, 1, -1, -1, 1, 1, 1, 1, 1, -1, 1, -1, -1, 1, 1), EnDate = c(1090803,
1090804, 1090805, 1090806, 1090806, 1090810, 1090811, 1090812,
1090813, 1090817, 1090819, 1090820, 1090820, 1090824, 1090825,
1090825, 1090826, 1090826, 1090827, 1090827, 1090827), EnTime = c(1004,
812, 641, 706, 1103, 1117, 633, 641, 641, 645, 641, 641, 958,
641, 919, 1037, 650, 853, 641, 932, 932), ExDate = c(1090803,
1090804, 1090805, 1090806, 1090806, 1090810, 1090811, 1090812,
1090813, 1090817, 1090819, 1090820, 1090820, 1090824, 1090825,
1090825, 1090826, 1090826, 1090827, 1090827, 1090827), ExTime = c(1259,
1058, 1258, 1258, 1259, 1311, 702, 1258, 1258, 1258, 1258, 1258,
1313, 1258, 1037, 1313, 1311, 1313, 1258, 1313, 1300), PL_Pos = c(174,
-26, 614, 344, -26, 414, -626, 544, -106, -146, 1004, 344, 224,
-716, -176, 44, 354, -346, -296, 564, 354)), .Names = c("Trade",
"PosType", "EnDate", "EnTime", "ExDate", "ExTime", "PL_Pos"), class =
"data.frame", row.names = c("733",
"734", "3631", "3641", "736", "2403", "2413", "3651", "3661",
"3671", "3681", "3691", "1303", "3701", "1304", "1305", "2432",
"1306", "3712", "1307", "4214"))
X$MyDate = TStoDate(X$EnDate)
X
days <- seq(as.Date("2009-07-01"), Sys.Date(), by = "day")
Y = data.frame(MyDate=days)
Y
Z = merge(X,Y, by.x="MyDate", by.y="MyDate", all.y=TRUE)
Z
dim(X)
dim(Y)
dim(Z)
X11(width=8, height=4)
par(mfrow=c(1,2))
plot(cumsum(X$PL_Pos), type="l")
plot(cumsum(Z$PL_Pos), type="l")
The TStoDate function is only required to translate this specific
date/time format into what R understand. Others might not need that
for their data.frames.
Cheers,
Mark
TStoDate = function (TSDate) {
X = strptime(TSDate + 19e6L, "%Y%m%d")
return(as.Date(X))
}
X = structure(list(Trade = 1951:1971, PosType = c(1, 1, -1, -1, -1,
1, 1, 1, -1, -1, 1, 1, 1, 1, 1, -1, 1, -1, -1, 1, 1), EnDate = c(1090803,
1090804, 1090805, 1090806, 1090806, 1090810, 1090811, 1090812,
1090813, 1090817, 1090819, 1090820, 1090820, 1090824, 1090825,
1090825, 1090826, 1090826, 1090827, 1090827, 1090827), EnTime = c(1004,
812, 641, 706, 1103, 1117, 633, 641, 641, 645, 641, 641, 958,
641, 919, 1037, 650, 853, 641, 932, 932), ExDate = c(1090803,
1090804, 1090805, 1090806, 1090806, 1090810, 1090811, 1090812,
1090813, 1090817, 1090819, 1090820, 1090820, 1090824, 1090825,
1090825, 1090826, 1090826, 1090827, 1090827, 1090827), ExTime = c(1259,
1058, 1258, 1258, 1259, 1311, 702, 1258, 1258, 1258, 1258, 1258,
1313, 1258, 1037, 1313, 1311, 1313, 1258, 1313, 1300), PL_Pos = c(174,
-26, 614, 344, -26, 414, -626, 544, -106, -146, 1004, 344, 224,
-716, -176, 44, 354, -346, -296, 564, 354)), .Names = c("Trade",
"PosType", "EnDate", "EnTime", "ExDate", "ExTime", "PL_Pos"), class =
"data.frame", row.names = c("733",
"734", "3631", "3641", "736", "2403", "2413", "3651", "3661",
"3671", "3681", "3691", "1303", "3701", "1304", "1305", "2432",
"1306", "3712", "1307", "4214"))
X$MyDate = TStoDate(X$EnDate)
X1 = aggregate(X$PL_Pos, list(X$MyDate), sum)
colnames(X1)<-c("MyDate","PL_SUM")
days <- seq(as.Date("2009-07-01"), Sys.Date(), by = "day")
Y = data.frame(MyDate=days)
Z = merge(X1,Y, by.x="MyDate", by.y="MyDate", all.y=TRUE)
Z
Z$PL_SUM[is.na(Z$PL_SUM)] <- 0
dim(X)
dim(Y)
dim(Z)
X11(width=12, height=6)
par(mfrow=c(1,2))
plot(cumsum(X$PL_Pos), type="l")
plot(cumsum(Z$PL_SUM) ~ Z$MyDate, type="l")