7.2 Data preprocessing
Download stock data using R’s quantmod package
Convert data to returns
Generate some descriptive statistics
Some plots
Data
# Run the following to download and save the data, this should be
# done once and when updating the time period
library(quantmod)
library(pander)
library(xts)
library(TTR)
# download stock
= getSymbols("BHP.AX", from = "2019-01-01", to = "2021-07-31", auto.assign = FALSE)
BHP # download index
= getSymbols("^AXJO", from = "2019-01-01", to = "2021-07-31", auto.assign = FALSE)
ASX # save both in rds (to be used in the TA chapter)
saveRDS(BHP, file = "data/bhp_prices.rds")
saveRDS(ASX, file = "data/asx200.rds")
- Convert to returns
library(quantmod)
library(pander)
library(xts)
library(TTR)
# load data from the saved files (not required if we execute the
# chunk above)
= readRDS("data/bhp_prices.rds")
BHP = readRDS("data/asx200.rds")
ASX # using close prices
= BHP$BHP.AX.Close
bhp2 = ASX$AXJO.Close
asx2 # covert to returns
= dailyReturn(bhp2, type = "log")
bhp_ret = dailyReturn(asx2, type = "log")
asx_ret
# merge the two with 'inner' join to get the same dates
= merge.xts(bhp_ret, asx_ret, join = "inner")
data_lm1 # convert to data frame
= data.frame(index(data_lm1), data_lm1$daily.returns, data_lm1$daily.returns.1)
data_lm2 # change column names
colnames(data_lm2) = c("Date", "bhp", "asx")
head(data_lm2) #there are row names which can be removed if required
Date bhp asx
2019-01-02 2019-01-02 0.000000000 0.000000000
2019-01-03 2019-01-03 0.000000000 0.013510839
2019-01-04 2019-01-04 -0.008947241 -0.002488271
2019-01-07 2019-01-07 0.029808847 0.011289609
2019-01-08 2019-01-08 0.001162482 0.006873792
2019-01-09 2019-01-09 -0.003782952 0.009721207
library(pastecs)
= stat.desc(data_lm2[, 2:3], norm = TRUE)
desc_stat1 pander(desc_stat1, caption = "Descriptive Statistics", split.table = Inf)
bhp | asx | |
---|---|---|
nbr.val | 653 | 653 |
nbr.null | 9 | 1 |
nbr.na | 0 | 0 |
min | -0.1557 | -0.102 |
max | 0.1128 | 0.06766 |
range | 0.2685 | 0.1697 |
sum | 0.4948 | 0.2853 |
median | 0 | 0.001163 |
mean | 0.0007577 | 0.0004369 |
SE.mean | 0.0007444 | 0.0005075 |
CI.mean.0.95 | 0.001462 | 0.0009966 |
var | 0.0003618 | 0.0001682 |
std.dev | 0.01902 | 0.01297 |
coef.var | 25.1 | 29.69 |
skewness | -0.4883 | -1.443 |
skew.2SE | -2.553 | -7.546 |
kurtosis | 9.991 | 13.32 |
kurt.2SE | 26.16 | 34.86 |
normtest.W | 0.9198 | 0.8266 |
normtest.p | 3.885e-18 | 5.497e-26 |