-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathchapter7_big_data_handle.R
61 lines (47 loc) · 1.11 KB
/
chapter7_big_data_handle.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# Handling Big Data
# Set working directory
setwd("/home/emu/R Projects/Data Science with R/")
# Load the ff package
library(ff)
library(ggplot2)
# Read a CSV file as ff data frame
irisff <- read.table.ffdf(
file = "Iris.csv",
FUN = "read.csv")
# Inspect the class
class(irisff)
# see the names of the columns
names(irisff)
# access first 5 rows
irisff[1:5,]
# Load the biglm package
library(biglm)
# create linear model using biglm
model <- biglm(
formula = Petal.Width ~ Petal.Length,
data = irisff)
# Summarize the model
summary(model)
# Create a scatterplot
plot(
x = irisff$Petal.Length[],
y = irisff$Petal.Width[],
main = "Iris Petal Length vs. Width",
xlab = "Petal Length (cm)",
ylab = "Petal Width (cm)")
# Get y-intercept from model
b <- summary(model)$mat[1,1]
# Get slope from model
m <- summary(model)$mat[2,1]
# Draw a regression line on plot
lines(
x = irisff$Petal.Length[],
y = m * irisff$Petal.Length[] + b,
col = "red",
lwd = 3)
# Predict new values with the model
predict(
object = model,
newdata = data.frame(
Petal.Length = c(2, 5, 7),
Petal.Width = c(0, 0, 0)))