티스토리 뷰
2017/09/28 LECTURE NOTE
Choi YT
Object : Understanding “REDUCE”
require(rhdfs)
## Loading required package: rhdfs
## Loading required package: rJava
##
## HADOOP_CMD=/home/stat/hadoop/hadoop-2.7.4/bin/hadoop
##
## Be sure to run hdfs.init()
hdfs.init()
require(rmr2)
## Loading required package: rmr2
## Please review your hadoop settings. See help(hadoop.settings)
Hadoop Example 3
Generate 100 random samples from and count numbers greater than or less than 0.
small.ints<-to.dfs(rnorm(100))
Freq<-mapreduce(
input=small.ints,
map=function(k,v) keyval(ifelse(v<0,"less_than_0","greater_than_0"),1),
reduce=function(k,v) keyval(k,sum(v))
)
out<-from.dfs(Freq)
out
## $key
## [1] "less_than_0" "greater_than_0"
##
## $val
## [1] 51 49
x<-out$val;names(x)<-out$key
barplot(x)
Exercises
- Generate 1000 random samples from and count the elements
- Generate 100 random samples from and count the elements
- Generate 1000 random samples from , classify them into the positives and negatives, and calculate the mean of each group.
Example 4
Generate 10 random samples from and find the minimum value. Repeat it 1000 times and count the number of the minimum values.
s<-to.dfs(1:1000)
map1<-function(k,v){
keyval(v,sapply(v,function(r) min(rbinom(10,5,0.9))))
}
map2<-function(k,v){
keyval(v,1)
}
reduce2<-function(k,v){
keyval(k,sum(v))
}
a<-mapreduce(input=mapreduce(input=s,
map=map1),
map=map2,
reduce=reduce2)
from.dfs(a)
## $key
## [1] 1 2 3 4 5
##
## $val
## [1] 4 85 493 411 7
or
s<-to.dfs(1:1000)
a<-mapreduce(input=s,
map=function(k,v){
keyval(v,sapply(v,function(r) min(rbinom(10,5,0.9))))
})
b<-mapreduce(input=a,
map=function(k,v){
keyval(v,1)
},
reduce=function(k,v) keyval(k,sum(v)))
from.dfs(b)
## $key
## [1] 1 2 3 4 5
##
## $val
## [1] 4 85 483 422 6
Exercises
Generate 10 random samples from and find the maximum value. Repeat it 1000 times and count the number of the maximum values.
Generate 10 random samples from and find the mean. Repeat it 1000 times, classify them into the positives and negatives, and calculate the mean of each group.
2017/09/28 SOLUTIONS
Choi YT
require(rhdfs)
## Loading required package: rhdfs
## Loading required package: rJava
##
## HADOOP_CMD=/home/stat/hadoop/hadoop-2.7.4/bin/hadoop
##
## Be sure to run hdfs.init()
hdfs.init()
require(rmr2)
## Loading required package: rmr2
## Please review your hadoop settings. See help(hadoop.settings)
Exercise 3.1
small.ints<-to.dfs(rbinom(1000,5,.3))
Freq<-mapreduce(
input=small.ints,
map=function(k,v) keyval(v,1),
reduce=function(k,v) keyval(k,sum(v))
)
out<-from.dfs(Freq)
out
## $key
## [1] 0 1 2 3 4 5
##
## $val
## [1] 198 323 311 140 26 2
x<-out$val;names(x)<-out$key
barplot(x)
Exercise 3.2
small.ints<-to.dfs(rgeom(100,.8))
Freq<-mapreduce(
input=small.ints,
map=function(k,v) keyval(v,1),
reduce=function(k,v) keyval(k,sum(v))
)
out<-from.dfs(Freq)
out
## $key
## [1] 0 1 2 3
##
## $val
## [1] 73 23 3 1
x<-out$val;names(x)<-out$key
barplot(x)
Exercise 3.3
small.ints<-to.dfs(rnorm(1000))
Freq<-mapreduce(
input=small.ints,
map=function(k,v) keyval(ifelse(v<0,"less_than_0","greater_than_0"),v),
reduce=function(k,v) keyval(k,mean(v))
)
out<-from.dfs(Freq)
out
## $key
## [1] "less_than_0" "greater_than_0"
##
## $val
## [1] -0.7531157 0.7885698
x<-out$val;names(x)<-out$key
barplot(x)
Exercise 4.1
s<-to.dfs(1:1000)
map1<-function(k,v){
keyval(v,sapply(v,function(r) max(rgeom(10,.5))))
}
map2<-function(k,v){
keyval(v,1)
}
reduce2<-function(k,v){
keyval(k,sum(v))
}
a<-mapreduce(input=mapreduce(input=s,
map=map1),
map=map2,
reduce=reduce2)
from.dfs(a)
## $key
## [1] 0 1 2 3 4 5 6 7 8 9 10 11 12 13
##
## $val
## [1] 2 51 211 274 197 123 68 31 18 11 9 2 1 2
or
s<-to.dfs(1:1000)
a<-mapreduce(input=s,
map=function(k,v){
keyval(v,sapply(v,function(r) max(rgeom(10,.5))))
})
b<-mapreduce(input=a,
map=function(k,v){
keyval(v,1)
},
reduce=function(k,v) keyval(k,sum(v)))
from.dfs(b)
## $key
## [1] 0 1 2 3 4 5 6 7 8 9 10 11 12 13
##
## $val
## [1] 1 60 212 264 202 118 69 31 18 11 9 2 1 2
Exercise 4.2
s<-to.dfs(1:1000)
map1<-function(k,v){
keyval(v,sapply(v,function(r) mean(rcauchy(10))))
}
map2<-function(k,v){
keyval(ifelse(v<0,"less_than_0","greater_than_0"),v)
}
reduce2<-function(k,v){
keyval(k,mean(v))
}
a<-mapreduce(input=mapreduce(input=s,
map=map1),
map=map2,
reduce=reduce2)
from.dfs(a)
## $key
## [1] "less_than_0" "greater_than_0"
##
## $val
## [1] -2.998590 6.106552
or
s<-to.dfs(1:1000)
a<-mapreduce(input=s,
map=function(k,v){
keyval(v,sapply(v,function(r) mean(rcauchy(10))))
})
b<-mapreduce(input=a,
map=function(k,v){
keyval(ifelse(v<0,"less_than_0","greater_than_0"),v)
},
reduce=function(k,v) keyval(k,mean(v)))
from.dfs(b)
## $key
## [1] "less_than_0" "greater_than_0"
##
## $val
## [1] -2.935838 6.244269
'Rhadoop' 카테고리의 다른 글
RHADOOP - WORD COUNT & WORD CLOUD -1 (17/11/09 Lecture Note) (0) | 2017.11.28 |
---|---|
RHADOOP - K-means Clustering (17/11/02 Lecture Note) (0) | 2017.11.28 |
RHADOOP - Linear Regression (17/10/26 Lecture Note) (0) | 2017.11.28 |
RHADOOP - HADOOP STREAMING (17/10/12 Lecture Note) (0) | 2017.11.28 |
RHADOOP MAPREDUCE -1. MAP (17/09/21 Lecture Note) (0) | 2017.11.28 |