Bas Bossink
2-2-2016
R is a free software environment for statistical computing and graphics.
In “Stages in the Evolution of S”, John Chambers writes:
“[W]e wanted users to be able to begin in an interactive environment, where they did not consciously think of themselves as programming. Then as their needs became clearer and their sophistication increased, they should be able to slide gradually into programming, when the language and system aspects would become more important.”
#
is the comment characterhelp
function for helphelp("plot")
?plot
TRUE
[1] TRUE
FALSE
[1] FALSE
"Hello, world!"
[1] "Hello, world!"
13.37
[1] 13.4
37
[1] 37
c()
, 'concatenate', creates a vectorc(1,2,3)
[1] 1 2 3
c(1, TRUE, "Flinstone")
[1] "1" "TRUE" "Flinstone"
c(1,2)[1]
[1] 1
c(3,4)[2]
[1] 4
c()[1]
NULL
list(1,TRUE)
[[1]]
[1] 1
[[2]]
[1] TRUE
str(list(1,TRUE))
List of 2
$ : num 1
$ : logi TRUE
list(1,TRUE)[[2]]
[1] TRUE
:
range operator1:4
[1] 1 2 3 4
6:3
[1] 6 5 4 3
seq(0, 1, 0.21)
[1] 0.00 0.21 0.42 0.63 0.84
fred <- 1:3
wilma <- 3:6
fred * wilma
[1] 3 8 15 6
a <- 1:4
dim(a)
NULL
dim(a) <- c(2,2)
a
[,1] [,2]
[1,] 1 3
[2,] 2 4
NA
[1] NA
v <- c(1, NA, 3)
sum(v)
[1] NA
na.rm
parameterv <- c(1, NA, 3)
sum(v, na.rm = TRUE)
[1] 4
factor(c("Medium", "High"), c("Low", "Medium", "High"), ordered= TRUE)
[1] Medium High
Levels: Low < Medium < High
c <- data.frame(married=c(TRUE,NA), medication=c("a", "b"), satisfaction=c(1,2))
c
married medication satisfaction
1 TRUE a 1
2 NA b 2
c[,1]
[1] TRUE NA
c[, "married"]
[1] TRUE NA
$
operatorc$medication
[1] a b
Levels: a b
c[1, 2:3]
medication satisfaction
1 a 1
names(c)
[1] "married" "medication" "satisfaction"
names(c)[1] <- "marital.status"
names(c)
[1] "marital.status" "medication" "satisfaction"
git clone git://git.kernel.org/.../linux-stable.git
cd linux-stable
git checkout -b stable-v4.3.3 v4.3.3
cloc --csv --out=linux-4.3.3.csv .
freebsd <- read.csv("freebsd-10.2.csv")
linux <- read.csv("linux-4.3.3.csv")
openbsd <- read.csv("openbsd-5.8.csv")
minix <- read.csv("minix-3.3.csv")
str(linux)
'data.frame': 23 obs. of 5 variables:
$ files : int 21978 16944 1406 176 2052 45 162 39 8 8 ...
$ language: Factor w/ 23 levels "ASP.Net","Assembly",..: 6 8 2 21 13 17 5 18 23 10 ...
$ blank : int 2113094 410387 47343 3468 7791 4495 1629 1233 639 292 ...
$ comment : int 1978872 703038 110270 242 7590 3557 3054 1261 355 289 ...
$ code : int 10809997 2610820 242570 51490 32681 23636 8878 7316 4311 1815 ...
head(linux, 3)
files language blank comment code
1 21978 C 2113094 1978872 10809997
2 16944 C/C++ Header 410387 703038 2610820
3 1406 Assembly 47343 110270 242570
tail(linux, 3)
files language blank comment code
21 6 XSLT 13 27 71
22 1 vim script 3 12 27
23 1 Windows Module Definition 0 0 8
cbind(summary(linux$files))
[,1]
Min. 1.0
1st Qu. 1.5
Median 8.0
Mean 1870.0
3rd Qu. 104.0
Max. 22000.0
library(Hmisc)
desc <- data.frame(describe(linux$files)$counts)
names(desc) <- "Value"
desc
Value
n 23
missing 0
unique 16
Info 0.98
Mean 1865
.05 1.0
.10 1.0
.25 1.5
.50 8.0
.75 104.0
.90 1922.8
.95 15454.8
library(pastecs)
stats <- data.frame(stat.desc(linux$files, basic=FALSE))
names(stats) <- "Value"
stats
Value
median 8.00e+00
mean 1.87e+03
SE.mean 1.17e+03
CI.mean.0.95 2.43e+03
var 3.17e+07
std.dev 5.63e+03
coef.var 3.02e+00
stats <- data.frame(stat.desc(linux$files, basic=FALSE, desc=FALSE,norm=TRUE))
names(stats) <- "Value"
stats
Value
skewness 2.79e+00
skew.2SE 2.90e+00
kurtosis 6.35e+00
kurt.2SE 3.40e+00
normtest.W 3.77e-01
normtest.p 6.90e-09
colSums(linux[,c(1,3:5)])
files blank comment code
42905 2591394 2808985 13799561
lines.per.file <- rowSums(linux[,3:5])/linux[,1]
mean(lines.per.file)
[1] 297
total.lines <- rowSums(linux[,3:5])
linux$total.lines <- total.lines
head(linux[,3:6], 3)
blank comment code total.lines
1 2113094 1978872 10809997 14901963
2 410387 703038 2610820 3724245
3 47343 110270 242570 400183
head(linux[
order(linux$total.lines,
decreasing=TRUE),
c(2,1,6)])
language files total.lines
1 C 21978 14901963
2 C/C++ Header 16944 3724245
3 Assembly 1406 400183
4 XML 176 55200
5 make 2052 48062
6 Perl 45 31688
linux$project <- c("linux 4.3.3")
head(linux[,c(1,7)],4)
files project
1 21978 linux 4.3.3
2 16944 linux 4.3.3
3 1406 linux 4.3.3
4 176 linux 4.3.3
all <- rbind(freebsd, minix, linux, openbsd)
total.lines <- rowSums(all[,3:5])
all$total.lines <- total.lines
head(all[,5:7], 3)
code project total.lines
1 3436448 FreeBSD 10.2 4747900
2 1543770 FreeBSD 10.2 2111553
3 45515 FreeBSD 10.2 72528
totalsPerProject <- with(all, aggregate(total.lines, list(project), sum))
barplot(totalsPerProject$x, names=totalsPerProject$Group.1, las=1)
barplot(sort(table(all$language), decreasing = TRUE), las=2)
with(all[all$project=="linux 4.3.3",], barplot(total.lines, names=language, las=2, log ="y"))
with(all, boxplot(total.lines ~ project, log="y",col=rainbow(4)))
grouped <- aggregate(all[,c(3,4,5)], by=list(all$project), sum)
names(grouped)[1] <- "project"
bars <- data.frame(t(grouped[,2:4]))
names(bars) <- grouped$project
totals <- colSums(bars)
relative <- t(t(bars)/totals)
barplot(as.matrix(relative), col=rainbow(3), legend.text = row.names(relative))
barplot(sort(relative[2,], decreasing =TRUE))
set.seed(1234)
x <- seq(-10,10, 0.1)
y <- -2 + 3*rnorm(201)
plot(x,y)
abline(lm(y~x))
hist(y)
rug(y)
plot(density(y))
par(fig=c(0,0.8,0,0.8), new=TRUE)
plot(x,y)
par(fig=c(0.65,1,0,0.8),new=TRUE)
boxplot(y, axes=FALSE)
packages can be installed using:
install.packages("RUnit")
use installed packages using:
library(RUnit)