Cynthia — Sep 17, 2013, 2:00 AM
x <- read.csv("http://www.stat.yale.edu/~jay/625/diving/Diving2000.csv", as.is=TRUE)
# Some sanity checks: if the data is correctly read into R
dim(x)
[1] 10787 10
head(x)
Event Round Diver Country Rank DiveNo Difficulty JScore
1 M3mSB Final XIONG Ni CHN 1 1 3.1 8.0
2 M3mSB Final XIONG Ni CHN 1 1 3.1 9.0
3 M3mSB Final XIONG Ni CHN 1 1 3.1 8.5
4 M3mSB Final XIONG Ni CHN 1 1 3.1 8.5
5 M3mSB Final XIONG Ni CHN 1 1 3.1 8.5
6 M3mSB Final XIONG Ni CHN 1 1 3.1 8.5
Judge JCountry
1 RUIZ-PEDREGUERA Rolando CUB
2 GEAR Dennis NZL
3 BOYS Beverley CAN
4 JOHNSON Bente NOR
5 BOUSSARD Michel FRA
6 CALDERON Felix PUR
tail(x)
Event Round Diver Country Rank DiveNo Difficulty
10782 W10mPF Semi RODRIGUEZ Angelique PUR 18 4 2
10783 W10mPF Semi RODRIGUEZ Angelique PUR 18 4 2
10784 W10mPF Semi RODRIGUEZ Angelique PUR 18 4 2
10785 W10mPF Semi RODRIGUEZ Angelique PUR 18 4 2
10786 W10mPF Semi RODRIGUEZ Angelique PUR 18 4 2
10787 W10mPF Semi RODRIGUEZ Angelique PUR 18 4 2
JScore Judge JCountry
10782 6 SEAMAN Kathy CAN
10783 6 LINDBERG Mathz SWE
10784 6 ZAITSEV Oleg RUS
10785 6 GEISSBUHLER Michael SUI
10786 6 XU Yiming CHN
10787 6 GEAR Dennis NZL
# Summaries
library(YaleToolkit)
Loading required package: grid Loading required package: lattice Loading
required package: vcd Loading required package: MASS Loading required
package: colorspace Loading required package: barcode Loading required
package: gpairs
whatis(x)
variable.name type missing distinct.values precision
1 Event character 0 4 NA
2 Round character 0 3 NA
3 Diver character 0 156 NA
4 Country character 0 42 NA
5 Rank numeric 0 49 1.0
6 DiveNo numeric 0 6 1.0
7 Difficulty numeric 0 20 0.1
8 JScore numeric 0 21 0.1
9 Judge character 0 25 NA
10 JCountry character 0 21 NA
min max
1 M10mPF W3mSB
2 Final Semi
3 ABALLI Jesus-Iory ZHUPINA Olena
4 ARG ZIM
5 1 49
6 1 6
7 1.5 3.8
8 0 10
9 ALT Walter ZAITSEV Oleg
10 AUS ZIM
summary(x)
Event Round Diver
Length:10787 Length:10787 Length:10787
Class :character Class :character Class :character
Mode :character Mode :character Mode :character
Country Rank DiveNo Difficulty
Length:10787 Min. : 1.0 Min. :1.00 Min. :1.50
Class :character 1st Qu.: 7.0 1st Qu.:2.00 1st Qu.:2.60
Mode :character Median :13.0 Median :3.00 Median :3.00
Mean :16.8 Mean :3.17 Mean :2.76
3rd Qu.:26.0 3rd Qu.:4.00 3rd Qu.:3.10
Max. :49.0 Max. :6.00 Max. :3.80
JScore Judge JCountry
Min. : 0.00 Length:10787 Length:10787
1st Qu.: 6.00 Class :character Class :character
Median : 7.00 Mode :character Mode :character
Mean : 6.83
3rd Qu.: 8.00
Max. :10.00
# One variable at a time
table(x$Event)
M10mPF M3mSB W10mPF W3mSB
2709 3192 2317 2569
table(x$Round)
Final Prelim Semi
1848 6636 2303
table(x$Diver)
ABALLI Jesus-Iory AHRENS Stefan AKHMETBEKOV Damir
42 119 14
ALCALA Maria-Jose ALEKSEEVA Svetlana ALLY Tony
35 70 119
ALMAZAN Azul ALVAREZ Rafael AMELIA Shenny-Ratna
105 77 35
ARBOLES-SOUCHON Odile AVTANDILYAN Hovhannes BAHARI Mohd-Azheem
35 42 42
BARTA Nora BIMIS Thomas BULMER Eryn
35 42 35
CAGNOTTO Tania CHEN Han-Hung CHEN Ting
70 42 35
CHERECHES Gabriel CHIKINA Natalya CHO Dae-Don
84 98 42
CHOE Hyong-Gil CHOE Myong-Hwa CHOE Song-Hui
112 98 35
CHOI Hye-Jin CIOCAN Clara-Elena CRUZ Iohana
28 35 35
DANAUX Julie DAVISON Michelle DELALOYE Jean-Romain
35 105 42
DESPATIE Alexandre DOBROSKOK Alexandre DOMENIOS Zardo
112 77 42
DUMAIS Troy DURAN Cassius EMPTOZ-LACOTE Gilles
119 119 42
FEBVAY Claire FOURNARIS Erick FRECE Richard
35 42 42
FREEMAN Sally FU Mingxia FUENTES Alejandra
35 105 35
FUMADO Ramon-Antonio GIL Jose-Miguel GILMORE Rebecca
42 42 168
GUERRA Jose-Antonio GUO Jingjing GURMAN Alexey
70 105 42
HAJNAL Andras HARTLEY Blythe HELM Mathew
42 105 112
HEMPEL Jan HEYMANS Emilie HSIEH Pei-Hua
70 98 63
HU Jia ILINA Vera INDAH Eka-Purnama
112 105 35
INSAWANG Meerit ISHKOVA Svetlana JABRAYILOV Emil
42 35 42
KALEC Christopher KEIM Jenny KHAMULKIN Vyacheslav
70 105 42
KONSTANTATOU Maria KOTZIAN Ditte KOUTSOPETROU Sotiria
35 35 35
KWON Kyung-Min LENGYEL Imre LEONG Mun-Yee
42 119 70
LI Na LIBERTY Jeff LINDBERG Anna
98 42 105
LINDNER Doerte LOUKACHINE Igor LUNA Jashia
105 112 70
LYSENKO Dmytro MAKOHIN Jaroslav MALIEV-AVIOLAT Catherine
42 42 35
MARCONI Maria MARCONI Nicola MAZZUCCHI Massimiliano
35 42 70
MEYER Heiko MICHELL Chantelle MIRANDA Donald
112 105 77
MONTMINY Anne NASRULLAH Muhammad NEBIERIDZE Nana
98 42 35
NEWBERY Robert OLSHEVSKAYA Evgeniya OPRIEA Anisoara
189 63 35
ORTIZ Yolanda PAK Yong-Ryong PAKHALINA Ioulia
35 112 105
PAPPA Eftihia PEREZ Francisco PEREZ Sheila-Mae
35 42 35
PICHI Suchart PICHLER David PIEKKANEN Jukka
42 112 42
PIERRE Frederic PINEDA Diana PINTER Orsolya
42 35 35
PLATAS Fernando PONTHUS Sandra POPOVA Natalya
119 35 35
PUHAKKA Joona PULLAR Dean REIFF Marion
42 119 35
REILING Sara RI Ok-Rim RICHTER-LIBISELLER Anja
63 70 98
RODRIGUEZ Angelique RODRIGUEZ Joel RUEDA Eduardo
98 42 7
RUIZ Mark SAEZ-de-IBARRA Dolores SAFONOV Eduard
231 63 42
SALAZAR Yoendris SANCHEZ Abel SANG Xue
77 84 98
SANTOS Leire SANTOS Ruben SAOUTINE Dmitri
35 42 231
SCHMALFUSS Conny SEITOV Alisher SERBINA Svitlana
14 42 35
SHIPMAN Mark SIRANIDIS Nikolaos SKRYPNIK Oleksandr
42 42 42
SMITH Jane SMITH Karen SOROKINA Ganna
70 35 105
STEWART Evan TAYLOR Leon TERAUCHI Ken
42 70 231
TIAN Liang TIMOCHININA Svetlana TOURKY Loudy
112 98 35
TSAI Yi-San URAN Juan-Guillermo VELOSO Juliana
35 84 70
VILLARROEL Luis VOLODKOV Roman VYGUZOVA Irina
84 42 168
WARD Lesley WATERFIELD Peter WELS Andreas
35 42 119
WETZIG Ute WILKINSON Laura XIAO Hailiang
98 98 119
XIONG Ni YEFIMENKO Olga YEOH Ken-Nee
119 70 84
YOO Chang-Joon YU Yuet ZHUPINA Olena
42 42 98
table(x$Country)
ARG ARM AUS AUT AZE BLR BRA CAN CHN COL CUB CZE ESP FIN FRA GBR GEO GER
35 42 728 175 42 112 189 560 868 119 301 42 259 84 224 448 35 672
GRE HKG HUN INA ITA JPN KAZ KOR MAS MEX PER PHI PRK PUR ROM RUS SUI SWE
189 42 231 112 294 231 399 154 196 420 84 77 427 98 154 791 77 105
THA TPE UKR USA VEN ZIM
84 175 476 833 161 42
table(x$Judge)
ALT Walter BARNETT Madeleine BOOTHROYD Sydney
498 661 411
BOUSSARD Michel BOYS Beverley BURK Hans-Peter
702 411 159
CALDERON Felix CERMAKOVA Maria CRUZ Julia
717 141 486
GEAR Dennis GEISSBUHLER Michael HASSAN Mostafa
448 401 376
HOOD Robin HUBER Peter JOHNSON Bente
432 382 273
KELEMEN Ildiko LINDBERG Mathz McFARLAND Steve
216 406 657
MENA Jesus RUIZ-PEDREGUERA Rolando SEAMAN Kathy
856 481 281
STEWART Anthea WANG Facheng XU Yiming
159 357 281
ZAITSEV Oleg
595
table(x$JCountry)
AUS AUT CAN CHN CUB CZE EGY ESP FRA GBR GER HUN MEX NOR NZL PUR RUS SUI
661 382 692 638 481 141 376 486 702 411 657 216 856 273 880 717 595 401
SWE USA ZIM
406 657 159
par(mfrow=c(2,2))
hist(x$Rank)
hist(x$DiveNo)
hist(x$Difficulty)
hist(x$JScore)
# Two variables at a time
pairs(x[,5:8])
#
y <- x[which(x$Country == x$JCountry), ]
z <- x[-which(x$Country == x$JCountry), ]
BMa1 <- y[y$Judge=="BARNETT Madeleine", ]
BS1 <- y[y$Judge=="BOOTHROYD Sydney", ]
BMi1 <- y[y$Judge=="BOUSSARD Michel", ]
BB1 <- y[y$Judge=="BOYS Beverley", ]
BH1 <- y[y$Judge=="BURK Hans-Peter", ]
CF1 <- y[y$Judge=="CALDERON Felix", ]
CJ1 <- y[y$Judge=="CRUZ Julia", ]
HR1 <- y[y$Judge=="HOOD Robin", ]
LM1 <- y[y$Judge=="LINDBERG Mathz", ]
MS1 <- y[y$Judge=="McFARLAND Steve", ]
MJ1 <- y[y$Judge=="MENA Jesus", ]
SA1 <- y[y$Judge=="STEWART Anthea", ]
WF1 <- y[y$Judge=="WANG Facheng", ]
ZO1 <- y[y$Judge=="ZAITSEV Oleg", ]
BMa2 <- z[z$Judge=="BARNETT Madeleine", ]
BS2 <- z[z$Judge=="BOOTHROYD Sydney", ]
BMi2 <- z[z$Judge=="BOUSSARD Michel", ]
BB2 <- z[z$Judge=="BOYS Beverley", ]
BH2 <- z[z$Judge=="BURK Hans-Peter", ]
CF2 <- z[z$Judge=="CALDERON Felix", ]
CJ2 <- z[z$Judge=="CRUZ Julia", ]
HR2 <- z[z$Judge=="HOOD Robin", ]
LM2 <- z[z$Judge=="LINDBERG Mathz", ]
MS2 <- z[z$Judge=="McFARLAND Steve", ]
MJ2 <- z[z$Judge=="MENA Jesus", ]
SA2 <- z[z$Judge=="STEWART Anthea", ]
WF2 <- z[z$Judge=="WANG Facheng", ]
ZO2 <- z[z$Judge=="ZAITSEV Oleg", ]
par(mfrow=c(2,2))
hist(BMa1$JScore,xlim=c(0,10))
hist(BMa2$JScore,xlim=c(0,10))
hist(BS1$JScore,xlim=c(0,10))
hist(BS2$JScore,xlim=c(0,10))
hist(BMi1$JScore,xlim=c(0,10))
hist(BMi2$JScore,xlim=c(0,10))
hist(BB1$JScore,xlim=c(0,10))
hist(BB2$JScore,xlim=c(0,10))
hist(BH1$JScore,xlim=c(0,10))
hist(BH2$JScore,xlim=c(0,10))
hist(CF1$JScore,xlim=c(0,10))
hist(CF2$JScore,xlim=c(0,10))
hist(CJ1$JScore,xlim=c(0,10))
hist(CJ2$JScore,xlim=c(0,10))
par(mfrow = c(1,2))
hist(HR1$JScore,xlim=c(0,10))
Error: invalid number of 'breaks'
hist(HR2$JScore,xlim=c(0,10))
hist(LM1$JScore,xlim=c(0,10))
Error: invalid number of 'breaks'
hist(LM2$JScore,xlim=c(0,10))
par(mfrow = c(1,2))
hist(MS1$JScore,xlim=c(0,10))
hist(MS2$JScore,xlim=c(0,10))
hist(MJ1$JScore,xlim=c(0,10))
hist(MJ2$JScore,xlim=c(0,10))
par(mfrow = c(1,1))
hist(SA1$JScore,xlim=c(0,10))
Error: invalid number of 'breaks'
hist(SA2$JScore,xlim=c(0,10))
par(mfrow = c(2,2))
hist(WF1$JScore,xlim=c(0,10))
hist(WF2$JScore,xlim=c(0,10))
hist(ZO1$JScore,xlim=c(0,10))
hist(ZO2$JScore,xlim=c(0,10))
dim(SA1)
[1] 0 10
dim(LM1)
[1] 0 10
dim(HR1)
[1] 0 10
table(y$Country)
AUS AUT CAN CHN CUB ESP FRA GBR GER MEX PUR RUS SUI USA
38 8 29 40 11 11 10 16 35 28 5 38 3 42
XN1 <- y[y$Country=="CHN", ]
XN2 <- z[z$Country=="CHN", ]
boxplot(XN1$JScore)
boxplot(XN2$JScore)
XN1 <- y[y$Country=="AUS", ]
XN2 <- z[z$Country=="AUS", ]
boxplot(XN1$JScore)
boxplot(XN2$JScore)
XN1 <- y[y$Country=="CAN", ]
XN2 <- z[z$Country=="CAN", ]
boxplot(XN1$JScore)
boxplot(XN2$JScore)
XN1 <- y[y$Country=="CUB", ]
XN2 <- z[z$Country=="CUB", ]
boxplot(XN1$JScore)
boxplot(XN2$JScore)
XN1 <- y[y$Country=="ESP", ]
XN2 <- z[z$Country=="ESP", ]
boxplot(XN1$JScore)
boxplot(XN2$JScore)
XN1 <- y[y$Country=="FRA", ]
XN2 <- z[z$Country=="FRA", ]
boxplot(XN1$JScore)
boxplot(XN2$JScore)
XN1 <- y[y$Country=="GBR", ]
XN2 <- z[z$Country=="GBR", ]
boxplot(XN1$JScore)
boxplot(XN2$JScore)
XN1 <- y[y$Country=="GER", ]
XN2 <- z[z$Country=="GER", ]
boxplot(XN1$JScore)
boxplot(XN2$JScore)
XN1 <- y[y$Country=="MEX", ]
XN2 <- z[z$Country=="MEX", ]
boxplot(XN1$JScore)
boxplot(XN2$JScore)
XN1 <- y[y$Country=="PUR", ]
XN2 <- z[z$Country=="PUR", ]
boxplot(XN1$JScore)
boxplot(XN2$JScore)
XN1 <- y[y$Country=="RUS", ]
XN2 <- z[z$Country=="RUS", ]
boxplot(XN1$JScore)
boxplot(XN2$JScore)
XN1 <- y[y$Country=="SUI", ]
XN2 <- z[z$Country=="SUI", ]
boxplot(XN1$JScore)
boxplot(XN2$JScore)
XN1 <- y[y$Country=="USA", ]
XN2 <- z[z$Country=="USA", ]
boxplot(XN1$JScore)
boxplot(XN2$JScore)
colnames(MS1)
[1] "Event" "Round" "Diver" "Country" "Rank"
[6] "DiveNo" "Difficulty" "JScore" "Judge" "JCountry"
table (MS1$Diver)
DAVISON Michelle DUMAIS Troy KEIM Jenny PICHLER David
5 6 5 5
REILING Sara RUIZ Mark WILKINSON Laura
5 11 5
v <- z[which(z$Diver == "DAVISON Michelle" | z$Diver == "DUMAIS Troy" | z$Diver == "KEIM Jenny" | z$Diver == "PICHLER David" | z$Diver == "REILING Sara" | z$Diver == "RUIZ Mark" | z$Diver == "WILKINSON Laura"), ]
dim(v)
[1] 791 10
dim(z)
[1] 10473 10
par(mfrow=c(2,2))
hist(MS1$JScore,xlim=c(0,10))
hist(MS1$JScore,xlim=c(0,10))
hist(MS2$JScore,xlim=c(0,10))
hist(v$JScore,xlim=c(0,10))
boxplot(MS1$JScore,ylim=c(0,10))
boxplot(MS2$JScore,ylim=c(0,10))
boxplot(MS1$JScore,ylim=c(0,10))
boxplot(v$JScore,ylim=c(0,10))
t.test(MS1[which(MS1$Diver=="DAVISON Michelle"), ]$JScore, v[which(v$Diver=="DAVISON Michelle"), ]$JScore)
Welch Two Sample t-test
data: MS1[which(MS1$Diver == "DAVISON Michelle"), ]$JScore and v[which(v$Diver == "DAVISON Michelle"), ]$JScore
t = 3.457, df = 6.301, p-value = 0.01251
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
0.2327 1.3173
sample estimates:
mean of x mean of y
7.800 7.025
t.test(MS1[which(MS1$Diver=="DUMAIS Troy"), ]$JScore, v[which(v$Diver=="DUMAIS Troy"), ]$JScore)
Welch Two Sample t-test
data: MS1[which(MS1$Diver == "DUMAIS Troy"), ]$JScore and v[which(v$Diver == "DUMAIS Troy"), ]$JScore
t = 2.959, df = 6.864, p-value = 0.02161
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
0.1336 1.2175
sample estimates:
mean of x mean of y
8.167 7.491
t.test(MS1[which(MS1$Diver=="KEIM Jenny"), ]$JScore, v[which(v$Diver=="KEIM Jenny"), ]$JScore)
Welch Two Sample t-test
data: MS1[which(MS1$Diver == "KEIM Jenny"), ]$JScore and v[which(v$Diver == "KEIM Jenny"), ]$JScore
t = 1.003, df = 4.486, p-value = 0.3669
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.5789 1.2789
sample estimates:
mean of x mean of y
7.70 7.35
t.test(MS1[which(MS1$Diver=="PICHLER David"), ]$JScore, v[which(v$Diver=="PICHLER David"), ]$JScore)
Welch Two Sample t-test
data: MS1[which(MS1$Diver == "PICHLER David"), ]$JScore and v[which(v$Diver == "PICHLER David"), ]$JScore
t = 1.895, df = 6.34, p-value = 0.1043
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.09235 0.76525
sample estimates:
mean of x mean of y
8.000 7.664
t.test(MS1[which(MS1$Diver=="REILING Sara"), ]$JScore, v[which(v$Diver=="REILING Sara"), ]$JScore)
Welch Two Sample t-test
data: MS1[which(MS1$Diver == "REILING Sara"), ]$JScore and v[which(v$Diver == "REILING Sara"), ]$JScore
t = 0.4678, df = 4.991, p-value = 0.6596
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.9305 1.3443
sample estimates:
mean of x mean of y
7.000 6.793
t.test(MS1[which(MS1$Diver=="RUIZ Mark"), ]$JScore, v[which(v$Diver=="RUIZ Mark"), ]$JScore)
Welch Two Sample t-test
data: MS1[which(MS1$Diver == "RUIZ Mark"), ]$JScore and v[which(v$Diver == "RUIZ Mark"), ]$JScore
t = 1.501, df = 12.12, p-value = 0.159
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.1197 0.6515
sample estimates:
mean of x mean of y
7.818 7.552
t.test(MS1[which(MS1$Diver=="WILKINSON Laura"), ]$JScore, v[which(v$Diver=="WILKINSON Laura"), ]$JScore)
Welch Two Sample t-test
data: MS1[which(MS1$Diver == "WILKINSON Laura"), ]$JScore and v[which(v$Diver == "WILKINSON Laura"), ]$JScore
t = 0.0946, df = 4.629, p-value = 0.9286
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.7213 0.7751
sample estimates:
mean of x mean of y
8.000 7.973
### From the histograms, boxplots and t tests shown above, we know that the American judge,
### Steve MacFarland, might sometimes give higher scores to American divers, but not always.