Diving EDA

Cynthia — Sep 17, 2013, 2:00 AM

x <- read.csv("http://www.stat.yale.edu/~jay/625/diving/Diving2000.csv", as.is=TRUE)

# Some sanity checks: if the data is correctly read into R
dim(x)
[1] 10787    10
head(x)
  Event Round    Diver Country Rank DiveNo Difficulty JScore
1 M3mSB Final XIONG Ni     CHN    1      1        3.1    8.0
2 M3mSB Final XIONG Ni     CHN    1      1        3.1    9.0
3 M3mSB Final XIONG Ni     CHN    1      1        3.1    8.5
4 M3mSB Final XIONG Ni     CHN    1      1        3.1    8.5
5 M3mSB Final XIONG Ni     CHN    1      1        3.1    8.5
6 M3mSB Final XIONG Ni     CHN    1      1        3.1    8.5
                    Judge JCountry
1 RUIZ-PEDREGUERA Rolando      CUB
2             GEAR Dennis      NZL
3           BOYS Beverley      CAN
4           JOHNSON Bente      NOR
5         BOUSSARD Michel      FRA
6          CALDERON Felix      PUR
tail(x)
       Event Round               Diver Country Rank DiveNo Difficulty
10782 W10mPF  Semi RODRIGUEZ Angelique     PUR   18      4          2
10783 W10mPF  Semi RODRIGUEZ Angelique     PUR   18      4          2
10784 W10mPF  Semi RODRIGUEZ Angelique     PUR   18      4          2
10785 W10mPF  Semi RODRIGUEZ Angelique     PUR   18      4          2
10786 W10mPF  Semi RODRIGUEZ Angelique     PUR   18      4          2
10787 W10mPF  Semi RODRIGUEZ Angelique     PUR   18      4          2
      JScore               Judge JCountry
10782      6        SEAMAN Kathy      CAN
10783      6      LINDBERG Mathz      SWE
10784      6        ZAITSEV Oleg      RUS
10785      6 GEISSBUHLER Michael      SUI
10786      6           XU Yiming      CHN
10787      6         GEAR Dennis      NZL

# Summaries 
library(YaleToolkit)
Loading required package: grid Loading required package: lattice Loading
required package: vcd Loading required package: MASS Loading required
package: colorspace Loading required package: barcode Loading required
package: gpairs
whatis(x)
   variable.name      type missing distinct.values precision
1          Event character       0               4        NA
2          Round character       0               3        NA
3          Diver character       0             156        NA
4        Country character       0              42        NA
5           Rank   numeric       0              49       1.0
6         DiveNo   numeric       0               6       1.0
7     Difficulty   numeric       0              20       0.1
8         JScore   numeric       0              21       0.1
9          Judge character       0              25        NA
10      JCountry character       0              21        NA
                 min           max
1             M10mPF         W3mSB
2              Final          Semi
3  ABALLI Jesus-Iory ZHUPINA Olena
4                ARG           ZIM
5                  1            49
6                  1             6
7                1.5           3.8
8                  0            10
9         ALT Walter  ZAITSEV Oleg
10               AUS           ZIM
summary(x)
    Event              Round              Diver          
 Length:10787       Length:10787       Length:10787      
 Class :character   Class :character   Class :character  
 Mode  :character   Mode  :character   Mode  :character  



   Country               Rank          DiveNo       Difficulty  
 Length:10787       Min.   : 1.0   Min.   :1.00   Min.   :1.50  
 Class :character   1st Qu.: 7.0   1st Qu.:2.00   1st Qu.:2.60  
 Mode  :character   Median :13.0   Median :3.00   Median :3.00  
                    Mean   :16.8   Mean   :3.17   Mean   :2.76  
                    3rd Qu.:26.0   3rd Qu.:4.00   3rd Qu.:3.10  
                    Max.   :49.0   Max.   :6.00   Max.   :3.80  
     JScore         Judge             JCountry        
 Min.   : 0.00   Length:10787       Length:10787      
 1st Qu.: 6.00   Class :character   Class :character  
 Median : 7.00   Mode  :character   Mode  :character  
 Mean   : 6.83                                        
 3rd Qu.: 8.00                                        
 Max.   :10.00                                        


# One variable at a time

table(x$Event)

M10mPF  M3mSB W10mPF  W3mSB 
  2709   3192   2317   2569 
table(x$Round)

 Final Prelim   Semi 
  1848   6636   2303 
table(x$Diver)

       ABALLI Jesus-Iory            AHRENS Stefan        AKHMETBEKOV Damir 
                      42                      119                       14 
       ALCALA Maria-Jose       ALEKSEEVA Svetlana                ALLY Tony 
                      35                       70                      119 
            ALMAZAN Azul           ALVAREZ Rafael      AMELIA Shenny-Ratna 
                     105                       77                       35 
   ARBOLES-SOUCHON Odile    AVTANDILYAN Hovhannes       BAHARI Mohd-Azheem 
                      35                       42                       42 
              BARTA Nora             BIMIS Thomas              BULMER Eryn 
                      35                       42                       35 
          CAGNOTTO Tania            CHEN Han-Hung                CHEN Ting 
                      70                       42                       35 
       CHERECHES Gabriel          CHIKINA Natalya              CHO Dae-Don 
                      84                       98                       42 
          CHOE Hyong-Gil           CHOE Myong-Hwa            CHOE Song-Hui 
                     112                       98                       35 
            CHOI Hye-Jin       CIOCAN Clara-Elena              CRUZ Iohana 
                      28                       35                       35 
            DANAUX Julie         DAVISON Michelle     DELALOYE Jean-Romain 
                      35                      105                       42 
      DESPATIE Alexandre      DOBROSKOK Alexandre           DOMENIOS Zardo 
                     112                       77                       42 
             DUMAIS Troy            DURAN Cassius     EMPTOZ-LACOTE Gilles 
                     119                      119                       42 
           FEBVAY Claire          FOURNARIS Erick            FRECE Richard 
                      35                       42                       42 
           FREEMAN Sally               FU Mingxia        FUENTES Alejandra 
                      35                      105                       35 
    FUMADO Ramon-Antonio          GIL Jose-Miguel          GILMORE Rebecca 
                      42                       42                      168 
     GUERRA Jose-Antonio             GUO Jingjing            GURMAN Alexey 
                      70                      105                       42 
           HAJNAL Andras           HARTLEY Blythe              HELM Mathew 
                      42                      105                      112 
              HEMPEL Jan           HEYMANS Emilie            HSIEH Pei-Hua 
                      70                       98                       63 
                  HU Jia               ILINA Vera        INDAH Eka-Purnama 
                     112                      105                       35 
         INSAWANG Meerit         ISHKOVA Svetlana          JABRAYILOV Emil 
                      42                       35                       42 
       KALEC Christopher               KEIM Jenny     KHAMULKIN Vyacheslav 
                      70                      105                       42 
      KONSTANTATOU Maria            KOTZIAN Ditte     KOUTSOPETROU Sotiria 
                      35                       35                       35 
          KWON Kyung-Min             LENGYEL Imre            LEONG Mun-Yee 
                      42                      119                       70 
                   LI Na             LIBERTY Jeff            LINDBERG Anna 
                      98                       42                      105 
          LINDNER Doerte          LOUKACHINE Igor              LUNA Jashia 
                     105                      112                       70 
          LYSENKO Dmytro         MAKOHIN Jaroslav MALIEV-AVIOLAT Catherine 
                      42                       42                       35 
           MARCONI Maria           MARCONI Nicola   MAZZUCCHI Massimiliano 
                      35                       42                       70 
             MEYER Heiko        MICHELL Chantelle           MIRANDA Donald 
                     112                      105                       77 
           MONTMINY Anne       NASRULLAH Muhammad          NEBIERIDZE Nana 
                      98                       42                       35 
          NEWBERY Robert     OLSHEVSKAYA Evgeniya          OPRIEA Anisoara 
                     189                       63                       35 
           ORTIZ Yolanda           PAK Yong-Ryong         PAKHALINA Ioulia 
                      35                      112                      105 
           PAPPA Eftihia          PEREZ Francisco         PEREZ Sheila-Mae 
                      35                       42                       35 
           PICHI Suchart            PICHLER David          PIEKKANEN Jukka 
                      42                      112                       42 
         PIERRE Frederic             PINEDA Diana           PINTER Orsolya 
                      42                       35                       35 
         PLATAS Fernando           PONTHUS Sandra           POPOVA Natalya 
                     119                       35                       35 
           PUHAKKA Joona              PULLAR Dean             REIFF Marion 
                      42                      119                       35 
            REILING Sara                RI Ok-Rim  RICHTER-LIBISELLER Anja 
                      63                       70                       98 
     RODRIGUEZ Angelique           RODRIGUEZ Joel            RUEDA Eduardo 
                      98                       42                        7 
               RUIZ Mark   SAEZ-de-IBARRA Dolores           SAFONOV Eduard 
                     231                       63                       42 
        SALAZAR Yoendris             SANCHEZ Abel                 SANG Xue 
                      77                       84                       98 
            SANTOS Leire             SANTOS Ruben          SAOUTINE Dmitri 
                      35                       42                      231 
        SCHMALFUSS Conny           SEITOV Alisher         SERBINA Svitlana 
                      14                       42                       35 
            SHIPMAN Mark       SIRANIDIS Nikolaos       SKRYPNIK Oleksandr 
                      42                       42                       42 
              SMITH Jane              SMITH Karen           SOROKINA Ganna 
                      70                       35                      105 
            STEWART Evan              TAYLOR Leon             TERAUCHI Ken 
                      42                       70                      231 
              TIAN Liang     TIMOCHININA Svetlana             TOURKY Loudy 
                     112                       98                       35 
             TSAI Yi-San      URAN Juan-Guillermo           VELOSO Juliana 
                      35                       84                       70 
         VILLARROEL Luis           VOLODKOV Roman           VYGUZOVA Irina 
                      84                       42                      168 
             WARD Lesley         WATERFIELD Peter             WELS Andreas 
                      35                       42                      119 
              WETZIG Ute          WILKINSON Laura            XIAO Hailiang 
                      98                       98                      119 
                XIONG Ni           YEFIMENKO Olga             YEOH Ken-Nee 
                     119                       70                       84 
          YOO Chang-Joon                  YU Yuet            ZHUPINA Olena 
                      42                       42                       98 
table(x$Country)

ARG ARM AUS AUT AZE BLR BRA CAN CHN COL CUB CZE ESP FIN FRA GBR GEO GER 
 35  42 728 175  42 112 189 560 868 119 301  42 259  84 224 448  35 672 
GRE HKG HUN INA ITA JPN KAZ KOR MAS MEX PER PHI PRK PUR ROM RUS SUI SWE 
189  42 231 112 294 231 399 154 196 420  84  77 427  98 154 791  77 105 
THA TPE UKR USA VEN ZIM 
 84 175 476 833 161  42 
table(x$Judge)

             ALT Walter       BARNETT Madeleine        BOOTHROYD Sydney 
                    498                     661                     411 
        BOUSSARD Michel           BOYS Beverley         BURK Hans-Peter 
                    702                     411                     159 
         CALDERON Felix         CERMAKOVA Maria              CRUZ Julia 
                    717                     141                     486 
            GEAR Dennis     GEISSBUHLER Michael          HASSAN Mostafa 
                    448                     401                     376 
             HOOD Robin             HUBER Peter           JOHNSON Bente 
                    432                     382                     273 
         KELEMEN Ildiko          LINDBERG Mathz         McFARLAND Steve 
                    216                     406                     657 
             MENA Jesus RUIZ-PEDREGUERA Rolando            SEAMAN Kathy 
                    856                     481                     281 
         STEWART Anthea            WANG Facheng               XU Yiming 
                    159                     357                     281 
           ZAITSEV Oleg 
                    595 
table(x$JCountry)

AUS AUT CAN CHN CUB CZE EGY ESP FRA GBR GER HUN MEX NOR NZL PUR RUS SUI 
661 382 692 638 481 141 376 486 702 411 657 216 856 273 880 717 595 401 
SWE USA ZIM 
406 657 159 

par(mfrow=c(2,2))
hist(x$Rank)
hist(x$DiveNo)
hist(x$Difficulty)
hist(x$JScore)

plot of chunk unnamed-chunk-1


# Two variables at a time
pairs(x[,5:8])

plot of chunk unnamed-chunk-1


# 
y <- x[which(x$Country == x$JCountry), ]
z <- x[-which(x$Country == x$JCountry), ]

BMa1 <- y[y$Judge=="BARNETT Madeleine", ]
BS1 <- y[y$Judge=="BOOTHROYD Sydney", ]
BMi1 <- y[y$Judge=="BOUSSARD Michel", ]
BB1 <- y[y$Judge=="BOYS Beverley", ]
BH1 <- y[y$Judge=="BURK Hans-Peter", ]
CF1 <- y[y$Judge=="CALDERON Felix", ]
CJ1 <- y[y$Judge=="CRUZ Julia", ]
HR1 <- y[y$Judge=="HOOD Robin", ]
LM1 <- y[y$Judge=="LINDBERG Mathz", ]
MS1 <- y[y$Judge=="McFARLAND Steve", ]
MJ1 <- y[y$Judge=="MENA Jesus", ]
SA1 <- y[y$Judge=="STEWART Anthea", ]
WF1 <- y[y$Judge=="WANG Facheng", ]
ZO1 <- y[y$Judge=="ZAITSEV Oleg", ]


BMa2 <- z[z$Judge=="BARNETT Madeleine", ]
BS2 <- z[z$Judge=="BOOTHROYD Sydney", ]
BMi2 <- z[z$Judge=="BOUSSARD Michel", ]
BB2 <- z[z$Judge=="BOYS Beverley", ]
BH2 <- z[z$Judge=="BURK Hans-Peter", ]
CF2 <- z[z$Judge=="CALDERON Felix", ]
CJ2 <- z[z$Judge=="CRUZ Julia", ]
HR2 <- z[z$Judge=="HOOD Robin", ]
LM2 <- z[z$Judge=="LINDBERG Mathz", ]
MS2 <- z[z$Judge=="McFARLAND Steve", ]
MJ2 <- z[z$Judge=="MENA Jesus", ]
SA2 <- z[z$Judge=="STEWART Anthea", ]
WF2 <- z[z$Judge=="WANG Facheng", ]
ZO2 <- z[z$Judge=="ZAITSEV Oleg", ]


par(mfrow=c(2,2))
hist(BMa1$JScore,xlim=c(0,10))
hist(BMa2$JScore,xlim=c(0,10))

hist(BS1$JScore,xlim=c(0,10))
hist(BS2$JScore,xlim=c(0,10))

plot of chunk unnamed-chunk-1


hist(BMi1$JScore,xlim=c(0,10))
hist(BMi2$JScore,xlim=c(0,10))

hist(BB1$JScore,xlim=c(0,10))
hist(BB2$JScore,xlim=c(0,10))

plot of chunk unnamed-chunk-1


hist(BH1$JScore,xlim=c(0,10))
hist(BH2$JScore,xlim=c(0,10))

hist(CF1$JScore,xlim=c(0,10))
hist(CF2$JScore,xlim=c(0,10))

plot of chunk unnamed-chunk-1


hist(CJ1$JScore,xlim=c(0,10))
hist(CJ2$JScore,xlim=c(0,10))

par(mfrow = c(1,2))

hist(HR1$JScore,xlim=c(0,10))
Error: invalid number of 'breaks'
hist(HR2$JScore,xlim=c(0,10))

hist(LM1$JScore,xlim=c(0,10))
Error: invalid number of 'breaks'
hist(LM2$JScore,xlim=c(0,10))

par(mfrow = c(1,2))
hist(MS1$JScore,xlim=c(0,10))
hist(MS2$JScore,xlim=c(0,10))

hist(MJ1$JScore,xlim=c(0,10))
hist(MJ2$JScore,xlim=c(0,10))

par(mfrow = c(1,1))

plot of chunk unnamed-chunk-1

hist(SA1$JScore,xlim=c(0,10))
Error: invalid number of 'breaks'
hist(SA2$JScore,xlim=c(0,10))

plot of chunk unnamed-chunk-1


par(mfrow = c(2,2))
hist(WF1$JScore,xlim=c(0,10))
hist(WF2$JScore,xlim=c(0,10))

hist(ZO1$JScore,xlim=c(0,10))
hist(ZO2$JScore,xlim=c(0,10))

plot of chunk unnamed-chunk-1


dim(SA1)
[1]  0 10
dim(LM1)
[1]  0 10
dim(HR1)
[1]  0 10


table(y$Country)

AUS AUT CAN CHN CUB ESP FRA GBR GER MEX PUR RUS SUI USA 
 38   8  29  40  11  11  10  16  35  28   5  38   3  42 
XN1 <- y[y$Country=="CHN", ]
XN2 <- z[z$Country=="CHN", ]
boxplot(XN1$JScore)
boxplot(XN2$JScore)

XN1 <- y[y$Country=="AUS", ]
XN2 <- z[z$Country=="AUS", ]
boxplot(XN1$JScore)
boxplot(XN2$JScore)

plot of chunk unnamed-chunk-1


XN1 <- y[y$Country=="CAN", ]
XN2 <- z[z$Country=="CAN", ]
boxplot(XN1$JScore)
boxplot(XN2$JScore)

XN1 <- y[y$Country=="CUB", ]
XN2 <- z[z$Country=="CUB", ]
boxplot(XN1$JScore)
boxplot(XN2$JScore)

plot of chunk unnamed-chunk-1


XN1 <- y[y$Country=="ESP", ]
XN2 <- z[z$Country=="ESP", ]
boxplot(XN1$JScore)
boxplot(XN2$JScore)

XN1 <- y[y$Country=="FRA", ]
XN2 <- z[z$Country=="FRA", ]
boxplot(XN1$JScore)
boxplot(XN2$JScore)

plot of chunk unnamed-chunk-1


XN1 <- y[y$Country=="GBR", ]
XN2 <- z[z$Country=="GBR", ]
boxplot(XN1$JScore)
boxplot(XN2$JScore)

XN1 <- y[y$Country=="GER", ]
XN2 <- z[z$Country=="GER", ]
boxplot(XN1$JScore)
boxplot(XN2$JScore)

plot of chunk unnamed-chunk-1


XN1 <- y[y$Country=="MEX", ]
XN2 <- z[z$Country=="MEX", ]
boxplot(XN1$JScore)
boxplot(XN2$JScore)

XN1 <- y[y$Country=="PUR", ]
XN2 <- z[z$Country=="PUR", ]
boxplot(XN1$JScore)
boxplot(XN2$JScore)

plot of chunk unnamed-chunk-1


XN1 <- y[y$Country=="RUS", ]
XN2 <- z[z$Country=="RUS", ]
boxplot(XN1$JScore)
boxplot(XN2$JScore)

XN1 <- y[y$Country=="SUI", ]
XN2 <- z[z$Country=="SUI", ]
boxplot(XN1$JScore)
boxplot(XN2$JScore)

plot of chunk unnamed-chunk-1


XN1 <- y[y$Country=="USA", ]
XN2 <- z[z$Country=="USA", ]
boxplot(XN1$JScore)
boxplot(XN2$JScore)


colnames(MS1)
 [1] "Event"      "Round"      "Diver"      "Country"    "Rank"      
 [6] "DiveNo"     "Difficulty" "JScore"     "Judge"      "JCountry"  
table (MS1$Diver)

DAVISON Michelle      DUMAIS Troy       KEIM Jenny    PICHLER David 
               5                6                5                5 
    REILING Sara        RUIZ Mark  WILKINSON Laura 
               5               11                5 
v <- z[which(z$Diver == "DAVISON Michelle" | z$Diver == "DUMAIS Troy" | z$Diver == "KEIM Jenny" | z$Diver == "PICHLER David" | z$Diver == "REILING Sara" | z$Diver == "RUIZ Mark" | z$Diver == "WILKINSON Laura"), ]

dim(v)
[1] 791  10
dim(z)
[1] 10473    10
par(mfrow=c(2,2))

plot of chunk unnamed-chunk-1

hist(MS1$JScore,xlim=c(0,10))
hist(MS1$JScore,xlim=c(0,10))
hist(MS2$JScore,xlim=c(0,10))
hist(v$JScore,xlim=c(0,10))

plot of chunk unnamed-chunk-1



boxplot(MS1$JScore,ylim=c(0,10))
boxplot(MS2$JScore,ylim=c(0,10))
boxplot(MS1$JScore,ylim=c(0,10))
boxplot(v$JScore,ylim=c(0,10))

plot of chunk unnamed-chunk-1



t.test(MS1[which(MS1$Diver=="DAVISON Michelle"), ]$JScore, v[which(v$Diver=="DAVISON Michelle"), ]$JScore)

    Welch Two Sample t-test

data:  MS1[which(MS1$Diver == "DAVISON Michelle"), ]$JScore and v[which(v$Diver == "DAVISON Michelle"), ]$JScore
t = 3.457, df = 6.301, p-value = 0.01251
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 0.2327 1.3173
sample estimates:
mean of x mean of y 
    7.800     7.025 
t.test(MS1[which(MS1$Diver=="DUMAIS Troy"), ]$JScore, v[which(v$Diver=="DUMAIS Troy"), ]$JScore)

    Welch Two Sample t-test

data:  MS1[which(MS1$Diver == "DUMAIS Troy"), ]$JScore and v[which(v$Diver == "DUMAIS Troy"), ]$JScore
t = 2.959, df = 6.864, p-value = 0.02161
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 0.1336 1.2175
sample estimates:
mean of x mean of y 
    8.167     7.491 
t.test(MS1[which(MS1$Diver=="KEIM Jenny"), ]$JScore, v[which(v$Diver=="KEIM Jenny"), ]$JScore)

    Welch Two Sample t-test

data:  MS1[which(MS1$Diver == "KEIM Jenny"), ]$JScore and v[which(v$Diver == "KEIM Jenny"), ]$JScore
t = 1.003, df = 4.486, p-value = 0.3669
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -0.5789  1.2789
sample estimates:
mean of x mean of y 
     7.70      7.35 
t.test(MS1[which(MS1$Diver=="PICHLER David"), ]$JScore, v[which(v$Diver=="PICHLER David"), ]$JScore)

    Welch Two Sample t-test

data:  MS1[which(MS1$Diver == "PICHLER David"), ]$JScore and v[which(v$Diver == "PICHLER David"), ]$JScore
t = 1.895, df = 6.34, p-value = 0.1043
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -0.09235  0.76525
sample estimates:
mean of x mean of y 
    8.000     7.664 
t.test(MS1[which(MS1$Diver=="REILING Sara"), ]$JScore, v[which(v$Diver=="REILING Sara"), ]$JScore)

    Welch Two Sample t-test

data:  MS1[which(MS1$Diver == "REILING Sara"), ]$JScore and v[which(v$Diver == "REILING Sara"), ]$JScore
t = 0.4678, df = 4.991, p-value = 0.6596
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -0.9305  1.3443
sample estimates:
mean of x mean of y 
    7.000     6.793 
t.test(MS1[which(MS1$Diver=="RUIZ Mark"), ]$JScore, v[which(v$Diver=="RUIZ Mark"), ]$JScore)

    Welch Two Sample t-test

data:  MS1[which(MS1$Diver == "RUIZ Mark"), ]$JScore and v[which(v$Diver == "RUIZ Mark"), ]$JScore
t = 1.501, df = 12.12, p-value = 0.159
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -0.1197  0.6515
sample estimates:
mean of x mean of y 
    7.818     7.552 
t.test(MS1[which(MS1$Diver=="WILKINSON Laura"), ]$JScore, v[which(v$Diver=="WILKINSON Laura"), ]$JScore)

    Welch Two Sample t-test

data:  MS1[which(MS1$Diver == "WILKINSON Laura"), ]$JScore and v[which(v$Diver == "WILKINSON Laura"), ]$JScore
t = 0.0946, df = 4.629, p-value = 0.9286
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -0.7213  0.7751
sample estimates:
mean of x mean of y 
    8.000     7.973 

### From the histograms, boxplots and t tests shown above, we know that the American judge,
### Steve MacFarland, might sometimes give higher scores to American divers, but not always.