Diving

First of all I want to find out the general setting.

x <- read.csv("http://www.stat.yale.edu/~jay/625/diving/Diving2000.csv", as.is = TRUE)
Events <- levels(factor(x$Event))

S <- data.frame(array(dim = c(length(Events), 3)))
colnames(S)[1] = "Event"
colnames(S)[2] = "Number of Divers"
colnames(S)[3] = "Number of Judges"
j <- 1
for (l in Events) {
    S[[1]][j] <- l
    S[[2]][j] <- length(levels(factor(x[x$Event == l, ]$Diver)))
    S[[3]][j] <- length(levels(factor(x[x$Event == l, ]$Judge)))
    j <- j + 1
}
S  ##Event |Number of Diver | Number of Judges

##    Event Number of Divers Number of Judges
## 1 M10mPF               42               14
## 2  M3mSB               49               14
## 3 W10mPF               40               14
## 4  W3mSB               43               14

Now I want to find out how well the American divers did:

W <- data.frame(array(dim = c(length(Events), 3)))
colnames(W)[1] = "Event"
colnames(W)[2] = "Diver"
colnames(W)[3] = "Rank"

j <- 1
for (l in Events) {
    W[[1]][j] <- l
    W[[3]][j] <- n <- min(x[x$Event == l & x$Country == "USA" & x$Round == "Final", 
        ]$Rank)
    W[[2]][j] <- x[x$Event == l & x$Rank == n & x$Round == "Final", ]$Diver[1]
    j <- j + 1
}
W  ##Event| best American Diver | Rank of the best American Diver

##    Event           Diver Rank
## 1 M10mPF       RUIZ Mark    6
## 2  M3mSB     DUMAIS Troy    6
## 3 W10mPF WILKINSON Laura    1
## 4  W3mSB      KEIM Jenny    8

At this point I would like to find out, how the Rank can be calculated by the Score and the Difficulty. I believe that the final Score for each Dive can be calculated by multiplying the Difficulty of the Dive with the mean of all judge scores. And the final score of each Diver might be the sum of all his final dive scores. I will check this thesis by creating a table which contains the calculated rank (cRank), which can be calculated by this thesis and the real rank, which can be found in the provided data set. I will do this for the final round of the W3mSB competition.

#### Find out: How is the Rank calculated by the Difficulty and the JScore ####
#### Guess: Difficulty*JScore=Score
y <- x[x$Event == "W3mSB" & x$Round == "Final", c(3, 5, 7, 8)]
A <- data.frame(levels(factor(y$Diver)), tapply(y$Difficulty * y$JScore, factor(y$Diver), 
    sum))
colnames(A)[1] = "Diver"
colnames(A)[2] = "Score"
# order A:
A <- A[order(-A[, 2]), ]
# add a column with the calculated Rank (cRank):
A <- cbind(A, 1:length(A$Diver))
colnames(A)[3] = "cRank"
# add a column with the real Rank (rRank):
A <- cbind(A, rep(-1, times = length(A$Diver)))
j = 1
for (d in A$Diver) {
    A[[4]][j] = y[y$Diver == d, ]$Rank[1]
    j <- j + 1
}
colnames(A)[4] = "rRank"
# rownames(A)=NULL
A[, c(2, 3, 4)]  ###Guess is NOT TRUE because the calculated Rank and the real Rank of the 11th and 10th place is not the same!!!!

##                   Score cRank rRank
## FU Mingxia        849.1     1     1
## GUO Jingjing      808.8     2     2
## LINDNER Doerte    794.0     3     3
## PAKHALINA Ioulia  779.5     4     4
## LINDBERG Anna     761.6     5     5
## MICHELL Chantelle 747.3     6     7
## ILINA Vera        742.5     7     6
## VYGUZOVA Irina    729.5     8     9
## KEIM Jenny        717.6     9     8
## SOROKINA Ganna    705.8    10    11
## HARTLEY Blythe    705.4    11    10
## DAVISON Michelle  680.0    12    12

One can see that the calculated Rank and the real Rank of SOROKINA and HARTLEY are not the same. So one can conclude that my theory is not true.

Now I will try to find out whether the judges of any country promoted the diver of their country:

DC<- levels(factor(x$Country))  #get the levels of the Diver Countries
JC<- levels(factor(x$JCountry)) #get the levels of the Judge Countries

#Create a numeric vector with the mean Score of each Diver
AVS<-tapply(x$JScore,factor(x$Country),mean) #returns a numeric vector, the first entry is w[1]=w["ARG"]

T<-data.frame(array(dim=c(length(DC),length(JC)+2))) #Create a data.frame of dim 42x44
T[,1]=DC  #first column is Diver Countries (DC)
colnames(T)[1]="DC"
T[,2]=AVS   #second column is the avarage score (AVS)
colnames(T)[2]="AVS"

i<-3
for(j in JC){
  #Create a numeric vector y for the Judges of j. Which avg Score did the Judges of j give to the different countries?
  y<-x[x$JCountry==j,c(4,8)];
  y<-tapply(y$JScore,factor(y$Country),mean);  #returns a numeric vector, the first entry is w[1]=w["ARG"]
  for(d in 1:length(DC)) T[d,i]<-y[T[d,1]]-AVS[d];
  colnames(T)[i]<-j;
  i<-i+1;
}

#SCALING: substract the average score of each Judge (could also divide by the variance, but then you can not see how big the effect really is) + ROUND
for(d in JC){
  m<-mean(T[,d], na.rm=TRUE)
  T[,d]<-round(T[,d]-m,1)
}
T[,AVS]<-round(T[,AVS],1)
##T is a data.frame which contains the Scores the different Judges gave to the different Countries - the AVG score of each country scaled by the expectation.

#Create a data.frame a that contains the avg scores Judges gaves divers of their country
a<-data.frame(array(dim=c(length(JC),2)))
for(i in 1:length(JC)){
  a[i,1]=JC[i]
  if(any(T$DC==JC[i])) a[i,2]=T[T$DC==JC[i],][JC[i]]
}

plot(factor(T$DC), T$SUI, xlab = "Diver Countries", main = "Scores of the SUI-Judges", 
    ylab = "JScore-AVS", las = 2)  ##Judges from SUI gave SUI Divers many points 
points(30, 0.8)

plot of chunk unnamed-chunk-5


plot(factor(T$DC), T$GBR, xlab = "Diver Countries", main = "Scores of the GBR-Judges", 
    ylab = "JScore-AVS", las = 2)  ##Judges from GBR gave SUI Divers many points
points(16, 0.8)

plot of chunk unnamed-chunk-5