First of all I want to find out the general setting.
x <- read.csv("http://www.stat.yale.edu/~jay/625/diving/Diving2000.csv", as.is = TRUE)
Events <- levels(factor(x$Event))
S <- data.frame(array(dim = c(length(Events), 3)))
colnames(S)[1] = "Event"
colnames(S)[2] = "Number of Divers"
colnames(S)[3] = "Number of Judges"
j <- 1
for (l in Events) {
S[[1]][j] <- l
S[[2]][j] <- length(levels(factor(x[x$Event == l, ]$Diver)))
S[[3]][j] <- length(levels(factor(x[x$Event == l, ]$Judge)))
j <- j + 1
}
S ##Event |Number of Diver | Number of Judges
## Event Number of Divers Number of Judges
## 1 M10mPF 42 14
## 2 M3mSB 49 14
## 3 W10mPF 40 14
## 4 W3mSB 43 14
Now I want to find out how well the American divers did:
W <- data.frame(array(dim = c(length(Events), 3)))
colnames(W)[1] = "Event"
colnames(W)[2] = "Diver"
colnames(W)[3] = "Rank"
j <- 1
for (l in Events) {
W[[1]][j] <- l
W[[3]][j] <- n <- min(x[x$Event == l & x$Country == "USA" & x$Round == "Final",
]$Rank)
W[[2]][j] <- x[x$Event == l & x$Rank == n & x$Round == "Final", ]$Diver[1]
j <- j + 1
}
W ##Event| best American Diver | Rank of the best American Diver
## Event Diver Rank
## 1 M10mPF RUIZ Mark 6
## 2 M3mSB DUMAIS Troy 6
## 3 W10mPF WILKINSON Laura 1
## 4 W3mSB KEIM Jenny 8
At this point I would like to find out, how the Rank can be calculated by the Score and the Difficulty. I believe that the final Score for each Dive can be calculated by multiplying the Difficulty of the Dive with the mean of all judge scores. And the final score of each Diver might be the sum of all his final dive scores. I will check this thesis by creating a table which contains the calculated rank (cRank), which can be calculated by this thesis and the real rank, which can be found in the provided data set. I will do this for the final round of the W3mSB competition.
#### Find out: How is the Rank calculated by the Difficulty and the JScore ####
#### Guess: Difficulty*JScore=Score
y <- x[x$Event == "W3mSB" & x$Round == "Final", c(3, 5, 7, 8)]
A <- data.frame(levels(factor(y$Diver)), tapply(y$Difficulty * y$JScore, factor(y$Diver),
sum))
colnames(A)[1] = "Diver"
colnames(A)[2] = "Score"
# order A:
A <- A[order(-A[, 2]), ]
# add a column with the calculated Rank (cRank):
A <- cbind(A, 1:length(A$Diver))
colnames(A)[3] = "cRank"
# add a column with the real Rank (rRank):
A <- cbind(A, rep(-1, times = length(A$Diver)))
j = 1
for (d in A$Diver) {
A[[4]][j] = y[y$Diver == d, ]$Rank[1]
j <- j + 1
}
colnames(A)[4] = "rRank"
# rownames(A)=NULL
A[, c(2, 3, 4)] ###Guess is NOT TRUE because the calculated Rank and the real Rank of the 11th and 10th place is not the same!!!!
## Score cRank rRank
## FU Mingxia 849.1 1 1
## GUO Jingjing 808.8 2 2
## LINDNER Doerte 794.0 3 3
## PAKHALINA Ioulia 779.5 4 4
## LINDBERG Anna 761.6 5 5
## MICHELL Chantelle 747.3 6 7
## ILINA Vera 742.5 7 6
## VYGUZOVA Irina 729.5 8 9
## KEIM Jenny 717.6 9 8
## SOROKINA Ganna 705.8 10 11
## HARTLEY Blythe 705.4 11 10
## DAVISON Michelle 680.0 12 12
One can see that the calculated Rank and the real Rank of SOROKINA and HARTLEY are not the same. So one can conclude that my theory is not true.
Now I will try to find out whether the judges of any country promoted the diver of their country:
DC<- levels(factor(x$Country)) #get the levels of the Diver Countries
JC<- levels(factor(x$JCountry)) #get the levels of the Judge Countries
#Create a numeric vector with the mean Score of each Diver
AVS<-tapply(x$JScore,factor(x$Country),mean) #returns a numeric vector, the first entry is w[1]=w["ARG"]
T<-data.frame(array(dim=c(length(DC),length(JC)+2))) #Create a data.frame of dim 42x44
T[,1]=DC #first column is Diver Countries (DC)
colnames(T)[1]="DC"
T[,2]=AVS #second column is the avarage score (AVS)
colnames(T)[2]="AVS"
i<-3
for(j in JC){
#Create a numeric vector y for the Judges of j. Which avg Score did the Judges of j give to the different countries?
y<-x[x$JCountry==j,c(4,8)];
y<-tapply(y$JScore,factor(y$Country),mean); #returns a numeric vector, the first entry is w[1]=w["ARG"]
for(d in 1:length(DC)) T[d,i]<-y[T[d,1]]-AVS[d];
colnames(T)[i]<-j;
i<-i+1;
}
#SCALING: substract the average score of each Judge (could also divide by the variance, but then you can not see how big the effect really is) + ROUND
for(d in JC){
m<-mean(T[,d], na.rm=TRUE)
T[,d]<-round(T[,d]-m,1)
}
T[,AVS]<-round(T[,AVS],1)
##T is a data.frame which contains the Scores the different Judges gave to the different Countries - the AVG score of each country scaled by the expectation.
#Create a data.frame a that contains the avg scores Judges gaves divers of their country
a<-data.frame(array(dim=c(length(JC),2)))
for(i in 1:length(JC)){
a[i,1]=JC[i]
if(any(T$DC==JC[i])) a[i,2]=T[T$DC==JC[i],][JC[i]]
}
plot(factor(T$DC), T$SUI, xlab = "Diver Countries", main = "Scores of the SUI-Judges",
ylab = "JScore-AVS", las = 2) ##Judges from SUI gave SUI Divers many points
points(30, 0.8)
plot(factor(T$DC), T$GBR, xlab = "Diver Countries", main = "Scores of the GBR-Judges",
ylab = "JScore-AVS", las = 2) ##Judges from GBR gave SUI Divers many points
points(16, 0.8)