source('normality.r')
d=read.table("titanic.data")
head(d)
summary(d)
Q1) Is there a significant difference in Age distribution b/w those who survived and those who did not?
S=subset(d,d$V5==1) # Survivors
NS = subset(d,d$V5==0) # Non-Survivors
t1=as.numeric(as.character(S$V3))
t2=as.numeric(as.character(NS$V3))
t1 = na.omit(t1)
t2 = na.omit(t2)
par(mfrow=c(1,2))
hist(t1)
boxplot(t1)
par(mfrow=c(1,2))
hist(t2)
boxplot(t2)
par(mfrow=c(1,2))
qqnorm(t1)
qqnorm(t2)
par(mfrow=c(1,2))
plot(ecdf(t2))
lines(seq(0,60,1),pnorm(seq(0,60,1),mean(t2),sd(t2)))
plot(ecdf(t1))
lines(seq(0,60,1),pnorm(seq(0,60,1),mean(t1),sd(t1)))
normtest(t1)
normtest(t2)
Strong Evidence Against Normality So using non-parametric test
wilcox.test(t1,t2)
Ans : There is no significant difference in the age distribution of the ones who survived and who didn't
Q2) Is there a significant difference in age distribution between those who survived and those who did not after controlling for gender ?
Controlling for the Gender Male
S=subset(d,(d$V5==1) & (d$V4=='male')) # Survivors
NS = subset(d,(d$V5==0)&(d$V4=='male')) # Non-Survivors
t1=as.numeric(as.character(S$V3))
t2=as.numeric(as.character(NS$V3))
t1 = na.omit(t1)
t2 = na.omit(t2)
par(mfrow=c(1,2))
hist(t1)
boxplot(t1)
par(mfrow=c(1,2))
hist(t2)
boxplot(t2)
par(mfrow=c(1,2))
qqnorm(t1)
qqnorm(t2)
par(mfrow=c(1,2))
plot(ecdf(t2))
lines(seq(0,60,1),pnorm(seq(0,60,1),mean(t2),sd(t2)))
plot(ecdf(t1))
lines(seq(0,60,1),pnorm(seq(0,60,1),mean(t1),sd(t1)))
normtest(t1)
normtest(t2)
Strong evidence against Normality for Distribution of Age groups for survivors and non survivors after controlling gender as male. Proceeding on to use non-parametric test
wilcox.test(t1,t2)
There is strong evidence to suggest difference in the age distribution of the ones who survived and who didn't after controlling the gender to male
Controlling for gender female
S=subset(d,(d$V5==1) & (d$V4=='female')) # Survivors
NS = subset(d,(d$V5==0)&(d$V4=='female')) # Non-Survivors
t1=as.numeric(as.character(S$V3))
t2=as.numeric(as.character(NS$V3))
t1 = na.omit(t1)
t2 = na.omit(t2)
par(mfrow=c(1,2))
hist(t1)
boxplot(t1)
par(mfrow=c(1,2))
hist(t2)
boxplot(t2)
par(mfrow=c(1,2))
qqnorm(t1)
qqnorm(t2)
par(mfrow=c(1,2))
plot(ecdf(t2))
lines(seq(0,60,1),pnorm(seq(0,60,1),mean(t2),sd(t2)))
plot(ecdf(t1))
lines(seq(0,60,1),pnorm(seq(0,60,1),mean(t1),sd(t1)))
normtest(t1)
normtest(t2)
There is strong evidence against normality in the age distribution for survivors with gender controlled as female and No significant evidence against normality for the age distribution in the non survivors
wilcox.test(t1,t2)
There is strong evidence to suggest difference in the age distribution of the ones who survived and who didn't after controlling the gender to male
Ans Q2) There is strong evidenve to suggest difference in the age distributions of the ones who survived and the ones who didn't even after controlling for the gender of the individual
Q3) Is there significant difference in the survival probabilities of the 2 gender ?
a = table(as.character(d$V4[-c(1)]),as.character(d$V5[-c(1)]))
a
chisq.test(a)
chisq.test(a,simulate.p.value=T)
fisher.test(a)
fisher.test(a,alt='l')
Ans Q3)There is significant evidence to show there is difference in the survival probabilities of the 2 gender
Q4) Is there significant difference in survival probabilities for the two genders even after taking the effects of passenger class into account?
#Passenger Class 1
p1=subset(d,d$V2=="1st")
#Passenger Class 2
p2=subset(d,d$V2=="2nd")
#Passenger Class 3
p3=subset(d,d$V2=="3rd")
head(p1)
Checking for passenger Class 1st
t1 = table(as.character(p1$V4),as.character(p1$V5))
t1
chisq.test(t1)
fisher.test(t1)
fisher.test(t1,alt='l')
Checking for passenger Class 2nd
t2 = table(as.character(p2$V4),as.character(p2$V5))
t2
chisq.test(t2)
fisher.test(t2)
fisher.test(t2,alt='l')
Checking for passenger Class 3rd
t3 = table(as.character(p3$V4),as.character(p3$V5))
t3
chisq.test(t3)
fisher.test(t3)
fisher.test(t3,alt='l')
Strong evidence to say there is significant differnce in the Survival probabilities of the 2 genders even after taking the effect of passenger class int account