rhode<-read.csv("C:\\Users\\scrawfo8\\Desktop\\rhodeisland.csv",header=TRUE,skip=2) plot(rhode) plot(Total~Overall,data=rhode) rhode<-rhode[1:334,] fit<-lm(Total~Literature,data=rhode) summary(fit) rhode\$Literature <- as.numeric(gsub("%","",rhode\$Literature)) for(i in 5:37){ rhode[,i] <- as.numeric(gsub("%","",rhode[,i])) } rhode<-na.omit(rhode) fit<-lm(Total~School+Overall+Literature+Writing+Speaking.and.Listening+ Literacy.in.Social.Studies+Literacy.in.Science.and.Techical.Subjects+ Number.and.Quantity+Algebra+Mathematical.Modeling+Geometry+ Statistics.and.Probability, data=rhode) summary(fit) par(mfrow=c(2,2)) plot(fit) par(mfrow=c(1,1)) plot(fit\$residuals~rhode[,1],xlab=colnames(rhode)[1]) plot(fit\$residuals~rhode[,5],xlab=colnames(rhode)[5]) i<-6;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) #maybe square literature fit<-lm(Total~School+Overall+Literature+I(Literature^2)+Writing+Speaking.and.Listening+ Literacy.in.Social.Studies+Literacy.in.Science.and.Techical.Subjects+ Number.and.Quantity+Algebra+Mathematical.Modeling+Geometry+ Statistics.and.Probability, data=rhode) summary(fit) par(mfrow=c(2,2)) plot(fit) par(mfrow=c(1,1)) plot(fit\$residuals~rhode[,1],xlab=colnames(rhode)[1]) plot(fit\$residuals~rhode[,5],xlab=colnames(rhode)[5]) i<-6;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) #square overall? fit<-lm(Total~School+Overall+I(Overall^2)+Literature+I(Literature^2)+Writing+Speaking.and.Listening+ Literacy.in.Social.Studies+Literacy.in.Science.and.Techical.Subjects+ Number.and.Quantity+Algebra+Mathematical.Modeling+Geometry+ Statistics.and.Probability, data=rhode) summary(fit) par(mfrow=c(2,2)) plot(fit) par(mfrow=c(1,1)) plot(fit\$residuals~rhode[,1],xlab=colnames(rhode)[1]) plot(fit\$residuals~rhode[,5],xlab=colnames(rhode)[5]) i<-6;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) #Let's fish for interactions fit<-lm(Total~School+Overall+I(Overall^2)+Literature+I(Literature^2)+Writing+Speaking.and.Listening+ Literacy.in.Social.Studies+Literacy.in.Science.and.Techical.Subjects+ Algebra+Geometry+ Mathematical.Modeling*Literature*I(Literature^2)*Number.and.Quantity+ Statistics.and.Probability, data=rhode) par(mfrow=c(2,2)) plot(fit) par(mfrow=c(1,1)) plot(fit\$residuals~rhode[,1],xlab=colnames(rhode)[1]) plot(fit\$residuals~rhode[,5],xlab=colnames(rhode)[5]) i<-6;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) summary(fit) #Meh - mediocre results. Let's try only the two-ways fit<-lm(Total~School+Overall+I(Overall^2)+Literature+I(Literature^2)+Writing+Speaking.and.Listening+ Literacy.in.Social.Studies+Literacy.in.Science.and.Techical.Subjects+ Algebra+Geometry+ Mathematical.Modeling*Literature+ Mathematical.Modeling*I(Literature^2)+ Mathematical.Modeling*Number.and.Quantity+ Literature*Number.and.Quantity+ I(Literature^2)*Number.and.Quantity+ Statistics.and.Probability, data=rhode) par(mfrow=c(2,2)) plot(fit) par(mfrow=c(1,1)) plot(fit\$residuals~rhode[,1],xlab=colnames(rhode)[1]) plot(fit\$residuals~rhode[,5],xlab=colnames(rhode)[5]) i<-6;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) summary(fit) #Let's kill math*number because it's obnoxiously long. fit<-lm(Total~School+Overall+I(Overall^2)+Literature+I(Literature^2)+Writing+Speaking.and.Listening+ Literacy.in.Social.Studies+Literacy.in.Science.and.Techical.Subjects+ Algebra+Geometry+ Mathematical.Modeling*Literature+ Mathematical.Modeling*I(Literature^2)+ Literature*Number.and.Quantity+ I(Literature^2)*Number.and.Quantity+ Statistics.and.Probability, data=rhode) par(mfrow=c(2,2)) plot(fit) par(mfrow=c(1,1)) plot(fit\$residuals~rhode[,1],xlab=colnames(rhode)[1]) plot(fit\$residuals~rhode[,5],xlab=colnames(rhode)[5]) i<-6;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) summary(fit) #Let's kill Literacy in Science because it's obnoxiously long. fit<-lm(Total~School+Overall+I(Overall^2)+Literature+I(Literature^2)+Writing+Speaking.and.Listening+ Literacy.in.Social.Studies+ Algebra+Geometry+ Mathematical.Modeling*Literature+ Mathematical.Modeling*I(Literature^2)+ Literature*Number.and.Quantity+ I(Literature^2)*Number.and.Quantity+ Statistics.and.Probability, data=rhode) par(mfrow=c(2,2)) plot(fit) par(mfrow=c(1,1)) plot(fit\$residuals~rhode[,1],xlab=colnames(rhode)[1]) plot(fit\$residuals~rhode[,5],xlab=colnames(rhode)[5]) i<-6;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) summary(fit) #Kill Lit^2*Math fit<-lm(Total~School+Overall+I(Overall^2)+Literature+I(Literature^2)+Writing+Speaking.and.Listening+ Literacy.in.Social.Studies+ Algebra+Geometry+ Mathematical.Modeling*Literature+ Literature*Number.and.Quantity+ I(Literature^2)*Number.and.Quantity+ Statistics.and.Probability, data=rhode) par(mfrow=c(2,2)) plot(fit) par(mfrow=c(1,1)) plot(fit\$residuals~rhode[,1],xlab=colnames(rhode)[1]) plot(fit\$residuals~rhode[,5],xlab=colnames(rhode)[5]) i<-6;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) summary(fit) #Overall has such a big p-value, but we would kill overall^2 first fit<-lm(Total~School+Overall+Literature+I(Literature^2)+Writing+Speaking.and.Listening+ Literacy.in.Social.Studies+ Algebra+Geometry+ Mathematical.Modeling*Literature+ Literature*Number.and.Quantity+ I(Literature^2)*Number.and.Quantity+ Statistics.and.Probability, data=rhode) par(mfrow=c(2,2)) plot(fit) par(mfrow=c(1,1)) plot(fit\$residuals~rhode[,1],xlab=colnames(rhode)[1]) plot(fit\$residuals~rhode[,5],xlab=colnames(rhode)[5]) i<-6;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) summary(fit) #Algebra and let's leave off residual plots until the end fit<-lm(Total~School+Overall+Literature+I(Literature^2)+Writing+Speaking.and.Listening+ Literacy.in.Social.Studies+Geometry+ Mathematical.Modeling*Literature+ Literature*Number.and.Quantity+ I(Literature^2)*Number.and.Quantity+ Statistics.and.Probability, data=rhode) summary(fit) #Writing fit<-lm(Total~School+Overall+Literature+I(Literature^2)+Speaking.and.Listening+ Literacy.in.Social.Studies+Geometry+ Mathematical.Modeling*Literature+ Literature*Number.and.Quantity+ I(Literature^2)*Number.and.Quantity+ Statistics.and.Probability, data=rhode) summary(fit) #Overall fit<-lm(Total~School+Literature+I(Literature^2)+Speaking.and.Listening+ Literacy.in.Social.Studies+Geometry+ Mathematical.Modeling*Literature+ Literature*Number.and.Quantity+ I(Literature^2)*Number.and.Quantity+ Statistics.and.Probability, data=rhode) summary(fit) #Geometry (because we love statistics) fit<-lm(Total~School+Literature+I(Literature^2)+Speaking.and.Listening+ Literacy.in.Social.Studies+ Mathematical.Modeling*Literature+ Literature*Number.and.Quantity+ I(Literature^2)*Number.and.Quantity+ Statistics.and.Probability, data=rhode) summary(fit) #let's just try lit^2*number real quick? fit<-lm(Total~School+Literature+I(Literature^2)+Speaking.and.Listening+ Literacy.in.Social.Studies+ Mathematical.Modeling*Literature+ Literature*Number.and.Quantity+ Statistics.and.Probability, data=rhode) summary(fit) #stat is a little better? lit*number? fit<-lm(Total~School+Literature+I(Literature^2)+Speaking.and.Listening+ Literacy.in.Social.Studies+ Mathematical.Modeling*Literature+ Statistics.and.Probability, data=rhode) summary(fit) #No, stats won't survive. We ought to have taken it out before #the last two. Let's go back and do that. fit<-lm(Total~School+Literature+I(Literature^2)+Speaking.and.Listening+ Literacy.in.Social.Studies+ Mathematical.Modeling*Literature+ Literature*Number.and.Quantity+ I(Literature^2)*Number.and.Quantity, data=rhode) summary(fit) #now I can properly justify killing lit^2*number #it's complicated and it's not significant fit<-lm(Total~School+Literature+I(Literature^2)+Speaking.and.Listening+ Literacy.in.Social.Studies+ Mathematical.Modeling*Literature+ Literature*Number.and.Quantity, data=rhode) summary(fit) #lit number as comlicated and (about) most significant fit<-lm(Total~School+Literature+I(Literature^2)+Speaking.and.Listening+ Literacy.in.Social.Studies+ Mathematical.Modeling*Literature, data=rhode) summary(fit) #lit^2 fit<-lm(Total~School+Literature+Speaking.and.Listening+ Literacy.in.Social.Studies+ Mathematical.Modeling*Literature, data=rhode) summary(fit) #literacy fit<-lm(Total~School+Literature+Speaking.and.Listening+ Mathematical.Modeling*Literature, data=rhode) summary(fit) #I'm going to leave literature in because it's interacting #with the math modeling fit<-lm(Total~School+Speaking.and.Listening+ Mathematical.Modeling*Literature, data=rhode) summary(fit) #SIDE JOURNEY #to explore the question of whether school interacts with #these variables (as suggested by the client) fit<-lm(Total~School*(Overall+Literature+Writing+Speaking.and.Listening+ Literacy.in.Social.Studies+Literacy.in.Science.and.Techical.Subjects+ Number.and.Quantity+Algebra+Mathematical.Modeling+Geometry+ Statistics.and.Probability), data=rhode) summary(fit) #Rocky is special, but only Rocky isrocky<-rep("Not",length=nrow(rhode)) isrocky[rhode\$School=="Rocky"]<-"Rocky" rhode<-cbind(rhode,isrocky) fit<-lm(Total~School+isrocky*(Overall+Literature+Writing+Speaking.and.Listening+ Literacy.in.Social.Studies+Literacy.in.Science.and.Techical.Subjects+ Number.and.Quantity+Algebra+Mathematical.Modeling+Geometry+ Statistics.and.Probability), data=rhode) summary(fit) #OK, so Rocky IS important. Let's look at creating two models, one for all the schools #and one for Rocky fit<-lm(Total~School+isrocky*(Speaking.and.Listening+ Mathematical.Modeling*Literature), data=rhode) summary(fit) #Get rid of annoying NA line fit<-lm(Total~School+isrocky*(Literature+Speaking.and.Listening+ Mathematical.Modeling*Literature)-isrocky, data=rhode) summary(fit) #literature*Math same for rocky fit<-lm(Total~School+isrocky*(Speaking.and.Listening+ Mathematical.Modeling+Literature)-isrocky+ Mathematical.Modeling*Literature, data=rhode) summary(fit) #Literature same for Rocky fit<-lm(Total~School+isrocky*(Speaking.and.Listening+ Mathematical.Modeling)-isrocky+ Mathematical.Modeling*Literature, data=rhode) summary(fit) #Math same for Rocky fit<-lm(Total~School+isrocky*(Speaking.and.Listening)-isrocky+ Mathematical.Modeling*Literature, data=rhode) summary(fit) #Separating Rocky doesn't really help. fit<-lm(Total~School+Speaking.and.Listening+ Mathematical.Modeling*Literature, data=rhode) summary(fit) par(mfrow=c(2,2)) plot(fit) par(mfrow=c(1,1)) plot(fit\$residuals~rhode[,1],xlab=colnames(rhode)[1]) plot(fit\$residuals~rhode[,5],xlab=colnames(rhode)[5]) i<-6;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) i<-i+1;plot(fit\$residuals~rhode[,i],xlab=colnames(rhode)[i]) cbind(fit\$names,fit\$coefficients) #Largeprediction equation #Totalhat=9.30733788+EastGreenwich*24.36519439+ #SchoolHendricken*14.28301452+ #SchoolPilgrim*12.49321934+ #SchoolRocky*11.11910811+ #SchoolTollGate*7.49398218+ #SchoolWarwickVeterans*8.24224745+ #SchoolWestWarwick*14.48851894+ #Speaking.and.Listening*0.19498674+ #Mathematical.Modeling*7.69206619+ #Literature*0.28013729- #Mathematical.Modeling:Literature*0.07529707 #Just look at Coventry with speaking at 0% #plot(-100,-100,xlim=c(0,100),ylim=c(0,100),xlab="Mathematical Modeling",ylab="Total") plot(Total~Mathematical.Modeling,data=rhode) Mathhat<-seq(0,100,length=1000) Lithat<-10 Totalhat=9.30733788+Mathhat*7.69206619+Lithat*0.28013729-Mathhat*Lithat*0.07529707 lines(Totalhat~Mathhat,col="red") Lithat<-50 Totalhat=9.30733788+Mathhat*7.69206619+Lithat*0.28013729-Mathhat*Lithat*0.07529707 lines(Totalhat~Mathhat,col="blue") Lithat<-80 Totalhat=9.30733788+Mathhat*7.69206619+Lithat*0.28013729-Mathhat*Lithat*0.07529707 lines(Totalhat~Mathhat,col="green") legend("bottomright",col=c("red","blue","green"), legend=paste("Literature=",c(10,50,90),sep=""),lty=1) plot(Total~Literature,data=rhode) Lithat<-seq(0,100,length=1000) Mathhat<-20 Totalhat=9.30733788+Mathhat*7.69206619+Lithat*0.28013729-Mathhat*Lithat*0.07529707 lines(Totalhat~Lithat,col="red") Mathhat<-50 Totalhat=9.30733788+Mathhat*7.69206619+Lithat*0.28013729-Mathhat*Lithat*0.07529707 lines(Totalhat~Lithat,col="blue") Mathhat<-90 Totalhat=9.30733788+Mathhat*7.69206619+Lithat*0.28013729-Mathhat*Lithat*0.07529707 lines(Totalhat~Lithat,col="green") legend("bottomleft",col=c("red","blue","green"), legend=paste("MathModeling=",c(20,50,90),sep=""),lty=1)