#dane z http://neuroph.sourceforge.net/tutorials/ForestFires/PredictingTheBurnedAreaOfForestFires.html #skróty z nazw tabel http://cwfis.cfs.nrcan.gc.ca/background/summary/fwi #czyścimy gc(reset=T) rm(list = ls()) #ładujemy potrzebne biblioteki library ('dplyr') #pobieranie pliku path <-"D:/forestfire.csv" dane<-read.csv(path,header=TRUE,sep=";",dec=',') dane%>%subset(area<=40)%>%subset(area>0)->dane2 FindOutliers <- function(data) { lowerq = quantile(data)[2] upperq = quantile(data)[4] iqr = upperq - lowerq #Or use IQR(data) # we identify extreme outliers extreme.threshold.upper = (iqr * 3) + upperq extreme.threshold.lower = lowerq - (iqr * 3) result <- which(data > extreme.threshold.upper | data < extreme.threshold.lower) } temp <- FindOutliers(dane2$FFMC) dane2$FFMC[temp]<-NA #histogram hist(dane2$area ,col= 'green'#kolor histogramu ,main='histogram obszaru spalonego'#nazwa histogramu ,xlab='area'#nazwa x-owej ,ylab='liczba wystapien')#nazwa y-kowej #korelacja dane2%>%select_("area", "FFMC","DMC","DC","ISI" , "temp","RH","wind","rain")->dane_cov CORELATIONS <- cor(dane_cov, method="pearson", use="complete.obs") round(CORELATIONS,3)->CORELATIONS library(corrplot) corrplot(CORELATIONS, type="lower", tl.col="black", tl.srt=45) #wkresy zależności plot(dane2$area, dane2$temp, col="black",pch=21,bg="red",cex=2,ylab="",xlab="", axes=TRUE,main="Zaleznosc obszaru spalonego od temperatury") mtext('area', side=1,line=2.5,cex=1.5) mtext('temp', side=2,line=2.5,cex=1.5) abline(linear_model, lty=5,lwd=2, col="blue") plot(dane2$area, dane2$FFMC, col="black",pch=21,bg="red",cex=2,ylab="",xlab="", axes=TRUE,main="Zaleznosc obszaru spalonego od indexu ffmc") mtext('area', side=1,line=2.5,cex=1.5) mtext('FFMC', side=2,line=2.5,cex=1.5) plot(dane2$area, dane2$DMC, col="black",pch=21,bg="red",cex=2,ylab="",xlab="", axes=TRUE,main="Zaleznosc area od DMC") mtext('area', side=1,line=2.5,cex=1.5) mtext('RH', side=2,line=2.5,cex=1.5) plot(dane2$ISI, dane2$FFMC, col="black",pch=21,bg="red",cex=2,ylab="",xlab="", axes=TRUE,main="Zaleznosc isi od indexu ffmc") mtext('ISI', side=1,line=2.5,cex=1.5) mtext('FFMC', side=2,line=2.5,cex=1.5) plot(dane2$temp, dane2$RH, col="black",pch=21,bg="red",cex=2,ylab="",xlab="", axes=TRUE,main="Zaleznosc temp od rh") mtext('temp', side=1,line=2.5,cex=1.5) mtext('RH', side=2,line=2.5,cex=1.5) #regresja liniowa fit1 = lm(formula = dane2$area ~ dane2$temp) summary(fit1) plot(dane2$area,predict(fit1)) abline(fit1) #regresja wielowymiarowa