- #dane z http://neuroph.sourceforge.net/tutorials/ForestFires/PredictingTheBurnedAreaOfForestFires.html
- #skróty z nazw tabel http://cwfis.cfs.nrcan.gc.ca/background/summary/fwi
- #czyścimy
- gc(reset=T)
- rm(list = ls())
- #ładujemy potrzebne biblioteki
- library ('dplyr')
- #pobieranie pliku
- path <-"D:/forestfire.csv"
- dane<-read.csv(path,header=TRUE,sep=";",dec=',')
- dane%>%subset(area<=40)%>%subset(area>0)->dane2
- FindOutliers <- function(data) {
- lowerq = quantile(data)[2]
- upperq = quantile(data)[4]
- iqr = upperq - lowerq #Or use IQR(data)
- # we identify extreme outliers
- extreme.threshold.upper = (iqr * 3) + upperq
- extreme.threshold.lower = lowerq - (iqr * 3)
- result <- which(data > extreme.threshold.upper | data < extreme.threshold.lower)
- }
- temp <- FindOutliers(dane2$FFMC)
- dane2$FFMC[temp]<-NA
- #histogram
- hist(dane2$area
- ,col= 'green'#kolor histogramu
- ,main='histogram obszaru spalonego'#nazwa histogramu
- ,xlab='area'#nazwa x-owej
- ,ylab='liczba wystapien')#nazwa y-kowej
- #korelacja
- dane2%>%select_("area", "FFMC","DMC","DC","ISI" ,
- "temp","RH","wind","rain")->dane_cov
- CORELATIONS <- cor(dane_cov, method="pearson", use="complete.obs")
- round(CORELATIONS,3)->CORELATIONS
- library(corrplot)
- corrplot(CORELATIONS, type="lower", tl.col="black", tl.srt=45)
- #wkresy zależności
- plot(dane2$area, dane2$temp,
- col="black",pch=21,bg="red",cex=2,ylab="",xlab="",
- axes=TRUE,main="Zaleznosc obszaru spalonego od temperatury")
- mtext('area', side=1,line=2.5,cex=1.5)
- mtext('temp', side=2,line=2.5,cex=1.5)
- abline(linear_model, lty=5,lwd=2, col="blue")
- plot(dane2$area, dane2$FFMC,
- col="black",pch=21,bg="red",cex=2,ylab="",xlab="",
- axes=TRUE,main="Zaleznosc obszaru spalonego od indexu ffmc")
- mtext('area', side=1,line=2.5,cex=1.5)
- mtext('FFMC', side=2,line=2.5,cex=1.5)
- plot(dane2$area, dane2$DMC,
- col="black",pch=21,bg="red",cex=2,ylab="",xlab="",
- axes=TRUE,main="Zaleznosc area od DMC")
- mtext('area', side=1,line=2.5,cex=1.5)
- mtext('RH', side=2,line=2.5,cex=1.5)
- plot(dane2$ISI, dane2$FFMC,
- col="black",pch=21,bg="red",cex=2,ylab="",xlab="",
- axes=TRUE,main="Zaleznosc isi od indexu ffmc")
- mtext('ISI', side=1,line=2.5,cex=1.5)
- mtext('FFMC', side=2,line=2.5,cex=1.5)
- plot(dane2$temp, dane2$RH,
- col="black",pch=21,bg="red",cex=2,ylab="",xlab="",
- axes=TRUE,main="Zaleznosc temp od rh")
- mtext('temp', side=1,line=2.5,cex=1.5)
- mtext('RH', side=2,line=2.5,cex=1.5)
- #regresja liniowa
- fit1 = lm(formula = dane2$area ~ dane2$temp)
- summary(fit1)
- plot(dane2$area,predict(fit1))
- abline(fit1)
- #regresja wielowymiarowa