Linear Regression

Simple linear Regression
Statframe = data.frame(Station=1, Konstante=1, Trendwert=1, Pvalue=1)[-1,] for(i in 2:ncol(data)) { x <- data[,1] y <- data[,i] mod <- lm(y ~ x)   summary(mod) Statframe[i,] <- c(i,                      summary(mod) 'coefficients' ['(Intercept)','Estimate'],                       summary(mod) 'coefficients' ['x','Estimate'],                       summary(mod) 'coefficients' ['x','Pr(>|t|)']) }  Statframe <- Statframe[-1,] rownames(Statframe) <- NULL Overview$Start = data[1,1] Overview$Ende= data[nrow(data),1] Overview$NStart = as.numeric(Overview$Start) Overview$NEnde = as.numeric(Overview$Ende) Overview$Konstante = Statframe$Konstante Overview$Trendwert = Statframe$Trendwert Overview$Pvalue = Statframe$Pvalue Overview$Tline_Start = Overview$Trendwert * Overview$NStart + Overview$Konstante Overview$Tline_Ende = Overview$Trendwert * Overview$NEnde + Overview$Konstante Overview$Veränderung = Overview$Tline_Ende / (Overview$Tline_Start / 100) - 100
 * 1) Create empty Dataframe
 * 1) Create Loop for each Station
 * 1) Remove first row (if empty)
 * 1) Add Start- and End-Date and transform it to numerics
 * 1) Export Results of Regression
 * 1) Calculate Trend Values for beginning and end
 * 1) Difference in Percent

Linear Regression within a List of Dataframes
Temp_subs <- Map(function(nm) Temp[c("Year", "Day", nm)], names(Temp)[-(1:2)]) library(reshape) Temp_subs = lapply(Temp_subs, function(x) {  cast(x, Year ~ Day)}) Statframe = data.frame(Day=1, Konstante=1, Trendwert=1, Pvalue=1)[-1,] for (a in names(Temp_subs)) { for(i in 2:ncol(Temp_subs1)) { #Regression-Model x <- Temp_subs1$Year y <- Temp_subsa[,i] mod <- lm(y ~ x)    summary(mod) #Add Statistics to empty DF    #Overview str(summary(mod)) Statframe[i,] <- c(i,                       summary(mod)'coefficients'['(Intercept)','Estimate'],                        summary(mod)'coefficients'['x','Estimate'],                        summary(mod)'coefficients'['x','Pr(>|t|)']) filename <- paste(a, "_Statistics.txt", col="", sep="") write.table(Statframe, filename, row.names=TRUE, col.names=TRUE) } }
 * 1) Create Subsets
 * 1) Transform Df-list
 * 1) Empty DF for Regression
 * 1) Loop for output

Stepwise cross validation
Cal_Data1 <- rbind(Data_Periode1, Data_Periode2) Cal_Data <- rbind(Cal_Data1, Data_Periode3) rownames(Cal_Data) <- NULL Cal_Temp1 <- rbind(AUG_Temp_Periode1, AUG_Temp_Periode2) Cal_Temp <- rbind(Cal_Temp1, AUG_Temp_Periode3) rownames(Cal_Temp) <- NULL gridnames <- paste0("Cal_Data$", colnames(Cal_Data[,2:ncol(Cal_Data)])) fmla <- as.formula(paste("Cal_Temp$Tmax ~ ", paste(gridnames, collapse= "+"))) Regression <- lm(fmla) summary(Regression) StepRegression = step(Regression,direction="both") finalmod = as.character(StepRegression$call)[2] OptiRegression = lm(finalmod) Regression_Summary= as.data.frame (summary(OptiRegression)$coefficient) Regression_Summary$RSquared = summary(OptiRegression)$r.squared Regression_Summary$AdjRSquared = summary(OptiRegression)$adj.r.squared
 * 1) Calibration Periode 1+2+3
 * 1) Extract selected stations and pass it into formula
 * 1) Perform Linear Regression
 * 1) Optimize Modell
 * 1) Extract Stations for optimized Modell
 * 1) Perform Linear Regression for optimized Modell
 * 1) Output

Modelling after cross validation
gridnames = rownames(Regression_Summary)[-1] gridnames = str_sub (gridnames, 10) Modell <- ((as.matrix(Referenz[, gridnames]) %*% Regression_Summary[-1, 1]) + Regression_Summary[1, 1])[,1] Modell = as.data.frame (Modell) rownames(Modell) <- NULL