Cluster Analysis

Calculate Correlations
corloads = cor(ozon[,2:(length(ozon))], use = "pairwise.complete.obs")

Calculating Distance
dissimilarity = 1 - corloads distance = as.dist(dissimilarity)

Find best number of Clusters and get penalty value
clus = hclust(distance) op_k = kgs(clus, distance, maxclus = 20) min(op_k) op_k[which(op_k == min(op_k))] AnzahlCluster = data.frame(op_k[which(op_k == min(op_k))]) AnzahlCluster = rownames (AnzahlCluster) AnzahlCluster = as.numeric (AnzahlCluster)

Plotting number of clusters
plot(names (op_k), op_k, xlab="Number of Clusters", ylab="KGS Penalty Function",      col=ifelse(names(op_k)==AnzahlCluster, "red", "black"),       pch=ifelse(names(op_k)==AnzahlCluster, 19, 1),      cex=ifelse(names(op_k)==AnzahlCluster, 1.3, 1)) grid(NULL,NULL)

Cluster Analysis
source("CLUSTER.R") cluster_result = cbcla_func(x = corloads, cl1 = AnzahlCluster, cl2 = AnzahlCluster) Zuordnung = data.frame(cluster_result$kcmem[,1]) Verteilung = cluster_result$kcfreq

Create Dataframe for Graphic Visualisation
ClusterKarte = cbind(ValidStations, Zuordnung) colnames(ClusterKarte)[1] <- "Code" colnames(ClusterKarte)[2] <- "Cluster-Nummer" ClusterKarte = inner_join(ClusterKarte, StationInfo, by = "Code") ClusterKarte$Startdatum <- NULL ClusterKarte = ClusterKarte[,c(1, 3, 2, 4, 5, 6, 7)]

Create Dataframe with Centroids for Reference Stations
StationNames = as.list(ClusterKarte$Code) Centroiden = data.frame(cluster_result$kccentroids) Centroiden = Centroiden[1:AnzahlCluster,] colnames(Centroiden) = StationNames

Adding Centroid Distances in assigned Cluster
ClusterKarte$Distance <- apply(ClusterKarte, 1, function(x) Centroiden[x[3], x[1]])