Practices in Data Normalization using normtools() in R

Practices in Data Normalization using normtools() in R





[R package] Normalization Methods for Data Scaling (Feat. normtools)


In my previous post, I introduced the R package normtools(), which I developed to normalize data using various methods. This time, I’ll demonstrate how to use the R package normtools() for data normalization.

1. Data upload

if(!require(readr)) install.packages("readr")
library(readr)
github="https://raw.githubusercontent.com/agronomy4future/raw_data_practice/refs/heads/main/corn_yield_components.csv"
df= data.frame(read_csv(url(github), show_col_types=FALSE))

head(df,5)
year season	  variety  population location  ear  AGW    KN	  GY
2022 Long Season  CV1	   High D     Site 1    1    297.5  3550  10439
2022 Long Season  CV1	   High D     Site 1    2    283.9  3408  9562
2022 Long Season  CV1	   High D     Site 1    3    270.8  3834  10264
2022 Long Season  CV1	   High D     Site 1    4    328.1  3408  11053
2022 Long Season  CV1	   High D     Site 1    5    298.1  3692  10878
.
.
.

This data includes kernel number (KN), average kernel weight (AGW), and grain yield (GY) for different corn varieties across various years, populations, and locations.






2. Data normalization

if(!require(remotes)) install.packages("remotes")
if (!requireNamespace("normtools", quietly = TRUE)) {
  remotes::install_github("agronomy4future/normtools")
}
library(remotes)
library(normtools)

z_test= normtools(df, c("year","season","location"), c("AGW","KN","GY"), method= 1) # 1 or "z_test"

This is the normtools() package. First, I’ll normalize the data using the z-test. Next, I’ll summarize the normalized data.

if(!require(dplyr)) install.packages("dplyr")
library(dplyr)

df1 = data.frame(z_test %>%
                 group_by(year, season, population, variety) %>%
                 dplyr::summarize(across(c(Normalized_AGW, Normalized_KN, Normalized_GY), 
                                           .fns= list(Mean=~mean(., na.rm= TRUE), 
                                             SD= ~sd(., na.rm= TRUE), 
                                              n=~length(.),
                                             se=~sd(.,na.rm= TRUE) / sqrt(length(.))))))





3. to create graph

Then, let’s create a graph.

if(!require(readr)) install.packages("ggplot2")
library(ggplot2)

KN_GY=ggplot(data=df1, aes(x=Normalized_KN_Mean, y=Normalized_GY_Mean)) +
  geom_errorbar(aes(xmin=Normalized_KN_Mean-Normalized_KN_se, 
                    xmax=Normalized_KN_Mean+Normalized_KN_se), 
                    position=position_dodge(0.9), width=0.5) +
  geom_errorbar(aes(ymin=Normalized_GY_Mean-Normalized_GY_se, 
                    ymax=Normalized_GY_Mean+Normalized_GY_se), 
                    position=position_dodge(0.9), width=0.5) +
  geom_point(aes(fill=season, shape=season), color="black", size=4.5) +
  scale_fill_manual(values= c("darkred","orange")) +
  scale_shape_manual(values= c(21,21)) +
  scale_x_continuous(breaks=seq(-5,5,2.5),limits=c(-5,5)) +
  scale_y_continuous(breaks=seq(-5,5,2.5),limits=c(-5,5)) +
  geom_vline(xintercept=0, linetype="dashed", color="black") +
  geom_hline(yintercept=0, linetype="dashed", color="black") +
  #geom_abline(slope=1, linetype="dashed", color="grey55", linewidth=0.5) +
  facet_wrap (~ season) +
  annotate("segment", x=-2.5, xend=2.5, y=Inf, yend=Inf, color="black", lwd=1)+
  labs(x="Normalized kernel number", y="Normalized grain yield") +
  theme_classic(base_size=18, base_family="serif") +
  theme(legend.position="none",
        legend.title=element_blank(),
        legend.key=element_rect(color="white", fill="white"),
        legend.text=element_text(family="serif", face="plain", size=13, color="black"),
        legend.background= element_rect(fill="white"),
        strip.background=element_rect(color="white", linewidth=0.5, linetype="solid"),
        axis.line = element_line(linewidth = 0.5, colour="black"))

KN_GY + windows(width=5.5, height=5)
ggsave("C:/Users/kimjk/Desktop/Coding_Output/KN_GY.jpg", 
       KN_GY, width=10*2.54, height=7*2.54, units="cm", dpi=1000)

This data indicates kernel number is correlated with grain yield.

Next, I’ll analyze the relationship between kernel weight and grain yield.

if(!require(readr)) install.packages("ggplot2")
library(ggplot2)

AGW_GY=ggplot(data=df1, aes(x=Normalized_AGW_Mean, y=Normalized_GY_Mean)) +
  geom_errorbar(aes(xmin=Normalized_AGW_Mean-Normalized_AGW_se, 
  xmax=Normalized_AGW_Mean+Normalized_AGW_se), position=position_dodge(0.9), width=0.5) +
  geom_errorbar(aes(ymin=Normalized_GY_Mean-Normalized_GY_se, 
  ymax=Normalized_GY_Mean+Normalized_GY_se), position=position_dodge(0.9), width=0.5) +
  geom_point(aes(fill=season, shape=season), color="black", size=4.5) +
  scale_fill_manual(values= c("darkred","orange")) +
  scale_shape_manual(values= c(21,21)) +
  scale_x_continuous(breaks=seq(-5,5,2.5),limits=c(-5,5)) +
  scale_y_continuous(breaks=seq(-5,5,2.5),limits=c(-5,5)) +
  geom_vline(xintercept=0, linetype="dashed", color="black") +
  geom_hline(yintercept=0, linetype="dashed", color= "black") +
  #geom_abline(slope=1, linetype= "dashed", color="grey55", linewidth=0.5) +
  facet_wrap (~ season) +
  annotate("segment", x=-2.5, xend=2.5, y=Inf, yend=Inf, color="black", lwd=1)+
  labs(x="Normalized kernel weight", y="Normalized grain yield") +
  theme_classic(base_size=18, base_family="serif") +
  theme(legend.position="none",
        legend.title=element_blank(),
        legend.key=element_rect(color="white", fill="white"),
        legend.text=element_text(family="serif", face="plain", size=13, color="black"),
        legend.background= element_rect(fill="white"),
        strip.background=element_rect(color="white", linewidth=0.5, linetype="solid"),
        axis.line = element_line(linewidth = 0.5, colour="black"))

AGW_GY + windows(width=6, height=5)
ggsave("C:/Users/kimjk/Desktop/Coding_Output/AGW_GY.jpg", 
       AGW_GY, width=10*2.54, height=7*2.54, units="cm", dpi=1000)

Finally, I’ll analyze the relationship between kernel number and kernel weight.

if(!require(readr)) install.packages("ggplot2")
library(ggplot2)
KN_AGW=ggplot(data=df1, aes(x=Normalized_KN_Mean, y=Normalized_AGW_Mean)) +
  geom_errorbar(aes(xmin=Normalized_KN_Mean-Normalized_KN_se, 
  xmax=Normalized_KN_Mean+Normalized_KN_se), position=position_dodge(0.9), width=0.5) +
  geom_errorbar(aes(ymin=Normalized_AGW_Mean-Normalized_AGW_se, 
  ymax=Normalized_AGW_Mean+Normalized_AGW_se), position=position_dodge(0.9), width=0.5) +
  geom_point(aes(fill=season, shape=season), color="black", size=4.5) +
  scale_fill_manual(values= c("darkred","orange")) +
  scale_shape_manual(values= c(21,21)) +
  scale_x_continuous(breaks=seq(-5,5,2.5),limits=c(-5,5)) +
  scale_y_continuous(breaks=seq(-5,5,2.5),limits=c(-5,5)) +
  geom_vline(xintercept=0, linetype="dashed", color="black") +
  geom_hline(yintercept=0, linetype="dashed", color= "black") +
  #geom_abline(slope=1, linetype= "dashed", color="grey55", linewidth=0.5) +
  facet_wrap (~ season) +
  annotate("segment", x=-2.5, xend=2.5, y=Inf, yend=Inf, color="black", lwd=1)+
  labs(x="Normalized kernel number", y="Normalized kernel weight") +
  theme_classic(base_size=18, base_family="serif") +
  theme(legend.position="none",
        legend.title=element_blank(),
        legend.key=element_rect(color="white", fill="white"),
        legend.text=element_text(family="serif", face="plain", size=13, color="black"),
        legend.background= element_rect(fill="white"),
        strip.background=element_rect(color="white", linewidth=0.5, linetype="solid"),
        axis.line = element_line(linewidth = 0.5, colour="black"))

KN_AGW + windows(width=6, height=5)
ggsave("C:/Users/kimjk/Desktop/Coding_Output/KN_AGW.jpg", 
       KN_AGW, width=10*2.54, height=7*2.54, units="cm", dpi=1000)
<Full code>
https://github.com/agronomy4future/r_code/blob/main/Practices_in_Data_Normalization_using_normtools_in_R.ipynb





Comments are closed.