Illustrating Data Trends with a Line Graph in R Studio

Illustrating Data Trends with a Line Graph in R Studio


I’ll Introduce the method of creating a line graph using R. I will utilize the geom_line() within ggplot().

First, let’s load the file.

library(readr)
github= "https://raw.githubusercontent.com/agronomy4future/raw_data_practice/main/chlorophyll_contents_on_leaves.csv"
dataA= data.frame(read_csv(url(github),show_col_types= FALSE))
dataA

This data pertains to the changes in chlorophyll content and leaf greenness over time (days after planting). This dataset contains information from two distinct locations (Northern, Southern) and genotypes (CV1, CV2), each with three stress treatments (Control, Stress_1, Stress_2).

table(dataA$Genotype, dataA$Location, dataA$Treatment)

, ,  = Control
              
                CV1 CV2
  Northern area  20  20
  Southern area  20  20

, ,  = Stress_1
               
                CV1 CV2
  Northern area  20  20
  Southern area  20  20

, ,  = Stress_2
               
                CV1 CV2
  Northern area  20  20
  Southern area  20  20

Now, I’d like to know how chlorophyll content is changed by stress treatments per genotype at each location.



First, I will divide the data into four subsets using the subset().

North_CV1_control= subset(dataA, Location=="Northern area" & Genotype=="CV1" & Treatment=="Control")
North_CV1_stress1= subset(dataA, Location=="Northern area" & Genotype=="CV1" & Treatment=="Stress_1")
North_CV1_stress2= subset(dataA, Location=="Northern area" & Genotype=="CV1" & Treatment=="Stress_2")

North_CV2_control= subset(dataA, Location=="Northern area" & Genotype=="CV2" & Treatment=="Control")
North_CV2_stress1= subset(dataA, Location=="Northern area" & Genotype=="CV2" & Treatment=="Stress_1")
North_CV2_stress2= subset(dataA, Location=="Northern area" & Genotype=="CV2" & Treatment=="Stress_2")

South_CV1_control= subset(dataA, Location=="Southern area" & Genotype=="CV1" & Treatment=="Control")
South_CV1_stress1= subset(dataA, Location=="Southern area" & Genotype=="CV1" & Treatment=="Stress_1")
South_CV1_stress2= subset(dataA, Location=="Southern area" & Genotype=="CV1" & Treatment=="Stress_2")

South_CV2_control= subset(dataA, Location=="Southern area" & Genotype=="CV2" & Treatment=="Control")
South_CV2_stress1= subset(dataA, Location=="Southern area" & Genotype=="CV2" & Treatment=="Stress_1")
South_CV2_stress2= subset(dataA, Location=="Southern area" & Genotype=="CV2" & Treatment=="Stress_2")

I have divided the data into 12 parts like above. Now, I will use ggplot() to draw line graphs.

library(ggplot2)
ggplot() +
  geom_line(data=North_CV1_control, aes(x=Days_after_planting,y=Chlorophyll_contents)) +
  geom_line(data=North_CV1_stress1, aes(x=Days_after_planting,y=Chlorophyll_contents)) +
  geom_line(data=North_CV1_stress2, aes(x=Days_after_planting,y=Chlorophyll_contents)) +
  geom_line(data=North_CV2_control, aes(x=Days_after_planting,y=Chlorophyll_contents)) +
  geom_line(data=North_CV2_stress1, aes(x=Days_after_planting,y=Chlorophyll_contents)) +
  geom_line(data=North_CV2_stress2, aes(x=Days_after_planting,y=Chlorophyll_contents)) +
  geom_line(data=South_CV1_control, aes(x=Days_after_planting,y=Chlorophyll_contents)) +
  geom_line(data=South_CV1_stress1, aes(x=Days_after_planting,y=Chlorophyll_contents)) +
  geom_line(data=South_CV1_stress2, aes(x=Days_after_planting,y=Chlorophyll_contents)) +
  geom_line(data=South_CV2_control, aes(x=Days_after_planting,y=Chlorophyll_contents)) +
  geom_line(data=South_CV2_stress1, aes(x=Days_after_planting,y=Chlorophyll_contents)) +
  geom_line(data=South_CV2_stress2, aes(x=Days_after_planting,y=Chlorophyll_contents)) +
  windows(width=8, height=8)

What are your thoughts on this graph? It appears overly complex and doesn’t effectively display clear patterns. Furthermore, utilizing subset() for dividing the data into 12 datasets is impractical and time-consuming. Given this situation, what alternatives do we have? I recommend implementing facet_grid().


Graph Partitioning Using facet_grid() in R Studio


I recommend using the following code.

library(ggplot2)
ggplot() +
  geom_line(data=dataA, aes(x=Days_after_planting, y=Chlorophyll_contents, color=Treatment)) +
  scale_color_manual(values= c("blue","red","black"))+
  facet_grid (~Genotype ~ Location) +
  scale_x_continuous(breaks=seq(0,50,10), limits = c(0,50)) +
  scale_y_continuous(breaks=seq(0,50,10), limits = c(0,50)) +
  labs(x="Days_after_planting", y="Chlorophyll_content") +
  theme_grey(base_size=18, base_family="serif")+
  theme(legend.position=c(0.09,0.08),
        legend.title=element_blank(),
        legend.key.size=unit(0.5,'cm'),
        legend.key=element_rect(color=alpha("white",.05), fill=alpha("white",.05)),
        legend.text=element_text(size=15),
        legend.background= element_rect(fill=alpha("white",.05)),
        strip.background=element_rect(color="white", size=0.5,linetype="solid"),
        axis.line=element_line(linewidth=0.5, colour="black")) +
  windows(width=8, height=8)
code summary: https://github.com/agronomy4future/r_code/blob/main/Illustrating_Data_Trends_with_a_Line_Graph_in_R_Studio.ipynb


Let’s more decorate the graph. I’ll include points and standard error on each point.

library(ggplot2)
ggplot(data=dataA, aes(x=Days_after_planting, y=Chlorophyll_contents)) +
  geom_line(data=dataA, aes(x=Days_after_planting, y=Chlorophyll_contents, color=Treatment)) +
  geom_point(aes(fill=Treatment, shape=Treatment), color="black", size=3) +
  geom_errorbar(aes(ymin=Chlorophyll_contents-Chlorophyll_contents_Std_error, 
                    ymax=Chlorophyll_contents+Chlorophyll_contents_Std_error), 
                    position=position_dodge(0.7), width=0.5, color='Black') +
  scale_color_manual(values= c("blue","red","black"))+
  scale_fill_manual(values= c("blue","red","black"))+
  scale_shape_manual(values=c(21,22,24)) +
  facet_grid (~Genotype ~ Location) +
  scale_x_continuous(breaks=seq(0,50,10), limits = c(0,50)) +
  scale_y_continuous(breaks=c(0, 10, 20, 30, 40, 50))+ 
  labs(x="Days_after_planting", y="Chlorophyll_content") +
  theme_grey(base_size=18, base_family="serif")+
  theme(legend.position=c(0.09,0.08),
        legend.title=element_blank(),
        legend.key.size=unit(0.5,'cm'),
        legend.key=element_rect(color=alpha("white",.05), fill=alpha("white",.05)),
        legend.text=element_text(size=15),
        legend.background= element_rect(fill=alpha("white",.05)),
        strip.background=element_rect(color="white", size=0.5,linetype="solid"),
        axis.line=element_line(linewidth=0.5, colour="black")) +
  windows(width=11, height=9)


Leave a Reply

If you include a website address in the comment section, I cannot see your comment as it will be automatically deleted and will not be posted. Please refrain from including website addresses.