R语言农艺性状表型数据与环境关联互作代码分析
发表于:2025-11-07 作者:千家信息网编辑
千家信息网最后更新 2025年11月07日,这篇文章主要介绍了R语言农艺性状表型数据与环境关联互作代码分析的相关知识,内容详细易懂,操作简单快捷,具有一定借鉴价值,相信大家阅读完这篇R语言农艺性状表型数据与环境关联互作代码分析文章都会有所收获,
千家信息网最后更新 2025年11月07日R语言农艺性状表型数据与环境关联互作代码分析
这篇文章主要介绍了R语言农艺性状表型数据与环境关联互作代码分析的相关知识,内容详细易懂,操作简单快捷,具有一定借鉴价值,相信大家阅读完这篇R语言农艺性状表型数据与环境关联互作代码分析文章都会有所收获,下面我们一起来看看吧。
农艺性状表型数据与环境关联互作分析
local({r <- getOption("repos") r["CRAN"] <- "http://mirrors.tuna.tsinghua.edu.cn/CRAN/" options(repos=r)}) options(BioC_mirror="https://mirrors.tuna.tsinghua.edu.cn/bioconductor")#install.packages("GGEBiplots")#install.packages("GGEBiplotGUI")library(GGEBiplotGUI)library("GGEBiplots")library("reshape2")library(vegan)library(corrplot)library("psych")library(Cairo)setwd("D:/potato/")data1<-read.table("张北data.txt",header = F,row.names = 1,comment.char = "",sep = "\t",encoding = "UTF-8")data.t<-as.data.frame(t(data1))data.l<-melt(data.t,id.vars = c('年度', '性状'),variable.name = "基因型")mydata1<-dcast(data.l, `年度` + `基因型` ~ `性状`)#`单株产量` + `淀粉含量(%)` + `干物质含量(%)` + `平均出苗率(%)`+ `生育日数(天)`+ `株高`data2<-read.table("康保data.txt",header = F,row.names = 1,comment.char = "",sep = "\t",encoding = "UTF-8")data.t<-as.data.frame(t(data2))data.l<-melt(data.t,id.vars = c('年度', '性状'),variable.name = "基因型")mydata2<-dcast(data.l, `年度` + `基因型` ~ `性状`)#`单株产量` + `淀粉含量(%)` + `干物质含量(%)` + `平均出苗率(%)`+ `生育日数(天)` + `株高`#inter<-intersect(colnames(mydata1),colnames(mydata2))#去掉一些数据inter<-c("年度" , "基因型" , "单株产量" , "淀粉含量(%)" , "干物质含量(%)" , "平均出苗率(%)" , "生育日数(天)" , "株高" )mydata1.c<-mydata1[,inter]mydata2.c<-mydata2[,inter]mydata1.c$`环境`="张北"mydata2.c$`环境`="康保"mydata=rbind(mydata1.c,mydata2.c)########################计算平均值#traits<-c("单株产量" , "淀粉含量(%)" , "干物质含量(%)" , "平均出苗率(%)", "生育日数(天)" , "小区产量" ,"折合单位产量kg/hm", "株高")traits=c( "单株产量" , "淀粉含量(%)" , "干物质含量(%)" , "平均出苗率(%)" , "生育日数(天)" , "株高" )for (i in traits){ mydata[,i]=as.double(mydata[,i])}SD=round(apply(mydata[,traits],2,sd),2)MEAN=round(apply(mydata[,traits],2,mean),2)RANGE=round(apply(mydata[,traits],2,range),2)CV=round(SD/MEAN*100,2)H=round(diversity(mydata[,traits], index = "shannon", MARGIN = 2),2)trait.mean=cbind(`平均值`=MEAN,`标准差`=SD,`变异系数(%)`=CV,`变幅`=paste(range.min=RANGE[1,],range.max=RANGE[2,],sep="-"),`多样性指数 H'`=H)write.csv(trait.mean,file="01.马铃薯表型性状统计.csv")#################################相关性分析occor = corr.test(mydata[,traits],use="pairwise",method="spearman",adjust="BH",alpha=.05)occor.r = occor$r # 取相关性矩阵R值occor.p = occor$p # 取相关性矩阵p值#设置色板palette_2 <- colorRampPalette(c("yellow","red"))(100)#绘图#png(file="traits_cor.png",w=10*300,h=10*300,res = 300)pdf(file="traits_cor.pdf",w=10,h=10,family="GB1")corrplot(occor.r, method = "square", type = "lower", order="AOE", col = palette_2, tl.pos = "tp", tl.col = "blue", cl.pos = "r", title = "Traits corr", mar = c(2,4,4,1))corrplot(occor.r, method = "number", type = "upper",order="AOE", col = palette_2,add = TRUE, diag = FALSE, tl.pos = "n", cl.pos = "n", number.cex=1,mar = c(2,4,4,1))dev.off()write.csv(occor.p,file="02.相关性P值.csv")write.csv(occor.r,file="03.相关性R值.csv")########################分布范围柱状图#########################求平均genotype.mean=aggregate(mydata[,traits],by=list(`基因型`=mydata$`基因型`),mean)for(t in traits){ #t="平均出苗率(%)" d<- genotype.mean[,t] ff=gsub("\\(\\S+\\)", "", t) ff=gsub("kg/hm", "", ff) pdf(paste0(ff,"-表型性状值分布.pdf"),w=5,h=5,family="GB1") par(mar=c(6,4.1,2,4)) xhist <- hist(d,breaks=seq(from=floor(min(d)),to=ceiling(max(d)),length.out=10), plot=F,xlab =t ,ylim=c(0,),col="white",xaxt="n", yaxs="i",fill=NULL,ylab = "样本数",freq=T,main="") xhist <- hist(d,breaks=seq(from=floor(min(d)),to=ceiling(max(d)),length.out=10), plot=T,xlab ="" ,ylim=c(0,max(xhist$counts)*1.1),col="white",xaxt="n", yaxs="i",fill=NULL,ylab = "样本数",freq=T,main="") mtext(t,side=1,line=4.5) axis(side=1,xhist$mids,labels=F) b=(xhist$mids[2]-xhist$mids[1])/2 s=round(xhist$mids-b,2) s[1]=0 e=round(xhist$mids+b,2) lab=paste(s,e,sep="-") text(x=xhist$mids, y=-3, labels=lab,cex=1, xpd=TRUE, srt=45,adj=1) dd=density(d) par(new=T) plot(dd, ylim=c(0,max(dd$y)*1.1), col ='red',lwd=2,yaxs="i",xlab = "",ylab="",xaxt="n",yaxt="n",main = "") axis(4, las=1,at=seq(from = 0, to = max(xhist$density), length.out = 6), labels=round(seq(0, max(xhist$density),length.out =6)/sum(xhist$density),2), col = 'red', col.axis = 'red') mtext("频率",side=4,line=3,col="red") box(bty="l") dev.off()}##############GGEbioplotsggdata=mydata[,c("环境","基因型","单株产量")]ggdata1=aggregate(`单株产量` ~ 基因型 + 环境, data = mydata, mean)gg.f=dcast(ggdata1,基因型 ~环境)row.names(gg.f)=gg.f[,1]gg.f=gg.f[,-1]*10#GGEBiplot(Data = gg.f)library(ggplot2)GGE1<-GGEModel(gg.f,centering = "tester", SVP = "symmetrical", scaling = "none")g1=GGEPlot(GGE1,type=6,sizeGen=4,sizeEnv = 2,largeSize=3)g1=g1+ labs(title = "")pdf(paste0("适应性分析bioplot.pdf"),w=5,h=5,family="GB1")print(g1)dev.off()g2=GGEPlot(GGE1,type=9,sizeGen=2,sizeEnv = 3,largeSize=3)g2=g2+ labs(title = "")pdf(paste0("丰产性-稳定性分析bioplot.pdf"),w=5,h=5,family="GB1")print(g2)dev.off()#################################表型主成分分析p=genotype.meanrow.names(p)=p[,1]p=p[,-1]pca = prcomp(t(p), scale=TRUE)#pca = prcomp(t(decostand(p, "hellinger")), scale=TRUE)ss=summary(pca)pc=pca$xxx=ss$importancerow.names(xx)<-c("特征值E","贡献率CR","累计贡献率CCR")pca.res=rbind(pc,xx)write.csv(pca.res,file="04.PCA分析结果.csv")#表型数据聚类分析dist_mat <- dist(genotype.mean[,2:5], method = "euclidean")clustering <- hclust(dist_mat, method = "complete")plot(clustering,labels=genotype.mean$基因型,xlab="sample",ylab="Height" ,main="")#根据进化树分组group.data=genotype.meangroup.data$group=as.factor(cutree(clustering,h=15))names=c("基因型" , "单株产量" , "淀粉含量" , "干物质含量", "平均出苗率" ,"生育日数" ,"株高" , "group" )colnames(group.data)=nameslibrary("ggpubr")phenotype=names[2:(length(names)-1)]test.res=data.frame()for(i in phenotype){ f=as.formula(paste0("`",i,"`~group")) print(f) res=compare_means(f, data = group.data,method="t.test") print(res) test.res=rbind(test.res,res)}write.csv(test.res,file="聚类树分组差异统计检验.csv")write.csv(group.data,file="聚类树分组信息表.csv")g1.res=aggregate(group.data[,2:7],by=list(group.data$group),FUN=mean)g2.res=aggregate(group.data[,2:7],by=list(group.data$group),FUN=sd)write.csv(g1.res,file="聚类树分组统计平均值.csv")write.csv(g2.res,file="聚类树分组统计标准差.csv")#表型数据在不同环境下变异for (i in traits){ mydata1[,i]=as.double(mydata1[,i])}SD=round(apply(mydata1[,traits],2,sd),2)MEAN=round(apply(mydata1[,traits],2,mean),2)RANGE=round(apply(mydata1[,traits],2,range),2)CV=round(SD/MEAN*100,2)H=round(diversity(mydata1[,traits], index = "shannon", MARGIN = 2),2)trait.mean=cbind(`平均值`=MEAN,`标准差`=SD,`变异系数(%)`=CV,`变幅`=paste(range.min=RANGE[1,],range.max=RANGE[2,],sep="-"),`多样性指数 H'`=H)write.csv(trait.mean,file="01.马铃薯表型性状统计(张北).csv")#表型数据在不同环境下变异for (i in traits){ mydata2[,i]=as.double(mydata2[,i])}SD=round(apply(mydata2[,traits],2,sd),2)MEAN=round(apply(mydata2[,traits],2,mean),2)RANGE=round(apply(mydata2[,traits],2,range),2)CV=round(SD/MEAN*100,2)H=round(diversity(mydata2[,traits], index = "shannon", MARGIN = 2),2)trait.mean=cbind(`平均值`=MEAN,`标准差`=SD,`变异系数(%)`=CV,`变幅`=paste(range.min=RANGE[1,],range.max=RANGE[2,],sep="-"),`多样性指数 H'`=H)write.csv(trait.mean,file="01.马铃薯表型性状统计(康保).csv")######关联分析mydata$年度=as.factor(mydata$年度)mydata$环境=as.factor(mydata$环境)F.res=data.frame()for (i in traits){ print(i) fl=c(as.formula(paste0("`",i,"`~基因型")), as.formula(paste0("`",i,"`~环境")), as.formula(paste0("`",i,"`~年度")), as.formula(paste0("`",i,"`~基因型*环境")), as.formula(paste0("`",i,"`~基因型*年度")) #as.formula(paste0("`",i,"`~基因型*年度*环境")) ) for(f in fl){ blp=lm(f,data=mydata) aa=summary(blp) # #print(aa) print(aa$df) print(f) fstatistic = aa$fstatistic p_value = pf(as.numeric(fstatistic[1]), as.numeric(fstatistic[2]), as.numeric(fstatistic[3]), lower.tail = FALSE) print(p_value) aa=data.frame(`公式`=as.character(f)[3],df=aa$df[1],F=fstatistic[1],P=p_value,`性状`=i) F.res=rbind(F.res,aa) # question1 <- readline("Would you like to proceed untill the loop ends? (Y/N)") # if(regexpr(question1, 'y', ignore.case = TRUE) == 1){ # continue = TRUE # next # } else{ # break # } } }write.csv(F.res,file="05.表型与基因型环境年度互作效应.csv")第二版版本,注意PCA分析升级,输入数据行为样本,列为表型;
local({r <- getOption("repos") r["CRAN"] <- "http://mirrors.tuna.tsinghua.edu.cn/CRAN/" options(repos=r)}) options(BioC_mirror="https://mirrors.tuna.tsinghua.edu.cn/bioconductor")#install.packages("GGEBiplots")#install.packages("GGEBiplotGUI")library(GGEBiplotGUI)library("GGEBiplots")library("reshape2")library(vegan)library(corrplot)library("psych")library(Cairo)setwd("D:/potato/第二次分析")mydata<-read.table("data.txt",header = T,row.names = 1,comment.char = "",sep = "\t",encoding = "UTF-8")########################计算平均值traits=colnames(mydata)for (i in traits){ mydata[,i]=as.double(mydata[,i])}SD=round(apply(mydata[,traits],2,sd),2)MEAN=round(apply(mydata[,traits],2,mean),2)RANGE=round(apply(mydata[,traits],2,range),2)CV=round(SD/MEAN*100,2)H=round(diversity(mydata[,traits], index = "shannon", MARGIN = 2),2)J=round(H/log(specnumber(t(mydata[,traits]))),2)trait.mean=cbind(`平均值`=MEAN,`标准差`=SD,`变异系数(%)`=CV,`变幅`=paste(range.min=RANGE[1,],range.max=RANGE[2,],sep="~"),`H'`=H,J=J)write.csv(trait.mean,file="01.马铃薯表型性状统计.csv")########################分布范围柱状图#########################求平均for(t in traits){ ##t="顶小叶宽" #t="出苗率" d<- mydata[,t] pdf(paste0(t,"-表型性状值分布.pdf"),w=5,h=5,family="GB1") par(mar=c(6,4.1,2,4)) MIN=min(d) MAX=max(d) step=(MAX-MIN)/(length(d)^(1/2)+1) xhist <- hist(d,breaks=seq(from=floor(min(d)),to=ceiling(max(d))+step,by=step), plot=F,xlab =t ,ylim=c(0,),col="white",xaxt="n", yaxs="i",fill=NULL,ylab = "样本数",freq=T,main="") xhist <- hist(d,breaks=seq(from=floor(min(d)),to=ceiling(max(d))+step,by=step), plot=T,xlab ="" ,ylim=c(0,max(xhist$counts)*1.1),col="green",xaxt="n", yaxs="i",fill=NULL,ylab = "样本数",freq=T,main="") mtext(t,side=1,line=4.5) axis(side=1,xhist$mids,labels=F) s=round(xhist$mids-step,2) if (s[1]<0) {s[1]=0} e=round(xhist$mids+step,2) lab=paste(s,e,sep="-") text(x=xhist$mids, y=-3, labels=lab,cex=1, xpd=TRUE, srt=45,adj=1) axis(4, las=1,at=seq(from = 0, to = max(xhist$counts), length.out = 4), labels=paste0(round(seq(0, max(xhist$counts),length.out =4)/sum(xhist$counts),4)*100,"%"), col = 'red', col.axis = 'red') dd=density(d) par(new=T) plot(dd, ylim=c(0,max(dd$y)*1.1), col ='red',lwd=2,yaxs="i",xlab = "",ylab="",xaxt="n",yaxt="n",main = "") #mtext("频率",side=4,line=3,col="red") box(bty="l") l.at="topright" if(t=="出苗率" || t=="叶缘形状"){ l.at="topleft" } legend(l.at,legend = c("直方图"), fill = c('green'), bty='n') legend(l.at,legend = c("正态图"), col = c("red"), lty=1, bty='n',inset = c(0,0.08), lwd=3) dev.off()}################相关性分析####################################################相关性分析occor = corr.test(mydata,use="pairwise",method="spearman",adjust="BH",alpha=.05)occor.r = occor$r # 取相关性矩阵R值occor.p = occor$p # 取相关性矩阵p值#设置色板palette_2 <- colorRampPalette(c("yellow","red"))(100)#绘图#png(file="traits_cor.png",w=10*300,h=10*300,res = 300)pdf(file="traits_cor.pdf",w=10,h=10,family="GB1")corrplot(occor.r, method = "square", type = "lower", order="AOE", col = palette_2, tl.pos = "tp", tl.col = "blue", cl.pos = "r", title = "Traits corr", mar = c(2,8,4,1))corrplot(occor.r, method = "number", type = "upper",order="AOE", col = palette_2,add = TRUE, diag = FALSE, tl.pos = "n", cl.pos = "n", number.cex=1,mar = c(2,4,4,1))dev.off()write.csv(occor.p,file="02.相关性P值.csv")write.csv(occor.r,file="03.相关性R值.csv")###############因子分析###############mydata.l=melt(mydata)bt=bartlett.test(value~variable,data=mydata.l)kmo=KMO(mydata)sink("04.因子分析.txt")btkmosink()#################################表型主成分分析#pca = prcomp(mydata, scale=TRUE)pca = princomp(mydata, scores=TRUE, cor=TRUE)#pca = prcomp(t(mydata), scale=TRUE)fa1 = factanal(mydata, factor=2, rotation="varimax", scores="regression")fa1#write.csv(abs(fa1$loadings), "loadings.csv")pdf(file="04.PCA分析特征根碎石图.pdf",w=8,h=4,family="GB1")screeplot(pca, type="line", main="Scree Plot") dev.off()sink(file="04.PCA分析summary.txt")summary(pca)sink()pc=loadings(pca)[1:21,]pc=as.data.frame(pc)write.csv(pc,file="04.PCA分析结果.csv")library(RColorBrewer)mycolor=c(brewer.pal(9, "Set1"),brewer.pal(8, "Set2"),brewer.pal(9, "Paired"))pdf(file="04.PCA分析表型分布.pdf",w=7,h=6,family="GB1")par(mar=c(5,4,4,10))plot(x=pc[,1],y=pc[,2],cex=1.5,col=mycolor,pch=16,xlab="factor 1",ylab = "factor 2")abline(h=0,lty=1)abline(v=0,lty=1)legend("topright",legend =rownames(pc) ,pch=16,col=mycolor,xpd =T,inset = c(-0.3,0),ncol=1,bty='n',cex=1)dev.off()#表型数据聚类分析dist_mat <- dist(mydata, method = "euclidean")clustering <- hclust(dist_mat, method = "complete")pdf(file="05.聚类图.pdf",w=15,h=4,family="GB1")plot(clustering,labels=rownames(mydata),cex=0.55,xlab="sample",ylab="Height" ,main="")abline(h=40,col="red")dev.off()#根据聚类树分组group.data=mydatagroup.data$group=as.factor(cutree(clustering,h=40))###################分组PCA图#################install.packages("ggfortify")library(ggfortify)pdf(file="04.PCA分析表型样本双序图.pdf",w=5,h=4,family="GB1")#par(mar=c(5,4,4,10)) #pcs=pca$scores #plot(x=pcs[,1],y=pcs[,2],cex=1.5,col=mycolor[group.data$group],pch=1,xlab="factor 1",ylab = "factor 2") #par(new=T) #plot(x=pc[,1],y=pc[,2],cex=1.5,col="blue",pch=16,xlab="factor 1",ylab = "factor 2")#legend("topright",legend =rownames(pc) ,pch=16,col=mycolor,xpd =T,inset = c(-0.3,0),ncol=1,bty='n',cex=1)#legend("topright",legend =1:6 ,pch=16,col=mycolor[1:6],xpd =T,inset = c(-0.3,0),ncol=1,bty='n',cex=1)#biplot(pca)g=autoplot(pca, data = group.data, colour = 'group', loadings = TRUE, loadings.colour = 'blue', loadings.label = TRUE, loadings.label.size = 3,size=2)#,frame = TRUE, frame.type = 'norm')g=g+scale_color_manual(name="group",values =mycolor)+ scale_fill_manual(name="group",values =mycolor)+ theme_bw()+ theme( panel.grid=element_blank(), axis.text.x=element_text(colour="black"), axis.text.y=element_text(colour="black"), panel.border=element_rect(colour = "black"), legend.key = element_blank(), plot.title = element_text(hjust = 0.5))gdev.off()library("ggpubr")phenotype=traitstest.res=data.frame()for(i in phenotype){ f=as.formula(paste0("`",i,"`~group")) print(f) fit=aov(f,data=group.data) ss=summary(fit) res=as.data.frame(ss[[1]]) res$`表型`=i rownames(res)=c("组间","组内") print(res) test.res=rbind(test.res,res)}colnames(test.res)=c("Df","平方和SS","均方MS","F","显著性","性状trait")write.csv(test.res,file="05.聚类树分组差异统计检验.csv")write.csv(group.data,file="05.聚类树分组信息表.csv")g1.res=aggregate(group.data[,traits],by=list(group.data$group),FUN=mean)Average=apply(group.data[,traits],2,mean)Average=c("Group.1"="总体平均值 Average",Average)g.mean=rbind(g1.res,t(as.data.frame(Average)))write.csv(t(g.mean),file="06.聚类树分组统计平均值.csv")关于"R语言农艺性状表型数据与环境关联互作代码分析"这篇文章的内容就介绍到这里,感谢各位的阅读!相信大家对"R语言农艺性状表型数据与环境关联互作代码分析"知识都有一定的了解,大家如果还想学习更多知识,欢迎关注行业资讯频道。
分析
表型
环境
性状
基因
含量
数据
年度
相关性
产量
分组
出苗率
平均值
统计
单株
关联
变异
日数
样本
淀粉
数据库的安全要保护哪些东西
数据库安全各自的含义是什么
生产安全数据库录入
数据库的安全性及管理
数据库安全策略包含哪些
海淀数据库安全审计系统
建立农村房屋安全信息数据库
易用的数据库客户端支持安全管理
连接数据库失败ssl安全错误
数据库的锁怎样保障安全
静安区品质软件开发技术指导
服务器开机启动缓慢
数据库技术与应用体会
公司邮箱服务器怎么填写
关于2g通信网络技术论文
温州软件开发得多少钱
青岛营销服务管理软件开发
tx网络安全创始人大神
互联网教育行业科技发展状况
写入数据库出错
软件开发优秀员工自我评估
网络安全指的是什么h
戴尔服务器如何查看主板型号
什么是网络安全中的鱼叉
软件开发的 相关书籍
云服务器的管理
信息与网络安全管理考试答案
投标单位数据库建立
无线网络技术连接项目描述
常用的数据库语言
ctf网络安全大赛啥意思
存薪水 类型 数据库
北京管理软件开发一般要多少钱
网络安全信息教育进校园
微服务 无服务器
网络安全童心向党四年级手抄报
深圳上位机软件开发公司
菏泽软件开发80亿投资
dns服务器地址湖南岳阳
软件开发设计创业