Page 4 of 4 FirstFirst ... 234
Results 31 to 39 of 39

Thread: Shell commands and R scripts for working with G25

  1. #31
    Registered Users
    Posts
    95
    Sex
    Ethnicity
    Finnish

    This makes a secondary PCA of the closest populations to the specified population. It sets the color of the points based on the distance to the specified population, and it draws a line between each point and its two closest neighbors.

    Code:
    library(tidyverse)
    
    nneigh=2
    xpc=1
    ypc=2
    
    t1=read.csv("https://drive.google.com/uc?export=download&id=1F2rKEVtu8nWSm7qFhxPU6UESQNsmA-sl",row.names=1)
    t2=read.csv("https://drive.google.com/uc?export=download&id=1wZr-UOve0KUKo_Qbgeo27m-CQncZWb8y",row.names=1)
    t=rbind(t1,t2)
    
    target="RUS_Bolshoy_Oleni_Ostrov"
    
    dist=as.matrix(dist(t))
    pick=head(sort(dist[target,]),80)
    t2=t[names(pick),]
    dist2=dist[target,names(pick)]
    
    max=max(dist2)
    # max=.25;dist2[dist2>max]=max # use a scale with a fixed maximum distance
    
    p=prcomp(t2)
    p2=as.data.frame(p$x)
    pct=paste0(colnames(p$x)," (",sprintf("%.1f",p$sdev/sum(p$sdev)*100),"%)")
    nk=16
    p2$k=cutree(hclust(dist(t2)),k=nk)
    
    dist3=as.data.frame(as.matrix(dist(t2)))
    seg0=lapply(1:nneigh+1,function(i)apply(dist3,1,function(x)unlist(p2[names(sort(x)[i]),c(xpc,ypc)],use.names=F))%>%t%>%cbind(p2[,c(xpc,ypc)]))
    seg=do.call(rbind,seg0)%>%setNames(paste0("V",1:4))
    
    ggplot(p2,aes(!!sym(paste0("PC",xpc)),!!sym(paste0("PC",ypc))))+
    geom_segment(data=seg,aes(x=V1,y=V2,xend=V3,yend=V4),color="gray80",size=.3)+
    geom_point(aes(color=dist2),size=.4)+
    geom_text(aes(label=rownames(t2),color=dist2),size=2,vjust=-.7)+
    labs(x=pct[xpc],y=pct[ypc])+
    scale_x_continuous(breaks=seq(-10,10,.05),expand=expansion(mult=.08))+
    scale_y_continuous(breaks=seq(-10,10,.05),expand=expansion(mult=.04))+
    scale_color_gradientn(colors=hcl(c(240,220,200,170,120,70,30,0,320,280)+15,100,70),name=paste("Distance to",target),breaks=seq(0,max,.05),limits=c(0,max))+
    guides(color=guide_colorbar(ticks=F))+
    theme(
      aspect.ratio=1/sqrt(2),
      axis.text=element_text(color="black",size=6),
      axis.ticks=element_line(size=.3,color="gray70"),
      axis.ticks.length=unit(-.12,"cm"),
      axis.text.x=element_text(margin=margin(.2,0,0,0,"cm")),
      axis.text.y=element_text(angle=90,vjust=1,hjust=.5,margin=margin(0,.2,0,0,"cm")),
      axis.title=element_text(color="black",size=8),
      legend.direction="horizontal",
      legend.justification="left",
      legend.key.height=unit(.5,"cm"),
      legend.key.width=unit(1,"cm"),
      legend.margin=margin(0,0,-7,0),
      legend.position="top",
      legend.spacing.y=unit(.05,"cm"),
      legend.text=element_text(size=6,vjust=.5),
      legend.title=element_text(size=8,vjust=.7),
      panel.background=element_rect(fill="white"),
      panel.border=element_rect(color="gray70",fill=NA,size=.5),
      panel.grid=element_blank()
    )
    
    ggsave("a.png",width=8,height=8)


    Quote Originally Posted by David Bush View Post
    > ggdraw(p)
    Error in ggdraw(p) : could not find function "ggdraw"
    > leg=get_legend(p)
    Error in get_legend(p) : could not find function "get_legend"
    > ggdraw(plot_grid(p+theme(legend.position="none"),l eg,ncol=1,rel_heights=c(1,.12)))
    Error in ggdraw(plot_grid(p + theme(legend.position = "none"), leg, ncol = 1, :
    could not find function "ggdraw"
    > ggsave("a.png",width=7,height=7)
    I forgot to add `library("cowplot")` to the script.
    Last edited by Nganasankhan; 05-02-2021 at 12:08 PM.

  2. The Following 2 Users Say Thank You to Nganasankhan For This Useful Post:

     David Bush (05-02-2021),  sheepslayer (05-04-2021)

  3. #32
    Registered Users
    Posts
    119
    Sex

    Quote Originally Posted by Nganasankhan View Post

    to add `library("cowplot")` to the script.
    Please provide a sample file for : "csv-table-copied-from-multi-tab"
    let the
    Because the script does not read my file
    Thank You

  4. #33
    Registered Users
    Posts
    95
    Sex
    Ethnicity
    Finnish

    Quote Originally Posted by David Bush View Post
    Please provide a sample file for : "csv-table-copied-from-multi-tab"
    let the
    Because the script does not read my file
    Thank You
    Here's an updated version of my script. It automatically sets the height of the plot. It reorders the rows based on the order of branches in a hierarchical clustering tree. It uses `vegan::reorder.hclust` to arrange branches with a higher proportion of the first components to the top and populations with a higher proportion of the last components to the bottom.

    Code:
    library(tidyverse)
    library(reshape2) # for melt
    library(colorspace) # for hex
    library(cowplot) # for plot_grid
    library(vegan) # for reorder.hclust
    
    t=read.csv("https://pastebin.com/raw/5TFeN1uV",header=T,check.names=F) # check.names=F disables replacing dashes in column names with periods
    
    avedist=t[nrow(t),2]
    t=t[-nrow(t),]
    
    name=paste0(t[,1]," (",sub("^0","",sprintf("%.3f",t[,2])),")")
    mat=t[,3:ncol(t)]/100
    
    hc=hclust(dist(mat)) # order based on hierarchical clustering
    hc=reorder(hc,as.matrix(mat)%*%seq(ncol(mat))^2)
    t=cbind(name,mat)[hc$order,]
    
    # t=t[order(t[,2]),] # order based on a single column
    
    t2=melt(t) # this uses melt because pivot_longer doesn't preserve the order of the columns
    lab=round(100*t2$value)
    lab[lab==0]=""
    t2$lab=lab
    
    p=ggplot(t2,aes(x=fct_rev(factor(name,level=t$name)),y=value,fill=variable))+
    geom_bar(stat="identity",width=1,position=position_fill(reverse=T))+
    geom_text(aes(label=lab),position=position_stack(vjust=.5,reverse=T),size=3.5)+
    ggtitle(paste("Average distance:",sub("^0","",sprintf("%.3f",avedist))))+
    coord_flip()+
    scale_x_discrete(expand=c(0,0))+ # remove gray area above and below the bars
    scale_y_discrete(expand=c(0,0))+ # remove gray area on the left and right side of the bars
    scale_fill_manual("legend",values=hex(HSV(c(0,20,40,90,180,210,240,270,330),.5,1)))+ # manual colors
    # scale_fill_manual(values=hex(HSV(head(seq(0,360,length.out=length(unique(t2$variable))+1),-1),.5,1)))+ # automatic colors
    guides(fill=guide_legend(ncol=3,byrow=F))+
    theme(
      axis.text=element_text(color="black",size=11),
      axis.text.x=element_blank(),
      axis.ticks=element_blank(),
      axis.title.x=element_blank(),
      axis.title.y=element_blank(),
      legend.direction="horizontal",
      legend.margin=margin(-6,0,0,0),
      legend.text=element_text(size=11),
      legend.title=element_blank(),
      legend.key=element_rect(fill=NA), # remove gray border around color squares
      legend.key.size=unit(.65,"cm"),
      plot.title=element_text(size=11),
      text=element_text(size=11)
    )
    
    hei=c(.3+.28*nrow(t),.1+.25*ceiling(length(unique(t2[!is.na(t2$value),2]))/3))
    plot_grid(p+theme(legend.position="none"),get_legend(p),ncol=1,rel_heights=hei)
    
    ggsave("a.png",width=6,height=sum(hei))

  5. The Following 4 Users Say Thank You to Nganasankhan For This Useful Post:

     David Bush (05-03-2021),  Kapisa (05-02-2021),  perttimp (05-04-2021),  sheepslayer (05-04-2021)

  6. #34
    Registered Users
    Posts
    119
    Sex

    Quote Originally Posted by Nganasankhan View Post
    Here's an updated version of my script. It automatically sets the height of the plot. It reorders the rows based on the order of branches in a hierarchical clustering tree. It uses `vegan::reorder.hclust` to arrange branches with a higher proportion of the first components to the top and populations with a higher proportion of the last components to the bottom.
    Thank you very much Dear Nganasankhan
    I notice that when the data is copied with option (copy table as csv), points are added that we have to delete so that the script can read them.


    Another issue:
    If the target populations are more than 9, they are not Plotted
    Last edited by David Bush; 05-03-2021 at 06:33 AM.

  7. The Following User Says Thank You to David Bush For This Useful Post:

     Nganasankhan (05-03-2021)

  8. #35
    Registered Users
    Posts
    95
    Sex
    Ethnicity
    Finnish

    Quote Originally Posted by David Bush View Post
    Another issue:
    If the target populations are more than 9, they are not Plotted
    You need to remove the `scale_fill_manual` line, or add more colors to it.

    I have no idea what those dots are. But you can try pressing COPY AS TSV instead, and change `read.csv` to `read.table`.

  9. The Following User Says Thank You to Nganasankhan For This Useful Post:

     David Bush (05-03-2021)

  10. #36
    Registered Users
    Posts
    119
    Sex

    Quote Originally Posted by Nganasankhan View Post
    The scripts in this post generate a heatmap based on pairwise distances between populations.
    excuse me
    w=d["Baltic_EST_Narva",]-d["RUS_Krasnoyarsk_BA",]
    > sort=reorder(hclust(dist(t)),wts=w)

    Error in unique.default(x, nmax = nmax) :
    unique() applies only to vectors

  11. The Following User Says Thank You to David Bush For This Useful Post:

     Nganasankhan (05-04-2021)

  12. #37
    Registered Users
    Posts
    95
    Sex
    Ethnicity
    Finnish

    Quote Originally Posted by David Bush View Post
    excuse me
    w=d["Baltic_EST_Narva",]-d["RUS_Krasnoyarsk_BA",]
    > sort=reorder(hclust(dist(t)),wts=w)

    Error in unique.default(x, nmax = nmax) :
    unique() applies only to vectors
    Thanks for pointing these out. I made an error, and the function `reorder.hclust` was from the package `vegan` and not from `ape`. You need to add this: `library(vegan)`.

  13. The Following User Says Thank You to Nganasankhan For This Useful Post:

     David Bush (05-04-2021)

  14. #38
    Registered Users
    Posts
    95
    Sex
    Ethnicity
    Finnish

    The script in this post uses `vegan::spantree` to draw a minimal spanning tree. The color of the points indicates the value of PC3.

    Code:
    library(tidyverse)
    library(vegan)
    
    xpc=1
    ypc=2
    zpc=3
    
    csv1=read.csv("https://drive.google.com/uc?export=download&id=1F2rKEVtu8nWSm7qFhxPU6UESQNsmA-sl",row.names=1)
    csv2=read.csv("https://drive.google.com/uc?export=download&id=1wZr-UOve0KUKo_Qbgeo27m-CQncZWb8y",row.names=1)
    t=rbind(csv1,csv2)
    
    dist=as.matrix(dist(t))
    pick=head(sort(dist["RUS_Kostenki14",]),80)
    t2=t[names(pick),]
    
    p=prcomp(t2)
    p2=as.data.frame(p$x)
    span=vegan::spantree(dist(t2))
    links=cbind(2:nrow(t2),span$kid)
    seg=cbind(p2[links[,1],c(xpc,ypc)],p2[links[,2],c(xpc,ypc)])%>%setNames(paste0("V",1:4))
    
    ggplot(p2,aes(!!sym(paste0("PC",xpc)),!!sym(paste0("PC",ypc))))+
    geom_segment(data=seg,aes(x=V1,y=V2,xend=V3,yend=V4),color="gray10",size=.3)+
    geom_point(aes(color=!!sym(paste0("PC",zpc))),size=.4)+
    geom_text(aes(label=rownames(t2),color=!!sym(paste0("PC",zpc))),size=2,vjust=-.7)+
    scale_x_continuous(breaks=seq(-10,10,.05),expand=expansion(mult=.08))+
    scale_y_continuous(breaks=seq(-10,10,.05),expand=expansion(mult=.04))+
    scale_color_gradientn(colors=hcl(seq(0,240)+15,80,80))+
    theme(
      aspect.ratio=1,
      axis.text=element_text(color="gray10",size=6),
      axis.ticks=element_line(size=.3,color="gray10"),
      axis.ticks.length=unit(-.12,"cm"),
      axis.text.x=element_text(margin=margin(.2,0,0,0,"cm")),
      axis.text.y=element_text(angle=90,vjust=1,hjust=.5,margin=margin(0,.2,0,0,"cm")),
      axis.title=element_blank(),
      legend.position="none",
      panel.background=element_rect(fill="gray40"),
      plot.background=element_rect(fill="gray40",color=NA),
      panel.border=element_rect(color="gray10",fill=NA,size=.5),
      panel.grid=element_blank()
    )
    
    ggsave("a.png",width=6,height=6)
    Here's plots of the closest populations to Kostenki14 (left side) and Tianyuan (right side).

    On the right side Tharu is close to Tianyuan on the first two PCs, but you can see that they are not close on PC3, because they have a fairly large difference in hue. Tharu's hue is about 28 and Tianyuan's hue is about 113, so their distance on PC3 is about 35% of the total range of PC3 (from (113-28)/240).

    Last edited by Nganasankhan; 05-04-2021 at 10:08 AM.

  15. The Following User Says Thank You to Nganasankhan For This Useful Post:

     David Bush (05-04-2021)

  16. #39
    Registered Users
    Posts
    119
    Sex

    Dear ganasankhan
    Beautiful art paintings
    Your work is very interesting and new


  17. The Following User Says Thank You to David Bush For This Useful Post:

     Nganasankhan (05-04-2021)

Page 4 of 4 FirstFirst ... 234

Similar Threads

  1. What Surnames Are You Working On?
    By msmarjoribanks in forum Genealogy
    Replies: 71
    Last Post: 03-09-2021, 08:30 AM
  2. Replies: 6
    Last Post: 09-27-2019, 10:31 PM
  3. Ancestry's DNA link not working.
    By rdegnen in forum AncestryDNA
    Replies: 4
    Last Post: 01-29-2019, 02:04 AM
  4. Replies: 3
    Last Post: 02-06-2016, 04:44 PM

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •