Page 5 of 5 FirstFirst ... 345
Results 41 to 43 of 43

Thread: NW European samples on Voronoi tesselation

  1. #41
    Registered Users
    Posts
    88
    Sex
    Ethnicity
    Finnish

    Here's another version that uses a color palette with variation in chroma and luminance and not just hue.



    Code:
    library(tidyverse)
    library(ggforce)
    
    t=read.csv("individuals-to-plot-without-header-row",header=F,row.names=1)
    names(t)=paste0("PC",1:ncol(t))
    
    t=as.data.frame(prcomp(t)$x) # comment out in order to not do a secondary PCA
    
    pop=sub(":.*","",rownames(t))
    centers=aggregate(t,by=list(pop),mean)
    t$pop=pop
    
    set.seed(1488)
    color=as.factor(sample(seq(1,length(unique(t$pop)))))
    col=rbind(c(60,80),c(25,95),c(30,70),c(70,50),c(60,100),c(20,50))
    # col=rbind(c(60,80),c(25,95),c(25,60)) # simpler palette
    hues=max(ceiling(length(color)/nrow(col)),12)
    pal1=as.vector(apply(col,1,function(x)hcl(seq(15,375,length=hues+1)[1:hues],x[1],x[2])))
    pal2=as.vector(apply(col,1,function(x)hcl(seq(15,375,length=hues+1)[1:hues],.8*x[1],.3*x[2])))
    
    ggplot(t,aes(PC1,PC2,group=-1L))+
    geom_voronoi_tile(aes(fill=color[as.factor(pop)],color=color[as.factor(pop)]),size=.07,max.radius=.01)+
    geom_label(data=centers,aes(x=PC1,y=PC2,label=Group.1,color=color,fill=color),alpha=.7,size=2,label.r=unit(.08,"lines"),label.padding=unit(.08,"lines"),label.size=0)+
    coord_fixed()+
    scale_x_continuous(breaks=seq(-1,1,.1),expand=expansion(mult=.02))+
    scale_y_continuous(breaks=seq(-1,1,.1),expand=expansion(mult=.02))+
    scale_fill_manual(values=pal1)+
    scale_color_manual(values=pal2)+
    theme(
      axis.text=element_text(color="black",size=6),
      axis.ticks=element_blank(),
      axis.ticks.length=unit(0,"pt"),
      axis.title=element_text(color="black",size=8),
      legend.position="none",
      panel.background=element_rect(fill="white"),
      panel.border=element_rect(color="gray85",fill=NA,size=.4),
      panel.grid.major=element_line(color="gray85",size=.2)
    )
    
    ggsave("a.png",width=9,height=9)
    system("mogrify -trim -bordercolor white -border 16x16 a.png")
    Last edited by Nganasankhan; 04-18-2021 at 05:59 PM.

  2. The Following 2 Users Say Thank You to Nganasankhan For This Useful Post:

     Fitis (04-18-2021),  xerxez (04-18-2021)

  3. #42
    Registered Users
    Posts
    88
    Sex
    Ethnicity
    Finnish

    Here's a new version of my script that simultaneously generates images for multiple PCs. It also uses light labels for dark colors, and it automatically adjusts the radius of the circles based on the width and height of the plot area.

    Code:
    library(tidyverse)
    library(ggforce)
    
    f="latlong11"
    t=read.csv(paste0(f,".csv"),header=F,row.names=1)
    names(t)=paste0("PC",1:ncol(t))
    eig=head(as.double(readLines(paste0(f,".eval"))),ncol(t))
    pct=paste0("PC",seq(length(eig))," (",sprintf("%.1f",100*eig/sum(eig)),"%)")
    
    pop=sub(":.*","",rownames(t))
    pop=sub("\\.(DG|SDG|SG|WGA)","",pop)
    centers=aggregate(t,by=list(pop),mean)
    t$pop=pop
    
    set.seed(1488)
    color=as.factor(sample(seq(1,length(unique(t$pop)))))
    col=rbind(c(60,80),c(25,95),c(30,70),c(70,50),c(60,100),c(20,50))
    # col=rbind(c(60,80),c(25,95),c(25,60)) # simpler palette
    hues=max(ceiling(length(color)/nrow(col)),8)
    pal1=as.vector(apply(col,1,function(x)hcl(seq(15,375,length=hues+1)[1:hues],x[1],x[2])))
    # pal2=as.vector(apply(col,1,function(x)hcl(seq(15,375,length=hues+1)[1:hues],.8*x[1],.2*x[2]))) # dark text for every color
    pal2=as.vector(apply(col,1,function(x)hcl(seq(15,375,length=hues+1)[1:hues],.8*x[1],ifelse(x[2]>50,.2*x[2],100)))) # light text for dark colors
    # pal2=as.vector(apply(col,1,function(x)hcl(seq(15,375,length=hues+1)[1:hues],0,ifelse(x[2]>50,0,100)))) # only black or white text
    
    ranges=apply(t[,-ncol(t)],2,function(x)abs(max(x)-min(x)))
    
    for(i in seq(1,7,2)){
      p1=sym(paste0("PC",i))
      p2=sym(paste0("PC",i+1))
    
      maxrange=max(ranges[c(i,i+1)])
      lims=sapply(c(i,i+1),function(x)mean(range(t[,x]))+c(-.54*maxrange,.54*maxrange))
    
      ggplot(t,aes(!!p1,!!p2,group=-1L))+
      geom_voronoi_tile(aes(fill=color[as.factor(pop)],color=color[as.factor(pop)]),size=.07,max.radius=maxrange/40)+
      geom_label(data=centers,aes(x=!!p1,y=!!p2,label=Group.1,color=color,fill=color),alpha=.7,size=2,label.r=unit(.08,"lines"),label.padding=unit(.08,"lines"),label.size=0)+
      coord_fixed(xlim=lims[,1],ylim=lims[,2],expand=F)+
      # coord_fixed()+ # don't force plot area to be square
      labs(x=pct[i],y=pct[i+1])+
      scale_x_continuous(breaks=seq(-1,1,.05))+
      scale_y_continuous(breaks=seq(-1,1,.05))+
      scale_fill_manual(values=pal1)+
      scale_color_manual(values=pal2)+
      theme(
        axis.text=element_blank(),
        axis.ticks=element_blank(),
        axis.title=element_text(color="black",size=8),
        legend.position="none",
        panel.background=element_rect(fill="white"),
        panel.border=element_rect(color="gray85",fill=NA,size=.4),
        panel.grid.major=element_line(color="gray85",size=.2)
      )
    
      f=paste0("pc",i,".png")
      ggsave(f,width=7,height=7)
    }
    
    system("montage pc[1357].png -geometry +0+0 -tile 2x comb.png")
    The data is based on a SmartPCA run for modern samples with longitude between -20 and 65 and latitude over 36:

    Code:
    curl -LsO reichdata.hms.harvard.edu/pub/datasets/amh_repo/curated_releases/V44/V44.3/SHARE/public.dir/v44.3_HO_public.tar;tar -xf v44.3_HO_public.tar
    f=v44.3_HO_public;convertf -p <(printf %s\\n genotypename:\ $f.geno snpname:\ $f.snp indivname:\ $f.ind outputformat:\ PACKEDPED genotypeoutname:\ $f.bed snpoutname:\ $f.bim indivoutname:\ $f.fam)
    x=latlong13
    igno()(grep -Ev '\.REF|Ignore_|_dup|_contam|_father|_mother|_son|_daughter|_brother|_sister|_sibling|_twin|\.rel\.|\.fail\.|Neanderthal|Denisova|Vindija|Orangutang|Macaque|Marmoset|Primate_Chimp|hg19ref')
    sed 1d v44.3_HO_public.anno|sort -t$'\t' -rnk15|awk -F\\t '!a[$3]++&&$11>=36&&$12>=-20&&$12<=65&&$6==0{print$2,$8}'|igno|grep -Ev '_o|_lc'>$x.pick
    plink --allow-no-sex --bfile v44.3_HO_public --keep <(awk 'NR==FNR{a[$1];next}$2 in a' $x.pick v44.3_HO_public.fam) --make-bed --out $x
    smartpca -p <(printf %s\\n genotypename:\ $x.bed snpname:\ $x.bim indivname:\ $x.fam evecoutname:\ $x.evec evaloutname:\ $x.eval autoshrink:\ YES lsqproject:\ YES numoutevec:\ 20)
    awk 'NR==FNR{a[$1]=$2;next}{printf"%s:%s",a[$1],$1;for(i=2;i<=NF-1;i++)printf",%s",$i;print""}' $x.pick <(sed 1d $x.evec|cut -d: -f2)>$x.csv


    Russian_Archangelsk_Leshukonsky is even more eastern than Russian_Pinega on G25. However the Chuvashes in 1240K+HO are more western on average than the Chuvashes on G25. Almost all Bashkir samples are from Jeong 2019, where southern Bashkirs had much higher East Eurasian ancestry than northern Bashkirs.
    Last edited by Nganasankhan; 04-22-2021 at 04:06 PM.

  4. The Following User Says Thank You to Nganasankhan For This Useful Post:

     JMcB (04-22-2021)

  5. #43
    Registered Users
    Posts
    88
    Sex
    Ethnicity
    Finnish

    Here's Voronoi tesselation applied to the latitude and longitude coordinates of samples in the 1240K+HO dataset. This adds a small random factor to each coordinate in order to plot points with identical coordinates.

    You can see that most Siberian ancient DNA is from around latitudes 50-58, or from areas with a Turkic and Mongolic indigenous population. Ust'Ishim is one of the northernmost Siberian samples on this map, but even it is located close to where Siberian Tatars live today. Tyumen_HG is located further south close to the southernmost point of Tyumen Oblast, which is far from where the Uralic domain begins in the massive swamps of the Khanty-Mansi Autonomous Okrug of Tyumen Oblast.



    Code:
    curl -LsO reichdata.hms.harvard.edu/pub/datasets/amh_repo/curated_releases/V44/V44.3/SHARE/public.dir/v44.3_HO_public.anno
    awk -F\\t 'NR>1&&$11>50&&$12>-30&&$12<180{print$2,$8,$12,$11}' v44.3_HO_public.anno|grep -Ev '\.REF|rel\.|fail\.|Ignore_|_o|_lc|_outlier|_dup|_contam|_SomeAfrican|_father|_mother|_son|_daughter|_brother|_sister|_sibling|_twin|Neanderthal|Denisova|Vindija|Orangutang|Macaque|Marmoset|Primate_Chimp|hg19ref'|sed -E 's/\.(DG|SDG|SG|WGA)|_(possible|published)//g'>map
    Code:
    library(tidyverse)
    library(ggforce)
    
    t=read.table("map",header=F)
    t$V4=4*t$V4
    centers=aggregate(t[,3:4],by=list(t$V2),mean)
    
    t$V3=t$V3+runif(nrow(t))/1e5
    t$V4=t$V4+runif(nrow(t))/1e5
    
    set.seed(1488)
    color=as.factor(sample(seq(1,length(unique(t$V2)))))
    col=rbind(c(60,80),c(25,95),c(30,70),c(70,50),c(60,100),c(20,50))
    hues=max(ceiling(length(color)/nrow(col)),8)
    pal1=as.vector(apply(col,1,function(x)hcl(seq(15,375,length=hues+1)[1:hues],x[1],x[2])))
    pal2=as.vector(apply(col,1,function(x)hcl(seq(15,375,length=hues+1)[1:hues],.8*x[1],ifelse(x[2]>50,.2*x[2],100))))
    
    ggplot(t,aes(V3,V4,group=-1L))+
    geom_voronoi_tile(aes(fill=color[as.factor(V2)],color=color[as.factor(V2)]),size=.07,max.radius=3)+
    geom_label(data=centers,aes(x=V3,y=V4,label=Group.1,color=color,fill=color),alpha=.7,size=2,label.r=unit(.08,"lines"),label.padding=unit(.08,"lines"),label.size=0)+
    coord_fixed()+
    scale_x_continuous(breaks=seq(-1000,1000,20),expand=expansion(mult=.02))+
    scale_y_continuous(breaks=seq(-1000,1000,5)*4,expand=expansion(mult=.01),labels=seq(-1000,1000,5))+
    scale_fill_manual(values=pal1)+
    scale_color_manual(values=pal2)+
    theme(
      axis.text=element_text(color="black",size=6),
      axis.ticks=element_blank(),
      axis.ticks.length=unit(0,"pt"),
      axis.title=element_blank(),
      legend.position="none",
      panel.background=element_rect(fill="white"),
      panel.border=element_rect(color="gray85",fill=NA,size=.4),
      panel.grid.major=element_line(color="gray85",size=.2)
    )
    
    ggsave("a.png",width=14,height=14)
    system("mogrify -trim -bordercolor white -border 16x16 a.png")
    Here's another map for the whole world:

    Last edited by Nganasankhan; 04-22-2021 at 07:56 PM.

  6. The Following User Says Thank You to Nganasankhan For This Useful Post:

     sheepslayer (04-28-2021)

Page 5 of 5 FirstFirst ... 345

Similar Threads

  1. Clustering the sub modern European G25 samples
    By Huijbregts in forum Autosomal (auDNA)
    Replies: 65
    Last Post: 01-26-2021, 01:18 AM
  2. My current list of U106+ samples in aDNA samples
    By Bollox79 in forum Ancient (aDNA)
    Replies: 2
    Last Post: 04-23-2018, 12:51 AM
  3. My list of ancient European y-dna samples
    By venustas in forum Ancient (aDNA)
    Replies: 2
    Last Post: 10-14-2017, 05:19 AM
  4. Replies: 13
    Last Post: 09-13-2016, 05:21 PM

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •