# Thread: Shell commands and R scripts for working with G25

1. This makes a secondary PCA of the closest populations to the specified population. It sets the color of the points based on the distance to the specified population, and it draws a line between each point and its two closest neighbors.

Code:
```library(tidyverse)

nneigh=2
xpc=1
ypc=2

t=rbind(t1,t2)

target="RUS_Bolshoy_Oleni_Ostrov"

dist=as.matrix(dist(t))
t2=t[names(pick),]
dist2=dist[target,names(pick)]

max=max(dist2)
# max=.25;dist2[dist2>max]=max # use a scale with a fixed maximum distance

p=prcomp(t2)
p2=as.data.frame(p\$x)
pct=paste0(colnames(p\$x)," (",sprintf("%.1f",p\$sdev/sum(p\$sdev)*100),"%)")
nk=16
p2\$k=cutree(hclust(dist(t2)),k=nk)

dist3=as.data.frame(as.matrix(dist(t2)))
seg0=lapply(1:nneigh+1,function(i)apply(dist3,1,function(x)unlist(p2[names(sort(x)[i]),c(xpc,ypc)],use.names=F))%>%t%>%cbind(p2[,c(xpc,ypc)]))
seg=do.call(rbind,seg0)%>%setNames(paste0("V",1:4))

ggplot(p2,aes(!!sym(paste0("PC",xpc)),!!sym(paste0("PC",ypc))))+
geom_segment(data=seg,aes(x=V1,y=V2,xend=V3,yend=V4),color="gray80",size=.3)+
geom_point(aes(color=dist2),size=.4)+
geom_text(aes(label=rownames(t2),color=dist2),size=2,vjust=-.7)+
labs(x=pct[xpc],y=pct[ypc])+
scale_x_continuous(breaks=seq(-10,10,.05),expand=expansion(mult=.08))+
scale_y_continuous(breaks=seq(-10,10,.05),expand=expansion(mult=.04))+
guides(color=guide_colorbar(ticks=F))+
theme(
aspect.ratio=1/sqrt(2),
axis.text=element_text(color="black",size=6),
axis.ticks=element_line(size=.3,color="gray70"),
axis.ticks.length=unit(-.12,"cm"),
axis.text.x=element_text(margin=margin(.2,0,0,0,"cm")),
axis.text.y=element_text(angle=90,vjust=1,hjust=.5,margin=margin(0,.2,0,0,"cm")),
axis.title=element_text(color="black",size=8),
legend.direction="horizontal",
legend.justification="left",
legend.key.height=unit(.5,"cm"),
legend.key.width=unit(1,"cm"),
legend.margin=margin(0,0,-7,0),
legend.position="top",
legend.spacing.y=unit(.05,"cm"),
legend.text=element_text(size=6,vjust=.5),
legend.title=element_text(size=8,vjust=.7),
panel.background=element_rect(fill="white"),
panel.border=element_rect(color="gray70",fill=NA,size=.5),
panel.grid=element_blank()
)

ggsave("a.png",width=8,height=8)```

Originally Posted by David Bush
> ggdraw(p)
Error in ggdraw(p) : could not find function "ggdraw"
> leg=get_legend(p)
Error in get_legend(p) : could not find function "get_legend"
> ggdraw(plot_grid(p+theme(legend.position="none"),l eg,ncol=1,rel_heights=c(1,.12)))
Error in ggdraw(plot_grid(p + theme(legend.position = "none"), leg, ncol = 1, :
could not find function "ggdraw"
> ggsave("a.png",width=7,height=7)
I forgot to add `library("cowplot")` to the script.

2. ## The Following 2 Users Say Thank You to Nganasankhan For This Useful Post:

David Bush (05-02-2021),  sheepslayer (05-04-2021)

3. Originally Posted by Nganasankhan

to add `library("cowplot")` to the script.
Please provide a sample file for : "csv-table-copied-from-multi-tab"
let the
Because the script does not read my file
Thank You

4. Originally Posted by David Bush
Please provide a sample file for : "csv-table-copied-from-multi-tab"
let the
Because the script does not read my file
Thank You
Here's an updated version of my script. It automatically sets the height of the plot. It reorders the rows based on the order of branches in a hierarchical clustering tree. It uses `vegan::reorder.hclust` to arrange branches with a higher proportion of the first components to the top and populations with a higher proportion of the last components to the bottom.

Code:
```library(tidyverse)
library(reshape2) # for melt
library(colorspace) # for hex
library(cowplot) # for plot_grid
library(vegan) # for reorder.hclust

avedist=t[nrow(t),2]
t=t[-nrow(t),]

name=paste0(t[,1]," (",sub("^0","",sprintf("%.3f",t[,2])),")")
mat=t[,3:ncol(t)]/100

hc=hclust(dist(mat)) # order based on hierarchical clustering
hc=reorder(hc,as.matrix(mat)%*%seq(ncol(mat))^2)
t=cbind(name,mat)[hc\$order,]

# t=t[order(t[,2]),] # order based on a single column

t2=melt(t) # this uses melt because pivot_longer doesn't preserve the order of the columns
lab=round(100*t2\$value)
lab[lab==0]=""
t2\$lab=lab

p=ggplot(t2,aes(x=fct_rev(factor(name,level=t\$name)),y=value,fill=variable))+
geom_bar(stat="identity",width=1,position=position_fill(reverse=T))+
geom_text(aes(label=lab),position=position_stack(vjust=.5,reverse=T),size=3.5)+
ggtitle(paste("Average distance:",sub("^0","",sprintf("%.3f",avedist))))+
coord_flip()+
scale_x_discrete(expand=c(0,0))+ # remove gray area above and below the bars
scale_y_discrete(expand=c(0,0))+ # remove gray area on the left and right side of the bars
scale_fill_manual("legend",values=hex(HSV(c(0,20,40,90,180,210,240,270,330),.5,1)))+ # manual colors
guides(fill=guide_legend(ncol=3,byrow=F))+
theme(
axis.text=element_text(color="black",size=11),
axis.text.x=element_blank(),
axis.ticks=element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_blank(),
legend.direction="horizontal",
legend.margin=margin(-6,0,0,0),
legend.text=element_text(size=11),
legend.title=element_blank(),
legend.key=element_rect(fill=NA), # remove gray border around color squares
legend.key.size=unit(.65,"cm"),
plot.title=element_text(size=11),
text=element_text(size=11)
)

hei=c(.3+.28*nrow(t),.1+.25*ceiling(length(unique(t2[!is.na(t2\$value),2]))/3))
plot_grid(p+theme(legend.position="none"),get_legend(p),ncol=1,rel_heights=hei)

ggsave("a.png",width=6,height=sum(hei))```

5. ## The Following 4 Users Say Thank You to Nganasankhan For This Useful Post:

David Bush (05-03-2021),  Kapisa (05-02-2021),  perttimp (05-04-2021),  sheepslayer (05-04-2021)

6. Originally Posted by Nganasankhan
Here's an updated version of my script. It automatically sets the height of the plot. It reorders the rows based on the order of branches in a hierarchical clustering tree. It uses `vegan::reorder.hclust` to arrange branches with a higher proportion of the first components to the top and populations with a higher proportion of the last components to the bottom.
Thank you very much Dear Nganasankhan
I notice that when the data is copied with option (copy table as csv), points are added that we have to delete so that the script can read them.

Another issue:
If the target populations are more than 9, they are not Plotted

7. ## The Following User Says Thank You to David Bush For This Useful Post:

Nganasankhan (05-03-2021)

8. Originally Posted by David Bush
Another issue:
If the target populations are more than 9, they are not Plotted
You need to remove the `scale_fill_manual` line, or add more colors to it.

I have no idea what those dots are. But you can try pressing COPY AS TSV instead, and change `read.csv` to `read.table`.

9. ## The Following User Says Thank You to Nganasankhan For This Useful Post:

David Bush (05-03-2021)

10. Originally Posted by Nganasankhan
The scripts in this post generate a heatmap based on pairwise distances between populations.
excuse me
w=d["Baltic_EST_Narva",]-d["RUS_Krasnoyarsk_BA",]
> sort=reorder(hclust(dist(t)),wts=w)

Error in unique.default(x, nmax = nmax) :
unique() applies only to vectors

11. ## The Following User Says Thank You to David Bush For This Useful Post:

Nganasankhan (05-04-2021)

12. Originally Posted by David Bush
excuse me
w=d["Baltic_EST_Narva",]-d["RUS_Krasnoyarsk_BA",]
> sort=reorder(hclust(dist(t)),wts=w)

Error in unique.default(x, nmax = nmax) :
unique() applies only to vectors
Thanks for pointing these out. I made an error, and the function `reorder.hclust` was from the package `vegan` and not from `ape`. You need to add this: `library(vegan)`.

13. ## The Following User Says Thank You to Nganasankhan For This Useful Post:

David Bush (05-04-2021)

14. The script in this post uses `vegan::spantree` to draw a minimal spanning tree. The color of the points indicates the value of PC3.

Code:
```library(tidyverse)
library(vegan)

xpc=1
ypc=2
zpc=3

t=rbind(csv1,csv2)

dist=as.matrix(dist(t))
t2=t[names(pick),]

p=prcomp(t2)
p2=as.data.frame(p\$x)
span=vegan::spantree(dist(t2))

ggplot(p2,aes(!!sym(paste0("PC",xpc)),!!sym(paste0("PC",ypc))))+
geom_segment(data=seg,aes(x=V1,y=V2,xend=V3,yend=V4),color="gray10",size=.3)+
geom_point(aes(color=!!sym(paste0("PC",zpc))),size=.4)+
geom_text(aes(label=rownames(t2),color=!!sym(paste0("PC",zpc))),size=2,vjust=-.7)+
scale_x_continuous(breaks=seq(-10,10,.05),expand=expansion(mult=.08))+
scale_y_continuous(breaks=seq(-10,10,.05),expand=expansion(mult=.04))+
theme(
aspect.ratio=1,
axis.text=element_text(color="gray10",size=6),
axis.ticks=element_line(size=.3,color="gray10"),
axis.ticks.length=unit(-.12,"cm"),
axis.text.x=element_text(margin=margin(.2,0,0,0,"cm")),
axis.text.y=element_text(angle=90,vjust=1,hjust=.5,margin=margin(0,.2,0,0,"cm")),
axis.title=element_blank(),
legend.position="none",
panel.background=element_rect(fill="gray40"),
plot.background=element_rect(fill="gray40",color=NA),
panel.border=element_rect(color="gray10",fill=NA,size=.5),
panel.grid=element_blank()
)

ggsave("a.png",width=6,height=6)```
Here's plots of the closest populations to Kostenki14 (left side) and Tianyuan (right side).

On the right side Tharu is close to Tianyuan on the first two PCs, but you can see that they are not close on PC3, because they have a fairly large difference in hue. Tharu's hue is about 28 and Tianyuan's hue is about 113, so their distance on PC3 is about 35% of the total range of PC3 (from (113-28)/240).

15. ## The Following User Says Thank You to Nganasankhan For This Useful Post:

David Bush (05-04-2021)

16. Dear ganasankhan
Beautiful art paintings
Your work is very interesting and new

17. ## The Following User Says Thank You to David Bush For This Useful Post:

Nganasankhan (05-04-2021)

#### Posting Permissions

• You may not post new threads
• You may not post replies
• You may not post attachments
• You may not edit your posts
•