Hi Olgert,
One of our engineers created the script below and tested it here for
you. It appears to be working fine. The results are available at:
http://hgdownload-test.cse.ucsc.edu/goldenPath/hg19/vsMm10/reciprocalBest/.
If you have further questions, please reply to
gen...@soe.ucsc.edu.
#!/bin/csh -efx
# This script was automatically generated by
/cluster/bin/scripts/doRecipBest.pl
# It is to be executed on swarm in
/hive/data/genomes/hg19/bed/lastzMm10.2012-03-07/axtChain .
# It nets in both directions to get reciprocal best chains and nets.
# This script will fail if any of its commands fail.
cd /hive/data/genomes/hg19/bed/lastzMm10.2012-03-07/axtChain
# Swap hg19-best chains to be mm10-referenced:
chainStitchId hg19.mm10.over.chain.gz stdout \
| chainSwap stdin stdout \
| chainSort stdin mm10.hg19.tBest.chain
# Net those on mm10 to get mm10-ref'd reciprocal best net:
chainPreNet mm10.hg19.tBest.chain \
/hive/data/genomes/{mm10,hg19}/chrom.sizes stdout \
| chainNet -minSpace=1 -minScore=0 \
stdin /hive/data/genomes/{mm10,hg19}/chrom.sizes stdout /dev/null \
| netSyntenic stdin stdout \
| gzip -c > mm10.hg19.rbest.net.gz
# Extract mm10-ref'd reciprocal best chain:
netChainSubset mm10.hg19.rbest.net.gz mm10.hg19.tBest.chain stdout \
| chainStitchId stdin stdout \
| gzip -c > mm10.hg19.rbest.chain.gz
# Swap to get hg19-ref'd reciprocal best chain:
chainSwap mm10.hg19.rbest.chain.gz stdout \
| chainSort stdin stdout \
| gzip -c > hg19.mm10.rbest.chain.gz
# Net those on hg19 to get hg19-ref'd reciprocal best net:
chainPreNet hg19.mm10.rbest.chain.gz \
/hive/data/genomes/{hg19,mm10}/chrom.sizes stdout \
| chainNet -minSpace=1 -minScore=0 \
stdin /hive/data/genomes/{hg19,mm10}/chrom.sizes stdout /dev/null \
| netSyntenic stdin stdout \
| gzip -c > hg19.mm10.rbest.net.gz
# Clean up the one temp file and make md5sum:
rm mm10.hg19.tBest.chain
md5sum *.rbest.*.gz > md5sum.rbest.txt
# Create files for testing coverage of *.rbest.*.
netToBed -maxGap=1 mm10.hg19.rbest.net.gz mm10.hg19.rbest.net.bed
netToBed -maxGap=1 hg19.mm10.rbest.net.gz hg19.mm10.rbest.net.bed
chainToPsl mm10.hg19.rbest.chain.gz \
/hive/data/genomes/{mm10,hg19}/chrom.sizes \
/hive/data/genomes/mm10/mm10.2bit /hive/data/genomes/hg19/hg19.2bit \
mm10.hg19.rbest.chain.psl
chainToPsl hg19.mm10.rbest.chain.gz \
/hive/data/genomes/{hg19,mm10}/chrom.sizes \
/hive/data/genomes/hg19/hg19.2bit /hive/data/genomes/mm10/mm10.2bit \
hg19.mm10.rbest.chain.psl
# Verify that all coverage figures are equal:
set tChCov = `awk '{print $19;}' hg19.mm10.rbest.chain.psl | sed -e
's/,/\n/g' | awk 'BEGIN {N = 0;} {N += $1;} END {printf "%d\n", N;}'`
set qChCov = `awk '{print $19;}' mm10.hg19.rbest.chain.psl | sed -e
's/,/\n/g' | awk 'BEGIN {N = 0;} {N += $1;} END {printf "%d\n", N;}'`
set tNetCov = `awk 'BEGIN {N = 0;} {N += ($3 - $2);} END {printf "%d\n",
N;}' hg19.mm10.rbest.net.bed`
set qNetCov = `awk 'BEGIN {N = 0;} {N += ($3 - $2);} END {printf "%d\n",
N;}' mm10.hg19.rbest.net.bed`
if ($tChCov != $qChCov) then
echo "Warning: hg19 rbest chain coverage $tChCov != mm10 $qChCov"
endif
if ($tNetCov != $qNetCov) then
echo "Warning: hg19 rbest net coverage $tNetCov != mm10 $qNetCov"
endif
if ($tChCov != $tNetCov) then
echo "Warning: hg19 rbest chain coverage $tChCov != net cov $tNetCov"
endif
mkdir experiments
mv *.bed *.psl experiments
# Make rbest net axt's download: one .axt per hg19 seq.
netSplit hg19.mm10.rbest.net.gz rBestNet
chainSplit rBestChain hg19.mm10.rbest.chain.gz
cd ..
mkdir axtRBestNet
foreach f (axtChain/rBestNet/*.net)
netToAxt $f axtChain/rBestChain/$f:t:r.chain \
/hive/data/genomes/hg19/hg19.2bit /hive/data/genomes/mm10/mm10.2bit
stdout \
| axtSort stdin stdout \
| gzip -c > axtRBestNet/$f:t:r.hg19.mm10.net.axt.gz
end
# Make rbest mafNet for multiz: one .maf per hg19 seq.
mkdir mafRBestNet
foreach f (axtRBestNet/*.hg19.mm10.net.axt.gz)
axtToMaf -tPrefix=hg19. -qPrefix=mm10. $f \
/hive/data/genomes/{hg19,mm10}/chrom.sizes \
stdout \
| gzip -c > mafRBestNet/$f:t:r:r:r:r:r.maf.gz
end
--
Brooke Rhead
UCSC Genome Bioinformatics Group