In [1]:
# get clasp, genomes, halos
%cd /Users/schmackeroodle/jupyter_scaff/
/Users/schmackeroodle/jupyter_scaff
/Users/schmackeroodle/opt/anaconda3/envs/ancst_tutorial/lib/python3.10/site-packages/IPython/core/magics/osm.py:417: UserWarning: This is now an optional IPython functionality, setting dhist requires you to install the `pickleshare` library. self.shell.db['dhist'] = compress_dhist(dhist)[-100:]
In [2]:
%%bash
# get the results
curl -L -o out.tar.gz https://anchored.bioinf.uni-leipzig.de/sets/eeb0911b-8904-4617-ad43-5a391bbbe60e/download-small/
# extract the webserver output archive and see whats in there
tar -xzf out.tar.gz
# copy the data needed which is from https://zenodo.org/records/18005166
cp -r ~/Downloads/tutorial_aux_data/genomes/ out/utils/genomes/
# (extract and put into ~/Downloads or change path)
cp -r ~/Downloads/tutorial_aux_data/halos out/stable/synthology/
ls out/
% Total % Received % Xferd Average Speed Time Time Time Current
Dload Upload Total Spent Left Speed
100 291M 100 291M 0 0 6656k 0 0:00:44 0:00:44 --:--:-- 7919k0:00:44 --:--:-- 7510k
README requirements.txt scaffolder stable utils
In [3]:
%cd out/scaffolder
/Users/schmackeroodle/jupyter_scaff/out/scaffolder
/Users/schmackeroodle/opt/anaconda3/envs/ancst_tutorial/lib/python3.10/site-packages/IPython/core/magics/osm.py:417: UserWarning: This is now an optional IPython functionality, setting dhist requires you to install the `pickleshare` library. self.shell.db['dhist'] = compress_dhist(dhist)[-100:]
In [4]:
%%bash
# here we now scaffold the only non-chromsome-level assembly of this set: GCF_018153835.1 (NCBI ref genome of Drosophila eugracilis)
# there is a blossom5-v2.05 directory here which contains a linux-compiled version of the optimization program used for multo-ref scaffolding
# get it from here and comile if necessary
wget https://pub.ista.ac.at/~vnk/software/blossom5-v2.05.src.tar.gz
tar -xzf blossom5-v2.05.src.tar.gz
cd blossom5-v2.05.src
make clean && make
cd ..
# we put the rest of the species in a "refs.txt" file
grep -v "GCF_018153835.1" ../utils/orgs > refs.txt
# the multi-reference scaffolder can also be run without (unit) weights but lets use the anchor alignments as a proxy
# this way we get the total score of the alignmetns between two species divided by the total length of the two genomes per comparison
python3 get_weights.py GCF_018153835.1
# then we scaffold
python3 AncST_scaff.py GCF_018153835.1
--2025-12-30 14:02:20-- https://pub.ista.ac.at/~vnk/software/blossom5-v2.05.src.tar.gz
Resolving pub.ista.ac.at (pub.ista.ac.at)... 81.223.84.195
Connecting to pub.ista.ac.at (pub.ista.ac.at)|81.223.84.195|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 47922 (47K) [application/x-gzip]
Saving to: 'blossom5-v2.05.src.tar.gz.1'
0K .......... .......... .......... .......... ...... 100% 1.39M=0.03s
2025-12-30 14:02:21 (1.39 MB/s) - 'blossom5-v2.05.src.tar.gz.1' saved [47922/47922]
rm -f ./example.o ./misc.o ./PMduals.o ./PMexpand.o ./PMinit.o ./PMinterface.o ./PMmain.o ./PMrepair.o ./PMshrink.o MinCost/MinCost.o GEOM/GPMinit.o GEOM/GPMinterface.o GEOM/GPMkdtree.o GEOM/GPMmain.o blossom5
c++ -O3 -D_NDEBUG example.cpp -c -o example.o
c++ -O3 -D_NDEBUG misc.cpp -c -o misc.o
c++ -O3 -D_NDEBUG PMduals.cpp -c -o PMduals.o
c++ -O3 -D_NDEBUG PMexpand.cpp -c -o PMexpand.o
c++ -O3 -D_NDEBUG PMinit.cpp -c -o PMinit.o
c++ -O3 -D_NDEBUG PMinterface.cpp -c -o PMinterface.o
c++ -O3 -D_NDEBUG PMmain.cpp -c -o PMmain.o
c++ -O3 -D_NDEBUG PMrepair.cpp -c -o PMrepair.o
c++ -O3 -D_NDEBUG PMshrink.cpp -c -o PMshrink.o
c++ -O3 -D_NDEBUG MinCost/MinCost.cpp -c -o MinCost/MinCost.o
c++ -O3 -D_NDEBUG GEOM/GPMinit.cpp -c -o GEOM/GPMinit.o
c++ -O3 -D_NDEBUG GEOM/GPMinterface.cpp -c -o GEOM/GPMinterface.o
c++ -O3 -D_NDEBUG GEOM/GPMkdtree.cpp -c -o GEOM/GPMkdtree.o
c++ -O3 -D_NDEBUG GEOM/GPMmain.cpp -c -o GEOM/GPMmain.o
c++ -O3 -D_NDEBUG -o blossom5 ./example.o ./misc.o ./PMduals.o ./PMexpand.o ./PMinit.o ./PMinterface.o ./PMmain.o ./PMrepair.o ./PMshrink.o MinCost/MinCost.o GEOM/GPMinit.o GEOM/GPMinterface.o GEOM/GPMkdtree.o GEOM/GPMmain.o
20043415
2004341
200434
perfect matching with 300 nodes and 44700 edges
starting init...done [0.001 secs]. 2 trees
.0.
done [0.002 secs]. 32 grows, 0 expands, 10 shrinks
expands: [0.000 secs], shrinks: [0.000 secs], dual updates: [0.000 secs]
cost = -7980088.0
In [5]:
%%bash
# in the singles_out files we can view some results and aux files about the single-ref scaffolding
# in multi_out we can see the new scaffolds of the multi-ref run.
# here we can see that there are a bunch of contigs with divergently aligned reference chromosomes
tail -n 30 singles_out/divergently_aligned_contigs_target_GCF_018153835.1_from_AncST_ref_GCF_*
echo
# we can make some initial observations:
# 1. D. eugracilis shows quite many ambiguously aligned contigs with GCF_018902025.1 - D. willistoni which is the species
# with the largest phylogenetic distance to D. eugracilis
# 2. NW_024572310.1 appears as divergently aligned with respect to 5 other species while most other contigs only appear max. 2 times
# in any of those pairwise relationships
grep -l "NW_024572310.1" singles_out/divergently_aligned_contigs_target_GCF_018153835.1_from_AncST_ref_* | sed 's/.*_ref_//' | sed 's/.txt$//'
echo
# we can check where about in the phylogeny those 5 could be
grep --color=always "GCF_000001215.4\|GCF_004382195.2\|GCF_016746365.2\|GCF_018902025.1\|GCF_030179915.1" ../utils/NJTree.nwk
echo
grep --color=always "GCF_000001215.4\|GCF_004382195.2\|GCF_016746365.2\|GCF_018902025.1\|GCF_030179915.1" ../utils/UPGMATree.nwk
echo
# this pattern does not seem to clearly indicate that the chromosomes this contig aligns to may have undergone one specific rearrangement
# as could have been expected if they comprised one phylogenetic group which shares a particular ancestor
# thus, perhaps these are recurring rearrangements or the assemblies of this contig or the chromosomes are questionable
tail singles_out/divergently_aligned_contigs_target_GCF_018153835.1_from_AncST_ref_GCF_0* | grep NW_024572310.1
echo
# the proportions of divergently aligned parts seem similar...perhaps we can inspect this visually...
# ...hence, lets now write it to a fasta file and submit it to the server (manually) with the rest of the Drosophilas (whose anchors are reused)
# that way we can better inspect the scaffolding output
# you need teh genome somewhere (here in ../utils/genomes/GCF_018153835.1.fasta)
python3 write_scaffolds_and_contigs_to_fasta.py\
../utils/genomes/GCF_018153835.1.fasta\
multi_out/scaffolds_names.out\
GCF_018153835.1_scaffolded.fasta
ls fastas/GCF_018153835.1_scaffolded.fasta
==> singles_out/divergently_aligned_contigs_target_GCF_018153835.1_from_AncST_ref_GCF_000001215.4.txt <== NW_024572310.1: [NT_033779.5: 905.0,NT_033778.4: 345.0,] ==> singles_out/divergently_aligned_contigs_target_GCF_018153835.1_from_AncST_ref_GCF_004382195.2.txt <== NW_024572310.1: [NC_045949.1: 959.0,NC_045952.1: 361.0,] NW_024572805.1: [NC_045951.1: 756.0,NC_045952.1: 742.0,] ==> singles_out/divergently_aligned_contigs_target_GCF_018153835.1_from_AncST_ref_GCF_009870125.1.txt <== ==> singles_out/divergently_aligned_contigs_target_GCF_018153835.1_from_AncST_ref_GCF_016746365.2.txt <== NW_024571981.1: [NC_052527.2: 193405.09999999995,NC_052528.2: 342387.9,] NW_024571941.1: [NC_052527.2: 1368.0,NC_052528.2: 3332.6,] NW_024572605.1: [NC_052527.2: 541771.2999999999,NC_052528.2: 804988.2999999999,] NW_024572310.1: [NC_052528.2: 976.0,NW_025048801.1: 289.8,] ==> singles_out/divergently_aligned_contigs_target_GCF_018153835.1_from_AncST_ref_GCF_016746395.2.txt <== ==> singles_out/divergently_aligned_contigs_target_GCF_018153835.1_from_AncST_ref_GCF_017639315.1.txt <== NW_024573195.1: [NW_025319037.1: 7724.5,NW_025319038.1: 12438.900000000001,] NW_024573196.1: [NW_025319037.1: 1976.6,NW_025319038.1: 2637.3999999999996,] NW_024573032.1: [NW_025319038.1: 62.1,NC_057930.1: 172.0,] NW_024573369.1: [NC_057931.1: 51870.40000000001,NC_057932.1: 74477.00000000003,] NW_024573429.1: [NC_057931.1: 681.2,NC_057932.1: 1432.3,] NW_024573254.1: [NC_057931.1: 12870.500000000002,NC_057932.1: 20422.3,] NW_024573046.1: [NC_057931.1: 31853.600000000006,NC_057932.1: 22731.8,] NW_024573048.1: [NC_057931.1: 26772.000000000004,NC_057932.1: 16748.6,] NW_024572220.1: [NC_057931.1: 11429.6,NC_057932.1: 9490.5,] ==> singles_out/divergently_aligned_contigs_target_GCF_018153835.1_from_AncST_ref_GCF_018902025.1.txt <== NW_024572605.1: [NW_025814047.1: 180387.40000000002,NW_025814048.1: 100439.80000000006,] NW_024573812.1: [NW_025814047.1: 135066.60000000003,NW_025814048.1: 174841.80000000002,] NW_024573712.1: [NW_025814047.1: 23925.500000000004,NW_025814048.1: 17483.8,] NW_024571981.1: [NW_025814050.1: 120702.80000000008,NW_025814051.1: 61311.9,] NW_024573038.1: [NW_025814056.1: 128623.39999999998,NW_025814057.1: 152514.70000000007,] NW_024571881.1: [NW_025814056.1: 21190.3,NW_025814058.1: 13586.400000000001,] NW_024573036.1: [NW_025814050.1: 103832.10000000003,NW_025814051.1: 29175.000000000015,] NW_024573870.1: [NW_025814050.1: 34927.29999999999,NW_025814051.1: 18174.099999999995,] NW_024573018.1: [NW_025814047.1: 475.0,NW_025814128.1: 119.0,] NW_024573685.1: [NW_025814048.1: 183.0,NW_025814049.1: 320.8,] NW_024573052.1: [NW_025814052.1: 89031.29999999999,NW_025814053.1: 65014.50000000001,] NW_024572507.1: [NW_025814050.1: 2388.8,NW_025814051.1: 1945.5,] NW_024573209.1: [NW_025814050.1: 63836.30000000001,NW_025814051.1: 25896.69999999999,] NW_024573245.1: [NW_025814050.1: 43167.59999999999,NW_025814051.1: 22228.100000000002,] NW_024572310.1: [NW_025814050.1: 602.8,NW_025814051.1: 422.0,] NW_024573454.1: [NW_025814050.1: 37949.19999999999,NW_025814051.1: 16028.300000000001,] NW_024573287.1: [NW_025814050.1: 12051.0,NW_025814051.1: 16128.200000000003,] NW_024573369.1: [NW_025814052.1: 42716.40000000001,NW_025814053.1: 35109.2,] NW_024572672.1: [NW_025814056.1: 31065.600000000002,NW_025814057.1: 22964.90000000001,] NW_024573234.1: [NW_025814052.1: 3106.4999999999995,NW_025814053.1: 8905.700000000003,] NW_024573367.1: [NW_025814052.1: 4652.4,NW_025814053.1: 4614.3,] NW_024573030.1: [NW_025814052.1: 34718.99999999999,NW_025814053.1: 29661.099999999988,] NW_024573039.1: [NW_025814052.1: 46607.899999999994,NW_025814053.1: 32176.899999999994,] NW_024573046.1: [NW_025814052.1: 18748.6,NW_025814053.1: 10688.9,] NW_024573270.1: [NW_025814052.1: 25904.200000000004,NW_025814053.1: 20880.0,] NW_024572708.1: [NW_025814052.1: 29890.399999999994,NW_025814053.1: 9478.1,] NW_024572029.1: [NW_025814052.1: 26826.49999999999,NW_025814053.1: 7442.2,] NW_024572220.1: [NW_025814052.1: 3667.4999999999995,NW_025814053.1: 6578.6,NW_025814102.1: 2570.7,] NW_024572849.1: [NW_025814052.1: 12885.7,NW_025814053.1: 8496.2,] NW_024573415.1: [NW_025814052.1: 1417.0,NW_025814053.1: 650.0,] ==> singles_out/divergently_aligned_contigs_target_GCF_018153835.1_from_AncST_ref_GCF_030179895.1.txt <== NW_024573812.1: [NC_091728.1: 249628.4,NC_091729.1: 943910.5000000001,] NW_024573209.1: [NC_091728.1: 110888.50000000003,NC_091729.1: 93976.59999999999,] NW_024573454.1: [NC_091728.1: 85395.0,NC_091729.1: 32145.699999999993,] NW_024571941.1: [NC_091728.1: 1077.6,NC_091729.1: 2579.0,] NW_024573387.1: [NC_091728.1: 33502.6,NC_091729.1: 34024.999999999985,] NW_024573057.1: [NC_091728.1: 18323.50000000001,NC_091729.1: 19693.2,] NW_024571954.1: [NC_091728.1: 41066.3,NC_091729.1: 16331.6,] NW_024571771.1: [NC_091730.1: 213.0,NC_091731.1: 78.7,] ==> singles_out/divergently_aligned_contigs_target_GCF_018153835.1_from_AncST_ref_GCF_030179915.1.txt <== NW_024572310.1: [NC_091678.1: 1176.0,NC_091679.1: 473.0,] NW_024571771.1: [NC_091680.1: 172.0,NC_091681.1: 233.0,] ==> singles_out/divergently_aligned_contigs_target_GCF_018153835.1_from_AncST_ref_GCF_030788295.1.txt <== NW_024572805.1: [NC_091544.1: 171.0,NW_027212828.1: 193.4,] GCF_000001215.4 GCF_004382195.2 GCF_016746365.2 GCF_018902025.1 GCF_030179915.1 (((GCF_000001215.4:0.43981,GCF_016746395.2:0.43774)Inner9:0.00438,GCF_004382195.2:0.43114)Inner10:0.00331,GCF_016746365.2:0.43861,(((dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded:0.41588,dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited:0.41576)Inner1:0.00209,GCF_018153835.1:0.41787)Inner2:0.03210,(((((GCF_030788295.1:0.47777,GCF_018902025.1:0.48284)Inner3:0.00487,GCF_009870125.1:0.47123)Inner4:0.00735,GCF_017639315.1:0.46996)Inner5:0.00228,GCF_030179895.1:0.46413)Inner6:0.00808,GCF_030179915.1:0.45477)Inner7:0.00224)Inner8:0.01084)Inner11:0.00000; (GCF_018902025.1:0.48030,(GCF_030788295.1:0.47778,(GCF_009870125.1:0.47379,(GCF_017639315.1:0.46755,(GCF_030179895.1:0.46277,(GCF_030179915.1:0.45369,(((dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded:0.41582,dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited:0.41582)Inner1:0.00207,GCF_018153835.1:0.41789)Inner2:0.03282,(GCF_016746365.2:0.44148,(GCF_000001215.4:0.43882,(GCF_004382195.2:0.43544,GCF_016746395.2:0.43544)Inner3:0.00338)Inner4:0.00266)Inner5:0.00923)Inner6:0.01221)Inner7:0.00908)Inner8:0.00478)Inner9:0.00624)Inner10:0.00399)Inner11:0.00252)Inner12:0.00000; NW_024572310.1: [NT_033779.5: 905.0,NT_033778.4: 345.0,] NW_024572310.1: [NC_045949.1: 959.0,NC_045952.1: 361.0,] NW_024572310.1: [NC_052528.2: 976.0,NW_025048801.1: 289.8,] NW_024572310.1: [NC_091678.1: 1176.0,NC_091679.1: 473.0,] fastas/GCF_018153835.1_scaffolded.fasta
In [6]:
%%bash
# here we computed the rest of the 11 species (including the contig version of D. eugracilis) and two scaffolded versions:
# (1) naively taking the output of the greedy algorithm and (2) excluding all contig which are ambiguously aligned to ref chromosomes
# (2) can be computed by just outcommenting line 207 where it says
# #and x in limited and c[enu+1] in limited:
# as we will see below including the ambiguously aligned contigs gives a biologically less plausible scaffolding result
cd ../..
mkdir -p scaffolded
cd scaffolded
# get the results
curl -L -o out.tar.gz https://anchored.bioinf.uni-leipzig.de/sets/eeb0911b-8904-4617-ad43-5a391bbbe60e/download-small/
# extract the webserver output archive and see whats in there
# and get the scaffolded genomes from zenodo at https://zenodo.org/uploads/18096371 (extract and put into ~/Downloads or change path)
tar -xzf out.tar.gz
cp -r ~/tutorial_aux_data/genomes/ out/utils/genomes/
cp -r ~/scaff_tut_aux/genomes/ out/utils/genomes/
cp -r ~/tutorial_aux_data/halos out/stable/synthology/
ls out/
% Total % Received % Xferd Average Speed Time Time Time Current
Dload Upload Total Spent Left Speed
100 291M 100 291M 0 0 9783k 0 0:00:30 0:00:30 --:--:-- 14.2M
README requirements.txt scaffolder stable utils
In [7]:
%cd ../../scaffolded/out/stable/MCScanX
/Users/schmackeroodle/jupyter_scaff/scaffolded/out/stable/MCScanX
/Users/schmackeroodle/opt/anaconda3/envs/ancst_tutorial/lib/python3.10/site-packages/IPython/core/magics/osm.py:417: UserWarning: This is now an optional IPython functionality, setting dhist requires you to install the `pickleshare` library. self.shell.db['dhist'] = compress_dhist(dhist)[-100:]
In [8]:
%%bash
# lets run MCScanX to draw the collinearity blocks and visually inspect the scaffolding results
~/MCScanX/MCScanX_h MCScanX
Reading homologs and pre-processing Generating homolog list 1269074 homologous pairs imported (0 discarded) 5695 pairwise comparisons 33238 alignments generated Pairwise collinear blocks written to MCScanX.collinearity [91.142 seconds elapsed] Writing multiple syntenic blocks to HTML files 10orgchr1.html 10orgchr100.html 10orgchr101.html 10orgchr113.html 10orgchr2.html 10orgchr3.html 10orgchr4.html 10orgchr47.html 10orgchr5.html 10orgchr52.html 10orgchr56.html 10orgchr58.html 10orgchr6.html 10orgchr61.html 10orgchr64.html 10orgchr66.html 10orgchr67.html 10orgchr68.html 10orgchr71.html 10orgchr72.html 10orgchr73.html 10orgchr79.html 10orgchr82.html 10orgchr83.html 10orgchr84.html 10orgchr93.html 10orgchr97.html 10orgchr98.html 10orgchr99.html 11orgchr1.html 11orgchr10.html 11orgchr100.html 11orgchr101.html 11orgchr102.html 11orgchr103.html 11orgchr104.html 11orgchr105.html 11orgchr106.html 11orgchr107.html 11orgchr108.html 11orgchr109.html 11orgchr11.html 11orgchr110.html 11orgchr111.html 11orgchr112.html 11orgchr113.html 11orgchr114.html 11orgchr115.html 11orgchr116.html 11orgchr117.html 11orgchr118.html 11orgchr119.html 11orgchr12.html 11orgchr120.html 11orgchr121.html 11orgchr122.html 11orgchr123.html 11orgchr124.html 11orgchr125.html 11orgchr126.html 11orgchr127.html 11orgchr129.html 11orgchr13.html 11orgchr130.html 11orgchr131.html 11orgchr132.html 11orgchr133.html 11orgchr134.html 11orgchr135.html 11orgchr136.html 11orgchr137.html 11orgchr138.html 11orgchr139.html 11orgchr14.html 11orgchr140.html 11orgchr141.html 11orgchr142.html 11orgchr143.html 11orgchr144.html 11orgchr145.html 11orgchr146.html 11orgchr147.html 11orgchr148.html 11orgchr149.html 11orgchr15.html 11orgchr150.html 11orgchr151.html 11orgchr153.html 11orgchr154.html 11orgchr155.html 11orgchr156.html 11orgchr157.html 11orgchr158.html 11orgchr159.html 11orgchr16.html 11orgchr160.html 11orgchr161.html 11orgchr162.html 11orgchr163.html 11orgchr164.html 11orgchr165.html 11orgchr166.html 11orgchr167.html 11orgchr168.html 11orgchr169.html 11orgchr17.html 11orgchr170.html 11orgchr171.html 11orgchr172.html 11orgchr173.html 11orgchr174.html 11orgchr175.html 11orgchr176.html 11orgchr177.html 11orgchr178.html 11orgchr179.html 11orgchr18.html 11orgchr180.html 11orgchr181.html 11orgchr182.html 11orgchr183.html 11orgchr184.html 11orgchr185.html 11orgchr186.html 11orgchr187.html 11orgchr188.html 11orgchr189.html 11orgchr19.html 11orgchr190.html 11orgchr191.html 11orgchr192.html 11orgchr193.html 11orgchr194.html 11orgchr195.html 11orgchr196.html 11orgchr197.html 11orgchr198.html 11orgchr199.html 11orgchr2.html 11orgchr20.html 11orgchr200.html 11orgchr201.html 11orgchr202.html 11orgchr203.html 11orgchr204.html 11orgchr205.html 11orgchr206.html 11orgchr207.html 11orgchr208.html 11orgchr209.html 11orgchr21.html 11orgchr210.html 11orgchr211.html 11orgchr212.html 11orgchr213.html 11orgchr214.html 11orgchr215.html 11orgchr216.html 11orgchr217.html 11orgchr218.html 11orgchr219.html 11orgchr22.html 11orgchr220.html 11orgchr221.html 11orgchr222.html 11orgchr223.html 11orgchr224.html 11orgchr225.html 11orgchr226.html 11orgchr227.html 11orgchr228.html 11orgchr229.html 11orgchr23.html 11orgchr230.html 11orgchr231.html 11orgchr232.html 11orgchr233.html 11orgchr234.html 11orgchr235.html 11orgchr236.html 11orgchr237.html 11orgchr238.html 11orgchr239.html 11orgchr24.html 11orgchr240.html 11orgchr241.html 11orgchr242.html 11orgchr243.html 11orgchr244.html 11orgchr245.html 11orgchr246.html 11orgchr247.html 11orgchr248.html 11orgchr249.html 11orgchr25.html 11orgchr250.html 11orgchr251.html 11orgchr252.html 11orgchr253.html 11orgchr254.html 11orgchr255.html 11orgchr256.html 11orgchr257.html 11orgchr258.html 11orgchr259.html 11orgchr26.html 11orgchr260.html 11orgchr261.html 11orgchr262.html 11orgchr263.html 11orgchr264.html 11orgchr265.html 11orgchr266.html 11orgchr267.html 11orgchr268.html 11orgchr269.html 11orgchr27.html 11orgchr270.html 11orgchr271.html 11orgchr272.html 11orgchr273.html 11orgchr274.html 11orgchr275.html 11orgchr276.html 11orgchr277.html 11orgchr278.html 11orgchr279.html 11orgchr28.html 11orgchr280.html 11orgchr281.html 11orgchr282.html 11orgchr283.html 11orgchr284.html 11orgchr285.html 11orgchr286.html 11orgchr287.html 11orgchr288.html 11orgchr289.html 11orgchr29.html 11orgchr290.html 11orgchr291.html 11orgchr292.html 11orgchr293.html 11orgchr294.html 11orgchr295.html 11orgchr296.html 11orgchr297.html 11orgchr298.html 11orgchr299.html 11orgchr3.html 11orgchr30.html 11orgchr300.html 11orgchr301.html 11orgchr302.html 11orgchr303.html 11orgchr304.html 11orgchr305.html 11orgchr306.html 11orgchr307.html 11orgchr308.html 11orgchr309.html 11orgchr31.html 11orgchr310.html 11orgchr311.html 11orgchr312.html 11orgchr313.html 11orgchr314.html 11orgchr315.html 11orgchr316.html 11orgchr317.html 11orgchr318.html 11orgchr319.html 11orgchr32.html 11orgchr320.html 11orgchr321.html 11orgchr322.html 11orgchr323.html 11orgchr324.html 11orgchr325.html 11orgchr326.html 11orgchr327.html 11orgchr328.html 11orgchr329.html 11orgchr33.html 11orgchr330.html 11orgchr331.html 11orgchr332.html 11orgchr333.html 11orgchr334.html 11orgchr335.html 11orgchr337.html 11orgchr338.html 11orgchr339.html 11orgchr34.html 11orgchr340.html 11orgchr341.html 11orgchr342.html 11orgchr343.html 11orgchr344.html 11orgchr345.html 11orgchr346.html 11orgchr347.html 11orgchr348.html 11orgchr349.html 11orgchr35.html 11orgchr350.html 11orgchr351.html 11orgchr352.html 11orgchr353.html 11orgchr354.html 11orgchr355.html 11orgchr356.html 11orgchr357.html 11orgchr358.html 11orgchr359.html 11orgchr36.html 11orgchr360.html 11orgchr361.html 11orgchr362.html 11orgchr363.html 11orgchr364.html 11orgchr365.html 11orgchr366.html 11orgchr367.html 11orgchr368.html 11orgchr369.html 11orgchr37.html 11orgchr370.html 11orgchr371.html 11orgchr372.html 11orgchr373.html 11orgchr374.html 11orgchr375.html 11orgchr376.html 11orgchr377.html 11orgchr378.html 11orgchr379.html 11orgchr38.html 11orgchr380.html 11orgchr381.html 11orgchr382.html 11orgchr383.html 11orgchr384.html 11orgchr385.html 11orgchr386.html 11orgchr387.html 11orgchr388.html 11orgchr389.html 11orgchr39.html 11orgchr390.html 11orgchr391.html 11orgchr392.html 11orgchr393.html 11orgchr394.html 11orgchr395.html 11orgchr396.html 11orgchr397.html 11orgchr398.html 11orgchr399.html 11orgchr4.html 11orgchr40.html 11orgchr400.html 11orgchr401.html 11orgchr402.html 11orgchr403.html 11orgchr404.html 11orgchr405.html 11orgchr406.html 11orgchr407.html 11orgchr408.html 11orgchr409.html 11orgchr41.html 11orgchr410.html 11orgchr411.html 11orgchr412.html 11orgchr413.html 11orgchr414.html 11orgchr415.html 11orgchr416.html 11orgchr417.html 11orgchr418.html 11orgchr419.html 11orgchr42.html 11orgchr420.html 11orgchr421.html 11orgchr422.html 11orgchr423.html 11orgchr424.html 11orgchr425.html 11orgchr426.html 11orgchr427.html 11orgchr428.html 11orgchr429.html 11orgchr43.html 11orgchr430.html 11orgchr431.html 11orgchr432.html 11orgchr433.html 11orgchr434.html 11orgchr435.html 11orgchr436.html 11orgchr437.html 11orgchr438.html 11orgchr439.html 11orgchr44.html 11orgchr440.html 11orgchr441.html 11orgchr442.html 11orgchr443.html 11orgchr444.html 11orgchr445.html 11orgchr446.html 11orgchr448.html 11orgchr449.html 11orgchr45.html 11orgchr450.html 11orgchr451.html 11orgchr452.html 11orgchr453.html 11orgchr454.html 11orgchr455.html 11orgchr456.html 11orgchr457.html 11orgchr458.html 11orgchr459.html 11orgchr46.html 11orgchr460.html 11orgchr461.html 11orgchr462.html 11orgchr463.html 11orgchr464.html 11orgchr465.html 11orgchr466.html 11orgchr467.html 11orgchr468.html 11orgchr469.html 11orgchr47.html 11orgchr470.html 11orgchr471.html 11orgchr472.html 11orgchr473.html 11orgchr474.html 11orgchr48.html 11orgchr49.html 11orgchr5.html 11orgchr51.html 11orgchr52.html 11orgchr53.html 11orgchr54.html 11orgchr55.html 11orgchr56.html 11orgchr57.html 11orgchr58.html 11orgchr59.html 11orgchr6.html 11orgchr60.html 11orgchr61.html 11orgchr62.html 11orgchr63.html 11orgchr64.html 11orgchr65.html 11orgchr66.html 11orgchr67.html 11orgchr68.html 11orgchr69.html 11orgchr7.html 11orgchr70.html 11orgchr71.html 11orgchr72.html 11orgchr73.html 11orgchr74.html 11orgchr75.html 11orgchr76.html 11orgchr77.html 11orgchr78.html 11orgchr79.html 11orgchr8.html 11orgchr80.html 11orgchr81.html 11orgchr82.html 11orgchr83.html 11orgchr84.html 11orgchr85.html 11orgchr86.html 11orgchr87.html 11orgchr88.html 11orgchr89.html 11orgchr9.html 11orgchr90.html 11orgchr91.html 11orgchr92.html 11orgchr93.html 11orgchr94.html 11orgchr95.html 11orgchr96.html 11orgchr97.html 11orgchr98.html 11orgchr99.html 12orgchr1.html 12orgchr10.html 12orgchr100.html 12orgchr101.html 12orgchr102.html 12orgchr103.html 12orgchr104.html 12orgchr105.html 12orgchr106.html 12orgchr108.html 12orgchr109.html 12orgchr11.html 12orgchr110.html 12orgchr111.html 12orgchr112.html 12orgchr113.html 12orgchr114.html 12orgchr115.html 12orgchr116.html 12orgchr117.html 12orgchr118.html 12orgchr119.html 12orgchr12.html 12orgchr120.html 12orgchr121.html 12orgchr122.html 12orgchr123.html 12orgchr124.html 12orgchr125.html 12orgchr126.html 12orgchr127.html 12orgchr128.html 12orgchr129.html 12orgchr13.html 12orgchr130.html 12orgchr131.html 12orgchr132.html 12orgchr133.html 12orgchr134.html 12orgchr135.html 12orgchr136.html 12orgchr137.html 12orgchr138.html 12orgchr139.html 12orgchr14.html 12orgchr140.html 12orgchr141.html 12orgchr142.html 12orgchr143.html 12orgchr144.html 12orgchr145.html 12orgchr146.html 12orgchr147.html 12orgchr148.html 12orgchr149.html 12orgchr15.html 12orgchr150.html 12orgchr151.html 12orgchr152.html 12orgchr153.html 12orgchr154.html 12orgchr155.html 12orgchr156.html 12orgchr157.html 12orgchr158.html 12orgchr159.html 12orgchr16.html 12orgchr160.html 12orgchr161.html 12orgchr162.html 12orgchr163.html 12orgchr164.html 12orgchr165.html 12orgchr166.html 12orgchr167.html 12orgchr168.html 12orgchr169.html 12orgchr17.html 12orgchr170.html 12orgchr171.html 12orgchr172.html 12orgchr173.html 12orgchr174.html 12orgchr175.html 12orgchr176.html 12orgchr177.html 12orgchr178.html 12orgchr179.html 12orgchr18.html 12orgchr180.html 12orgchr181.html 12orgchr182.html 12orgchr183.html 12orgchr184.html 12orgchr185.html 12orgchr186.html 12orgchr187.html 12orgchr188.html 12orgchr189.html 12orgchr19.html 12orgchr190.html 12orgchr191.html 12orgchr192.html 12orgchr193.html 12orgchr194.html 12orgchr195.html 12orgchr196.html 12orgchr197.html 12orgchr198.html 12orgchr199.html 12orgchr2.html 12orgchr20.html 12orgchr200.html 12orgchr201.html 12orgchr202.html 12orgchr203.html 12orgchr204.html 12orgchr205.html 12orgchr206.html 12orgchr207.html 12orgchr208.html 12orgchr209.html 12orgchr21.html 12orgchr210.html 12orgchr211.html 12orgchr212.html 12orgchr213.html 12orgchr214.html 12orgchr215.html 12orgchr216.html 12orgchr217.html 12orgchr218.html 12orgchr219.html 12orgchr22.html 12orgchr220.html 12orgchr221.html 12orgchr222.html 12orgchr223.html 12orgchr224.html 12orgchr225.html 12orgchr226.html 12orgchr227.html 12orgchr228.html 12orgchr229.html 12orgchr23.html 12orgchr230.html 12orgchr231.html 12orgchr232.html 12orgchr233.html 12orgchr234.html 12orgchr235.html 12orgchr236.html 12orgchr237.html 12orgchr238.html 12orgchr239.html 12orgchr24.html 12orgchr240.html 12orgchr241.html 12orgchr242.html 12orgchr243.html 12orgchr244.html 12orgchr245.html 12orgchr246.html 12orgchr248.html 12orgchr249.html 12orgchr25.html 12orgchr250.html 12orgchr251.html 12orgchr252.html 12orgchr253.html 12orgchr254.html 12orgchr255.html 12orgchr256.html 12orgchr257.html 12orgchr258.html 12orgchr259.html 12orgchr26.html 12orgchr260.html 12orgchr261.html 12orgchr262.html 12orgchr263.html 12orgchr264.html 12orgchr265.html 12orgchr266.html 12orgchr267.html 12orgchr268.html 12orgchr269.html 12orgchr27.html 12orgchr270.html 12orgchr271.html 12orgchr272.html 12orgchr273.html 12orgchr274.html 12orgchr275.html 12orgchr276.html 12orgchr277.html 12orgchr278.html 12orgchr279.html 12orgchr28.html 12orgchr280.html 12orgchr281.html 12orgchr282.html 12orgchr283.html 12orgchr284.html 12orgchr285.html 12orgchr286.html 12orgchr287.html 12orgchr288.html 12orgchr289.html 12orgchr29.html 12orgchr290.html 12orgchr291.html 12orgchr292.html 12orgchr293.html 12orgchr294.html 12orgchr295.html 12orgchr296.html 12orgchr297.html 12orgchr298.html 12orgchr299.html 12orgchr3.html 12orgchr30.html 12orgchr300.html 12orgchr301.html 12orgchr302.html 12orgchr303.html 12orgchr304.html 12orgchr305.html 12orgchr306.html 12orgchr307.html 12orgchr308.html 12orgchr309.html 12orgchr31.html 12orgchr310.html 12orgchr311.html 12orgchr312.html 12orgchr313.html 12orgchr314.html 12orgchr315.html 12orgchr316.html 12orgchr317.html 12orgchr318.html 12orgchr32.html 12orgchr320.html 12orgchr321.html 12orgchr322.html 12orgchr323.html 12orgchr324.html 12orgchr325.html 12orgchr326.html 12orgchr327.html 12orgchr328.html 12orgchr329.html 12orgchr33.html 12orgchr330.html 12orgchr331.html 12orgchr332.html 12orgchr333.html 12orgchr334.html 12orgchr335.html 12orgchr336.html 12orgchr337.html 12orgchr338.html 12orgchr34.html 12orgchr35.html 12orgchr36.html 12orgchr37.html 12orgchr38.html 12orgchr39.html 12orgchr4.html 12orgchr40.html 12orgchr41.html 12orgchr43.html 12orgchr44.html 12orgchr45.html 12orgchr46.html 12orgchr47.html 12orgchr48.html 12orgchr49.html 12orgchr5.html 12orgchr50.html 12orgchr51.html 12orgchr52.html 12orgchr53.html 12orgchr54.html 12orgchr55.html 12orgchr56.html 12orgchr57.html 12orgchr58.html 12orgchr59.html 12orgchr6.html 12orgchr60.html 12orgchr61.html 12orgchr62.html 12orgchr63.html 12orgchr64.html 12orgchr65.html 12orgchr66.html 12orgchr67.html 12orgchr68.html 12orgchr69.html 12orgchr7.html 12orgchr70.html 12orgchr71.html 12orgchr72.html 12orgchr73.html 12orgchr74.html 12orgchr75.html 12orgchr76.html 12orgchr77.html 12orgchr78.html 12orgchr79.html 12orgchr8.html 12orgchr80.html 12orgchr81.html 12orgchr82.html 12orgchr83.html 12orgchr84.html 12orgchr85.html 12orgchr86.html 12orgchr87.html 12orgchr88.html 12orgchr89.html 12orgchr9.html 12orgchr90.html 12orgchr91.html 12orgchr92.html 12orgchr93.html 12orgchr94.html 12orgchr95.html 12orgchr96.html 12orgchr97.html 12orgchr98.html 12orgchr99.html 13orgchr1.html 13orgchr10.html 13orgchr100.html 13orgchr101.html 13orgchr102.html 13orgchr103.html 13orgchr104.html 13orgchr105.html 13orgchr107.html 13orgchr108.html 13orgchr109.html 13orgchr11.html 13orgchr110.html 13orgchr111.html 13orgchr112.html 13orgchr113.html 13orgchr114.html 13orgchr115.html 13orgchr116.html 13orgchr117.html 13orgchr118.html 13orgchr119.html 13orgchr12.html 13orgchr120.html 13orgchr121.html 13orgchr122.html 13orgchr123.html 13orgchr124.html 13orgchr125.html 13orgchr126.html 13orgchr127.html 13orgchr128.html 13orgchr129.html 13orgchr13.html 13orgchr130.html 13orgchr131.html 13orgchr132.html 13orgchr133.html 13orgchr134.html 13orgchr135.html 13orgchr136.html 13orgchr137.html 13orgchr138.html 13orgchr139.html 13orgchr14.html 13orgchr140.html 13orgchr141.html 13orgchr142.html 13orgchr143.html 13orgchr144.html 13orgchr145.html 13orgchr146.html 13orgchr147.html 13orgchr148.html 13orgchr149.html 13orgchr15.html 13orgchr150.html 13orgchr151.html 13orgchr152.html 13orgchr153.html 13orgchr154.html 13orgchr155.html 13orgchr156.html 13orgchr157.html 13orgchr158.html 13orgchr159.html 13orgchr16.html 13orgchr160.html 13orgchr161.html 13orgchr162.html 13orgchr163.html 13orgchr164.html 13orgchr165.html 13orgchr166.html 13orgchr167.html 13orgchr168.html 13orgchr169.html 13orgchr17.html 13orgchr170.html 13orgchr171.html 13orgchr172.html 13orgchr173.html 13orgchr174.html 13orgchr175.html 13orgchr176.html 13orgchr177.html 13orgchr178.html 13orgchr179.html 13orgchr18.html 13orgchr180.html 13orgchr181.html 13orgchr182.html 13orgchr183.html 13orgchr184.html 13orgchr185.html 13orgchr186.html 13orgchr187.html 13orgchr188.html 13orgchr189.html 13orgchr19.html 13orgchr190.html 13orgchr191.html 13orgchr192.html 13orgchr193.html 13orgchr194.html 13orgchr195.html 13orgchr196.html 13orgchr197.html 13orgchr198.html 13orgchr199.html 13orgchr2.html 13orgchr20.html 13orgchr200.html 13orgchr201.html 13orgchr202.html 13orgchr203.html 13orgchr204.html 13orgchr205.html 13orgchr206.html 13orgchr207.html 13orgchr208.html 13orgchr209.html 13orgchr21.html 13orgchr210.html 13orgchr211.html 13orgchr212.html 13orgchr213.html 13orgchr214.html 13orgchr215.html 13orgchr216.html 13orgchr217.html 13orgchr218.html 13orgchr219.html 13orgchr22.html 13orgchr220.html 13orgchr221.html 13orgchr222.html 13orgchr223.html 13orgchr224.html 13orgchr225.html 13orgchr226.html 13orgchr227.html 13orgchr228.html 13orgchr229.html 13orgchr23.html 13orgchr230.html 13orgchr231.html 13orgchr232.html 13orgchr233.html 13orgchr234.html 13orgchr235.html 13orgchr236.html 13orgchr237.html 13orgchr238.html 13orgchr239.html 13orgchr24.html 13orgchr240.html 13orgchr241.html 13orgchr242.html 13orgchr243.html 13orgchr244.html 13orgchr246.html 13orgchr247.html 13orgchr248.html 13orgchr249.html 13orgchr25.html 13orgchr250.html 13orgchr251.html 13orgchr252.html 13orgchr253.html 13orgchr254.html 13orgchr255.html 13orgchr256.html 13orgchr257.html 13orgchr258.html 13orgchr259.html 13orgchr26.html 13orgchr260.html 13orgchr261.html 13orgchr262.html 13orgchr263.html 13orgchr264.html 13orgchr265.html 13orgchr266.html 13orgchr267.html 13orgchr268.html 13orgchr269.html 13orgchr27.html 13orgchr270.html 13orgchr271.html 13orgchr272.html 13orgchr273.html 13orgchr274.html 13orgchr275.html 13orgchr276.html 13orgchr277.html 13orgchr278.html 13orgchr279.html 13orgchr28.html 13orgchr280.html 13orgchr281.html 13orgchr282.html 13orgchr283.html 13orgchr284.html 13orgchr285.html 13orgchr286.html 13orgchr287.html 13orgchr288.html 13orgchr289.html 13orgchr29.html 13orgchr290.html 13orgchr291.html 13orgchr292.html 13orgchr293.html 13orgchr294.html 13orgchr295.html 13orgchr296.html 13orgchr297.html 13orgchr298.html 13orgchr299.html 13orgchr3.html 13orgchr30.html 13orgchr300.html 13orgchr301.html 13orgchr302.html 13orgchr303.html 13orgchr304.html 13orgchr305.html 13orgchr306.html 13orgchr307.html 13orgchr308.html 13orgchr309.html 13orgchr31.html 13orgchr310.html 13orgchr311.html 13orgchr312.html 13orgchr313.html 13orgchr314.html 13orgchr315.html 13orgchr316.html 13orgchr318.html 13orgchr319.html 13orgchr32.html 13orgchr320.html 13orgchr321.html 13orgchr322.html 13orgchr323.html 13orgchr324.html 13orgchr325.html 13orgchr326.html 13orgchr327.html 13orgchr328.html 13orgchr329.html 13orgchr33.html 13orgchr330.html 13orgchr331.html 13orgchr332.html 13orgchr333.html 13orgchr334.html 13orgchr335.html 13orgchr336.html 13orgchr34.html 13orgchr35.html 13orgchr36.html 13orgchr37.html 13orgchr38.html 13orgchr39.html 13orgchr4.html 13orgchr40.html 13orgchr42.html 13orgchr43.html 13orgchr44.html 13orgchr45.html 13orgchr46.html 13orgchr47.html 13orgchr48.html 13orgchr49.html 13orgchr5.html 13orgchr50.html 13orgchr51.html 13orgchr52.html 13orgchr53.html 13orgchr54.html 13orgchr55.html 13orgchr56.html 13orgchr57.html 13orgchr58.html 13orgchr59.html 13orgchr6.html 13orgchr60.html 13orgchr61.html 13orgchr62.html 13orgchr63.html 13orgchr64.html 13orgchr65.html 13orgchr66.html 13orgchr67.html 13orgchr68.html 13orgchr69.html 13orgchr7.html 13orgchr70.html 13orgchr71.html 13orgchr72.html 13orgchr73.html 13orgchr74.html 13orgchr75.html 13orgchr76.html 13orgchr77.html 13orgchr78.html 13orgchr79.html 13orgchr8.html 13orgchr80.html 13orgchr81.html 13orgchr82.html 13orgchr83.html 13orgchr84.html 13orgchr85.html 13orgchr86.html 13orgchr87.html 13orgchr88.html 13orgchr89.html 13orgchr9.html 13orgchr90.html 13orgchr91.html 13orgchr92.html 13orgchr93.html 13orgchr94.html 13orgchr95.html 13orgchr96.html 13orgchr97.html 13orgchr98.html 13orgchr99.html 1orgchr1.html 1orgchr10.html 1orgchr11.html 1orgchr12.html 1orgchr13.html 1orgchr14.html 1orgchr2.html 1orgchr3.html 1orgchr4.html 1orgchr5.html 1orgchr6.html 1orgchr7.html 1orgchr8.html 2orgchr1.html 2orgchr16.html 2orgchr2.html 2orgchr25.html 2orgchr26.html 2orgchr27.html 2orgchr28.html 2orgchr29.html 2orgchr3.html 2orgchr30.html 2orgchr31.html 2orgchr32.html 2orgchr35.html 2orgchr39.html 2orgchr4.html 2orgchr44.html 2orgchr45.html 2orgchr5.html 2orgchr6.html 2orgchr7.html 3orgchr1.html 3orgchr10.html 3orgchr13.html 3orgchr18.html 3orgchr19.html 3orgchr2.html 3orgchr26.html 3orgchr3.html 3orgchr4.html 3orgchr5.html 3orgchr6.html 3orgchr7.html 4orgchr1.html 4orgchr11.html 4orgchr12.html 4orgchr13.html 4orgchr14.html 4orgchr15.html 4orgchr17.html 4orgchr2.html 4orgchr3.html 4orgchr4.html 4orgchr5.html 4orgchr6.html 4orgchr7.html 4orgchr9.html 5orgchr1.html 5orgchr2.html 5orgchr3.html 5orgchr4.html 5orgchr5.html 5orgchr6.html 6orgchr1.html 6orgchr12.html 6orgchr17.html 6orgchr2.html 6orgchr3.html 6orgchr4.html 6orgchr5.html 6orgchr6.html 7orgchr1.html 7orgchr11.html 7orgchr12.html 7orgchr13.html 7orgchr14.html 7orgchr16.html 7orgchr2.html 7orgchr23.html 7orgchr34.html 7orgchr35.html 7orgchr36.html 7orgchr38.html 7orgchr4.html 7orgchr42.html 7orgchr43.html 7orgchr44.html 7orgchr45.html 7orgchr46.html 7orgchr48.html 7orgchr50.html 7orgchr53.html 7orgchr56.html 7orgchr57.html 7orgchr59.html 7orgchr61.html 7orgchr8.html 8orgchr1.html 8orgchr2.html 8orgchr3.html 8orgchr4.html 8orgchr5.html 8orgchr7.html 9orgchr1.html 9orgchr10.html 9orgchr107.html 9orgchr11.html 9orgchr12.html 9orgchr126.html 9orgchr13.html 9orgchr135.html 9orgchr14.html 9orgchr140.html 9orgchr146.html 9orgchr15.html 9orgchr152.html 9orgchr16.html 9orgchr161.html 9orgchr168.html 9orgchr177.html 9orgchr178.html 9orgchr179.html 9orgchr18.html 9orgchr186.html 9orgchr194.html 9orgchr197.html 9orgchr2.html 9orgchr202.html 9orgchr209.html 9orgchr21.html 9orgchr236.html 9orgchr238.html 9orgchr243.html 9orgchr247.html 9orgchr251.html 9orgchr268.html 9orgchr3.html 9orgchr318.html 9orgchr321.html 9orgchr333.html 9orgchr34.html 9orgchr38.html 9orgchr4.html 9orgchr44.html 9orgchr45.html 9orgchr5.html 9orgchr53.html 9orgchr55.html 9orgchr6.html 9orgchr67.html 9orgchr7.html 9orgchr74.html 9orgchr8.html 9orgchr86.html 9orgchr89.html 9orgchr9.html Print statistics: Species # of collinear homolog pairs # of homolog pairs Percentage 10&11 9614 11607 82.8293 10&12 9649 11591 83.2456 10&13 9685 11590 83.5634 10&1o 8891 10892 81.6287 10&2o 9226 11195 82.4118 10&3o 9443 11468 82.3422 10&4o 8658 10724 80.7348 10&5o 11546 13676 84.4253 10&6o 10463 12730 82.1917 10&7o 10871 13008 83.5716 10&8o 8957 10806 82.8891 10&9o 10099 12889 78.3536 11&12 29907 30465 98.1684 11&13 29915 30480 98.1463 11&1o 16741 16899 99.065 11&2o 17246 17418 99.0125 11&3o 17781 17961 98.9978 11&4o 16704 16890 98.8988 11&5o 21115 21382 98.7513 11&6o 17321 18236 94.9825 11&7o 17019 17798 95.6231 11&8o 11240 12380 90.7916 11&9o 9937 12517 79.388 12&13 30546 31034 98.4275 12&1o 16755 16855 99.4067 12&2o 17336 17448 99.3581 12&3o 17848 17975 99.2935 12&4o 16819 16948 99.2388 12&5o 21083 21334 98.8235 12&6o 17358 18219 95.2742 12&7o 17059 17764 96.0313 12&8o 11270 12362 91.1665 12&9o 10001 12464 80.2391 13&1o 16754 16865 99.3418 13&2o 17381 17514 99.2406 13&3o 17823 17953 99.2759 13&4o 16795 16931 99.1967 13&5o 21076 21323 98.8416 13&6o 17393 18236 95.3773 13&7o 17038 17747 96.005 13&8o 11328 12415 91.2445 13&9o 9929 12446 79.7766 1o&2o 19856 19875 99.9044 1o&3o 20979 21029 99.7622 1o&4o 17559 17627 99.6142 1o&5o 18412 18642 98.7662 1o&6o 15185 16012 94.8351 1o&7o 14527 15267 95.1529 1o&8o 10701 11785 90.8019 1o&9o 8605 11081 77.6554 2o&3o 20158 20239 99.5998 2o&4o 18272 18359 99.5261 2o&5o 19723 19954 98.8423 2o&6o 16413 17228 95.2693 2o&7o 16246 16937 95.9202 2o&8o 10913 12045 90.6019 2o&9o 9628 11927 80.7244 3o&4o 18662 18730 99.6369 3o&5o 20101 20345 98.8007 3o&6o 16488 17367 94.9387 3o&7o 15895 16630 95.5803 3o&8o 11237 12339 91.069 3o&9o 9279 11810 78.569 4o&5o 19051 19291 98.7559 4o&6o 15799 16716 94.5142 4o&7o 15317 16098 95.1485 4o&8o 10585 11756 90.0391 4o&9o 8905 11375 78.2857 5o&6o 21546 22490 95.8026 5o&7o 20931 21787 96.0711 5o&8o 13118 14449 90.7883 5o&9o 11935 14659 81.4176 6o&7o 19551 20550 95.1387 6o&8o 12132 13555 89.502 6o&9o 11078 13804 80.2521 7o&8o 12279 13457 91.2462 7o&9o 11596 14199 81.6677 8o&9o 8625 11225 76.8374 Done! [18.956 seconds elapsed]
In [9]:
# lets now draw mcdraw images but in a hacky way:
# i outcommented line 48 and 49 of mcdraw.py to avoid automatic computation of greedy drawing order and
# to avoid parsing the blocks every time. additionally i changed the script to load the homology dict from memory but that is not necessary
# thats why get_mc_blocks.py and get_orders.py are run separately
# now we just make manual orders_orgs to draw the scaffolded genomes against one ref always
# since we have the (1) scaffolded version with all contigs, also the ones which ambiguously align to more than one ref chromosome and
# (2) the "limited" scaffolded version which excludes those contigs, we always draw both of them with one of the reference species
# as already mentioned above, the naively scaffolded version shows a fusion of two chromosomes
# if we do not take ambiguously aligned contigs, this fusion does not happen
# its generally not clear what caused this fusion without digging deep into the data
# all single-ref scenarios only place a contig with respect its primarily mapped ref chrs
# thus, the maximum matching (Blossom part) must introduce one or more edges connecting contigs which actually probably belong
# to different chrs
# most likely this happened because of the primary chromosomes from two or more ref species for one or more particular contigs
# comprise two or more biologically different chromosomes (so for Drosophilas one contig from D. eugracilis may primarily
# map to 2L for one pairwise comparison while it maps to 3R in the other and then in the Blossom graph these connections
# could both be established on the two ends of the contig and if this leads to an optimal solution, the results is a
# probably wrongly fused chromosome
import subprocess
import os
import shutil
from IPython.display import SVG, display
from pathlib import Path
os.makedirs('singles_out', exist_ok=True)
os.makedirs('orders', exist_ok=True)
os.makedirs('simple_maps', exist_ok=True)
# Configuration
THRESHOLD = '1000000'
# Step 1: Parse blocks once (creates naive_blocks.pickle)
print("Parsing MCScanX blocks...")
subprocess.run(['python3', 'get_mc_blocks.py', 'MCScanX.collinearity'], check=True)
print("Done!\n")
# Read all orgs
with open('orgs') as f:
all_orgs = [line.strip() for line in f if line.strip()]
# Fixed species for 3-genome comparison
scaffolded = 'dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded'
scaffolded_limited = 'dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited'
# Step 2: Generate simple_maps for all organisms once
print("Generating simple_maps for all organisms...")
for org in all_orgs:
if org == scaffolded or org == scaffolded_limited:
continue # scaffolded and scaffolded_limited handled specially, skip
if not os.path.exists(f'simple_maps/{org}'):
print(f" {org} -> scaffolded...")
subprocess.run(['python3', 'sc_mcdraw.py', scaffolded, org, '1', THRESHOLD],
check=True, capture_output=True)
# Also pre-generate scaffolded_limited -> org mapping for correct coloring
if not os.path.exists(f'simple_maps/scaffolded_limited_vs_{org}'):
print(f" scaffolded_limited -> {org}...")
subprocess.run(['python3', 'sc_mcdraw.py', org, scaffolded_limited, '0', THRESHOLD],
check=True, capture_output=True)
# Save with unique names to avoid overwriting
os.rename(f'simple_maps/{scaffolded_limited}', f'simple_maps/scaffolded_limited_vs_{org}')
if os.path.exists(f'orders/{scaffolded_limited}'):
os.rename(f'orders/{scaffolded_limited}', f'orders/scaffolded_limited_vs_{org}')
print("Done!\n")
# Step 3: Create output directory for individual comparisons
os.makedirs('pairwise_comparisons', exist_ok=True)
# Step 4: Loop through each organism and generate 3-way comparisons
for org in all_orgs:
if org == scaffolded or org == scaffolded_limited:
continue
# Skip if no simple_maps file exists (filtered by threshold)
if not os.path.exists(f'simple_maps/{org}'):
print(f"Skipping {org} (no data above threshold)")
continue
print(f"Drawing: {scaffolded} -> {org} -> {scaffolded_limited}...")
# Copy pre-generated files for scaffolded_limited -> org (for correct coloring and ordering)
shutil.copy(f'simple_maps/scaffolded_limited_vs_{org}', f'simple_maps/{scaffolded_limited}')
shutil.copy(f'orders/scaffolded_limited_vs_{org}', f'orders/{scaffolded_limited}')
# Create orders_orgs with 3-genome chain
with open('orders_orgs', 'w') as f:
f.write(f"{org}\t{scaffolded}\n")
f.write(f"{scaffolded_limited}\t{org}\n")
# Run mcdraw
subprocess.run([
'python3', 'mcdraw.py',
'--mcscanx_file', 'MCScanX.collinearity',
'--threshold_chr', THRESHOLD
], check=True)
# Rename output to preserve it
output_name = f"pairwise_comparisons/{org}_3way.svg"
os.rename('mcscx_blocks.svg', output_name)
print(f" Saved to {output_name}\n")
# Display all resulting images
print("\n" + "="*80)
print("DISPLAYING ALL PAIRWISE COMPARISONS")
print("="*80 + "\n")
comparison_dir = Path('pairwise_comparisons')
for svg_file in sorted(comparison_dir.glob('*.svg')):
print(f"\n{svg_file.name}")
print("-" * 80)
display(SVG(filename=str(svg_file)))
Parsing MCScanX blocks... Done! Generating simple_maps for all organisms... Done! Drawing: dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded -> GCF_016746395.2 -> dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited... Saved to pairwise_comparisons/GCF_016746395.2_3way.svg Drawing: dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded -> GCF_004382195.2 -> dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited... Saved to pairwise_comparisons/GCF_004382195.2_3way.svg Drawing: dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded -> GCF_000001215.4 -> dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited... Saved to pairwise_comparisons/GCF_000001215.4_3way.svg Drawing: dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded -> GCF_016746365.2 -> dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited... Saved to pairwise_comparisons/GCF_016746365.2_3way.svg Drawing: dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded -> GCF_030179915.1 -> dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited... Saved to pairwise_comparisons/GCF_030179915.1_3way.svg Drawing: dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded -> GCF_030179895.1 -> dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited... Saved to pairwise_comparisons/GCF_030179895.1_3way.svg Drawing: dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded -> GCF_017639315.1 -> dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited... Saved to pairwise_comparisons/GCF_017639315.1_3way.svg Drawing: dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded -> GCF_009870125.1 -> dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited... Saved to pairwise_comparisons/GCF_009870125.1_3way.svg Drawing: dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded -> GCF_018902025.1 -> dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited... Saved to pairwise_comparisons/GCF_018902025.1_3way.svg Drawing: dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded -> GCF_030788295.1 -> dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited... Saved to pairwise_comparisons/GCF_030788295.1_3way.svg Drawing: dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded -> GCF_018153835.1 -> dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited... Saved to pairwise_comparisons/GCF_018153835.1_3way.svg ================================================================================ DISPLAYING ALL PAIRWISE COMPARISONS ================================================================================ GCF_000001215.4_3way.svg --------------------------------------------------------------------------------
GCF_004382195.2_3way.svg --------------------------------------------------------------------------------
GCF_009870125.1_3way.svg --------------------------------------------------------------------------------
GCF_016746365.2_3way.svg --------------------------------------------------------------------------------
GCF_016746395.2_3way.svg --------------------------------------------------------------------------------
GCF_017639315.1_3way.svg --------------------------------------------------------------------------------
GCF_018153835.1_3way.svg --------------------------------------------------------------------------------
GCF_018902025.1_3way.svg --------------------------------------------------------------------------------
GCF_030179895.1_3way.svg --------------------------------------------------------------------------------
GCF_030179915.1_3way.svg --------------------------------------------------------------------------------
GCF_030788295.1_3way.svg --------------------------------------------------------------------------------