• Home
  • Species
  • Sets
  • Create New Job
  • Tutorial
  • Scaffolder Tutorial
  • Help
In [1]:
# get clasp, genomes, halos

%cd /Users/schmackeroodle/jupyter_scaff/
/Users/schmackeroodle/jupyter_scaff
/Users/schmackeroodle/opt/anaconda3/envs/ancst_tutorial/lib/python3.10/site-packages/IPython/core/magics/osm.py:417: UserWarning: This is now an optional IPython functionality, setting dhist requires you to install the `pickleshare` library.
  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]
In [2]:
%%bash

# get the results
curl -L -o out.tar.gz https://anchored.bioinf.uni-leipzig.de/sets/eeb0911b-8904-4617-ad43-5a391bbbe60e/download-small/
# extract the webserver output archive and see whats in there
tar -xzf out.tar.gz
# copy the data needed which is from https://zenodo.org/records/18005166
cp -r ~/Downloads/tutorial_aux_data/genomes/ out/utils/genomes/
# (extract and put into ~/Downloads or change path)
cp -r ~/Downloads/tutorial_aux_data/halos out/stable/synthology/
ls out/
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  291M  100  291M    0     0  6656k      0  0:00:44  0:00:44 --:--:-- 7919k0:00:44 --:--:-- 7510k
README
requirements.txt
scaffolder
stable
utils
In [3]:
%cd out/scaffolder
/Users/schmackeroodle/jupyter_scaff/out/scaffolder
/Users/schmackeroodle/opt/anaconda3/envs/ancst_tutorial/lib/python3.10/site-packages/IPython/core/magics/osm.py:417: UserWarning: This is now an optional IPython functionality, setting dhist requires you to install the `pickleshare` library.
  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]
In [4]:
%%bash

# here we now scaffold the only non-chromsome-level assembly of this set: GCF_018153835.1 (NCBI ref genome of Drosophila eugracilis)

# there is a blossom5-v2.05 directory here which contains a linux-compiled version of the optimization program used for multo-ref scaffolding
# get it from here and comile if necessary
wget https://pub.ista.ac.at/~vnk/software/blossom5-v2.05.src.tar.gz
tar -xzf blossom5-v2.05.src.tar.gz
cd blossom5-v2.05.src
make clean && make
cd ..

# we put the rest of the species in a "refs.txt" file
grep -v "GCF_018153835.1" ../utils/orgs > refs.txt
# the multi-reference scaffolder can also be run without (unit) weights but lets use the anchor alignments as a proxy
# this way we get the total score of the alignmetns between two species divided by the total length of the two genomes per comparison
python3 get_weights.py GCF_018153835.1
# then we scaffold
python3 AncST_scaff.py GCF_018153835.1
--2025-12-30 14:02:20--  https://pub.ista.ac.at/~vnk/software/blossom5-v2.05.src.tar.gz
Resolving pub.ista.ac.at (pub.ista.ac.at)... 81.223.84.195
Connecting to pub.ista.ac.at (pub.ista.ac.at)|81.223.84.195|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 47922 (47K) [application/x-gzip]
Saving to: 'blossom5-v2.05.src.tar.gz.1'

     0K .......... .......... .......... .......... ......    100% 1.39M=0.03s

2025-12-30 14:02:21 (1.39 MB/s) - 'blossom5-v2.05.src.tar.gz.1' saved [47922/47922]

rm -f  ./example.o  ./misc.o  ./PMduals.o  ./PMexpand.o  ./PMinit.o  ./PMinterface.o  ./PMmain.o  ./PMrepair.o  ./PMshrink.o  MinCost/MinCost.o  GEOM/GPMinit.o  GEOM/GPMinterface.o  GEOM/GPMkdtree.o  GEOM/GPMmain.o blossom5
c++ -O3 -D_NDEBUG  example.cpp -c -o example.o
c++ -O3 -D_NDEBUG  misc.cpp -c -o misc.o
c++ -O3 -D_NDEBUG  PMduals.cpp -c -o PMduals.o
c++ -O3 -D_NDEBUG  PMexpand.cpp -c -o PMexpand.o
c++ -O3 -D_NDEBUG  PMinit.cpp -c -o PMinit.o
c++ -O3 -D_NDEBUG  PMinterface.cpp -c -o PMinterface.o
c++ -O3 -D_NDEBUG  PMmain.cpp -c -o PMmain.o
c++ -O3 -D_NDEBUG  PMrepair.cpp -c -o PMrepair.o
c++ -O3 -D_NDEBUG  PMshrink.cpp -c -o PMshrink.o
c++ -O3 -D_NDEBUG  MinCost/MinCost.cpp -c -o MinCost/MinCost.o
c++ -O3 -D_NDEBUG  GEOM/GPMinit.cpp -c -o GEOM/GPMinit.o
c++ -O3 -D_NDEBUG  GEOM/GPMinterface.cpp -c -o GEOM/GPMinterface.o
c++ -O3 -D_NDEBUG  GEOM/GPMkdtree.cpp -c -o GEOM/GPMkdtree.o
c++ -O3 -D_NDEBUG  GEOM/GPMmain.cpp -c -o GEOM/GPMmain.o
c++ -O3 -D_NDEBUG  -o blossom5  ./example.o  ./misc.o  ./PMduals.o  ./PMexpand.o  ./PMinit.o  ./PMinterface.o  ./PMmain.o  ./PMrepair.o  ./PMshrink.o  MinCost/MinCost.o  GEOM/GPMinit.o  GEOM/GPMinterface.o  GEOM/GPMkdtree.o  GEOM/GPMmain.o 
20043415
2004341
200434
perfect matching with 300 nodes and 44700 edges
    starting init...done [0.001 secs]. 2 trees
    .0.
done [0.002 secs]. 32 grows, 0 expands, 10 shrinks
    expands: [0.000 secs], shrinks: [0.000 secs], dual updates: [0.000 secs]
cost = -7980088.0
In [5]:
%%bash

# in the singles_out files we can view some results and aux files about the single-ref scaffolding
# in multi_out we can see the new scaffolds of the multi-ref run.

# here we can see that there are a bunch of contigs with divergently aligned reference chromosomes
tail -n 30 singles_out/divergently_aligned_contigs_target_GCF_018153835.1_from_AncST_ref_GCF_*
echo
# we can make some initial observations:
# 1. D. eugracilis shows quite many ambiguously aligned contigs with GCF_018902025.1 - D. willistoni which is the species 
# with the largest phylogenetic distance to D. eugracilis

# 2. NW_024572310.1 appears as divergently aligned with respect to 5 other species while most other contigs only appear max. 2 times 
# in any of those pairwise relationships
grep -l "NW_024572310.1" singles_out/divergently_aligned_contigs_target_GCF_018153835.1_from_AncST_ref_* | sed 's/.*_ref_//' | sed 's/.txt$//'
echo
# we can check where about in the phylogeny those 5 could be 
grep --color=always "GCF_000001215.4\|GCF_004382195.2\|GCF_016746365.2\|GCF_018902025.1\|GCF_030179915.1" ../utils/NJTree.nwk
echo
grep --color=always "GCF_000001215.4\|GCF_004382195.2\|GCF_016746365.2\|GCF_018902025.1\|GCF_030179915.1" ../utils/UPGMATree.nwk
echo
# this pattern does not seem to clearly indicate that the chromosomes this contig aligns to may have undergone one specific rearrangement
# as could have been expected if they comprised one phylogenetic group which shares a particular ancestor
# thus, perhaps these are recurring rearrangements or the assemblies of this contig or the chromosomes are questionable
tail singles_out/divergently_aligned_contigs_target_GCF_018153835.1_from_AncST_ref_GCF_0* | grep NW_024572310.1
echo
# the proportions of divergently aligned parts seem similar...perhaps we can inspect this visually...

# ...hence, lets now write it to a fasta file and submit it to the server (manually) with the rest of the Drosophilas (whose anchors are reused)
# that way we can better inspect the scaffolding output
# you need teh genome somewhere (here in ../utils/genomes/GCF_018153835.1.fasta)
python3 write_scaffolds_and_contigs_to_fasta.py\
        ../utils/genomes/GCF_018153835.1.fasta\
        multi_out/scaffolds_names.out\
        GCF_018153835.1_scaffolded.fasta
ls fastas/GCF_018153835.1_scaffolded.fasta
==> singles_out/divergently_aligned_contigs_target_GCF_018153835.1_from_AncST_ref_GCF_000001215.4.txt <==
NW_024572310.1: [NT_033779.5: 905.0,NT_033778.4: 345.0,]

==> singles_out/divergently_aligned_contigs_target_GCF_018153835.1_from_AncST_ref_GCF_004382195.2.txt <==
NW_024572310.1: [NC_045949.1: 959.0,NC_045952.1: 361.0,]
NW_024572805.1: [NC_045951.1: 756.0,NC_045952.1: 742.0,]

==> singles_out/divergently_aligned_contigs_target_GCF_018153835.1_from_AncST_ref_GCF_009870125.1.txt <==

==> singles_out/divergently_aligned_contigs_target_GCF_018153835.1_from_AncST_ref_GCF_016746365.2.txt <==
NW_024571981.1: [NC_052527.2: 193405.09999999995,NC_052528.2: 342387.9,]
NW_024571941.1: [NC_052527.2: 1368.0,NC_052528.2: 3332.6,]
NW_024572605.1: [NC_052527.2: 541771.2999999999,NC_052528.2: 804988.2999999999,]
NW_024572310.1: [NC_052528.2: 976.0,NW_025048801.1: 289.8,]

==> singles_out/divergently_aligned_contigs_target_GCF_018153835.1_from_AncST_ref_GCF_016746395.2.txt <==

==> singles_out/divergently_aligned_contigs_target_GCF_018153835.1_from_AncST_ref_GCF_017639315.1.txt <==
NW_024573195.1: [NW_025319037.1: 7724.5,NW_025319038.1: 12438.900000000001,]
NW_024573196.1: [NW_025319037.1: 1976.6,NW_025319038.1: 2637.3999999999996,]
NW_024573032.1: [NW_025319038.1: 62.1,NC_057930.1: 172.0,]
NW_024573369.1: [NC_057931.1: 51870.40000000001,NC_057932.1: 74477.00000000003,]
NW_024573429.1: [NC_057931.1: 681.2,NC_057932.1: 1432.3,]
NW_024573254.1: [NC_057931.1: 12870.500000000002,NC_057932.1: 20422.3,]
NW_024573046.1: [NC_057931.1: 31853.600000000006,NC_057932.1: 22731.8,]
NW_024573048.1: [NC_057931.1: 26772.000000000004,NC_057932.1: 16748.6,]
NW_024572220.1: [NC_057931.1: 11429.6,NC_057932.1: 9490.5,]

==> singles_out/divergently_aligned_contigs_target_GCF_018153835.1_from_AncST_ref_GCF_018902025.1.txt <==
NW_024572605.1: [NW_025814047.1: 180387.40000000002,NW_025814048.1: 100439.80000000006,]
NW_024573812.1: [NW_025814047.1: 135066.60000000003,NW_025814048.1: 174841.80000000002,]
NW_024573712.1: [NW_025814047.1: 23925.500000000004,NW_025814048.1: 17483.8,]
NW_024571981.1: [NW_025814050.1: 120702.80000000008,NW_025814051.1: 61311.9,]
NW_024573038.1: [NW_025814056.1: 128623.39999999998,NW_025814057.1: 152514.70000000007,]
NW_024571881.1: [NW_025814056.1: 21190.3,NW_025814058.1: 13586.400000000001,]
NW_024573036.1: [NW_025814050.1: 103832.10000000003,NW_025814051.1: 29175.000000000015,]
NW_024573870.1: [NW_025814050.1: 34927.29999999999,NW_025814051.1: 18174.099999999995,]
NW_024573018.1: [NW_025814047.1: 475.0,NW_025814128.1: 119.0,]
NW_024573685.1: [NW_025814048.1: 183.0,NW_025814049.1: 320.8,]
NW_024573052.1: [NW_025814052.1: 89031.29999999999,NW_025814053.1: 65014.50000000001,]
NW_024572507.1: [NW_025814050.1: 2388.8,NW_025814051.1: 1945.5,]
NW_024573209.1: [NW_025814050.1: 63836.30000000001,NW_025814051.1: 25896.69999999999,]
NW_024573245.1: [NW_025814050.1: 43167.59999999999,NW_025814051.1: 22228.100000000002,]
NW_024572310.1: [NW_025814050.1: 602.8,NW_025814051.1: 422.0,]
NW_024573454.1: [NW_025814050.1: 37949.19999999999,NW_025814051.1: 16028.300000000001,]
NW_024573287.1: [NW_025814050.1: 12051.0,NW_025814051.1: 16128.200000000003,]
NW_024573369.1: [NW_025814052.1: 42716.40000000001,NW_025814053.1: 35109.2,]
NW_024572672.1: [NW_025814056.1: 31065.600000000002,NW_025814057.1: 22964.90000000001,]
NW_024573234.1: [NW_025814052.1: 3106.4999999999995,NW_025814053.1: 8905.700000000003,]
NW_024573367.1: [NW_025814052.1: 4652.4,NW_025814053.1: 4614.3,]
NW_024573030.1: [NW_025814052.1: 34718.99999999999,NW_025814053.1: 29661.099999999988,]
NW_024573039.1: [NW_025814052.1: 46607.899999999994,NW_025814053.1: 32176.899999999994,]
NW_024573046.1: [NW_025814052.1: 18748.6,NW_025814053.1: 10688.9,]
NW_024573270.1: [NW_025814052.1: 25904.200000000004,NW_025814053.1: 20880.0,]
NW_024572708.1: [NW_025814052.1: 29890.399999999994,NW_025814053.1: 9478.1,]
NW_024572029.1: [NW_025814052.1: 26826.49999999999,NW_025814053.1: 7442.2,]
NW_024572220.1: [NW_025814052.1: 3667.4999999999995,NW_025814053.1: 6578.6,NW_025814102.1: 2570.7,]
NW_024572849.1: [NW_025814052.1: 12885.7,NW_025814053.1: 8496.2,]
NW_024573415.1: [NW_025814052.1: 1417.0,NW_025814053.1: 650.0,]

==> singles_out/divergently_aligned_contigs_target_GCF_018153835.1_from_AncST_ref_GCF_030179895.1.txt <==
NW_024573812.1: [NC_091728.1: 249628.4,NC_091729.1: 943910.5000000001,]
NW_024573209.1: [NC_091728.1: 110888.50000000003,NC_091729.1: 93976.59999999999,]
NW_024573454.1: [NC_091728.1: 85395.0,NC_091729.1: 32145.699999999993,]
NW_024571941.1: [NC_091728.1: 1077.6,NC_091729.1: 2579.0,]
NW_024573387.1: [NC_091728.1: 33502.6,NC_091729.1: 34024.999999999985,]
NW_024573057.1: [NC_091728.1: 18323.50000000001,NC_091729.1: 19693.2,]
NW_024571954.1: [NC_091728.1: 41066.3,NC_091729.1: 16331.6,]
NW_024571771.1: [NC_091730.1: 213.0,NC_091731.1: 78.7,]

==> singles_out/divergently_aligned_contigs_target_GCF_018153835.1_from_AncST_ref_GCF_030179915.1.txt <==
NW_024572310.1: [NC_091678.1: 1176.0,NC_091679.1: 473.0,]
NW_024571771.1: [NC_091680.1: 172.0,NC_091681.1: 233.0,]

==> singles_out/divergently_aligned_contigs_target_GCF_018153835.1_from_AncST_ref_GCF_030788295.1.txt <==
NW_024572805.1: [NC_091544.1: 171.0,NW_027212828.1: 193.4,]

GCF_000001215.4
GCF_004382195.2
GCF_016746365.2
GCF_018902025.1
GCF_030179915.1

(((GCF_000001215.4:0.43981,GCF_016746395.2:0.43774)Inner9:0.00438,GCF_004382195.2:0.43114)Inner10:0.00331,GCF_016746365.2:0.43861,(((dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded:0.41588,dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited:0.41576)Inner1:0.00209,GCF_018153835.1:0.41787)Inner2:0.03210,(((((GCF_030788295.1:0.47777,GCF_018902025.1:0.48284)Inner3:0.00487,GCF_009870125.1:0.47123)Inner4:0.00735,GCF_017639315.1:0.46996)Inner5:0.00228,GCF_030179895.1:0.46413)Inner6:0.00808,GCF_030179915.1:0.45477)Inner7:0.00224)Inner8:0.01084)Inner11:0.00000;

(GCF_018902025.1:0.48030,(GCF_030788295.1:0.47778,(GCF_009870125.1:0.47379,(GCF_017639315.1:0.46755,(GCF_030179895.1:0.46277,(GCF_030179915.1:0.45369,(((dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded:0.41582,dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited:0.41582)Inner1:0.00207,GCF_018153835.1:0.41789)Inner2:0.03282,(GCF_016746365.2:0.44148,(GCF_000001215.4:0.43882,(GCF_004382195.2:0.43544,GCF_016746395.2:0.43544)Inner3:0.00338)Inner4:0.00266)Inner5:0.00923)Inner6:0.01221)Inner7:0.00908)Inner8:0.00478)Inner9:0.00624)Inner10:0.00399)Inner11:0.00252)Inner12:0.00000;

NW_024572310.1: [NT_033779.5: 905.0,NT_033778.4: 345.0,]
NW_024572310.1: [NC_045949.1: 959.0,NC_045952.1: 361.0,]
NW_024572310.1: [NC_052528.2: 976.0,NW_025048801.1: 289.8,]
NW_024572310.1: [NC_091678.1: 1176.0,NC_091679.1: 473.0,]

fastas/GCF_018153835.1_scaffolded.fasta
In [6]:
%%bash

# here we computed the rest of the 11 species (including the contig version of D. eugracilis) and two scaffolded versions: 
# (1) naively taking the output of the greedy algorithm and (2) excluding all contig which are ambiguously aligned to ref chromosomes
# (2) can be computed by just outcommenting line 207 where it says
# #and x in limited and c[enu+1] in limited:
# as we will see below including the ambiguously aligned contigs gives a biologically less plausible scaffolding result

cd ../..
mkdir -p scaffolded
cd scaffolded
# get the results
curl -L -o out.tar.gz https://anchored.bioinf.uni-leipzig.de/sets/eeb0911b-8904-4617-ad43-5a391bbbe60e/download-small/
# extract the webserver output archive and see whats in there
# and get the scaffolded genomes from zenodo at https://zenodo.org/uploads/18096371 (extract and put into ~/Downloads or change path)
tar -xzf out.tar.gz
cp -r ~/tutorial_aux_data/genomes/ out/utils/genomes/
cp -r ~/scaff_tut_aux/genomes/ out/utils/genomes/
cp -r ~/tutorial_aux_data/halos out/stable/synthology/
ls out/
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  291M  100  291M    0     0  9783k      0  0:00:30  0:00:30 --:--:-- 14.2M
README
requirements.txt
scaffolder
stable
utils
In [7]:
%cd ../../scaffolded/out/stable/MCScanX
/Users/schmackeroodle/jupyter_scaff/scaffolded/out/stable/MCScanX
/Users/schmackeroodle/opt/anaconda3/envs/ancst_tutorial/lib/python3.10/site-packages/IPython/core/magics/osm.py:417: UserWarning: This is now an optional IPython functionality, setting dhist requires you to install the `pickleshare` library.
  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]
In [8]:
%%bash
# lets run MCScanX to draw the collinearity blocks and visually inspect the scaffolding results
~/MCScanX/MCScanX_h MCScanX
Reading homologs and pre-processing
Generating homolog list
1269074 homologous pairs imported (0 discarded)
5695 pairwise comparisons
33238 alignments generated
Pairwise collinear blocks written to MCScanX.collinearity [91.142 seconds elapsed]
Writing multiple syntenic blocks to HTML files
10orgchr1.html
10orgchr100.html
10orgchr101.html
10orgchr113.html
10orgchr2.html
10orgchr3.html
10orgchr4.html
10orgchr47.html
10orgchr5.html
10orgchr52.html
10orgchr56.html
10orgchr58.html
10orgchr6.html
10orgchr61.html
10orgchr64.html
10orgchr66.html
10orgchr67.html
10orgchr68.html
10orgchr71.html
10orgchr72.html
10orgchr73.html
10orgchr79.html
10orgchr82.html
10orgchr83.html
10orgchr84.html
10orgchr93.html
10orgchr97.html
10orgchr98.html
10orgchr99.html
11orgchr1.html
11orgchr10.html
11orgchr100.html
11orgchr101.html
11orgchr102.html
11orgchr103.html
11orgchr104.html
11orgchr105.html
11orgchr106.html
11orgchr107.html
11orgchr108.html
11orgchr109.html
11orgchr11.html
11orgchr110.html
11orgchr111.html
11orgchr112.html
11orgchr113.html
11orgchr114.html
11orgchr115.html
11orgchr116.html
11orgchr117.html
11orgchr118.html
11orgchr119.html
11orgchr12.html
11orgchr120.html
11orgchr121.html
11orgchr122.html
11orgchr123.html
11orgchr124.html
11orgchr125.html
11orgchr126.html
11orgchr127.html
11orgchr129.html
11orgchr13.html
11orgchr130.html
11orgchr131.html
11orgchr132.html
11orgchr133.html
11orgchr134.html
11orgchr135.html
11orgchr136.html
11orgchr137.html
11orgchr138.html
11orgchr139.html
11orgchr14.html
11orgchr140.html
11orgchr141.html
11orgchr142.html
11orgchr143.html
11orgchr144.html
11orgchr145.html
11orgchr146.html
11orgchr147.html
11orgchr148.html
11orgchr149.html
11orgchr15.html
11orgchr150.html
11orgchr151.html
11orgchr153.html
11orgchr154.html
11orgchr155.html
11orgchr156.html
11orgchr157.html
11orgchr158.html
11orgchr159.html
11orgchr16.html
11orgchr160.html
11orgchr161.html
11orgchr162.html
11orgchr163.html
11orgchr164.html
11orgchr165.html
11orgchr166.html
11orgchr167.html
11orgchr168.html
11orgchr169.html
11orgchr17.html
11orgchr170.html
11orgchr171.html
11orgchr172.html
11orgchr173.html
11orgchr174.html
11orgchr175.html
11orgchr176.html
11orgchr177.html
11orgchr178.html
11orgchr179.html
11orgchr18.html
11orgchr180.html
11orgchr181.html
11orgchr182.html
11orgchr183.html
11orgchr184.html
11orgchr185.html
11orgchr186.html
11orgchr187.html
11orgchr188.html
11orgchr189.html
11orgchr19.html
11orgchr190.html
11orgchr191.html
11orgchr192.html
11orgchr193.html
11orgchr194.html
11orgchr195.html
11orgchr196.html
11orgchr197.html
11orgchr198.html
11orgchr199.html
11orgchr2.html
11orgchr20.html
11orgchr200.html
11orgchr201.html
11orgchr202.html
11orgchr203.html
11orgchr204.html
11orgchr205.html
11orgchr206.html
11orgchr207.html
11orgchr208.html
11orgchr209.html
11orgchr21.html
11orgchr210.html
11orgchr211.html
11orgchr212.html
11orgchr213.html
11orgchr214.html
11orgchr215.html
11orgchr216.html
11orgchr217.html
11orgchr218.html
11orgchr219.html
11orgchr22.html
11orgchr220.html
11orgchr221.html
11orgchr222.html
11orgchr223.html
11orgchr224.html
11orgchr225.html
11orgchr226.html
11orgchr227.html
11orgchr228.html
11orgchr229.html
11orgchr23.html
11orgchr230.html
11orgchr231.html
11orgchr232.html
11orgchr233.html
11orgchr234.html
11orgchr235.html
11orgchr236.html
11orgchr237.html
11orgchr238.html
11orgchr239.html
11orgchr24.html
11orgchr240.html
11orgchr241.html
11orgchr242.html
11orgchr243.html
11orgchr244.html
11orgchr245.html
11orgchr246.html
11orgchr247.html
11orgchr248.html
11orgchr249.html
11orgchr25.html
11orgchr250.html
11orgchr251.html
11orgchr252.html
11orgchr253.html
11orgchr254.html
11orgchr255.html
11orgchr256.html
11orgchr257.html
11orgchr258.html
11orgchr259.html
11orgchr26.html
11orgchr260.html
11orgchr261.html
11orgchr262.html
11orgchr263.html
11orgchr264.html
11orgchr265.html
11orgchr266.html
11orgchr267.html
11orgchr268.html
11orgchr269.html
11orgchr27.html
11orgchr270.html
11orgchr271.html
11orgchr272.html
11orgchr273.html
11orgchr274.html
11orgchr275.html
11orgchr276.html
11orgchr277.html
11orgchr278.html
11orgchr279.html
11orgchr28.html
11orgchr280.html
11orgchr281.html
11orgchr282.html
11orgchr283.html
11orgchr284.html
11orgchr285.html
11orgchr286.html
11orgchr287.html
11orgchr288.html
11orgchr289.html
11orgchr29.html
11orgchr290.html
11orgchr291.html
11orgchr292.html
11orgchr293.html
11orgchr294.html
11orgchr295.html
11orgchr296.html
11orgchr297.html
11orgchr298.html
11orgchr299.html
11orgchr3.html
11orgchr30.html
11orgchr300.html
11orgchr301.html
11orgchr302.html
11orgchr303.html
11orgchr304.html
11orgchr305.html
11orgchr306.html
11orgchr307.html
11orgchr308.html
11orgchr309.html
11orgchr31.html
11orgchr310.html
11orgchr311.html
11orgchr312.html
11orgchr313.html
11orgchr314.html
11orgchr315.html
11orgchr316.html
11orgchr317.html
11orgchr318.html
11orgchr319.html
11orgchr32.html
11orgchr320.html
11orgchr321.html
11orgchr322.html
11orgchr323.html
11orgchr324.html
11orgchr325.html
11orgchr326.html
11orgchr327.html
11orgchr328.html
11orgchr329.html
11orgchr33.html
11orgchr330.html
11orgchr331.html
11orgchr332.html
11orgchr333.html
11orgchr334.html
11orgchr335.html
11orgchr337.html
11orgchr338.html
11orgchr339.html
11orgchr34.html
11orgchr340.html
11orgchr341.html
11orgchr342.html
11orgchr343.html
11orgchr344.html
11orgchr345.html
11orgchr346.html
11orgchr347.html
11orgchr348.html
11orgchr349.html
11orgchr35.html
11orgchr350.html
11orgchr351.html
11orgchr352.html
11orgchr353.html
11orgchr354.html
11orgchr355.html
11orgchr356.html
11orgchr357.html
11orgchr358.html
11orgchr359.html
11orgchr36.html
11orgchr360.html
11orgchr361.html
11orgchr362.html
11orgchr363.html
11orgchr364.html
11orgchr365.html
11orgchr366.html
11orgchr367.html
11orgchr368.html
11orgchr369.html
11orgchr37.html
11orgchr370.html
11orgchr371.html
11orgchr372.html
11orgchr373.html
11orgchr374.html
11orgchr375.html
11orgchr376.html
11orgchr377.html
11orgchr378.html
11orgchr379.html
11orgchr38.html
11orgchr380.html
11orgchr381.html
11orgchr382.html
11orgchr383.html
11orgchr384.html
11orgchr385.html
11orgchr386.html
11orgchr387.html
11orgchr388.html
11orgchr389.html
11orgchr39.html
11orgchr390.html
11orgchr391.html
11orgchr392.html
11orgchr393.html
11orgchr394.html
11orgchr395.html
11orgchr396.html
11orgchr397.html
11orgchr398.html
11orgchr399.html
11orgchr4.html
11orgchr40.html
11orgchr400.html
11orgchr401.html
11orgchr402.html
11orgchr403.html
11orgchr404.html
11orgchr405.html
11orgchr406.html
11orgchr407.html
11orgchr408.html
11orgchr409.html
11orgchr41.html
11orgchr410.html
11orgchr411.html
11orgchr412.html
11orgchr413.html
11orgchr414.html
11orgchr415.html
11orgchr416.html
11orgchr417.html
11orgchr418.html
11orgchr419.html
11orgchr42.html
11orgchr420.html
11orgchr421.html
11orgchr422.html
11orgchr423.html
11orgchr424.html
11orgchr425.html
11orgchr426.html
11orgchr427.html
11orgchr428.html
11orgchr429.html
11orgchr43.html
11orgchr430.html
11orgchr431.html
11orgchr432.html
11orgchr433.html
11orgchr434.html
11orgchr435.html
11orgchr436.html
11orgchr437.html
11orgchr438.html
11orgchr439.html
11orgchr44.html
11orgchr440.html
11orgchr441.html
11orgchr442.html
11orgchr443.html
11orgchr444.html
11orgchr445.html
11orgchr446.html
11orgchr448.html
11orgchr449.html
11orgchr45.html
11orgchr450.html
11orgchr451.html
11orgchr452.html
11orgchr453.html
11orgchr454.html
11orgchr455.html
11orgchr456.html
11orgchr457.html
11orgchr458.html
11orgchr459.html
11orgchr46.html
11orgchr460.html
11orgchr461.html
11orgchr462.html
11orgchr463.html
11orgchr464.html
11orgchr465.html
11orgchr466.html
11orgchr467.html
11orgchr468.html
11orgchr469.html
11orgchr47.html
11orgchr470.html
11orgchr471.html
11orgchr472.html
11orgchr473.html
11orgchr474.html
11orgchr48.html
11orgchr49.html
11orgchr5.html
11orgchr51.html
11orgchr52.html
11orgchr53.html
11orgchr54.html
11orgchr55.html
11orgchr56.html
11orgchr57.html
11orgchr58.html
11orgchr59.html
11orgchr6.html
11orgchr60.html
11orgchr61.html
11orgchr62.html
11orgchr63.html
11orgchr64.html
11orgchr65.html
11orgchr66.html
11orgchr67.html
11orgchr68.html
11orgchr69.html
11orgchr7.html
11orgchr70.html
11orgchr71.html
11orgchr72.html
11orgchr73.html
11orgchr74.html
11orgchr75.html
11orgchr76.html
11orgchr77.html
11orgchr78.html
11orgchr79.html
11orgchr8.html
11orgchr80.html
11orgchr81.html
11orgchr82.html
11orgchr83.html
11orgchr84.html
11orgchr85.html
11orgchr86.html
11orgchr87.html
11orgchr88.html
11orgchr89.html
11orgchr9.html
11orgchr90.html
11orgchr91.html
11orgchr92.html
11orgchr93.html
11orgchr94.html
11orgchr95.html
11orgchr96.html
11orgchr97.html
11orgchr98.html
11orgchr99.html
12orgchr1.html
12orgchr10.html
12orgchr100.html
12orgchr101.html
12orgchr102.html
12orgchr103.html
12orgchr104.html
12orgchr105.html
12orgchr106.html
12orgchr108.html
12orgchr109.html
12orgchr11.html
12orgchr110.html
12orgchr111.html
12orgchr112.html
12orgchr113.html
12orgchr114.html
12orgchr115.html
12orgchr116.html
12orgchr117.html
12orgchr118.html
12orgchr119.html
12orgchr12.html
12orgchr120.html
12orgchr121.html
12orgchr122.html
12orgchr123.html
12orgchr124.html
12orgchr125.html
12orgchr126.html
12orgchr127.html
12orgchr128.html
12orgchr129.html
12orgchr13.html
12orgchr130.html
12orgchr131.html
12orgchr132.html
12orgchr133.html
12orgchr134.html
12orgchr135.html
12orgchr136.html
12orgchr137.html
12orgchr138.html
12orgchr139.html
12orgchr14.html
12orgchr140.html
12orgchr141.html
12orgchr142.html
12orgchr143.html
12orgchr144.html
12orgchr145.html
12orgchr146.html
12orgchr147.html
12orgchr148.html
12orgchr149.html
12orgchr15.html
12orgchr150.html
12orgchr151.html
12orgchr152.html
12orgchr153.html
12orgchr154.html
12orgchr155.html
12orgchr156.html
12orgchr157.html
12orgchr158.html
12orgchr159.html
12orgchr16.html
12orgchr160.html
12orgchr161.html
12orgchr162.html
12orgchr163.html
12orgchr164.html
12orgchr165.html
12orgchr166.html
12orgchr167.html
12orgchr168.html
12orgchr169.html
12orgchr17.html
12orgchr170.html
12orgchr171.html
12orgchr172.html
12orgchr173.html
12orgchr174.html
12orgchr175.html
12orgchr176.html
12orgchr177.html
12orgchr178.html
12orgchr179.html
12orgchr18.html
12orgchr180.html
12orgchr181.html
12orgchr182.html
12orgchr183.html
12orgchr184.html
12orgchr185.html
12orgchr186.html
12orgchr187.html
12orgchr188.html
12orgchr189.html
12orgchr19.html
12orgchr190.html
12orgchr191.html
12orgchr192.html
12orgchr193.html
12orgchr194.html
12orgchr195.html
12orgchr196.html
12orgchr197.html
12orgchr198.html
12orgchr199.html
12orgchr2.html
12orgchr20.html
12orgchr200.html
12orgchr201.html
12orgchr202.html
12orgchr203.html
12orgchr204.html
12orgchr205.html
12orgchr206.html
12orgchr207.html
12orgchr208.html
12orgchr209.html
12orgchr21.html
12orgchr210.html
12orgchr211.html
12orgchr212.html
12orgchr213.html
12orgchr214.html
12orgchr215.html
12orgchr216.html
12orgchr217.html
12orgchr218.html
12orgchr219.html
12orgchr22.html
12orgchr220.html
12orgchr221.html
12orgchr222.html
12orgchr223.html
12orgchr224.html
12orgchr225.html
12orgchr226.html
12orgchr227.html
12orgchr228.html
12orgchr229.html
12orgchr23.html
12orgchr230.html
12orgchr231.html
12orgchr232.html
12orgchr233.html
12orgchr234.html
12orgchr235.html
12orgchr236.html
12orgchr237.html
12orgchr238.html
12orgchr239.html
12orgchr24.html
12orgchr240.html
12orgchr241.html
12orgchr242.html
12orgchr243.html
12orgchr244.html
12orgchr245.html
12orgchr246.html
12orgchr248.html
12orgchr249.html
12orgchr25.html
12orgchr250.html
12orgchr251.html
12orgchr252.html
12orgchr253.html
12orgchr254.html
12orgchr255.html
12orgchr256.html
12orgchr257.html
12orgchr258.html
12orgchr259.html
12orgchr26.html
12orgchr260.html
12orgchr261.html
12orgchr262.html
12orgchr263.html
12orgchr264.html
12orgchr265.html
12orgchr266.html
12orgchr267.html
12orgchr268.html
12orgchr269.html
12orgchr27.html
12orgchr270.html
12orgchr271.html
12orgchr272.html
12orgchr273.html
12orgchr274.html
12orgchr275.html
12orgchr276.html
12orgchr277.html
12orgchr278.html
12orgchr279.html
12orgchr28.html
12orgchr280.html
12orgchr281.html
12orgchr282.html
12orgchr283.html
12orgchr284.html
12orgchr285.html
12orgchr286.html
12orgchr287.html
12orgchr288.html
12orgchr289.html
12orgchr29.html
12orgchr290.html
12orgchr291.html
12orgchr292.html
12orgchr293.html
12orgchr294.html
12orgchr295.html
12orgchr296.html
12orgchr297.html
12orgchr298.html
12orgchr299.html
12orgchr3.html
12orgchr30.html
12orgchr300.html
12orgchr301.html
12orgchr302.html
12orgchr303.html
12orgchr304.html
12orgchr305.html
12orgchr306.html
12orgchr307.html
12orgchr308.html
12orgchr309.html
12orgchr31.html
12orgchr310.html
12orgchr311.html
12orgchr312.html
12orgchr313.html
12orgchr314.html
12orgchr315.html
12orgchr316.html
12orgchr317.html
12orgchr318.html
12orgchr32.html
12orgchr320.html
12orgchr321.html
12orgchr322.html
12orgchr323.html
12orgchr324.html
12orgchr325.html
12orgchr326.html
12orgchr327.html
12orgchr328.html
12orgchr329.html
12orgchr33.html
12orgchr330.html
12orgchr331.html
12orgchr332.html
12orgchr333.html
12orgchr334.html
12orgchr335.html
12orgchr336.html
12orgchr337.html
12orgchr338.html
12orgchr34.html
12orgchr35.html
12orgchr36.html
12orgchr37.html
12orgchr38.html
12orgchr39.html
12orgchr4.html
12orgchr40.html
12orgchr41.html
12orgchr43.html
12orgchr44.html
12orgchr45.html
12orgchr46.html
12orgchr47.html
12orgchr48.html
12orgchr49.html
12orgchr5.html
12orgchr50.html
12orgchr51.html
12orgchr52.html
12orgchr53.html
12orgchr54.html
12orgchr55.html
12orgchr56.html
12orgchr57.html
12orgchr58.html
12orgchr59.html
12orgchr6.html
12orgchr60.html
12orgchr61.html
12orgchr62.html
12orgchr63.html
12orgchr64.html
12orgchr65.html
12orgchr66.html
12orgchr67.html
12orgchr68.html
12orgchr69.html
12orgchr7.html
12orgchr70.html
12orgchr71.html
12orgchr72.html
12orgchr73.html
12orgchr74.html
12orgchr75.html
12orgchr76.html
12orgchr77.html
12orgchr78.html
12orgchr79.html
12orgchr8.html
12orgchr80.html
12orgchr81.html
12orgchr82.html
12orgchr83.html
12orgchr84.html
12orgchr85.html
12orgchr86.html
12orgchr87.html
12orgchr88.html
12orgchr89.html
12orgchr9.html
12orgchr90.html
12orgchr91.html
12orgchr92.html
12orgchr93.html
12orgchr94.html
12orgchr95.html
12orgchr96.html
12orgchr97.html
12orgchr98.html
12orgchr99.html
13orgchr1.html
13orgchr10.html
13orgchr100.html
13orgchr101.html
13orgchr102.html
13orgchr103.html
13orgchr104.html
13orgchr105.html
13orgchr107.html
13orgchr108.html
13orgchr109.html
13orgchr11.html
13orgchr110.html
13orgchr111.html
13orgchr112.html
13orgchr113.html
13orgchr114.html
13orgchr115.html
13orgchr116.html
13orgchr117.html
13orgchr118.html
13orgchr119.html
13orgchr12.html
13orgchr120.html
13orgchr121.html
13orgchr122.html
13orgchr123.html
13orgchr124.html
13orgchr125.html
13orgchr126.html
13orgchr127.html
13orgchr128.html
13orgchr129.html
13orgchr13.html
13orgchr130.html
13orgchr131.html
13orgchr132.html
13orgchr133.html
13orgchr134.html
13orgchr135.html
13orgchr136.html
13orgchr137.html
13orgchr138.html
13orgchr139.html
13orgchr14.html
13orgchr140.html
13orgchr141.html
13orgchr142.html
13orgchr143.html
13orgchr144.html
13orgchr145.html
13orgchr146.html
13orgchr147.html
13orgchr148.html
13orgchr149.html
13orgchr15.html
13orgchr150.html
13orgchr151.html
13orgchr152.html
13orgchr153.html
13orgchr154.html
13orgchr155.html
13orgchr156.html
13orgchr157.html
13orgchr158.html
13orgchr159.html
13orgchr16.html
13orgchr160.html
13orgchr161.html
13orgchr162.html
13orgchr163.html
13orgchr164.html
13orgchr165.html
13orgchr166.html
13orgchr167.html
13orgchr168.html
13orgchr169.html
13orgchr17.html
13orgchr170.html
13orgchr171.html
13orgchr172.html
13orgchr173.html
13orgchr174.html
13orgchr175.html
13orgchr176.html
13orgchr177.html
13orgchr178.html
13orgchr179.html
13orgchr18.html
13orgchr180.html
13orgchr181.html
13orgchr182.html
13orgchr183.html
13orgchr184.html
13orgchr185.html
13orgchr186.html
13orgchr187.html
13orgchr188.html
13orgchr189.html
13orgchr19.html
13orgchr190.html
13orgchr191.html
13orgchr192.html
13orgchr193.html
13orgchr194.html
13orgchr195.html
13orgchr196.html
13orgchr197.html
13orgchr198.html
13orgchr199.html
13orgchr2.html
13orgchr20.html
13orgchr200.html
13orgchr201.html
13orgchr202.html
13orgchr203.html
13orgchr204.html
13orgchr205.html
13orgchr206.html
13orgchr207.html
13orgchr208.html
13orgchr209.html
13orgchr21.html
13orgchr210.html
13orgchr211.html
13orgchr212.html
13orgchr213.html
13orgchr214.html
13orgchr215.html
13orgchr216.html
13orgchr217.html
13orgchr218.html
13orgchr219.html
13orgchr22.html
13orgchr220.html
13orgchr221.html
13orgchr222.html
13orgchr223.html
13orgchr224.html
13orgchr225.html
13orgchr226.html
13orgchr227.html
13orgchr228.html
13orgchr229.html
13orgchr23.html
13orgchr230.html
13orgchr231.html
13orgchr232.html
13orgchr233.html
13orgchr234.html
13orgchr235.html
13orgchr236.html
13orgchr237.html
13orgchr238.html
13orgchr239.html
13orgchr24.html
13orgchr240.html
13orgchr241.html
13orgchr242.html
13orgchr243.html
13orgchr244.html
13orgchr246.html
13orgchr247.html
13orgchr248.html
13orgchr249.html
13orgchr25.html
13orgchr250.html
13orgchr251.html
13orgchr252.html
13orgchr253.html
13orgchr254.html
13orgchr255.html
13orgchr256.html
13orgchr257.html
13orgchr258.html
13orgchr259.html
13orgchr26.html
13orgchr260.html
13orgchr261.html
13orgchr262.html
13orgchr263.html
13orgchr264.html
13orgchr265.html
13orgchr266.html
13orgchr267.html
13orgchr268.html
13orgchr269.html
13orgchr27.html
13orgchr270.html
13orgchr271.html
13orgchr272.html
13orgchr273.html
13orgchr274.html
13orgchr275.html
13orgchr276.html
13orgchr277.html
13orgchr278.html
13orgchr279.html
13orgchr28.html
13orgchr280.html
13orgchr281.html
13orgchr282.html
13orgchr283.html
13orgchr284.html
13orgchr285.html
13orgchr286.html
13orgchr287.html
13orgchr288.html
13orgchr289.html
13orgchr29.html
13orgchr290.html
13orgchr291.html
13orgchr292.html
13orgchr293.html
13orgchr294.html
13orgchr295.html
13orgchr296.html
13orgchr297.html
13orgchr298.html
13orgchr299.html
13orgchr3.html
13orgchr30.html
13orgchr300.html
13orgchr301.html
13orgchr302.html
13orgchr303.html
13orgchr304.html
13orgchr305.html
13orgchr306.html
13orgchr307.html
13orgchr308.html
13orgchr309.html
13orgchr31.html
13orgchr310.html
13orgchr311.html
13orgchr312.html
13orgchr313.html
13orgchr314.html
13orgchr315.html
13orgchr316.html
13orgchr318.html
13orgchr319.html
13orgchr32.html
13orgchr320.html
13orgchr321.html
13orgchr322.html
13orgchr323.html
13orgchr324.html
13orgchr325.html
13orgchr326.html
13orgchr327.html
13orgchr328.html
13orgchr329.html
13orgchr33.html
13orgchr330.html
13orgchr331.html
13orgchr332.html
13orgchr333.html
13orgchr334.html
13orgchr335.html
13orgchr336.html
13orgchr34.html
13orgchr35.html
13orgchr36.html
13orgchr37.html
13orgchr38.html
13orgchr39.html
13orgchr4.html
13orgchr40.html
13orgchr42.html
13orgchr43.html
13orgchr44.html
13orgchr45.html
13orgchr46.html
13orgchr47.html
13orgchr48.html
13orgchr49.html
13orgchr5.html
13orgchr50.html
13orgchr51.html
13orgchr52.html
13orgchr53.html
13orgchr54.html
13orgchr55.html
13orgchr56.html
13orgchr57.html
13orgchr58.html
13orgchr59.html
13orgchr6.html
13orgchr60.html
13orgchr61.html
13orgchr62.html
13orgchr63.html
13orgchr64.html
13orgchr65.html
13orgchr66.html
13orgchr67.html
13orgchr68.html
13orgchr69.html
13orgchr7.html
13orgchr70.html
13orgchr71.html
13orgchr72.html
13orgchr73.html
13orgchr74.html
13orgchr75.html
13orgchr76.html
13orgchr77.html
13orgchr78.html
13orgchr79.html
13orgchr8.html
13orgchr80.html
13orgchr81.html
13orgchr82.html
13orgchr83.html
13orgchr84.html
13orgchr85.html
13orgchr86.html
13orgchr87.html
13orgchr88.html
13orgchr89.html
13orgchr9.html
13orgchr90.html
13orgchr91.html
13orgchr92.html
13orgchr93.html
13orgchr94.html
13orgchr95.html
13orgchr96.html
13orgchr97.html
13orgchr98.html
13orgchr99.html
1orgchr1.html
1orgchr10.html
1orgchr11.html
1orgchr12.html
1orgchr13.html
1orgchr14.html
1orgchr2.html
1orgchr3.html
1orgchr4.html
1orgchr5.html
1orgchr6.html
1orgchr7.html
1orgchr8.html
2orgchr1.html
2orgchr16.html
2orgchr2.html
2orgchr25.html
2orgchr26.html
2orgchr27.html
2orgchr28.html
2orgchr29.html
2orgchr3.html
2orgchr30.html
2orgchr31.html
2orgchr32.html
2orgchr35.html
2orgchr39.html
2orgchr4.html
2orgchr44.html
2orgchr45.html
2orgchr5.html
2orgchr6.html
2orgchr7.html
3orgchr1.html
3orgchr10.html
3orgchr13.html
3orgchr18.html
3orgchr19.html
3orgchr2.html
3orgchr26.html
3orgchr3.html
3orgchr4.html
3orgchr5.html
3orgchr6.html
3orgchr7.html
4orgchr1.html
4orgchr11.html
4orgchr12.html
4orgchr13.html
4orgchr14.html
4orgchr15.html
4orgchr17.html
4orgchr2.html
4orgchr3.html
4orgchr4.html
4orgchr5.html
4orgchr6.html
4orgchr7.html
4orgchr9.html
5orgchr1.html
5orgchr2.html
5orgchr3.html
5orgchr4.html
5orgchr5.html
5orgchr6.html
6orgchr1.html
6orgchr12.html
6orgchr17.html
6orgchr2.html
6orgchr3.html
6orgchr4.html
6orgchr5.html
6orgchr6.html
7orgchr1.html
7orgchr11.html
7orgchr12.html
7orgchr13.html
7orgchr14.html
7orgchr16.html
7orgchr2.html
7orgchr23.html
7orgchr34.html
7orgchr35.html
7orgchr36.html
7orgchr38.html
7orgchr4.html
7orgchr42.html
7orgchr43.html
7orgchr44.html
7orgchr45.html
7orgchr46.html
7orgchr48.html
7orgchr50.html
7orgchr53.html
7orgchr56.html
7orgchr57.html
7orgchr59.html
7orgchr61.html
7orgchr8.html
8orgchr1.html
8orgchr2.html
8orgchr3.html
8orgchr4.html
8orgchr5.html
8orgchr7.html
9orgchr1.html
9orgchr10.html
9orgchr107.html
9orgchr11.html
9orgchr12.html
9orgchr126.html
9orgchr13.html
9orgchr135.html
9orgchr14.html
9orgchr140.html
9orgchr146.html
9orgchr15.html
9orgchr152.html
9orgchr16.html
9orgchr161.html
9orgchr168.html
9orgchr177.html
9orgchr178.html
9orgchr179.html
9orgchr18.html
9orgchr186.html
9orgchr194.html
9orgchr197.html
9orgchr2.html
9orgchr202.html
9orgchr209.html
9orgchr21.html
9orgchr236.html
9orgchr238.html
9orgchr243.html
9orgchr247.html
9orgchr251.html
9orgchr268.html
9orgchr3.html
9orgchr318.html
9orgchr321.html
9orgchr333.html
9orgchr34.html
9orgchr38.html
9orgchr4.html
9orgchr44.html
9orgchr45.html
9orgchr5.html
9orgchr53.html
9orgchr55.html
9orgchr6.html
9orgchr67.html
9orgchr7.html
9orgchr74.html
9orgchr8.html
9orgchr86.html
9orgchr89.html
9orgchr9.html
Print statistics:
Species	# of collinear homolog pairs	# of homolog pairs	Percentage
10&11	9614	11607	82.8293
10&12	9649	11591	83.2456
10&13	9685	11590	83.5634
10&1o	8891	10892	81.6287
10&2o	9226	11195	82.4118
10&3o	9443	11468	82.3422
10&4o	8658	10724	80.7348
10&5o	11546	13676	84.4253
10&6o	10463	12730	82.1917
10&7o	10871	13008	83.5716
10&8o	8957	10806	82.8891
10&9o	10099	12889	78.3536
11&12	29907	30465	98.1684
11&13	29915	30480	98.1463
11&1o	16741	16899	99.065
11&2o	17246	17418	99.0125
11&3o	17781	17961	98.9978
11&4o	16704	16890	98.8988
11&5o	21115	21382	98.7513
11&6o	17321	18236	94.9825
11&7o	17019	17798	95.6231
11&8o	11240	12380	90.7916
11&9o	9937	12517	79.388
12&13	30546	31034	98.4275
12&1o	16755	16855	99.4067
12&2o	17336	17448	99.3581
12&3o	17848	17975	99.2935
12&4o	16819	16948	99.2388
12&5o	21083	21334	98.8235
12&6o	17358	18219	95.2742
12&7o	17059	17764	96.0313
12&8o	11270	12362	91.1665
12&9o	10001	12464	80.2391
13&1o	16754	16865	99.3418
13&2o	17381	17514	99.2406
13&3o	17823	17953	99.2759
13&4o	16795	16931	99.1967
13&5o	21076	21323	98.8416
13&6o	17393	18236	95.3773
13&7o	17038	17747	96.005
13&8o	11328	12415	91.2445
13&9o	9929	12446	79.7766
1o&2o	19856	19875	99.9044
1o&3o	20979	21029	99.7622
1o&4o	17559	17627	99.6142
1o&5o	18412	18642	98.7662
1o&6o	15185	16012	94.8351
1o&7o	14527	15267	95.1529
1o&8o	10701	11785	90.8019
1o&9o	8605	11081	77.6554
2o&3o	20158	20239	99.5998
2o&4o	18272	18359	99.5261
2o&5o	19723	19954	98.8423
2o&6o	16413	17228	95.2693
2o&7o	16246	16937	95.9202
2o&8o	10913	12045	90.6019
2o&9o	9628	11927	80.7244
3o&4o	18662	18730	99.6369
3o&5o	20101	20345	98.8007
3o&6o	16488	17367	94.9387
3o&7o	15895	16630	95.5803
3o&8o	11237	12339	91.069
3o&9o	9279	11810	78.569
4o&5o	19051	19291	98.7559
4o&6o	15799	16716	94.5142
4o&7o	15317	16098	95.1485
4o&8o	10585	11756	90.0391
4o&9o	8905	11375	78.2857
5o&6o	21546	22490	95.8026
5o&7o	20931	21787	96.0711
5o&8o	13118	14449	90.7883
5o&9o	11935	14659	81.4176
6o&7o	19551	20550	95.1387
6o&8o	12132	13555	89.502
6o&9o	11078	13804	80.2521
7o&8o	12279	13457	91.2462
7o&9o	11596	14199	81.6677
8o&9o	8625	11225	76.8374
Done! [18.956 seconds elapsed]
In [9]:
# lets now draw mcdraw images but in a hacky way:
# i outcommented line 48 and 49 of mcdraw.py to avoid automatic computation of greedy drawing order and 
# to avoid parsing the blocks every time. additionally i changed the script to load the homology dict from memory but that is not necessary
# thats why get_mc_blocks.py and get_orders.py are run separately
# now we just make manual orders_orgs to draw the scaffolded genomes against one ref always
# since we have the (1) scaffolded version with all contigs, also the ones which ambiguously align to more than one ref chromosome and 
# (2) the "limited" scaffolded version which excludes those contigs, we always draw both of them with one of the reference species
# as already mentioned above, the naively scaffolded version shows a fusion of two chromosomes
# if we do not take ambiguously aligned contigs, this fusion does not happen
# its generally not clear what caused this fusion without digging deep into the data
# all single-ref scenarios only place a contig with respect its primarily mapped ref chrs
# thus, the maximum matching (Blossom part) must introduce one or more edges connecting contigs which actually probably belong 
# to different chrs
# most likely this happened because of the primary chromosomes from two or more ref species for one or more particular contigs 
# comprise two or more biologically different chromosomes (so for Drosophilas one contig from D. eugracilis may primarily 
# map to 2L for one pairwise comparison while it maps to 3R in the other and then in the Blossom graph these connections 
# could both be established on the two ends of the contig and if this leads to an optimal solution, the results is a 
# probably wrongly fused chromosome

import subprocess
import os
import shutil
from IPython.display import SVG, display
from pathlib import Path

os.makedirs('singles_out', exist_ok=True)
os.makedirs('orders', exist_ok=True)
os.makedirs('simple_maps', exist_ok=True)

# Configuration
THRESHOLD = '1000000'

# Step 1: Parse blocks once (creates naive_blocks.pickle)
print("Parsing MCScanX blocks...")
subprocess.run(['python3', 'get_mc_blocks.py', 'MCScanX.collinearity'], check=True)
print("Done!\n")

# Read all orgs
with open('orgs') as f:
    all_orgs = [line.strip() for line in f if line.strip()]

# Fixed species for 3-genome comparison
scaffolded = 'dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded'
scaffolded_limited = 'dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited'

# Step 2: Generate simple_maps for all organisms once
print("Generating simple_maps for all organisms...")
for org in all_orgs:
    if org == scaffolded or org == scaffolded_limited:
        continue  # scaffolded and scaffolded_limited handled specially, skip
    if not os.path.exists(f'simple_maps/{org}'):
        print(f"  {org} -> scaffolded...")
        subprocess.run(['python3', 'sc_mcdraw.py', scaffolded, org, '1', THRESHOLD],
                      check=True, capture_output=True)
    # Also pre-generate scaffolded_limited -> org mapping for correct coloring
    if not os.path.exists(f'simple_maps/scaffolded_limited_vs_{org}'):
        print(f"  scaffolded_limited -> {org}...")
        subprocess.run(['python3', 'sc_mcdraw.py', org, scaffolded_limited, '0', THRESHOLD],
                      check=True, capture_output=True)
        # Save with unique names to avoid overwriting
        os.rename(f'simple_maps/{scaffolded_limited}', f'simple_maps/scaffolded_limited_vs_{org}')
        if os.path.exists(f'orders/{scaffolded_limited}'):
            os.rename(f'orders/{scaffolded_limited}', f'orders/scaffolded_limited_vs_{org}')
print("Done!\n")

# Step 3: Create output directory for individual comparisons
os.makedirs('pairwise_comparisons', exist_ok=True)

# Step 4: Loop through each organism and generate 3-way comparisons
for org in all_orgs:
    if org == scaffolded or org == scaffolded_limited:
        continue

    # Skip if no simple_maps file exists (filtered by threshold)
    if not os.path.exists(f'simple_maps/{org}'):
        print(f"Skipping {org} (no data above threshold)")
        continue

    print(f"Drawing: {scaffolded} -> {org} -> {scaffolded_limited}...")

    # Copy pre-generated files for scaffolded_limited -> org (for correct coloring and ordering)
    shutil.copy(f'simple_maps/scaffolded_limited_vs_{org}', f'simple_maps/{scaffolded_limited}')
    shutil.copy(f'orders/scaffolded_limited_vs_{org}', f'orders/{scaffolded_limited}')

    # Create orders_orgs with 3-genome chain
    with open('orders_orgs', 'w') as f:
        f.write(f"{org}\t{scaffolded}\n")
        f.write(f"{scaffolded_limited}\t{org}\n")

    # Run mcdraw
    subprocess.run([
        'python3', 'mcdraw.py',
        '--mcscanx_file', 'MCScanX.collinearity',
        '--threshold_chr', THRESHOLD
    ], check=True)

    # Rename output to preserve it
    output_name = f"pairwise_comparisons/{org}_3way.svg"
    os.rename('mcscx_blocks.svg', output_name)
    print(f"  Saved to {output_name}\n")

# Display all resulting images
print("\n" + "="*80)
print("DISPLAYING ALL PAIRWISE COMPARISONS")
print("="*80 + "\n")

comparison_dir = Path('pairwise_comparisons')
for svg_file in sorted(comparison_dir.glob('*.svg')):
    print(f"\n{svg_file.name}")
    print("-" * 80)
    display(SVG(filename=str(svg_file)))
Parsing MCScanX blocks...
Done!

Generating simple_maps for all organisms...
Done!

Drawing: dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded -> GCF_016746395.2 -> dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited...
  Saved to pairwise_comparisons/GCF_016746395.2_3way.svg

Drawing: dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded -> GCF_004382195.2 -> dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited...
  Saved to pairwise_comparisons/GCF_004382195.2_3way.svg

Drawing: dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded -> GCF_000001215.4 -> dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited...
  Saved to pairwise_comparisons/GCF_000001215.4_3way.svg

Drawing: dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded -> GCF_016746365.2 -> dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited...
  Saved to pairwise_comparisons/GCF_016746365.2_3way.svg

Drawing: dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded -> GCF_030179915.1 -> dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited...
  Saved to pairwise_comparisons/GCF_030179915.1_3way.svg

Drawing: dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded -> GCF_030179895.1 -> dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited...
  Saved to pairwise_comparisons/GCF_030179895.1_3way.svg

Drawing: dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded -> GCF_017639315.1 -> dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited...
  Saved to pairwise_comparisons/GCF_017639315.1_3way.svg

Drawing: dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded -> GCF_009870125.1 -> dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited...
  Saved to pairwise_comparisons/GCF_009870125.1_3way.svg

Drawing: dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded -> GCF_018902025.1 -> dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited...
  Saved to pairwise_comparisons/GCF_018902025.1_3way.svg

Drawing: dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded -> GCF_030788295.1 -> dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited...
  Saved to pairwise_comparisons/GCF_030788295.1_3way.svg

Drawing: dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded -> GCF_018153835.1 -> dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited...
  Saved to pairwise_comparisons/GCF_018153835.1_3way.svg


================================================================================
DISPLAYING ALL PAIRWISE COMPARISONS
================================================================================


GCF_000001215.4_3way.svg
--------------------------------------------------------------------------------
No description has been provided for this image
GCF_004382195.2_3way.svg
--------------------------------------------------------------------------------
No description has been provided for this image
GCF_009870125.1_3way.svg
--------------------------------------------------------------------------------
No description has been provided for this image
GCF_016746365.2_3way.svg
--------------------------------------------------------------------------------
No description has been provided for this image
GCF_016746395.2_3way.svg
--------------------------------------------------------------------------------
No description has been provided for this image
GCF_017639315.1_3way.svg
--------------------------------------------------------------------------------
No description has been provided for this image
GCF_018153835.1_3way.svg
--------------------------------------------------------------------------------
No description has been provided for this image
GCF_018902025.1_3way.svg
--------------------------------------------------------------------------------
No description has been provided for this image
GCF_030179895.1_3way.svg
--------------------------------------------------------------------------------
No description has been provided for this image
GCF_030179915.1_3way.svg
--------------------------------------------------------------------------------
No description has been provided for this image
GCF_030788295.1_3way.svg
--------------------------------------------------------------------------------
No description has been provided for this image
In [10]:
# lets draw some microsynteny images for the same family of proteins as in the tutorial with the scaffolded version(s) of D. eugracilis too
# the data (halos) are found at https://zenodo.org/records/18005166
In [11]:
%cd ../synthology
/Users/schmackeroodle/jupyter_scaff/scaffolded/out/stable/synthology
/Users/schmackeroodle/opt/anaconda3/envs/ancst_tutorial/lib/python3.10/site-packages/IPython/core/magics/osm.py:417: UserWarning: This is now an optional IPython functionality, setting dhist requires you to install the `pickleshare` library.
  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]
In [12]:
%%bash

# although here we could attempt to liftover the annotations from the contig version of D. eugracilis and use 
# protein blast to search for potential homologs of the halo family proteins but we can also use the nucleotide 
# based pipeline introduced in the tutorial. lets mix the two which would be the method of choice if you have new 
# genomes without annotations and trusted ones

python3 get_syn_regions.py --cores 8 --iter 20 --threshold_new_region 1000000 --anchor_scope 500000 --gff halos/gff
# and lets filter by the 2 species we are actually intered in/do not have annotations for
# the get syn regions is still better run with all species considered as then syntenic regions are discovered transivitely
grep -E "^(dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded|dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited)" syntenic_regions_succinct > syntenic_regions_2species
cat syntenic_regions_2species
Loaded 122 elements from GFF files
Total elements to process: 122
NT_033779.5 in genome GCF_000001215.4
NT_033779.5 in genome GCF_000001215.4
NT_037436.4 in genome GCF_000001215.4
NT_037436.4 in genome GCF_000001215.4
NT_037436.4 in genome GCF_000001215.4
NT_037436.4 in genome GCF_000001215.4
NT_037436.4 in genome GCF_000001215.4
NT_037436.4 in genome GCF_000001215.4
NT_037436.4 in genome GCF_000001215.4
NC_052527.2 in genome GCF_016746365.2
NC_052527.2 in genome GCF_016746365.2
NC_052529.2 in genome GCF_016746365.2
NC_052529.2 in genome GCF_016746365.2
NC_052529.2 in genome GCF_016746365.2
NC_052529.2 in genome GCF_016746365.2
NC_052529.2 in genome GCF_016746365.2
NC_052529.2 in genome GCF_016746365.2
NC_052529.2 in genome GCF_016746365.2
NW_024571981.1 in genome GCF_018153835.1
NW_024572964.1 in genome GCF_018153835.1
NW_024572964.1 in genome GCF_018153835.1
NW_024572964.1 in genome GCF_018153835.1
NW_024572964.1 in genome GCF_018153835.1
NW_024572964.1 in genome GCF_018153835.1
NW_024572964.1 in genome GCF_018153835.1
NW_024572964.1 in genome GCF_018153835.1
NW_024573038.1 in genome GCF_018153835.1
NW_024573454.1 in genome GCF_018153835.1
NC_045949.1 in genome GCF_004382195.2
NC_045949.1 in genome GCF_004382195.2
NC_045951.1 in genome GCF_004382195.2
NC_045951.1 in genome GCF_004382195.2
NC_045951.1 in genome GCF_004382195.2
NC_045951.1 in genome GCF_004382195.2
NC_045951.1 in genome GCF_004382195.2
NC_045951.1 in genome GCF_004382195.2
NC_045951.1 in genome GCF_004382195.2
NW_025814050.1 in genome GCF_018902025.1
NW_025814050.1 in genome GCF_018902025.1
NW_025814050.1 in genome GCF_018902025.1
NW_025814050.1 in genome GCF_018902025.1
NW_025814050.1 in genome GCF_018902025.1
NW_025814057.1 in genome GCF_018902025.1
NW_025814057.1 in genome GCF_018902025.1
NW_025814057.1 in genome GCF_018902025.1
NW_025814057.1 in genome GCF_018902025.1
NW_025814057.1 in genome GCF_018902025.1
NW_025814057.1 in genome GCF_018902025.1
NW_025814057.1 in genome GCF_018902025.1
NW_025814057.1 in genome GCF_018902025.1
NW_025814057.1 in genome GCF_018902025.1
NW_025814057.1 in genome GCF_018902025.1
NW_025814057.1 in genome GCF_018902025.1
NC_057928.1 in genome GCF_017639315.1
NC_057928.1 in genome GCF_017639315.1
NC_057928.1 in genome GCF_017639315.1
NC_057928.1 in genome GCF_017639315.1
NC_057928.1 in genome GCF_017639315.1
NC_057928.1 in genome GCF_017639315.1
NC_057928.1 in genome GCF_017639315.1
NC_057930.1 in genome GCF_017639315.1
NC_057930.1 in genome GCF_017639315.1
NC_046681.1 in genome GCF_009870125.1
NC_046681.1 in genome GCF_009870125.1
NC_046681.1 in genome GCF_009870125.1
NC_046683.1 in genome GCF_009870125.1
NC_046683.1 in genome GCF_009870125.1
NC_046683.1 in genome GCF_009870125.1
NC_046683.1 in genome GCF_009870125.1
NC_046683.1 in genome GCF_009870125.1
NC_046683.1 in genome GCF_009870125.1
NC_046683.1 in genome GCF_009870125.1
NC_046683.1 in genome GCF_009870125.1
NC_046683.1 in genome GCF_009870125.1
NC_052520.2 in genome GCF_016746395.2
NC_052520.2 in genome GCF_016746395.2
NC_052522.2 in genome GCF_016746395.2
NC_052522.2 in genome GCF_016746395.2
NC_052522.2 in genome GCF_016746395.2
NC_052522.2 in genome GCF_016746395.2
NC_052522.2 in genome GCF_016746395.2
NC_052522.2 in genome GCF_016746395.2
NC_052522.2 in genome GCF_016746395.2
NC_091545.1 in genome GCF_030788295.1
NC_091545.1 in genome GCF_030788295.1
NC_091545.1 in genome GCF_030788295.1
NC_091545.1 in genome GCF_030788295.1
NC_091545.1 in genome GCF_030788295.1
NC_091545.1 in genome GCF_030788295.1
NC_091545.1 in genome GCF_030788295.1
NC_091545.1 in genome GCF_030788295.1
NC_091545.1 in genome GCF_030788295.1
NC_091546.1 in genome GCF_030788295.1
NC_091546.1 in genome GCF_030788295.1
NW_027212798.1 in genome GCF_030788295.1
NW_027212798.1 in genome GCF_030788295.1
NW_027212798.1 in genome GCF_030788295.1
NW_027212798.1 in genome GCF_030788295.1
NW_027212798.1 in genome GCF_030788295.1
NC_091728.1 in genome GCF_030179895.1
NC_091728.1 in genome GCF_030179895.1
NC_091730.1 in genome GCF_030179895.1
NC_091730.1 in genome GCF_030179895.1
NC_091730.1 in genome GCF_030179895.1
NC_091730.1 in genome GCF_030179895.1
NC_091730.1 in genome GCF_030179895.1
NC_091730.1 in genome GCF_030179895.1
NC_091730.1 in genome GCF_030179895.1
NC_091730.1 in genome GCF_030179895.1
NC_091730.1 in genome GCF_030179895.1
NC_091730.1 in genome GCF_030179895.1
NC_091730.1 in genome GCF_030179895.1
NC_091730.1 in genome GCF_030179895.1
NC_091678.1 in genome GCF_030179915.1
NC_091678.1 in genome GCF_030179915.1
NC_091680.1 in genome GCF_030179915.1
NC_091680.1 in genome GCF_030179915.1
NC_091680.1 in genome GCF_030179915.1
NC_091680.1 in genome GCF_030179915.1
NC_091680.1 in genome GCF_030179915.1
NC_091680.1 in genome GCF_030179915.1
NC_091680.1 in genome GCF_030179915.1
iteration no 1
found 503 new regions after iteration 1
iteration no 2
found 8 new regions after iteration 2
iteration no 3
found 0 new regions after iteration 3
exiting before iteration no 4 since no new regions found
writing final output file
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded	scaffold_1	24341200	24572982	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded	scaffold_1	25648989	25684550	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded	scaffold_1	26566181	26644300	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded	scaffold_1	27193981	27447081	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded	scaffold_1	28077481	28777531	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded	scaffold_1	28974231	29177531	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded	scaffold_1	29389981	30505881	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded	scaffold_1	31705252	31905450	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded	scaffold_1	33089500	33249895	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded	scaffold_1	34709950	35707900	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded	scaffold_1	37012400	37046000	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded	scaffold_1	38059652	38189000	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded	scaffold_1	38625500	38980200	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded	scaffold_1	39379700	39485650	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded	scaffold_1	39755236	40164901	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded	scaffold_1	40820500	40911449	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded	scaffold_1	41651900	42269759	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded	scaffold_1	42455832	43395382	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded	scaffold_1	44036350	44336300	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded	scaffold_1	45334250	45720046	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded	scaffold_1	46363550	46925912	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded	scaffold_5	7445	136695	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded	scaffold_5	654650	891100	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded	scaffold_5	1560850	1665700	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded	scaffold_5	2863200	3398550	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded	scaffold_5	4235495	5450400	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded	scaffold_5	6054300	6092845	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded	scaffold_5	6422600	6615295	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded	scaffold_5	6950895	7244245	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded	scaffold_5	7490950	7557900	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded	scaffold_5	7820350	7909450	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded	scaffold_5	8614215	9239445	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded	scaffold_5	10023545	10131345	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded	scaffold_5	10615495	10645295	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded	scaffold_5	12330503	13949903	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded	scaffold_5	16595903	16720753	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded	scaffold_5	17774153	17990545	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded	scaffold_5	18164953	18228195	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded	scaffold_5	18752903	18811445	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded	scaffold_5	18970645	19095853	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded	scaffold_5	19438553	19689045	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded	scaffold_5	21561803	22063253	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded	scaffold_5	22436745	22590353	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded	scaffold_5	23037453	23360045	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited	scaffold_2	0	560377	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited	scaffold_2	1204877	1590608	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited	scaffold_2	2589258	2886327	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited	scaffold_2	3529966	4469616	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited	scaffold_2	4655790	5272453	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited	scaffold_2	6014035	6104077	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited	scaffold_2	6760663	7165077	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited	scaffold_2	7430327	7544127	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited	scaffold_2	7943026	8293477	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited	scaffold_2	8733077	8865118	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited	scaffold_2	9878218	9911668	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited	scaffold_2	11216568	12214727	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited	scaffold_2	13675605	13834377	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited	scaffold_2	15017654	15219204	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited	scaffold_2	16419557	17535407	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited	scaffold_2	17746177	17950527	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited	scaffold_2	18146007	18848057	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited	scaffold_2	19478507	19730607	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited	scaffold_2	20279377	20358257	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited	scaffold_2	21239227	21276581	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited	scaffold_2	22351773	22583725	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited	scaffold_6	7450	136724	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited	scaffold_6	654650	891100	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited	scaffold_6	1560824	1665700	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited	scaffold_6	2863200	3398550	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited	scaffold_6	4235524	5450400	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited	scaffold_6	6054300	6092850	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited	scaffold_6	6422600	6615324	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited	scaffold_6	6950924	7244424	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited	scaffold_6	7490950	7557900	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited	scaffold_6	7820350	7909124	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited	scaffold_6	8614215	9238174	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited	scaffold_6	10023574	10132874	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited	scaffold_6	10615524	10645303	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited	scaffold_6	12330503	13949903	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited	scaffold_6	16595903	16720753	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited	scaffold_6	17774153	17990374	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited	scaffold_6	18164953	18228224	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited	scaffold_6	18752903	18811424	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited	scaffold_6	18970674	19095853	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited	scaffold_6	19438553	19689074	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited	scaffold_6	21561774	22063253	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited	scaffold_6	22436774	22590353	NO BP
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited	scaffold_6	23037453	23360074	NO BP
In [13]:
%%bash

# lets execute the scripts to find potential hits in the two scaffolded versions

python3 extract_syntenic_fastas.py syntenic_regions_2species ../../utils/genomes syn_regions
cat halos/proteins/* > all_halos.faa
python3 run_blast_on_regions.py all_halos.faa tblastn --fastas_dir syn_regions/  --output_dir syn_regions_blast_out --clasp_path ~/clasp.x  --cores 8 --evalue 0.1 --word_size 2
python3 blast_results_to_synthology.py --deduplicate_overlaps --genomes_dir ../../utils/genomes --output_dir halos_from_syn_regions --mode protein syn_regions_blast_out
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded: 44 regions extracted
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited: 44 regions extracted
2 organisms with FASTA files
Organisms: ['dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded', 'dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited']
Done
2 organisms with temp_coords files
Deduplication of overlapping hits is ENABLED
  dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited: Deduplicated 700 hits to 15 (685 merged)
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited: 15 sequences extracted
  dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded: Deduplicated 700 hits to 15 (685 merged)
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded: 15 sequences extracted
Done
In [22]:
# now lets copy the other gffs and draw some images
# (i uncommented the link warning in the except clause at the end of the file to make the output shorter)
!cp halos/gff/* halos_from_syn_regions/gff/
!python3 draw_verbose.py 250000 --draw-all-elements --gff-dir halos_from_syn_regions/gff --alignments-file ../../utils/pairwise_alignments_table --auto-order

# here you can see that the scaffolded versions contribute one scaffold each corresponding to the whole syntenic region of interest
# while in the original D. eugracilis contig-level assembly this region is covered by 2 contigs (component 3)
# also, there are two more hits found based on sequence similarity which are in between the two canonical genes which are also found for 
# the scaffolded version (and present from the original annotations for the other species)

from IPython.display import SVG, display, Markdown
import glob
import os

svg_files = sorted(glob.glob('images_draw_verbose/250000/*.svg'))

for svg_file in svg_files:
    display(Markdown(f"### {os.path.basename(svg_file)}"))
    display(SVG(svg_file))
Auto-generating plotting_order...
Loaded 13 organisms from ../../utils/orgs
Calculating alignment coverage between organisms...
Generated plotting_order with 1811 chromosome tracks
Organism order: GCF_016746395.2 -> GCF_004382195.2 -> GCF_016746365.2 -> GCF_030179915.1 -> dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded -> dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited -> GCF_018153835.1 -> GCF_000001215.4 -> GCF_030179895.1 -> GCF_017639315.1 -> GCF_009870125.1 -> GCF_018902025.1 -> GCF_030788295.1
Parsing GFF files from halos_from_syn_regions/gff...
Found 13 organisms with 29 chromosomes
Using default reference: ('GCF_000001215.4', 'NT_033779.5', '1517732')
Finding all connected components (--draw-all-elements flag set)
Found 4 connected components
  Component 1: 83 elements across 13 chromosomes
    Chromosomes: {'GCF_004382195.2:NC_045951.1': 6, 'GCF_030179915.1:NC_091680.1': 6, 'GCF_009870125.1:NC_046683.1': 8, 'GCF_030788295.1:NC_091545.1': 7, 'GCF_000001215.4:NT_037436.4': 6}
  Component 2: 22 elements across 13 chromosomes
    Chromosomes: {'GCF_016746365.2:NC_052529.2': 1, 'dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited:scaffold_6': 4, 'GCF_018902025.1:NW_025814057.1': 4, 'GCF_004382195.2:NC_045951.1': 1, 'GCF_018153835.1:NW_024573038.1': 1}
  Component 3: 33 elements across 14 chromosomes
    Chromosomes: {'GCF_009870125.1:NC_046681.1': 3, 'dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited:scaffold_2': 4, 'GCF_016746395.2:NC_052520.2': 2, 'dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded:scaffold_1': 4, 'GCF_030179895.1:NC_091728.1': 2}
  Component 4: 2 elements across 2 chromosomes
    Chromosomes: {'dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited:scaffold_2': 1, 'dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded:scaffold_1': 1}

=== Processing Component 1/4 (83 elements) ===
After filtering, bib contains 13 chromosome entries
  GCF_016746395.2: ['NC_052522.2']
  GCF_004382195.2: ['NC_045951.1']
  GCF_000001215.4: ['NT_037436.4']
  GCF_016746365.2: ['NC_052529.2']
  dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded: ['scaffold_5']
  dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited: ['scaffold_6']
  GCF_018153835.1: ['NW_024572964.1']
  GCF_030179915.1: ['NC_091680.1']
  GCF_009870125.1: ['NC_046683.1']
  GCF_030788295.1: ['NC_091545.1']
  GCF_018902025.1: ['NW_025814057.1']
  GCF_030179895.1: ['NC_091730.1']
  GCF_017639315.1: ['NC_057928.1']
Final plotting order has 13 tracks
  (11 with default forward orientation)
Saved figure to images_draw_verbose/250000/component_1_of_4_ref_GCF_004382195.2_NC_045951.1_4661886.svg

=== Processing Component 2/4 (22 elements) ===
After filtering, bib contains 13 chromosome entries
  GCF_016746395.2: ['NC_052522.2']
  GCF_004382195.2: ['NC_045951.1']
  GCF_000001215.4: ['NT_037436.4']
  GCF_016746365.2: ['NC_052529.2']
  dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded: ['scaffold_5']
  dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited: ['scaffold_6']
  GCF_018153835.1: ['NW_024573038.1']
  GCF_030179915.1: ['NC_091680.1']
  GCF_009870125.1: ['NC_046683.1']
  GCF_030788295.1: ['NC_091545.1']
  GCF_018902025.1: ['NW_025814057.1']
  GCF_030179895.1: ['NC_091730.1']
  GCF_017639315.1: ['NC_057928.1']
Final plotting order has 13 tracks
  (10 with default forward orientation)
Saved figure to images_draw_verbose/250000/component_2_of_4_ref_GCF_016746365.2_NC_052529.2_13569546.svg

=== Processing Component 3/4 (33 elements) ===
After filtering, bib contains 14 chromosome entries
  GCF_016746395.2: ['NC_052520.2']
  GCF_004382195.2: ['NC_045949.1']
  GCF_000001215.4: ['NT_033779.5']
  GCF_016746365.2: ['NC_052527.2']
  dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded: ['scaffold_1']
  dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited: ['scaffold_2']
  GCF_018153835.1: ['NW_024573454.1', 'NW_024571981.1']
  GCF_030179915.1: ['NC_091678.1']
  GCF_009870125.1: ['NC_046681.1']
  GCF_030788295.1: ['NC_091546.1']
  GCF_018902025.1: ['NW_025814050.1']
  GCF_030179895.1: ['NC_091728.1']
  GCF_017639315.1: ['NC_057930.1']
Final plotting order has 14 tracks
  (8 with default forward orientation)
Saved figure to images_draw_verbose/250000/component_3_of_4_ref_GCF_009870125.1_NC_046681.1_9910077.svg

=== Processing Component 4/4 (2 elements) ===
After filtering, bib contains 2 chromosome entries
  dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded: ['scaffold_1']
  dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited: ['scaffold_2']
Final plotting order has 2 tracks
  (1 with default forward orientation)
Saved figure to images_draw_verbose/250000/component_4_of_4_ref_dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited_scaffold_2_8246487.svg

component_1_of_4_ref_GCF_004382195.2_NC_045951.1_4661886.svg¶

No description has been provided for this image

component_2_of_4_ref_GCF_016746365.2_NC_052529.2_13569546.svg¶

No description has been provided for this image

component_3_of_4_ref_GCF_009870125.1_NC_046681.1_9910077.svg¶

No description has been provided for this image

component_4_of_4_ref_dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited_scaffold_2_8246487.svg¶

No description has been provided for this image
In [15]:
%%bash

# to check if the two additional hits are specific to the scaffolded versions or can actually also be traced down in 
# the other genomes, lets actually analyse all syntenic regions with tblastn to find potential hits from scratch

python3 extract_syntenic_fastas.py syntenic_regions_succinct ../../utils/genomes syn_regions_all
python3 run_blast_on_regions.py all_halos.faa tblastn --fastas_dir syn_regions_all/  --output_dir syn_regions_all_blast_out --clasp_path ~/clasp.x  --cores 8 --evalue 0.1 --word_size 2
python3 blast_results_to_synthology.py --deduplicate_overlaps --genomes_dir ../../utils/genomes --output_dir halos_from_syn_regions_all --mode protein syn_regions_all_blast_out
GCF_000001215.4: 40 regions extracted
GCF_016746365.2: 38 regions extracted
GCF_018153835.1: 55 regions extracted
GCF_004382195.2: 38 regions extracted
GCF_018902025.1: 66 regions extracted
GCF_017639315.1: 43 regions extracted
GCF_009870125.1: 43 regions extracted
GCF_016746395.2: 39 regions extracted
GCF_030788295.1: 61 regions extracted
GCF_030179895.1: 51 regions extracted
GCF_030179915.1: 50 regions extracted
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded: 44 regions extracted
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited: 44 regions extracted
13 organisms with FASTA files
Organisms: ['GCF_000001215.4', 'GCF_030788295.1', 'dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded', 'GCF_030179895.1', 'GCF_009870125.1', 'GCF_018902025.1', 'dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited', 'GCF_016746395.2', 'GCF_017639315.1', 'GCF_030179915.1', 'GCF_018153835.1', 'GCF_016746365.2', 'GCF_004382195.2']
Done
13 organisms with temp_coords files
Deduplication of overlapping hits is ENABLED
  GCF_009870125.1: Deduplicated 720 hits to 16 (704 merged)
GCF_009870125.1: 16 sequences extracted
  dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited: Deduplicated 700 hits to 15 (685 merged)
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited: 15 sequences extracted
  GCF_018902025.1: Deduplicated 673 hits to 14 (659 merged)
GCF_018902025.1: 14 sequences extracted
  GCF_030179895.1: Deduplicated 703 hits to 15 (688 merged)
GCF_030179895.1: 15 sequences extracted
  GCF_030788295.1: Deduplicated 1098 hits to 19 (1079 merged)
GCF_030788295.1: 19 sequences extracted
  GCF_000001215.4: Deduplicated 747 hits to 13 (734 merged)
GCF_000001215.4: 13 sequences extracted
  GCF_016746365.2: Deduplicated 748 hits to 12 (736 merged)
GCF_016746365.2: 12 sequences extracted
  GCF_004382195.2: Deduplicated 748 hits to 12 (736 merged)
GCF_004382195.2: 12 sequences extracted
  GCF_016746395.2: Deduplicated 745 hits to 12 (733 merged)
GCF_016746395.2: 12 sequences extracted
  dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded: Deduplicated 700 hits to 15 (685 merged)
dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded: 15 sequences extracted
  GCF_017639315.1: Deduplicated 750 hits to 13 (737 merged)
GCF_017639315.1: 13 sequences extracted
  GCF_030179915.1: Deduplicated 688 hits to 15 (673 merged)
GCF_030179915.1: 15 sequences extracted
  GCF_018153835.1: Deduplicated 700 hits to 14 (686 merged)
GCF_018153835.1: 14 sequences extracted
Done
In [23]:
# draw again
!rm images_draw_verbose/250000/component_*
!python3 draw_verbose.py 250000 --draw-all-elements --gff-dir halos_from_syn_regions_all/gff --alignments-file ../../utils/pairwise_alignments_table --auto-order

# here (component_1_of_4_ref_dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited_scaffold_2_5004486.svg)
# we see that now this additional region with a hit in also found in the original genome of D. eugracilis
# also this additional hit shows orthology based on the color of the anchors (which would show alignments 
# if the respective tracks were stacked) is present in D. yakuba (GCF_016746365.2)

from IPython.display import SVG, display, Markdown
import glob
import os

svg_files = sorted(glob.glob('images_draw_verbose/250000/*.svg'))

for svg_file in svg_files:
    display(Markdown(f"### {os.path.basename(svg_file)}"))
    display(SVG(svg_file))
Auto-generating plotting_order...
Loaded 13 organisms from ../../utils/orgs
Calculating alignment coverage between organisms...
Generated plotting_order with 1811 chromosome tracks
Organism order: GCF_016746395.2 -> GCF_004382195.2 -> GCF_016746365.2 -> GCF_030179915.1 -> dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded -> dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited -> GCF_018153835.1 -> GCF_000001215.4 -> GCF_030179895.1 -> GCF_017639315.1 -> GCF_009870125.1 -> GCF_018902025.1 -> GCF_030788295.1
Parsing GFF files from halos_from_syn_regions_all/gff...
Found 13 organisms with 31 chromosomes
Using default reference: ('GCF_000001215.4', 'NT_033779.5', '5552091')
Finding all connected components (--draw-all-elements flag set)
Found 4 connected components
  Component 1: 44 elements across 13 chromosomes
    Chromosomes: {'GCF_009870125.1:NC_046683.1': 3, 'GCF_018902025.1:NW_025814057.1': 4, 'dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded:scaffold_5': 4, 'GCF_018153835.1:NW_024573038.1': 3, 'dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited:scaffold_6': 4}
  Component 2: 89 elements across 13 chromosomes
    Chromosomes: {'GCF_030788295.1:NC_091545.1': 9, 'dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded:scaffold_5': 6, 'GCF_009870125.1:NC_046683.1': 8, 'GCF_004382195.2:NC_045951.1': 6, 'GCF_030179915.1:NC_091680.1': 7}
  Component 3: 43 elements across 15 chromosomes
    Chromosomes: {'GCF_004382195.2:NC_045949.1': 3, 'dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited:scaffold_2': 4, 'dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded:scaffold_1': 4, 'GCF_030179895.1:NC_091728.1': 5, 'GCF_030179915.1:NC_091678.1': 2}
  Component 4: 3 elements across 3 chromosomes
    Chromosomes: {'dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited:scaffold_2': 1, 'dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded:scaffold_1': 1, 'GCF_018153835.1:NW_024573245.1': 1}

=== Processing Component 1/4 (44 elements) ===
After filtering, bib contains 13 chromosome entries
  GCF_016746395.2: ['NC_052522.2']
  GCF_004382195.2: ['NC_045951.1']
  GCF_000001215.4: ['NT_037436.4']
  GCF_016746365.2: ['NC_052529.2']
  dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded: ['scaffold_5']
  dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited: ['scaffold_6']
  GCF_018153835.1: ['NW_024573038.1']
  GCF_030179915.1: ['NC_091680.1']
  GCF_009870125.1: ['NC_046683.1']
  GCF_030788295.1: ['NC_091545.1']
  GCF_018902025.1: ['NW_025814057.1']
  GCF_030179895.1: ['NC_091730.1']
  GCF_017639315.1: ['NC_057928.1']
Final plotting order has 13 tracks
  (11 with default forward orientation)
Saved figure to images_draw_verbose/250000/component_1_of_4_ref_GCF_009870125.1_NC_046683.1_48191116.svg

=== Processing Component 2/4 (89 elements) ===
After filtering, bib contains 13 chromosome entries
  GCF_016746395.2: ['NC_052522.2']
  GCF_004382195.2: ['NC_045951.1']
  GCF_000001215.4: ['NT_037436.4']
  GCF_016746365.2: ['NC_052529.2']
  dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded: ['scaffold_5']
  dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited: ['scaffold_6']
  GCF_018153835.1: ['NW_024572964.1']
  GCF_030179915.1: ['NC_091680.1']
  GCF_009870125.1: ['NC_046683.1']
  GCF_030788295.1: ['NC_091545.1']
  GCF_018902025.1: ['NW_025814057.1']
  GCF_030179895.1: ['NC_091730.1']
  GCF_017639315.1: ['NC_057928.1']
Final plotting order has 13 tracks
  (11 with default forward orientation)
Saved figure to images_draw_verbose/250000/component_2_of_4_ref_GCF_030788295.1_NC_091545.1_1206875.svg

=== Processing Component 3/4 (43 elements) ===
After filtering, bib contains 15 chromosome entries
  GCF_016746395.2: ['NC_052520.2']
  GCF_004382195.2: ['NC_045949.1']
  GCF_000001215.4: ['NT_033779.5']
  GCF_016746365.2: ['NC_052527.2']
  dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded: ['scaffold_1']
  dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited: ['scaffold_2']
  GCF_018153835.1: ['NW_024573454.1', 'NW_024571981.1', 'NW_024572675.1']
  GCF_030179915.1: ['NC_091678.1']
  GCF_009870125.1: ['NC_046681.1']
  GCF_030788295.1: ['NC_091546.1']
  GCF_018902025.1: ['NW_025814050.1']
  GCF_030179895.1: ['NC_091728.1']
  GCF_017639315.1: ['NC_057930.1']
Final plotting order has 15 tracks
  (8 with default forward orientation)
Saved figure to images_draw_verbose/250000/component_3_of_4_ref_GCF_004382195.2_NC_045949.1_5461905.svg

=== Processing Component 4/4 (3 elements) ===
After filtering, bib contains 3 chromosome entries
  dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded: ['scaffold_1']
  dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited: ['scaffold_2']
  GCF_018153835.1: ['NW_024573245.1']
Final plotting order has 3 tracks
  (1 with default forward orientation)
Saved figure to images_draw_verbose/250000/component_4_of_4_ref_dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited_scaffold_2_8246487.svg

component_1_of_4_ref_GCF_009870125.1_NC_046683.1_48191116.svg¶

No description has been provided for this image

component_2_of_4_ref_GCF_030788295.1_NC_091545.1_1206875.svg¶

No description has been provided for this image

component_3_of_4_ref_GCF_004382195.2_NC_045949.1_5461905.svg¶

No description has been provided for this image

component_4_of_4_ref_dee0396f-f6ba-4ef8-b178-1cfd2589e381_GCF_018153835.1_scaffolded_limited_scaffold_2_8246487.svg¶

No description has been provided for this image
In [ ]: