In [4]:
!python --version
Python 3.9.1
In [3]:
!pip freeze
appdirs==1.4.4
apsw==3.34.0.post1
argon2-cffi==20.1.0
asn1crypto==1.4.0
async-generator==1.10
attrs==20.3.0
audioread==2.1.9
autograd==1.3
backcall==0.2.0
beautifulsoup4==4.9.3
bleach==3.3.0
bokeh==2.2.3
Brlapi==0.8.2
btrfsutil==5.10.1
CacheControl==0.12.6
cffi==1.14.5
chardet==3.0.4
click==7.1.2
cloudpickle==1.6.0
cma==2.7.0
colorama==0.4.4
contextlib2==0.6.0.post1
cryptography==3.3.1
css-parser==1.0.6
cssselect==1.1.0
cycler==0.10.0
Cython==0.29.21
dask==2020.12.0
decorator==4.4.2
defusedxml==0.6.0
distlib==0.3.1
distributed==2020.12.0
distro==1.5.0
dnspython==1.16.0
docopt==0.6.2
entrypoints==0.3
evdev==1.4.0
feedparser==5.2.1
fsspec==0.8.5
future==0.18.2
HeapDict==1.0.1
html2text==2020.1.16
html5-parser==0.4.9
html5lib==1.1
idna==2.10
ifaddr==0.1.7
import-ipynb==0.1.3
importlib-metadata==3.4.0
ipykernel==5.4.2
ipython==7.19.0
ipython-genutils==0.2.0
ipywidgets==7.6.2
isc==2.0
jedi==0.17.2
Jinja2==2.11.3
joblib==1.0.0
jsonschema==3.2.0
jupyter-client==6.1.7
jupyter-console==6.2.0
jupyter-core==4.6.3
jupyterlab-pygments==0.1.2
keyutils==0.6
kiwisolver==1.3.1
lensfun==0.3.95
libfdt==1.6.0
librosa==0.8.0
libvirt-python==6.4.0
lit==0.10.1.dev0
llvmlite==0.34.0
locket==0.2.0
louis==3.16.0
lutris==0.5.8.3
lxml==4.6.2
Markdown==3.3.3
MarkupSafe==1.1.1
matplotlib==3.3.4
mechanize==0.4.5
meson==0.57.0
mistune==0.8.4
msgpack==1.0.2
nbclient==0.5.1
nbconvert==5.6.1
nbformat==5.0.8
nest-asyncio==1.4.3
netifaces==0.10.9
networkx==2.5
nftables==0.1
notebook==6.2.0
numba==0.51.2
numpy==1.20.0
ordered-set==4.0.2
packaging==20.9
pandas==1.1.5
pandocfilters==1.4.3
parso==0.7.1
partd==1.1.0
pep517==0.9.1
pexpect==4.8.0
pickleshare==0.7.5
Pillow==8.1.0
plotly==4.14.1
ply==3.11
pomegranate==0.14.0
pooch==1.3.0
progress==1.5
prometheus-client==0.9.0
prompt-toolkit==3.0.16
psutil==5.8.0
ptyprocess==0.7.0
py7zr==0.11.3
pybind11==2.6.2
pycairo==1.20.0
pychm==0.8.6
pycparser==2.20
pycryptodome==3.10.1
pydub==0.24.1
Pygments==2.8.0
PyGObject==3.38.0
pymoo==0.4.2.1
pyOpenSSL==20.0.1
pyparsing==2.4.7
PyQt5==5.15.2
PyQt5-sip==12.8.1
PyQtWebEngine==5.15.2
pyrsistent==0.17.3
python-dateutil==2.8.1
pytz==2020.4
pyxdg==0.26
PyYAML==5.3.1
pyzmq==20.0.0
regex==2020.11.13
requests==2.25.1
resampy==0.2.2
resolvelib==0.5.4
retrying==1.3.3
scikit-learn==0.24.0
scipy==1.6.1
seaborn==0.11.1
Send2Trash==1.5.0
six==1.15.0
slip==0.6.5
slip.dbus==0.6.5
sortedcontainers==2.3.0
SoundFile==0.10.3.post1
soupsieve==2.2
sox==1.4.1
tblib==1.7.0
terminado==0.9.2
testpath==0.4.4
texttable==1.6.3
threadpoolctl==2.1.0
toml==0.10.2
toolz==0.11.1
tornado==6.1
tqdm==4.55.1
traitlets==5.0.5
typing-extensions==3.7.4.3
udiskie==2.3.2
unrardll==0.1.4
urllib3==1.26.3
wcwidth==0.2.5
webencodings==0.5.1
widgetsnbextension==3.5.1
xgboost==1.3.3
youtube-dl==2021.2.10
zeroconf==0.28.8
zict==2.0.0
zipp==3.4.0
In [2]:
import import_ipynb
import os, sys
sys.path.append(os.path.abspath(os.path.join(sys.path[0], '..')))
import common.SGC as SGC
import itertools
import numpy as np
import matplotlib.pyplot as plt
importing Jupyter notebook from /mnt/DANE/Dokumenty/Informatyka/Projekty/optimal-egc/Notebooks/common/SGC.ipynb
In [ ]:
nucleotides=['A','T','C','G']
In [2]:
def codonOrdering():
    '''Generator function which return tuples with:
    -nucleotide order
    -position order
    -codons sorted based on nucleotide and position order i ndarray.'''
    nucleotides=['A','T','G','C']
    for nucOrder in itertools.permutations(nucleotides):
        triplets=list(itertools.product(nucOrder,repeat=3))
        positionPerGen=itertools.permutations([list(np.array(triplets).T[i]) for i in range(3)])
        for positionOrder in itertools.permutations([1,2,3]):
            positionPer=np.array(next(positionPerGen)).T
            yield (nucOrder,positionOrder,positionPer)
In [25]:
def calculateBlockConductance(setOfCodons):
    """Take set of all codons which are in one block
    and calculate conductance."""
    sumOfOutEdges=0
    for codon in setOfCodons:
        for pos in range(3):
            for nuc in nucleotides:
                nucList=list(codon)
                nucList[pos]=nuc
                newCodon="".join(nucList)
                if newCodon not in setOfCodons:
                    sumOfOutEdges+=1
    return sumOfOutEdges/(len(setOfCodons)*9)
In [1]:
def generateCodonsAndAA(nucOrder,posOrder):
    """Take nucleotide order and position order, and
    return codons and aminoacids in defined order."""
    triplets=list(itertools.product(nucOrder,repeat=3))
    triplets=np.array(triplets)
    codons=np.vstack((triplets[:,posOrder[0]-1],triplets[:,posOrder[1]-1],triplets[:,posOrder[2]-1])).T
    codons=np.array([''.join(codons[i,:]) for i in range(64)])
    aa=np.array([SGC.codonToAmin[cod] for cod in codons])
    return (codons,aa)
In [2]:
def avarangeConductance(codons, aa):
    """Take codons and coresponding aminoacids and
    calculate avarange conductance."""
    if len(aa)==0:
        return 0
    countOfAA={}
    for i in range(len(aa)):
        if aa[i] not in countOfAA:
            countOfAA[aa[i]]=set()
        countOfAA[aa[i]].add(codons[i])
        
    sumOfConductance=0
    
    for a in countOfAA:
        sumOfConductance+=calculateBlockConductance(countOfAA[a])
    return sumOfConductance/len(countOfAA)                
In [12]:
def generateAllMinCodes():
    """For each code defined by lexicographic order check how many codons
    are needed to encode all aminoacids. Return dictionary of minCodons:set of code"""
    licznik=0
    codes={}
    for order in codonOrdering():
        licznik+=1
        codonNucOrder=order[2]
        codonOrder=[''.join(codonNucOrder[i]) for i in range(64)]
        foundedAA=set()

        i=0
        for codon in codonOrder:
            i+=1 
            if SGC.codonToAmin[codon] not in foundedAA:
                foundedAA.add(SGC.codonToAmin[codon])
                if len(foundedAA) == len(SGC.aminToCodons):
                    if i not in codes:
                        codes[i]=set()
                    codes[i].add((order[0],order[1]))
                    break
    return codes
In [1]:
def balance(condOfOld, condOfNew):
    """Take avg. conductance of old part and avg. conductance of new part
    and return balance between this two parts."""
    return condOfNew/condOfOld
In [2]:
def minAvgCondForCodes(K, returnMinCode=False):
    """
    Take:
    K - length of old part of code. This part have K first codons in leksicographic order.
    returnMinCode - if true return (nucOrder,posOrder) as second argument
    Return:
    minAvgCond - minimum of avg. conductance for old code part calculated from all possible codes,
        which with K codons encode all standard aminoacids.
    """
    minCodes=generateAllMinCodes()
    minAvg=2
    minCode=None
    for k in range(K+1):
        if k not in minCodes:
            continue
        for nucOrder, posOrder in minCodes[k]:
            codons,aa=generateCodonsAndAA(nucOrder, posOrder)
            avgCond=avarangeConductance(codons[:K],aa[:K])
            if minAvg>avgCond:
                minAvg=avgCond
                minCode=(nucOrder,posOrder)
    if returnMinCode:
        return minAvg, minCode
    return minAvg
In [1]:
def complementCode(freeCodons):
    """Take codons which dont encode aminoacids and
    group they into blocks with diffrent last nucleotide."""
    blocks={}
    for codon in freeCodons:
        if codon[:-1] not in blocks:
            blocks[codon[:-1]]=set()
        blocks[codon[:-1]].add(codon)
    return blocks
In [3]:
def printCode(nucOrder, posOrder, k, file=sys.stdout, header=None):
    codonsList,aaList=generateCodonsAndAA(nucOrder, posOrder)
    codonsList=codonsList[:k]
    aaList=aaList[:k]
    nucleotides=['T','C','A','G']
    print(r"\begin{tabular}{|l|l|l|l|}", file=file)
    if header is not None:
        print(r"\hline", file=file)
        print(r"\multicolumn{4}{|c|}{\tiny{",header,r"}}\\", file=file)
    
    print(r"\hline", file=file)
    for n1 in nucleotides:
        for n3 in nucleotides:
            for n2 in nucleotides:
                DNAcodon=n1+n2+n3
                aa=''
                if np.any(codonsList==DNAcodon):
                    aa=aaList[codonsList==DNAcodon].item()
                n1p=n1
                n2p=n2
                n3p=n3
                if n1=='T':
                    n1p='U'
                if n2=='T':
                    n2p='U'
                if n3=='T':
                    n3p='U'
                if aa=='':
                    print(r"\textcolor{blue}{",n1p+n2p+n3p,"}", end="", file=file)
                else:
                    print(r"\textcolor{red}{",n1p+n2p+n3p,"}",aa, end="", file=file)
                if n2!='G':
                    print(" & ",end='', file=file)
                else:
                    print(r"\\", file=file)
        print(r"\hline", file=file)
    print(r"\end{tabular}", file=file)
In [ ]: