!python --version
!pip freeze
import import_ipynb
import os, sys
sys.path.append(os.path.abspath(os.path.join(sys.path[0], '..')))
import common.SGC as SGC
import itertools
import numpy as np
import matplotlib.pyplot as plt
nucleotides=['A','T','C','G']
def codonOrdering():
'''Generator function which return tuples with:
-nucleotide order
-position order
-codons sorted based on nucleotide and position order i ndarray.'''
nucleotides=['A','T','G','C']
for nucOrder in itertools.permutations(nucleotides):
triplets=list(itertools.product(nucOrder,repeat=3))
positionPerGen=itertools.permutations([list(np.array(triplets).T[i]) for i in range(3)])
for positionOrder in itertools.permutations([1,2,3]):
positionPer=np.array(next(positionPerGen)).T
yield (nucOrder,positionOrder,positionPer)
def calculateBlockConductance(setOfCodons):
"""Take set of all codons which are in one block
and calculate conductance."""
sumOfOutEdges=0
for codon in setOfCodons:
for pos in range(3):
for nuc in nucleotides:
nucList=list(codon)
nucList[pos]=nuc
newCodon="".join(nucList)
if newCodon not in setOfCodons:
sumOfOutEdges+=1
return sumOfOutEdges/(len(setOfCodons)*9)
def generateCodonsAndAA(nucOrder,posOrder):
"""Take nucleotide order and position order, and
return codons and aminoacids in defined order."""
triplets=list(itertools.product(nucOrder,repeat=3))
triplets=np.array(triplets)
codons=np.vstack((triplets[:,posOrder[0]-1],triplets[:,posOrder[1]-1],triplets[:,posOrder[2]-1])).T
codons=np.array([''.join(codons[i,:]) for i in range(64)])
aa=np.array([SGC.codonToAmin[cod] for cod in codons])
return (codons,aa)
def avarangeConductance(codons, aa):
"""Take codons and coresponding aminoacids and
calculate avarange conductance."""
if len(aa)==0:
return 0
countOfAA={}
for i in range(len(aa)):
if aa[i] not in countOfAA:
countOfAA[aa[i]]=set()
countOfAA[aa[i]].add(codons[i])
sumOfConductance=0
for a in countOfAA:
sumOfConductance+=calculateBlockConductance(countOfAA[a])
return sumOfConductance/len(countOfAA)
def generateAllMinCodes():
"""For each code defined by lexicographic order check how many codons
are needed to encode all aminoacids. Return dictionary of minCodons:set of code"""
licznik=0
codes={}
for order in codonOrdering():
licznik+=1
codonNucOrder=order[2]
codonOrder=[''.join(codonNucOrder[i]) for i in range(64)]
foundedAA=set()
i=0
for codon in codonOrder:
i+=1
if SGC.codonToAmin[codon] not in foundedAA:
foundedAA.add(SGC.codonToAmin[codon])
if len(foundedAA) == len(SGC.aminToCodons):
if i not in codes:
codes[i]=set()
codes[i].add((order[0],order[1]))
break
return codes
def balance(condOfOld, condOfNew):
"""Take avg. conductance of old part and avg. conductance of new part
and return balance between this two parts."""
return condOfNew/condOfOld
def minAvgCondForCodes(K, returnMinCode=False):
"""
Take:
K - length of old part of code. This part have K first codons in leksicographic order.
returnMinCode - if true return (nucOrder,posOrder) as second argument
Return:
minAvgCond - minimum of avg. conductance for old code part calculated from all possible codes,
which with K codons encode all standard aminoacids.
"""
minCodes=generateAllMinCodes()
minAvg=2
minCode=None
for k in range(K+1):
if k not in minCodes:
continue
for nucOrder, posOrder in minCodes[k]:
codons,aa=generateCodonsAndAA(nucOrder, posOrder)
avgCond=avarangeConductance(codons[:K],aa[:K])
if minAvg>avgCond:
minAvg=avgCond
minCode=(nucOrder,posOrder)
if returnMinCode:
return minAvg, minCode
return minAvg
def complementCode(freeCodons):
"""Take codons which dont encode aminoacids and
group they into blocks with diffrent last nucleotide."""
blocks={}
for codon in freeCodons:
if codon[:-1] not in blocks:
blocks[codon[:-1]]=set()
blocks[codon[:-1]].add(codon)
return blocks
def printCode(nucOrder, posOrder, k, file=sys.stdout, header=None):
codonsList,aaList=generateCodonsAndAA(nucOrder, posOrder)
codonsList=codonsList[:k]
aaList=aaList[:k]
nucleotides=['T','C','A','G']
print(r"\begin{tabular}{|l|l|l|l|}", file=file)
if header is not None:
print(r"\hline", file=file)
print(r"\multicolumn{4}{|c|}{\tiny{",header,r"}}\\", file=file)
print(r"\hline", file=file)
for n1 in nucleotides:
for n3 in nucleotides:
for n2 in nucleotides:
DNAcodon=n1+n2+n3
aa=''
if np.any(codonsList==DNAcodon):
aa=aaList[codonsList==DNAcodon].item()
n1p=n1
n2p=n2
n3p=n3
if n1=='T':
n1p='U'
if n2=='T':
n2p='U'
if n3=='T':
n3p='U'
if aa=='':
print(r"\textcolor{blue}{",n1p+n2p+n3p,"}", end="", file=file)
else:
print(r"\textcolor{red}{",n1p+n2p+n3p,"}",aa, end="", file=file)
if n2!='G':
print(" & ",end='', file=file)
else:
print(r"\\", file=file)
print(r"\hline", file=file)
print(r"\end{tabular}", file=file)