In [1]:
import import_ipynb
import SGC
import itertools
import numpy as np
importing Jupyter notebook from SGC.ipynb

from definition of itertools.product:

The nested loops cycle like an odometer with the rightmost element advancing on every iteration. This pattern creates a lexicographic ordering so that if the input’s iterables are sorted, the product tuples are emitted in sorted order.

In [2]:
def codonOrdering():
    '''Generator function which return tuples with:
    -nucleotide order
    -position order
    -codons sorted based on nucleotide and position order i ndarray.'''
    nucleotides=['A','T','G','C']
    for nucOrder in itertools.permutations(nucleotides):
        triplets=list(itertools.product(nucOrder,repeat=3))
        positionPerGen=itertools.permutations([list(np.array(triplets).T[i]) for i in range(3)])
        for positionOrder in itertools.permutations([1,2,3]):
            positionPer=np.array(next(positionPerGen)).T
            yield (nucOrder,positionOrder,positionPer)
In [3]:
bestlength=64
bestNucOrder=[]
bestPosOrder=[]
licznik=0
for order in codonOrdering():
    licznik+=1
    codonNucOrder=order[2]
    codonOrder=[''.join(codonNucOrder[i]) for i in range(64)]
    foundedAA=set()
    
    i=0
    for codon in codonOrder:
        i+=1 
        if SGC.codonToAmin[codon] not in foundedAA:
            foundedAA.add(SGC.codonToAmin[codon])
            if len(foundedAA) == len(SGC.aminToCodons):
                if bestlength==i:
                    bestNucOrder.append(order[0])
                    bestPosOrder.append(order[1])
                if bestlength>i:
                    bestlength=i
                    bestNucOrder=[order[0]]
                    bestPosOrder=[order[1]]
                break
print(bestlength)
print(bestNucOrder)
print(bestPosOrder)
28
[('T', 'G', 'A', 'C'), ('G', 'T', 'A', 'C')]
[(3, 2, 1), (3, 2, 1)]
In [4]:
licznik
Out[4]:
144
In [5]:
6*24
Out[5]:
144

TGAC

Generate dot file

In [6]:
def generateDot(codons, name):
    out=open(name,"w")
    print("strict graph {", file=out)
    codons=[''.join(c) for c in codons]
    for c in codons:
        for i in range(3):
            for n in ['A','T','C','G']:
                c1=c[:i]+n+c[i+1:]
                if not c1==c and c1 in codons:
                    print(c1,"--",c,file=out)
    print("}", file=out)
In [7]:
orig=np.array(list(itertools.product(('T', 'G', 'A', 'C'),repeat=3)))
In [8]:
order=np.full((64,3),'X')
order[:,0]=orig[:,2]
order[:,2]=orig[:,0]
order[:,1]=orig[:,1]
codons=[''.join(order[i]) for i in range(64)]
aa=[SGC.codonToAmin[cod] for cod in codons]
aa=np.array(aa[:28])
print(aa)
len(np.unique(aa))
['F' 'V' 'I' 'L' 'C' 'G' 'S' 'R' 'Y' 'D' 'N' 'H' 'S' 'A' 'T' 'P' 'L' 'V'
 'M' 'L' 'W' 'G' 'R' 'R' 'X' 'E' 'K' 'Q']
Out[8]:
21
In [9]:
order[:28]
Out[9]:
array([['T', 'T', 'T'],
       ['G', 'T', 'T'],
       ['A', 'T', 'T'],
       ['C', 'T', 'T'],
       ['T', 'G', 'T'],
       ['G', 'G', 'T'],
       ['A', 'G', 'T'],
       ['C', 'G', 'T'],
       ['T', 'A', 'T'],
       ['G', 'A', 'T'],
       ['A', 'A', 'T'],
       ['C', 'A', 'T'],
       ['T', 'C', 'T'],
       ['G', 'C', 'T'],
       ['A', 'C', 'T'],
       ['C', 'C', 'T'],
       ['T', 'T', 'G'],
       ['G', 'T', 'G'],
       ['A', 'T', 'G'],
       ['C', 'T', 'G'],
       ['T', 'G', 'G'],
       ['G', 'G', 'G'],
       ['A', 'G', 'G'],
       ['C', 'G', 'G'],
       ['T', 'A', 'G'],
       ['G', 'A', 'G'],
       ['A', 'A', 'G'],
       ['C', 'A', 'G']], dtype='<U1')
In [10]:
generateDot(order[:28], "tgac.dot")

GTAC

In [16]:
orig=np.array(list(itertools.product(('G', 'T', 'A', 'C'),repeat=3)))
In [17]:
order=np.full((64,3),'X')
order[:,0]=orig[:,2]
order[:,2]=orig[:,0]
order[:,1]=orig[:,1]
codons=[''.join(order[i]) for i in range(64)]
aa=[SGC.codonToAmin[cod] for cod in codons]
aa=np.array(aa[:28])
print(aa)
len(np.unique(aa))
['G' 'W' 'R' 'R' 'V' 'L' 'M' 'L' 'E' 'X' 'K' 'Q' 'A' 'S' 'T' 'P' 'G' 'C'
 'S' 'R' 'V' 'F' 'I' 'L' 'D' 'Y' 'N' 'H']
Out[17]:
21
In [18]:
order[28:]
Out[18]:
array([['G', 'C', 'T'],
       ['T', 'C', 'T'],
       ['A', 'C', 'T'],
       ['C', 'C', 'T'],
       ['G', 'G', 'A'],
       ['T', 'G', 'A'],
       ['A', 'G', 'A'],
       ['C', 'G', 'A'],
       ['G', 'T', 'A'],
       ['T', 'T', 'A'],
       ['A', 'T', 'A'],
       ['C', 'T', 'A'],
       ['G', 'A', 'A'],
       ['T', 'A', 'A'],
       ['A', 'A', 'A'],
       ['C', 'A', 'A'],
       ['G', 'C', 'A'],
       ['T', 'C', 'A'],
       ['A', 'C', 'A'],
       ['C', 'C', 'A'],
       ['G', 'G', 'C'],
       ['T', 'G', 'C'],
       ['A', 'G', 'C'],
       ['C', 'G', 'C'],
       ['G', 'T', 'C'],
       ['T', 'T', 'C'],
       ['A', 'T', 'C'],
       ['C', 'T', 'C'],
       ['G', 'A', 'C'],
       ['T', 'A', 'C'],
       ['A', 'A', 'C'],
       ['C', 'A', 'C'],
       ['G', 'C', 'C'],
       ['T', 'C', 'C'],
       ['A', 'C', 'C'],
       ['C', 'C', 'C']], dtype='<U1')
In [14]:
len(np.unique(aa[:27]))
Out[14]:
20
In [15]:
generateDot(order[:28], "gtac.dot")
In [34]:
len(codons)
Out[34]:
64
In [37]:
C28prim=dict([(codons[i],i) for i in range(28,64)])
In [48]:
tableOrder=["T","C","A","G"]
for i in range(4):
    for j in range(16):
        c=tableOrder[i]+tableOrder[j%4]+tableOrder[j//4]
        if c in C28prim:
            print(r"\textcolor{blue}{",end="")
        else:
            print(r"\textcolor{red}{",end="")
        print(c,sep="", end="} ")
        if c in C28prim:
            print("A",(C28prim[c]//4)-7,sep="",end=" ")
        if (j+1)%4==0:
            print(r"\\")
        else:
            print("& ",end="")
    print(r"\hline")
    
\textcolor{red}{TTT} & \textcolor{blue}{TCT} A0 & \textcolor{red}{TAT} & \textcolor{red}{TGT} \\
\textcolor{blue}{TTC} A6 & \textcolor{blue}{TCC} A8 & \textcolor{blue}{TAC} A7 & \textcolor{blue}{TGC} A5 \\
\textcolor{blue}{TTA} A2 & \textcolor{blue}{TCA} A4 & \textcolor{blue}{TAA} A3 & \textcolor{blue}{TGA} A1 \\
\textcolor{red}{TTG} & \textcolor{red}{TCG} & \textcolor{red}{TAG} & \textcolor{red}{TGG} \\
\hline
\textcolor{red}{CTT} & \textcolor{blue}{CCT} A0 & \textcolor{red}{CAT} & \textcolor{red}{CGT} \\
\textcolor{blue}{CTC} A6 & \textcolor{blue}{CCC} A8 & \textcolor{blue}{CAC} A7 & \textcolor{blue}{CGC} A5 \\
\textcolor{blue}{CTA} A2 & \textcolor{blue}{CCA} A4 & \textcolor{blue}{CAA} A3 & \textcolor{blue}{CGA} A1 \\
\textcolor{red}{CTG} & \textcolor{red}{CCG} & \textcolor{red}{CAG} & \textcolor{red}{CGG} \\
\hline
\textcolor{red}{ATT} & \textcolor{blue}{ACT} A0 & \textcolor{red}{AAT} & \textcolor{red}{AGT} \\
\textcolor{blue}{ATC} A6 & \textcolor{blue}{ACC} A8 & \textcolor{blue}{AAC} A7 & \textcolor{blue}{AGC} A5 \\
\textcolor{blue}{ATA} A2 & \textcolor{blue}{ACA} A4 & \textcolor{blue}{AAA} A3 & \textcolor{blue}{AGA} A1 \\
\textcolor{red}{ATG} & \textcolor{red}{ACG} & \textcolor{red}{AAG} & \textcolor{red}{AGG} \\
\hline
\textcolor{red}{GTT} & \textcolor{blue}{GCT} A0 & \textcolor{red}{GAT} & \textcolor{red}{GGT} \\
\textcolor{blue}{GTC} A6 & \textcolor{blue}{GCC} A8 & \textcolor{blue}{GAC} A7 & \textcolor{blue}{GGC} A5 \\
\textcolor{blue}{GTA} A2 & \textcolor{blue}{GCA} A4 & \textcolor{blue}{GAA} A3 & \textcolor{blue}{GGA} A1 \\
\textcolor{red}{GTG} & \textcolor{red}{GCG} & \textcolor{red}{GAG} & \textcolor{red}{GGG} \\
\hline
In [ ]: