import import_ipynb
import common.functions as fun
import itertools
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import animation
from sklearn.decomposition import PCA
from sklearn.preprocessing import minmax_scale
import os
np.set_printoptions(precision=5, suppress=True)
#taken from: https://stackoverflow.com/questions/10035752/elegant-python-code-for-integer-partitioning
def partitions(n, I=1):
yield (n,)
for i in range(I, n//2 + 1):
for p in partitions(n-i, i):
yield (i,) + p
eksamplOrder=["".join(i) for i in itertools.product(['A','C','G','T'],repeat=3)]
#thanks to "1000000" we have indeksing from 1
kSizeCond=np.array([1000000]+[fun.calculateBlockConductance(set(eksamplOrder[:i])) for i in range(1,37)])
exampleOptimalPartition={}
for p in partitions(36):
sumOfCond=0
n=len(p)
for k in p:
sumOfCond+=kSizeCond[k]
avgCond=sumOfCond/n
if n in exampleOptimalPartition:
if exampleOptimalPartition[n][0]>avgCond:
exampleOptimalPartition[n]=(avgCond,p)
else:
exampleOptimalPartition[n]=(avgCond,p)
exampleOptimalPartition
plt.gcf().set_size_inches(10,10)
plt.title("Optimal avarange conductans of partitioning 36 codons into groups.")
plt.ylabel("Avarange conductance")
plt.xlabel("Number of groups")
plt.xticks(range(1,37))
plt.ylim((0.2,1))
plt.plot(range(1,37),[exampleOptimalPartition[i][0] for i in range(1,37)])
plt.plot([9,9],[0,1],"r--")
plt.savefig("../Obrazy/partitioningOf36Codons.pdf",dpi=1000)
for i in range(1,len(kSizeCond)//2+1):
print(i,"&","{: f}".format(kSizeCond[i]),"&",i+18,"&","{: f}".format(kSizeCond[i+18]),r"\\")
"{: f}".format(kSizeCond[i])
kSizeCond
$\phi$
plt.gcf().set_size_inches(10,5)
plt.ylabel("k-size-conductance")
plt.xlabel("k")
plt.xticks(np.arange(0,37,2))
plt.plot(np.arange(1,37),kSizeCond[1:])
plt.savefig("../Obrazy/k-size-conductance.pdf",dpi=1000)
allOptimalPartition={}
for p in partitions(36):
sumOfCond=0
n=len(p)
for k in p:
sumOfCond+=kSizeCond[k]
avgCond=sumOfCond/n
if n in allOptimalPartition:
if allOptimalPartition[n][0][0]>avgCond:
allOptimalPartition[n]=[(avgCond,p)]
elif allOptimalPartition[n][0][0]==avgCond:
allOptimalPartition[n].append((avgCond,p))
else:
allOptimalPartition[n]=[(avgCond,p)]
allOptimalPartition
for i in range(1,37):
print(r"\hline")
print(i,"&","{: f}".format(allOptimalPartition[i][0][0]),"&", r"\parbox[t]{10cm}{", end="")
for partition in allOptimalPartition[i]:
print(partition[1],r"\\")
print(r"}\\")
print(r"\hline")
plt.gcf().set_size_inches(32,32)
for k in range(28,64):
plt.subplot(6,6,k-27)
partition={}
for p in partitions(64-k):
sumOfCond=0
n=len(p)
for K in p:
sumOfCond+=kSizeCond[K]
avgCond=sumOfCond/n
if n in partition:
if partition[n][0]>avgCond:
partition[n]=(avgCond,p)
else:
partition[n]=(avgCond,p)
plt.title("C_"+str(k))
plt.grid(True)
plt.xticks(range(0,65-k,2))
plt.plot(range(1,65-k),[partition[i][0] for i in range(1,65-k)],'o--')
plt.show()
plt.savefig("../Obrazy/avgCondForAllCprim.pdf",dpi=1000)
plt.gcf().set_size_inches(32,32)
for k in range(28,64):
plt.subplot(6,6,k-27)
oldAvgCond=fun.minAvgCondForCodes(k)
partitionBal={}
for p in partitions(64-k):
sumOfCond=0
n=len(p)
for K in p:
sumOfCond+=kSizeCond[K]
avgCond=sumOfCond/n
balance=fun.balance(oldAvgCond, avgCond)
if n in partitionBal:
if np.abs(partitionBal[n][0])>np.abs(balance):
partitionBal[n]=(balance,p)
else:
partitionBal[n]=(balance,p)
plt.title("C_"+str(k))
plt.grid(True)
plt.xticks(range(0,65-k,2))
plt.plot(range(1,65-k),[partitionBal[i][0] for i in range(1,65-k)],'o--')
plt.savefig("../Obrazy/balanceNearZeroForAllCprim.pdf",dpi=1000)
plt.show()
bestCodesGeneretedFromThisFunction={}
plt.gcf().set_size_inches(32,32)
for k in range(28,64):
plt.subplot(6,6,k-27)
oldAvgCond, order=fun.minAvgCondForCodes(k, True)
partitionBal={}
bestAvgs={}
bestCodesGeneretedFromThisFunction[k]={}
for p in partitions(64-k):
sumOfCond=0
n=len(p)
for K in p:
sumOfCond+=kSizeCond[K]
avgCond=sumOfCond/n
balance=fun.balance(oldAvgCond, avgCond)
if n in partitionBal:
if bestAvgs[n]>avgCond:
partitionBal[n]=(balance,p)
bestAvgs[n]=avgCond
bestCodesGeneretedFromThisFunction[k][n]=(balance, order, p)
else:
partitionBal[n]=(balance,p)
bestAvgs[n]=avgCond
bestCodesGeneretedFromThisFunction[k][n]=(balance, order, p)
plt.title("C_"+str(k))
plt.grid(True)
plt.xticks(range(0,65-k,2))
plt.plot(range(1,65-k),[partitionBal[i][0] for i in range(1,65-k)],'o--')
plt.savefig("../Obrazy/balanceWithBestForAllCprim.pdf",dpi=1000)
plt.show()
postprocess={}
for k in bestCodesGeneretedFromThisFunction:
slownik=bestCodesGeneretedFromThisFunction[k]
mostBalanced=None
bestBlance=1.01
for n in slownik:
balance, order,p=slownik[n]
if abs(bestBlance-1)>abs(balance-1):
mostBalanced=(balance,order,p)
if mostBalanced is not None:
postprocess[k]=mostBalanced
if not os.path.isdir("optymalneKody"):
os.mkdir("optymalneKody")
for k in postprocess:
b,order,p=postprocess[k]
with open("optymalneKody/bestBalancedWithLowerBoundForAvgCond-"+str(k)+'-'+''.join(order[0])+'-'+\
''.join(str(i) for i in order[1])+".tex","w") as plik:
fun.printCode(order[0],order[1],k, file=plik, header=str((b,p)))
plt.gcf().set_size_inches(32,32)
for k in range(28,64):
plt.subplot(6,6,k-27)
oldAvgCond=fun.minAvgCondForCodes(k)
partitionBal={}
partition={}
for p in partitions(64-k):
sumOfCond=0
n=len(p)
for K in p:
sumOfCond+=kSizeCond[K]
avgCond=sumOfCond/n
balance=fun.balance(oldAvgCond, avgCond)
if n in partition:
if partition[n][0]>avgCond:
partition[n]=(avgCond,p)
else:
partition[n]=(avgCond,p)
if n in partitionBal:
if np.abs(partitionBal[n][0])>np.abs(balance):
partitionBal[n]=(balance,p)
else:
partitionBal[n]=(balance,p)
plt.title("C_"+str(k))
plt.grid(True)
plt.xticks(range(0,65-k,2))
plt.plot(range(1,65-k),[partitionBal[i][0] for i in range(1,65-k)],'o--')
plt.plot(range(1,65-k),[partition[i][0] for i in range(1,65-k)],'o--')
plt.savefig("../Obrazy/joinNearForAllCprim.pdf",dpi=1000)
plt.show()
plt.gcf().set_size_inches(32,32)
for k in range(28,64):
plt.subplot(6,6,k-27)
oldAvgCond=fun.minAvgCondForCodes(k)
partitionBal={}
partition={}
for p in partitions(64-k):
sumOfCond=0
n=len(p)
for K in p:
sumOfCond+=kSizeCond[K]
avgCond=sumOfCond/n
balance=fun.balance(oldAvgCond, avgCond)
if n in partition:
if partition[n][0]>avgCond:
partition[n]=(avgCond,p)
partitionBal[n]=(balance,p)
else:
partition[n]=(avgCond,p)
partitionBal[n]=(balance,p)
plt.title("C_"+str(k))
plt.grid(True)
plt.xticks(range(0,65-k,2))
plt.plot(range(1,65-k),[partitionBal[i][0] for i in range(1,65-k)],'o--')
plt.plot(range(1,65-k),[partition[i][0] for i in range(1,65-k)],'o--')
plt.savefig("../Obrazy/joinBestForAllCprim.pdf",dpi=1000)
plt.show()
fig = plt.figure()
fig.set_size_inches(10,10)
ax = fig.add_subplot(111, projection='3d')
def init_3dplot_avg_cond_x_balance_x_codons():
plotDataAA=[]
plotDataBal=[]
plotDataCond=[]
for k in range(28,64):
oldAvgCond=fun.minAvgCondForCodes(k)
for p in partitions(64-k):
sumOfCond=0
n=len(p)
for K in p:
sumOfCond+=kSizeCond[K]
avgCond=sumOfCond/n
balance=fun.balance(oldAvgCond, avgCond)
plotDataAA.append(k)
plotDataBal.append(balance)
plotDataCond.append(avgCond)
ax.scatter(plotDataAA, plotDataCond, plotDataBal)
ax.set_xlabel("Number of codons in old part.")
ax.set_ylabel("Lower bound for avg. cond of new part.")
ax.set_zlabel("Balance between old and new part.")
return fig,
def animate_3dplot_avg_cond_x_balance_x_codons(i):
ax.view_init(elev=10., azim=(i%360))
return fig,
# Animate
anim = animation.FuncAnimation(fig, animate_3dplot_avg_cond_x_balance_x_codons, init_func=init_3dplot_avg_cond_x_balance_x_codons,
frames=720, interval=50, blit=True)
# Save
anim.save('../Obrazy/3dplot-avg_cond-x-balance-x-codons.mp4', fps=30, extra_args=['-vcodec', 'libx264'])
fig = plt.figure()
fig.set_size_inches(10,10)
ax = fig.add_subplot(111, projection='3d')
def init_3dplot_avg_cond_x_balance_x_aa():
plotDataAA=[]
plotDataBal=[]
plotDataCond=[]
for k in range(28,64):
oldAvgCond=fun.minAvgCondForCodes(k)
for p in partitions(64-k):
sumOfCond=0
n=len(p)
for K in p:
sumOfCond+=kSizeCond[K]
avgCond=sumOfCond/n
balance=fun.balance(oldAvgCond, avgCond)
plotDataAA.append(n)
plotDataBal.append(balance)
plotDataCond.append(avgCond)
ax.scatter(plotDataAA, plotDataCond, plotDataBal)
ax.set_xlabel("Number of new aminoacids.")
ax.set_ylabel("Lower bound for avg. cond of new part.")
ax.set_zlabel("Balance between old and new part.")
return fig,
def animate_3dplot_avg_cond_x_balance_x_aa(i):
ax.view_init(elev=10., azim=(i%360))
return fig,
# Animate
anim = animation.FuncAnimation(fig, animate_3dplot_avg_cond_x_balance_x_aa, init_func=init_3dplot_avg_cond_x_balance_x_aa,
frames=720, interval=50, blit=True)
# Save
anim.save('../Obrazy/3dplot-avg_cond-x-balance-x-aa.mp4', fps=30, extra_args=['-vcodec', 'libx264'])
fig = plt.figure()
fig.set_size_inches(10,10)
plotDataAA=[]
plotDataCond=[]
for k in range(28,64):
oldAvgCond=fun.minAvgCondForCodes(k)
for p in partitions(64-k):
sumOfCond=0
n=len(p)
for K in p:
sumOfCond+=kSizeCond[K]
avgCond=sumOfCond/n
balance=fun.balance(oldAvgCond, avgCond)
if abs(1-balance)<0.005:
plotDataAA.append(n)
plotDataCond.append(avgCond)
plt.scatter(plotDataAA, plotDataCond)
plt.xlabel("Number of new aminoacids.")
plt.ylabel("Avg. cond of partitions of new part.")
plt.xticks(range(0,31,2))
plt.title("Avg. cond. of partitions of new part in codons boxes were abs(1-balance)<0.005")
plt.grid()
plt.show()
plt.savefig("../Obrazy/avg-cond-of-new-part-partitions-with-abs(1-balance)<0,005-new-aa.pdf",dpi=1000)
fig = plt.figure()
fig.set_size_inches(10,10)
plotDataAA=[]
plotDataCond=[]
for k in range(28,64):
oldAvgCond=fun.minAvgCondForCodes(k)
for p in partitions(64-k):
sumOfCond=0
n=len(p)
for K in p:
sumOfCond+=kSizeCond[K]
avgCond=sumOfCond/n
balance=fun.balance(oldAvgCond, avgCond)
if abs(1-balance)<0.001:
plotDataAA.append(k)
plotDataCond.append(avgCond)
plt.scatter(plotDataAA, plotDataCond)
plt.xlabel("Number of codons in old part.")
plt.ylabel("Avg. cond of partitions of new part.")
plt.xticks(range(28,65,2))
plt.title("Avg. cond. of partitions of new part in codons boxes were abs(1-balance)<0.001")
plt.grid()
plt.show()
plt.savefig("../Obrazy/avg-cond-of-new-part-partitions-with-abs(1-balance)<0,001-old-codons.pdf",dpi=1000)
def generate3Ddata(aa=True):
plotDataAA=[]
plotDataBal=[]
plotDataCond=[]
for k in range(28,64):
oldAvgCond=fun.minAvgCondForCodes(k)
for p in partitions(64-k):
sumOfCond=0
n=len(p)
for K in p:
sumOfCond+=kSizeCond[K]
avgCond=sumOfCond/n
balance=fun.balance(oldAvgCond, avgCond)
if aa:
plotDataAA.append(n)
else:
plotDataAA.append(k)
plotDataBal.append(balance)
plotDataCond.append(avgCond)
return (plotDataAA,plotDataBal, plotDataCond)
plotDataAA,plotDataBal, plotDataCond=generate3Ddata()
data_nrOld_Bal_AvgCond=np.array((plotDataAA,plotDataBal,plotDataCond))
pcaModel=PCA(n_components=2, whiten=True)
pcaModel.fit(data_nrOld_Bal_AvgCond.T)
pcaModel.components_
plotDataAA,plotDataBal, plotDataCond=generate3Ddata(aa=False)
data_nrOld_Bal_AvgCond=np.array((plotDataAA,plotDataBal,plotDataCond))
pcaModel=PCA(n_components=2, whiten=True)
pcaModel.fit(data_nrOld_Bal_AvgCond.T)
pcaModel.components_