Molecular dynamics application example¶
Contents
1 Pre-requirements
1.1 Import dependencies
1.2 Notebook configuration
1.3 Package configuration
2 MD showcase - Langerin
3 Space A
3.1 Clustering
3.2 Coreset MSM estimation
3.2.1 (MSM on train data dt = 10 ns)
4 Space B
4.1 Clustering
4.2 Coreset MSM estimation
4.2.1 (MSM on test data dt = 0.1 ns)
Pre-requirements¶
Import dependencies¶
[1]:
# Primary imports
import importlib # Only needed for module editing
import json
import pandas as pd # Optional dependency
from pathlib import Path
import pprint
import sys
import time
import warnings
warnings.simplefilter("always") # Surpress or enable warnings
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
from sklearn import datasets # For sklearn test data set creation
from sklearn.preprocessing import StandardScaler
# CNN clustering module
import cnnclustering.cnn as cnn # CNN clustering
import cnnclustering.cmsm as cmsm # Core-set MSM estimation
This notebook was created using Python 3.8.
[2]:
# Version information
print(sys.version)
3.8.3 (default, May 15 2020, 15:24:35)
[GCC 8.3.0]
Notebook configuration¶
We use matplotlib
to create plots. A "matplotlibrc"
file can be used to customise the appearance of the plots.
[2]:
# Matplotlib configuration
mpl.rc_file(
"matplotlibrc",
use_default_template=False
)
[3]:
# Axis property defaults for the plots
ax_props = {
"xlabel": None,
"ylabel": None,
"xlim": (-2.5, 2.5),
"ylim": (-2.5, 2.5),
"xticks": (),
"yticks": (),
"aspect": "equal"
}
# Line plot property defaults
line_props = {
"linewidth": 0,
"marker": '.',
}
Package configuration¶
[6]:
# Configuration file found?
cnn.settings.cfgfile # If None, no file is provided
[7]:
# Display default settings
cnn.settings.defaults
[7]:
{'default_cnn_cutoff': '1',
'default_cnn_offset': '0',
'default_radius_cutoff': '1',
'default_member_cutoff': '2',
'float_precision': 'sp',
'int_precision': 'sp'}
MD showcase - Langerin¶
[57]:
langerin = cnn.CNN(points=[x[::20, :6] for x in np.load("../data/md_showcase_langerin.npy", allow_pickle=True)])
[58]:
fig, Ax = plt.subplots(
1, 3,
figsize=(mpl.rcParams['figure.figsize'][0],
mpl.rcParams['figure.figsize'][1]*0.5)
)
for dim in range(3):
_ = langerin.evaluate(
ax=Ax[dim], plot="contourf",
ax_props=ax_props,
dim=(dim*2, dim*2 + 1)
)

[60]:
langerin.calc_dist()
langerin.dist_hist()
[60]:
(<Figure size 750x450 with 1 Axes>,
<matplotlib.axes._subplots.AxesSubplot at 0x7fac08cf7ac0>,
[<matplotlib.lines.Line2D at 0x7fac531bbeb0>],
None)

[62]:
langerin.fit(2, 5)
Execution time for call of fit: 0 hours, 0 minutes, 21.9658 seconds
--------------------------------------------------------------------------------
#points R N M max #clusters %largest %noise
13541 2.000 5 2 None 4 0.977 0.000
--------------------------------------------------------------------------------
[68]:
fig, Ax = plt.subplots(
1, 3,
figsize=(mpl.rcParams['figure.figsize'][0],
mpl.rcParams['figure.figsize'][1]*0.5)
)
for dim in range(3):
_ = langerin.evaluate(
ax=Ax[dim], plot="dots",
ax_props=ax_props,
dim=(dim*2, dim*2 + 1)
)

[69]:
langerin.isolate()
[76]:
langerin._children[1].data.points.cKDtree()
langerin._children[1].calc_neighbours_from_cKDTree(r=1)
langerin._children[1].fit(1, 5)
Execution time for call of fit: 0 hours, 0 minutes, 0.9313 seconds
--------------------------------------------------------------------------------
#points R N M max #clusters %largest %noise
13233 1.000 5 2 None 3 0.725 0.001
--------------------------------------------------------------------------------
[77]:
fig, Ax = plt.subplots(
1, 3,
figsize=(mpl.rcParams['figure.figsize'][0],
mpl.rcParams['figure.figsize'][1]*0.5)
)
for dim in range(3):
_ = langerin._children[1].evaluate(
ax=Ax[dim], plot="dots",
ax_props=ax_props,
dim=(dim*2, dim*2 + 1)
)

Space A¶
[177]:
# Load the data
# 130 replica of holo-langerin;
# 100 ps time step;
# 5 dimensional TICA projection;
# 30 ns lag time, selected bb-dihedrals and H-bonds
proj = np.load('p30_5.npy', allow_pickle=True)
print(np.shape(proj), np.shape(proj[0]))
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
<ipython-input-177-e6deacd00740> in <module>
4 # 5 dimensional TICA projection;
5 # 30 ns lag time, selected bb-dihedrals and H-bonds
----> 6 proj = np.load('p30_5.npy', allow_pickle=True)
7 print(np.shape(proj), np.shape(proj[0]))
~/.local/share/virtualenvs/CNN-5gkgQAOT/lib/python3.8/site-packages/numpy/lib/npyio.py in load(file, mmap_mode, allow_pickle, fix_imports, encoding)
426 own_fid = False
427 else:
--> 428 fid = open(os_fspath(file), "rb")
429 own_fid = True
430
FileNotFoundError: [Errno 2] No such file or directory: 'p30_5.npy'
[25]:
# optionally reduce dimensionality
# for i in range(len(proj)):
# proj[i] = proj[i][:, :3]
[115]:
# initialise clustering
langerin = cnn.CNN(test=proj)
langerin.cut(points=(None, None, 100))
print(langerin)
Configuration file found in /home/janjoswig
cnn.CNN cluster object
--------------------------------------------------------------------------------
alias : root
hierachy level : 0
test data shape : Parts - 130
Points - [4002, 2501, 2201, 2201, 2201, '...']
Dimensions - 5
train data shape : Parts - 130
Points - [41, 26, 23, 23, 23, '...']
Dimensions - 5
distance matrix calculated (train) : False
distance matrix calculated (test) : False
clustered : False
children : False
--------------------------------------------------------------------------------
Clustering¶
[116]:
# Distance histogram
langerin.dist_hist(maxima=True, maxima_props={"order": 5})
Train distance matrix not calculated. Calculating distance matrix.
[116]:
(<Figure size 975.192x602.669 with 1 Axes>,
<matplotlib.axes._subplots.AxesSubplot at 0x7fadde61d250>,
[<matplotlib.lines.Line2D at 0x7fae0c9f7490>],
[Text(0.35896362584191055, 0.7247888035798966, '0.36'),
Text(1.7948178889355477, 0.43464846777076316, '1.79'),
Text(3.2306721520291855, 0.2663797558984762, '3.23'),
Text(7.251064088691371, 0.050229576941857444, '7.25'),
Text(11.271456025353556, 0.018509623274021017, '11.27')])

[118]:
# Axis property defaults for the following plots
ax_props2 = {
"xlabel": None,
"ylabel": None,
"xticks": (),
"yticks": (),
}
[119]:
# Have a look at the original data ...
obj = langerin
fig, Ax = plt.subplots(1, 3, figsize=(6.50128*0.5, 6.50128*0.25*0.618*1.1))
obj.evaluate(ax=Ax[0], ax_props=ax_props2, mode="test", original=True)
obj.evaluate(ax=Ax[1], ax_props=ax_props2, dim=(1, 2), mode="test", original=True)
obj.evaluate(ax=Ax[2], ax_props=ax_props2, dim=(3, 4), mode="test", original=True)
fig.tight_layout(pad=0.1)

[121]:
# Alternatively plot this as free energy landscape
obj = langerin
fig = plt.figure(figsize=(6.50128*0.5, 6.50128*0.2*0.618*1.1))
Ax = []
gspec = fig.add_gridspec(1, 22)
Ax.append(fig.add_subplot(gspec[0, :7]))
Ax.append(fig.add_subplot(gspec[0, 7:14]))
Ax.append(fig.add_subplot(gspec[0, 14:21]))
Ax.append(fig.add_subplot(gspec[0, -1]))
plotted = obj.evaluate(
ax=Ax[0], ax_props=ax_props2,
mode="test", original=True,
plot='contourf', contour_props={"levels": 40, "vmin": 0, "vmax": 9.5, "extend": "max"}
)[2]
obj.evaluate(
ax=Ax[1], ax_props=ax_props2,
dim=(1, 2),
mode="test", original=True,
plot='contourf', contour_props={"levels": 40, "vmin": 0, "vmax": 9.5}
)
obj.evaluate(
ax=Ax[2], ax_props=ax_props2,
dim=(3, 4),
mode="test", original=True,
plot='contourf', contour_props={"levels": 40, "vmin": 0, "vmax": 9.5}
)
# cbarax = fig.add_subplot(1, 20, 20)
cbar = fig.colorbar(mappable=plotted[0], cax=Ax[3], ticks=(2, 4, 6, 8))
cbar.set_label(r"$\Delta G$ / $kT$", fontsize=8)
fig.subplots_adjust(
left=0, right=0.8, bottom=0, top=1, wspace=0.5, hspace=1
)

[122]:
# ... and the reduced set
# Have a look at the original data
obj = langerin
fig, Ax = plt.subplots(1, 3, figsize=(6.50128*0.5, 6.50128*0.25*0.618*1.1))
obj.evaluate(ax=Ax[0], ax_props=ax_props2, original=True)
obj.evaluate(ax=Ax[1], ax_props=ax_props2, dim=(1, 2), original=True)
obj.evaluate(ax=Ax[2], ax_props=ax_props2, dim=(3, 4), original=True)
fig.tight_layout(pad=0.1)

[123]:
# Level 1
r, c = 4, 5
obj = langerin
obj.fit(radius_cutoff=r, cnn_cutoff=c)
fig, Ax = plt.subplots(1, 3, figsize=(6.50128*0.5, 6.50128*0.25*0.618*1.1))
obj.evaluate(ax=Ax[0], ax_props=ax_props2)
obj.evaluate(ax=Ax[1], ax_props=ax_props2, dim=(1, 2))
obj.evaluate(ax=Ax[2], ax_props=ax_props2, dim=(3, 4))
fig.tight_layout(pad=0.1)
# # fig.savefig(f"c_{r}_{c}.png")
-------------------------------------------------------------------------------
#points R N M max #clusters %largest %noise
3141 4.0 5 1 None 3 0.97453 0.0
-------------------------------------------------------------------------------
Execution time for call of fit(): 0 hours, 0 minutes, 2.0126 seconds
recording: ...

[124]:
# Isolate the result and prepare for level 2
langerin.isolate()
l1 = langerin.train_children[1]
l2 = langerin.train_children[2]
l3 = langerin.train_children[3]
Configuration file found in /home/janjoswig
Configuration file found in /home/janjoswig
Configuration file found in /home/janjoswig
[125]:
# Level 2, child 3
r, c = 1, 0
obj = l3
obj.fit(radius_cutoff=r, cnn_cutoff=c)
fig, Ax = plt.subplots(1, 3, figsize=(6.50128*0.5, 6.50128*0.25*0.618*1.1))
obj.evaluate(ax=Ax[0], ax_props=ax_props2)
obj.evaluate(ax=Ax[1], ax_props=ax_props2, dim=(1, 2))
obj.evaluate(ax=Ax[2], ax_props=ax_props2, dim=(3, 4))
fig.tight_layout(pad=0.1)
# # fig.savefig(f"c3_{r}_{c}.png")
-------------------------------------------------------------------------------
#points R N M max #clusters %largest %noise
24 1.0 0 1 None 2 0.875 0.041667
-------------------------------------------------------------------------------
Execution time for call of fit(): 0 hours, 0 minutes, 0.0056 seconds
recording: ...

[126]:
# Level 2, child 2
r, c = 0.8, 5
obj = l2
obj.fit(radius_cutoff=r, cnn_cutoff=c)
fig, Ax = plt.subplots(1, 3, figsize=(6.50128*0.5, 6.50128*0.25*0.618*1.1))
obj.evaluate(ax=Ax[0], ax_props=ax_props2)
obj.evaluate(ax=Ax[1], ax_props=ax_props2, dim=(1, 2))
obj.evaluate(ax=Ax[2], ax_props=ax_props2, dim=(3, 4))
fig.tight_layout(pad=0.1)
# # fig.savefig(f"c2_{r}_{c}.png")
-------------------------------------------------------------------------------
#points R N M max #clusters %largest %noise
56 0.8 5 1 None 2 0.553571 0.035714
-------------------------------------------------------------------------------
Execution time for call of fit(): 0 hours, 0 minutes, 0.0058 seconds
recording: ...

[127]:
# Level 2, child 1
r, c = 1.3, 0
obj = l1
obj.fit(radius_cutoff=r, cnn_cutoff=c)
fig, Ax = plt.subplots(1, 3, figsize=(6.50128*0.5, 6.50128*0.25*0.618*1.1))
obj.evaluate(ax=Ax[0], ax_props=ax_props2)
obj.evaluate(ax=Ax[1], ax_props=ax_props2, dim=(1, 2))
obj.evaluate(ax=Ax[2], ax_props=ax_props2, dim=(3, 4))
fig.tight_layout(pad=0.1)
# # fig.savefig(f"c1_{r}_{c}.png")
-------------------------------------------------------------------------------
#points R N M max #clusters %largest %noise
3061 1.3 0 1 None 3 0.991833 0.0
-------------------------------------------------------------------------------
Execution time for call of fit(): 0 hours, 0 minutes, 1.2624 seconds
recording: ...

[128]:
# Isolate from l1 and prepare for level 3
l1.isolate()
l1_1 = l1.train_children[1]
Configuration file found in /home/janjoswig
Configuration file found in /home/janjoswig
Configuration file found in /home/janjoswig
[129]:
# Level 3, child 1
r, c = 0.9, 0
obj = l1_1
obj.fit(radius_cutoff=r, cnn_cutoff=c, member_cutoff=0)
fig, Ax = plt.subplots(1, 3, figsize=(6.50128*0.5, 6.50128*0.25*0.618*1.1))
obj.evaluate(ax=Ax[0], ax_props=ax_props2)
obj.evaluate(ax=Ax[1], ax_props=ax_props2, dim=(1, 2))
obj.evaluate(ax=Ax[2], ax_props=ax_props2, dim=(3, 4))
fig.tight_layout(pad=0.1)
# # fig.savefig(f"c1_1_{r}_{c}.png")
-------------------------------------------------------------------------------
#points R N M max #clusters %largest %noise
3036 0.9 0 0 None 6 0.972661 0.0
-------------------------------------------------------------------------------
Execution time for call of fit(): 0 hours, 0 minutes, 1.1927 seconds
recording: ...

[130]:
# Isolate from l1_1 and prepare for level 4
l1_1.isolate()
l1_1_1 = l1_1.train_children[1]
Configuration file found in /home/janjoswig
Configuration file found in /home/janjoswig
Configuration file found in /home/janjoswig
Configuration file found in /home/janjoswig
Configuration file found in /home/janjoswig
Configuration file found in /home/janjoswig
[131]:
# Level 4, child 1
r, c = 0.6, 10
obj = l1_1_1
obj.fit(radius_cutoff=r, cnn_cutoff=c)
fig, Ax = plt.subplots(1, 3, figsize=(6.50128*0.5, 6.50128*0.25*0.618*1.1))
obj.evaluate(ax=Ax[0], ax_props=ax_props2)
obj.evaluate(ax=Ax[1], ax_props=ax_props2, dim=(1, 2))
obj.evaluate(ax=Ax[2], ax_props=ax_props2, dim=(3, 4))
fig.tight_layout(pad=0.1)
# # fig.savefig(f"c1_1_2_{r}_{c}.png")
-------------------------------------------------------------------------------
#points R N M max #clusters %largest %noise
2953 0.6 10 1 None 4 0.850999 0.013207
-------------------------------------------------------------------------------
Execution time for call of fit(): 0 hours, 0 minutes, 0.8132 seconds
recording: ...

[132]:
# Isolate from l1_1_1 and prepare for level 5
l1_1_1.isolate()
l1_1_1_1 = l1_1_1.train_children[1]
l1_1_1_2 = l1_1_1.train_children[2]
Configuration file found in /home/janjoswig
Configuration file found in /home/janjoswig
Configuration file found in /home/janjoswig
Configuration file found in /home/janjoswig
Configuration file found in /home/janjoswig
[133]:
# Level 5, child 2
r, c = 0.4, 15
obj = l1_1_1_2
obj.fit(radius_cutoff=r, cnn_cutoff=c)
fig, Ax = plt.subplots(1, 3, figsize=(6.50128*0.5, 6.50128*0.25*0.618*1.1))
obj.evaluate(ax=Ax[0], ax_props=ax_props2)
obj.evaluate(ax=Ax[1], ax_props=ax_props2, dim=(1, 2))
obj.evaluate(ax=Ax[2], ax_props=ax_props2, dim=(3, 4))
fig.tight_layout(pad=0.1)
# # fig.savefig(f"c1_1_1_2_{r}_{c}.png")
-------------------------------------------------------------------------------
#points R N M max #clusters %largest %noise
158 0.4 15 1 None 2 0.525316 0.113924
-------------------------------------------------------------------------------
Execution time for call of fit(): 0 hours, 0 minutes, 0.0100 seconds
recording: ...

[134]:
# Level 5, child 1
r, c = 0.4, 20
obj = l1_1_1_1
obj.fit(radius_cutoff=r, cnn_cutoff=c)
fig, Ax = plt.subplots(1, 3, figsize=(6.50128*0.5, 6.50128*0.25*0.618*1.1))
obj.evaluate(ax=Ax[0], ax_props=ax_props2)
obj.evaluate(ax=Ax[1], ax_props=ax_props2, dim=(1, 2))
obj.evaluate(ax=Ax[2], ax_props=ax_props2, dim=(3, 4))
fig.tight_layout(pad=0.1)
# # fig.savefig(f"c1_1_1_1_{r}_{c}.png")
-------------------------------------------------------------------------------
#points R N M max #clusters %largest %noise
2513 0.4 20 1 None 2 0.653402 0.024274
-------------------------------------------------------------------------------
Execution time for call of fit(): 0 hours, 0 minutes, 0.5373 seconds
recording: ...

[135]:
# Finished?
langerin.pie()

[136]:
# Wrap up from child to parent
l1_1_1.reel()
l1_1.reel()
l1.reel()
langerin.reel()
[137]:
# Plot the total clustering result
obj = langerin
fig, Ax = plt.subplots(1, 3, figsize=(6.50128*0.5, 6.50128*0.25*0.618*1.1))
obj.evaluate(ax=Ax[0], ax_props=ax_props2)
obj.evaluate(ax=Ax[1], ax_props=ax_props2, dim=(1, 2))
obj.evaluate(ax=Ax[2], ax_props=ax_props2, dim=(3, 4))
fig.tight_layout(pad=0.1)
# # fig.savefig(f"c1_1_1_2_{r}_{c}.png")

[138]:
# Cluster size overview
print(json.dumps({k: len(v) for k, v in langerin.train_clusterdict.items()}, indent=4))
{
"0": 121,
"1": 1642,
"2": 810,
"3": 147,
"4": 96,
"5": 83,
"6": 79,
"7": 57,
"8": 31,
"9": 23,
"10": 22,
"11": 21,
"12": 3,
"13": 2,
"14": 1,
"15": 1,
"16": 1,
"17": 1
}
[405]:
# store train set assignments
# np.save("train_labels_30_5.npy", langerin.train_labels)
[9]:
# load train set assignments
langerin.train_labels = np.load("train_labels_30_5.npy", allow_pickle=True)
# translate label information into clusterdict information
langerin.labels2dict()
[10]:
# Prepare distance matrices for lookup
langerin.dist()
langerin.map()
[ ]:
# Predict labels for the test set
# Going from small to large
langerin.predict(0.2, 0, clusters=[14, 15, 16, 17], behaviour="lookup")
[ ]:
langerin.predict(0.5, 1, clusters=[12, 13], behaviour="lookup")
[ ]:
langerin.predict(0.5, 5, clusters=[10, 11], behaviour="lookup")
[ ]:
langerin.predict(0.4, 20, clusters=[1, 2], behaviour="lookup")
[ ]:
langerin.predict(0.4, 10, clusters=[3, 4, 5, 6, 7, 8, 9], behaviour="lookup")
[ ]:
[ ]:
# Plot the total clustering result
obj = langerin
fig, Ax = plt.subplots(1, 3, figsize=(6.50128*0.5, 6.50128*0.25*0.618*1.1))
obj.evaluate(ax=Ax[0], ax_props=axprops, mode="test")
obj.evaluate(ax=Ax[1], ax_props=axprops, dim=(1, 2), mode="test")
obj.evaluate(ax=Ax[2], ax_props=axprops, dim=(3, 4), mode="test")
fig.tight_layout(pad=0.1)
# # fig.savefig(f"c1_1_1_2_{r}_{c}.png")
Coreset MSM estimation¶
(MSM on train data dt = 10 ns)¶
[18]:
# Create an MSM object
M = cmsm.CMSM(langerin.get_dtraj(mode='train'), unit="ns", step=10)
[21]:
# Estimate csMSM for different lag times (given in steps)
lags = [1, 2, 3, 4]
for i in lags:
M.cmsm(lag=i, minlenfactor=5)
M.get_its()
*********************************************************
---------------------------------------------------------
Computing coreset MSM at lagtime 10 ns
---------------------------------------------------------
Trajectories [38]
are shorter then step threshold (lag*minlenfactor = 5)
and will not be used to compute the MSM.
Using 129 trajectories with 3126 steps over 17 coresets
---------------------------------------------------------
*********************************************************
*********************************************************
---------------------------------------------------------
Computing coreset MSM at lagtime 20 ns
---------------------------------------------------------
Trajectories [38]
are shorter then step threshold (lag*minlenfactor = 10)
and will not be used to compute the MSM.
Using 129 trajectories with 3126 steps over 17 coresets
---------------------------------------------------------
*********************************************************
*********************************************************
---------------------------------------------------------
Computing coreset MSM at lagtime 30 ns
---------------------------------------------------------
Trajectories [38]
are shorter then step threshold (lag*minlenfactor = 15)
and will not be used to compute the MSM.
Using 129 trajectories with 3126 steps over 17 coresets
---------------------------------------------------------
*********************************************************
*********************************************************
---------------------------------------------------------
Computing coreset MSM at lagtime 40 ns
---------------------------------------------------------
Trajectories [38, 86, 89, 121]
are shorter then step threshold (lag*minlenfactor = 20)
and will not be used to compute the MSM.
Using 126 trajectories with 3073 steps over 17 coresets
---------------------------------------------------------
*********************************************************
[22]:
# Plot the time scales
fig, ax, *_ = M.plot_its()
fig.tight_layout(pad=0.1)

[ ]:
[ ]:
Space B¶
[139]:
# Load the data
# 130 replica of holo-langerin;
# 100 ps time step;
# 9 dimensional TICA projection;
# 9 ns lag time, selected bb-dihedrals and H-bonds
proj = np.load('p9_9.npy', allow_pickle=True)
print(np.shape(proj), np.shape(proj[0]))
(130,) (4002, 9)
[140]:
# optionally reduce dimensionality
for i in range(len(proj)):
proj[i] = proj[i][:, :3]
[141]:
# initialise clustering
langerin_b = cnn.CNN(test=proj)
langerin_b.cut(points=(None, None, 100))
print(langerin_b)
Configuration file found in /home/janjoswig
cnn.CNN cluster object
--------------------------------------------------------------------------------
alias : root
hierachy level : 0
test data shape : Parts - 130
Points - [4002, 2501, 2201, 2201, 2201, '...']
Dimensions - 3
train data shape : Parts - 130
Points - [41, 26, 23, 23, 23, '...']
Dimensions - 3
distance matrix calculated (train) : False
distance matrix calculated (test) : False
clustered : False
children : False
--------------------------------------------------------------------------------
[ ]:
Clustering¶
[142]:
# You can also use this clustering and skip the prediction ...
Image("c_predicted_9_3.png")
[142]:

[143]:
# load train and test set assignments
langerin_b.train_labels = np.load("train_labels_9_3.npy", allow_pickle=True)
langerin_b.test_labels = np.load("test_labels_9_3.npy", allow_pickle=True)
# translate label information into clusterdict information
langerin_b.labels2dict()
langerin_b.labels2dict(mode="test")
Coreset MSM estimation¶
(MSM on test data dt = 0.1 ns)¶
[144]:
# Create an MSM object
MB = cmsm.CMSM(langerin_b.get_dtraj(mode='test'), unit="ns", step=0.1)
[145]:
# Estimate csMSM for different lag times (given in steps)
lags = [1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 125, 150, 175, 200, 250, 300]
for i in lags:
MB.cmsm(lag=i, minlenfactor=5)
MB.get_its()
*********************************************************
---------------------------------------------------------
Computing coreset MSM at lagtime 0.1 ns
---------------------------------------------------------
Using 130 trajectories with 301170 steps over 6 coresets
---------------------------------------------------------
*********************************************************
*********************************************************
---------------------------------------------------------
Computing coreset MSM at lagtime 1.0 ns
---------------------------------------------------------
Using 130 trajectories with 301170 steps over 6 coresets
---------------------------------------------------------
*********************************************************
*********************************************************
---------------------------------------------------------
Computing coreset MSM at lagtime 2.0 ns
---------------------------------------------------------
Trajectories [38]
are shorter then step threshold (lag*minlenfactor = 100)
and will not be used to compute the MSM.
Using 129 trajectories with 301089 steps over 6 coresets
---------------------------------------------------------
*********************************************************
*********************************************************
---------------------------------------------------------
Computing coreset MSM at lagtime 3.0 ns
---------------------------------------------------------
Trajectories [38]
are shorter then step threshold (lag*minlenfactor = 150)
and will not be used to compute the MSM.
Using 129 trajectories with 301089 steps over 6 coresets
---------------------------------------------------------
*********************************************************
*********************************************************
---------------------------------------------------------
Computing coreset MSM at lagtime 4.0 ns
---------------------------------------------------------
Trajectories [38]
are shorter then step threshold (lag*minlenfactor = 200)
and will not be used to compute the MSM.
Using 129 trajectories with 301089 steps over 6 coresets
---------------------------------------------------------
*********************************************************
*********************************************************
---------------------------------------------------------
Computing coreset MSM at lagtime 5.0 ns
---------------------------------------------------------
Trajectories [38]
are shorter then step threshold (lag*minlenfactor = 250)
and will not be used to compute the MSM.
Using 129 trajectories with 301089 steps over 6 coresets
---------------------------------------------------------
*********************************************************
*********************************************************
---------------------------------------------------------
Computing coreset MSM at lagtime 6.0 ns
---------------------------------------------------------
Trajectories [38]
are shorter then step threshold (lag*minlenfactor = 300)
and will not be used to compute the MSM.
Using 129 trajectories with 301089 steps over 6 coresets
---------------------------------------------------------
*********************************************************
*********************************************************
---------------------------------------------------------
Computing coreset MSM at lagtime 7.0 ns
---------------------------------------------------------
Trajectories [38]
are shorter then step threshold (lag*minlenfactor = 350)
and will not be used to compute the MSM.
Using 129 trajectories with 301089 steps over 6 coresets
---------------------------------------------------------
*********************************************************
*********************************************************
---------------------------------------------------------
Computing coreset MSM at lagtime 8.0 ns
---------------------------------------------------------
Trajectories [38]
are shorter then step threshold (lag*minlenfactor = 400)
and will not be used to compute the MSM.
Using 129 trajectories with 301089 steps over 6 coresets
---------------------------------------------------------
*********************************************************
*********************************************************
---------------------------------------------------------
Computing coreset MSM at lagtime 9.0 ns
---------------------------------------------------------
Trajectories [38]
are shorter then step threshold (lag*minlenfactor = 450)
and will not be used to compute the MSM.
Using 129 trajectories with 301089 steps over 6 coresets
---------------------------------------------------------
*********************************************************
*********************************************************
---------------------------------------------------------
Computing coreset MSM at lagtime 10.0 ns
---------------------------------------------------------
Trajectories [38]
are shorter then step threshold (lag*minlenfactor = 500)
and will not be used to compute the MSM.
Using 129 trajectories with 301089 steps over 6 coresets
---------------------------------------------------------
*********************************************************
*********************************************************
---------------------------------------------------------
Computing coreset MSM at lagtime 12.5 ns
---------------------------------------------------------
Trajectories [38]
are shorter then step threshold (lag*minlenfactor = 625)
and will not be used to compute the MSM.
Using 129 trajectories with 301089 steps over 6 coresets
---------------------------------------------------------
*********************************************************
*********************************************************
---------------------------------------------------------
Computing coreset MSM at lagtime 15.0 ns
---------------------------------------------------------
Trajectories [38]
are shorter then step threshold (lag*minlenfactor = 750)
and will not be used to compute the MSM.
Using 129 trajectories with 301089 steps over 6 coresets
---------------------------------------------------------
*********************************************************
*********************************************************
---------------------------------------------------------
Computing coreset MSM at lagtime 17.5 ns
---------------------------------------------------------
Trajectories [38]
are shorter then step threshold (lag*minlenfactor = 875)
and will not be used to compute the MSM.
Using 129 trajectories with 301089 steps over 6 coresets
---------------------------------------------------------
*********************************************************
*********************************************************
---------------------------------------------------------
Computing coreset MSM at lagtime 20.0 ns
---------------------------------------------------------
Trajectories [38]
are shorter then step threshold (lag*minlenfactor = 1000)
and will not be used to compute the MSM.
Using 129 trajectories with 301089 steps over 6 coresets
---------------------------------------------------------
*********************************************************
*********************************************************
---------------------------------------------------------
Computing coreset MSM at lagtime 25.0 ns
---------------------------------------------------------
Trajectories [38]
are shorter then step threshold (lag*minlenfactor = 1250)
and will not be used to compute the MSM.
Using 129 trajectories with 301089 steps over 6 coresets
---------------------------------------------------------
*********************************************************
*********************************************************
---------------------------------------------------------
Computing coreset MSM at lagtime 30.0 ns
---------------------------------------------------------
Trajectories [38]
are shorter then step threshold (lag*minlenfactor = 1500)
and will not be used to compute the MSM.
Using 129 trajectories with 301089 steps over 6 coresets
---------------------------------------------------------
*********************************************************
[146]:
# Plot the implied time scales
fig, ax, *_ = MB.plot_its()
fig.tight_layout(pad=0.1)

[147]:
# A Model at lag time 10 ns might be a good choice
MB.cmsm(lag=100)
MB.get_its()
*********************************************************
---------------------------------------------------------
Computing coreset MSM at lagtime 10.0 ns
---------------------------------------------------------
Trajectories [38]
are shorter then step threshold (lag*minlenfactor = 1000)
and will not be used to compute the MSM.
Using 129 trajectories with 301089 steps over 6 coresets
---------------------------------------------------------
*********************************************************
[148]:
# Transition matrix
MB.T
[148]:
array([[ 9.96803579e-01, 2.88007674e-04, -2.90934852e-07,
1.85104164e-03, 7.84927156e-04, 2.72735582e-04],
[ 2.67709646e-03, 9.68344445e-01, 2.16534956e-02,
7.32579034e-03, -4.98370471e-08, -7.77422813e-07],
[-7.50856852e-05, 3.43212694e-02, 9.52231599e-01,
1.35221937e-02, 1.39780127e-09, 2.18047147e-08],
[ 5.29790349e-02, 2.23999339e-02, 2.60014695e-02,
8.98635933e-01, -9.86262053e-07, -1.53849930e-05],
[ 3.47243911e-02, 1.40713811e-08, -1.42143962e-11,
-6.79784120e-06, 9.65292477e-01, -1.00838853e-05],
[ 2.44734782e-02, 9.91739893e-09, -1.00181948e-11,
-4.79106511e-06, -4.55600275e-07, 9.75531759e-01]])
[149]:
# You can plot the eigenvectors of the transition matrix (right)
mpl.rcParams['figure.figsize'] = 6.50128*0.5, 6.50128*0.5*0.618*1*0.25
fig, Ax = MB.plot_eigenvectors()
fig.subplots_adjust(
left=0.08,
bottom=0.15,
right=0.99,
top=0.99,
wspace=None,
hspace=0
)

[150]:
# You can plot the eigenvectors of the transition matrix (left)
mpl.rcParams['figure.figsize'] = 6.50128*0.5, 6.50128*0.5*0.618*1*0.25
fig, Ax = MB.plot_eigenvectors(which="left", invert=True)
fig.subplots_adjust(
left=0.08,
bottom=0.15,
right=0.99,
top=0.99,
wspace=None,
hspace=0
)

[ ]:
[ ]:
Benchmark framework¶
[ ]:
# Coming soon