Molecular dynamics application example

Contents

  • 1  Pre-requirements

    • 1.1  Import dependencies

    • 1.2  Notebook configuration

    • 1.3  Package configuration

  • 2  MD showcase - Langerin

  • 3  Space A

    • 3.1  Clustering

    • 3.2  Coreset MSM estimation

      • 3.2.1  (MSM on train data dt = 10 ns)

  • 4  Space B

    • 4.1  Clustering

    • 4.2  Coreset MSM estimation

      • 4.2.1  (MSM on test data dt = 0.1 ns)

Pre-requirements

Import dependencies

[1]:
# Primary imports
import importlib  # Only needed for module editing
import json
import pandas as pd  # Optional dependency
from pathlib import Path
import pprint
import sys
import time
import warnings
warnings.simplefilter("always")  # Surpress or enable warnings

import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
from sklearn import datasets  # For sklearn test data set creation
from sklearn.preprocessing import StandardScaler

# CNN clustering module
import cnnclustering.cnn as cnn    # CNN clustering
import cnnclustering.cmsm as cmsm  # Core-set MSM estimation

This notebook was created using Python 3.8.

[2]:
# Version information
print(sys.version)
3.8.3 (default, May 15 2020, 15:24:35)
[GCC 8.3.0]

Notebook configuration

We use matplotlib to create plots. A "matplotlibrc" file can be used to customise the appearance of the plots.

[2]:
# Matplotlib configuration
mpl.rc_file(
    "matplotlibrc",
    use_default_template=False
)
[3]:
# Axis property defaults for the plots
ax_props = {
    "xlabel": None,
    "ylabel": None,
    "xlim": (-2.5, 2.5),
    "ylim": (-2.5, 2.5),
    "xticks": (),
    "yticks": (),
    "aspect": "equal"
}

# Line plot property defaults
line_props = {
    "linewidth": 0,
    "marker": '.',
}

Package configuration

[6]:
# Configuration file found?
cnn.settings.cfgfile  # If None, no file is provided
[7]:
# Display default settings
cnn.settings.defaults
[7]:
{'default_cnn_cutoff': '1',
 'default_cnn_offset': '0',
 'default_radius_cutoff': '1',
 'default_member_cutoff': '2',
 'float_precision': 'sp',
 'int_precision': 'sp'}

MD showcase - Langerin

[57]:
langerin = cnn.CNN(points=[x[::20, :6] for x in np.load("../data/md_showcase_langerin.npy", allow_pickle=True)])
[58]:
fig, Ax = plt.subplots(
    1, 3,
    figsize=(mpl.rcParams['figure.figsize'][0],
             mpl.rcParams['figure.figsize'][1]*0.5)
    )

for dim in range(3):
    _ = langerin.evaluate(
        ax=Ax[dim], plot="contourf",
        ax_props=ax_props,
        dim=(dim*2, dim*2 + 1)
        )
../_images/tutorial_tutorial_16_0.png
[60]:
langerin.calc_dist()
langerin.dist_hist()
[60]:
(<Figure size 750x450 with 1 Axes>,
 <matplotlib.axes._subplots.AxesSubplot at 0x7fac08cf7ac0>,
 [<matplotlib.lines.Line2D at 0x7fac531bbeb0>],
 None)
../_images/tutorial_tutorial_17_1.png
[62]:
langerin.fit(2, 5)
Execution time for call of fit: 0 hours, 0 minutes, 21.9658 seconds
--------------------------------------------------------------------------------
#points   R         N         M         max       #clusters %largest  %noise
13541     2.000     5         2         None      4         0.977     0.000
--------------------------------------------------------------------------------
[68]:
fig, Ax = plt.subplots(
    1, 3,
    figsize=(mpl.rcParams['figure.figsize'][0],
             mpl.rcParams['figure.figsize'][1]*0.5)
    )

for dim in range(3):
    _ = langerin.evaluate(
        ax=Ax[dim], plot="dots",
        ax_props=ax_props,
        dim=(dim*2, dim*2 + 1)
        )
../_images/tutorial_tutorial_19_0.png
[69]:
langerin.isolate()
[76]:
langerin._children[1].data.points.cKDtree()
langerin._children[1].calc_neighbours_from_cKDTree(r=1)
langerin._children[1].fit(1, 5)
Execution time for call of fit: 0 hours, 0 minutes, 0.9313 seconds
--------------------------------------------------------------------------------
#points   R         N         M         max       #clusters %largest  %noise
13233     1.000     5         2         None      3         0.725     0.001
--------------------------------------------------------------------------------
[77]:
fig, Ax = plt.subplots(
    1, 3,
    figsize=(mpl.rcParams['figure.figsize'][0],
             mpl.rcParams['figure.figsize'][1]*0.5)
    )

for dim in range(3):
    _ = langerin._children[1].evaluate(
        ax=Ax[dim], plot="dots",
        ax_props=ax_props,
        dim=(dim*2, dim*2 + 1)
        )
../_images/tutorial_tutorial_22_0.png

Space A

[177]:
# Load the data
# 130 replica of holo-langerin;
# 100 ps time step;
# 5 dimensional TICA projection;
# 30 ns lag time, selected bb-dihedrals and H-bonds
proj = np.load('p30_5.npy', allow_pickle=True)
print(np.shape(proj), np.shape(proj[0]))
---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
<ipython-input-177-e6deacd00740> in <module>
      4 # 5 dimensional TICA projection;
      5 # 30 ns lag time, selected bb-dihedrals and H-bonds
----> 6 proj = np.load('p30_5.npy', allow_pickle=True)
      7 print(np.shape(proj), np.shape(proj[0]))

~/.local/share/virtualenvs/CNN-5gkgQAOT/lib/python3.8/site-packages/numpy/lib/npyio.py in load(file, mmap_mode, allow_pickle, fix_imports, encoding)
    426         own_fid = False
    427     else:
--> 428         fid = open(os_fspath(file), "rb")
    429         own_fid = True
    430

FileNotFoundError: [Errno 2] No such file or directory: 'p30_5.npy'
[25]:
# optionally reduce dimensionality
# for i in range(len(proj)):
#     proj[i] = proj[i][:, :3]
[115]:
# initialise clustering
langerin = cnn.CNN(test=proj)
langerin.cut(points=(None, None, 100))
print(langerin)
Configuration file found in /home/janjoswig
cnn.CNN cluster object
--------------------------------------------------------------------------------
alias :                                  root
hierachy level :                         0

test data shape :                        Parts      - 130
                                         Points     - [4002, 2501, 2201, 2201, 2201, '...']
                                         Dimensions - 5

train data shape :                       Parts      - 130
                                         Points     - [41, 26, 23, 23, 23, '...']
                                         Dimensions - 5

distance matrix calculated (train) :     False
distance matrix calculated (test) :      False
clustered :                              False
children :                               False
--------------------------------------------------------------------------------

Clustering

[116]:
# Distance histogram
langerin.dist_hist(maxima=True, maxima_props={"order": 5})
Train distance matrix not calculated. Calculating distance matrix.
[116]:
(<Figure size 975.192x602.669 with 1 Axes>,
 <matplotlib.axes._subplots.AxesSubplot at 0x7fadde61d250>,
 [<matplotlib.lines.Line2D at 0x7fae0c9f7490>],
 [Text(0.35896362584191055, 0.7247888035798966, '0.36'),
  Text(1.7948178889355477, 0.43464846777076316, '1.79'),
  Text(3.2306721520291855, 0.2663797558984762, '3.23'),
  Text(7.251064088691371, 0.050229576941857444, '7.25'),
  Text(11.271456025353556, 0.018509623274021017, '11.27')])
../_images/tutorial_tutorial_28_2.png
[118]:
# Axis property defaults for the following plots
ax_props2 = {
    "xlabel": None,
    "ylabel": None,
    "xticks": (),
    "yticks": (),
}
[119]:
# Have a look at the original data ...
obj = langerin
fig, Ax = plt.subplots(1, 3, figsize=(6.50128*0.5, 6.50128*0.25*0.618*1.1))
obj.evaluate(ax=Ax[0], ax_props=ax_props2, mode="test", original=True)
obj.evaluate(ax=Ax[1], ax_props=ax_props2, dim=(1, 2), mode="test", original=True)
obj.evaluate(ax=Ax[2], ax_props=ax_props2, dim=(3, 4), mode="test", original=True)
fig.tight_layout(pad=0.1)
../_images/tutorial_tutorial_30_0.png
[121]:
# Alternatively plot this as free energy landscape
obj = langerin
fig = plt.figure(figsize=(6.50128*0.5, 6.50128*0.2*0.618*1.1))
Ax = []
gspec = fig.add_gridspec(1, 22)
Ax.append(fig.add_subplot(gspec[0, :7]))
Ax.append(fig.add_subplot(gspec[0, 7:14]))
Ax.append(fig.add_subplot(gspec[0, 14:21]))
Ax.append(fig.add_subplot(gspec[0, -1]))
plotted = obj.evaluate(
    ax=Ax[0], ax_props=ax_props2,
    mode="test", original=True,
    plot='contourf', contour_props={"levels": 40, "vmin": 0, "vmax": 9.5, "extend": "max"}
    )[2]
obj.evaluate(
    ax=Ax[1], ax_props=ax_props2,
    dim=(1, 2),
    mode="test", original=True,
    plot='contourf', contour_props={"levels": 40, "vmin": 0, "vmax": 9.5}
    )
obj.evaluate(
    ax=Ax[2], ax_props=ax_props2,
    dim=(3, 4),
    mode="test", original=True,
    plot='contourf', contour_props={"levels": 40, "vmin": 0, "vmax": 9.5}
    )
# cbarax = fig.add_subplot(1, 20, 20)
cbar = fig.colorbar(mappable=plotted[0], cax=Ax[3], ticks=(2, 4, 6, 8))
cbar.set_label(r"$\Delta G$ / $kT$", fontsize=8)
fig.subplots_adjust(
    left=0, right=0.8, bottom=0, top=1, wspace=0.5, hspace=1
    )
../_images/tutorial_tutorial_31_0.png
[122]:
# ... and the reduced set
# Have a look at the original data
obj = langerin
fig, Ax = plt.subplots(1, 3, figsize=(6.50128*0.5, 6.50128*0.25*0.618*1.1))
obj.evaluate(ax=Ax[0], ax_props=ax_props2, original=True)
obj.evaluate(ax=Ax[1], ax_props=ax_props2, dim=(1, 2), original=True)
obj.evaluate(ax=Ax[2], ax_props=ax_props2, dim=(3, 4), original=True)
fig.tight_layout(pad=0.1)
../_images/tutorial_tutorial_32_0.png
[123]:
# Level 1
r, c = 4, 5
obj = langerin
obj.fit(radius_cutoff=r, cnn_cutoff=c)
fig, Ax = plt.subplots(1, 3, figsize=(6.50128*0.5, 6.50128*0.25*0.618*1.1))
obj.evaluate(ax=Ax[0], ax_props=ax_props2)
obj.evaluate(ax=Ax[1], ax_props=ax_props2, dim=(1, 2))
obj.evaluate(ax=Ax[2], ax_props=ax_props2, dim=(3, 4))
fig.tight_layout(pad=0.1)
# # fig.savefig(f"c_{r}_{c}.png")

-------------------------------------------------------------------------------
  #points     R     N     M     max     #clusters     %largest     %noise
    3141      4.0    5     1    None         3         0.97453       0.0
-------------------------------------------------------------------------------
Execution time for call of fit():     0 hours, 0 minutes, 2.0126 seconds
recording: ...
../_images/tutorial_tutorial_33_1.png
[124]:
# Isolate the result and prepare for level 2
langerin.isolate()
l1 = langerin.train_children[1]
l2 = langerin.train_children[2]
l3 = langerin.train_children[3]
Configuration file found in /home/janjoswig
Configuration file found in /home/janjoswig
Configuration file found in /home/janjoswig
[125]:
# Level 2, child 3
r, c = 1, 0
obj = l3
obj.fit(radius_cutoff=r, cnn_cutoff=c)
fig, Ax = plt.subplots(1, 3, figsize=(6.50128*0.5, 6.50128*0.25*0.618*1.1))
obj.evaluate(ax=Ax[0], ax_props=ax_props2)
obj.evaluate(ax=Ax[1], ax_props=ax_props2, dim=(1, 2))
obj.evaluate(ax=Ax[2], ax_props=ax_props2, dim=(3, 4))
fig.tight_layout(pad=0.1)
# # fig.savefig(f"c3_{r}_{c}.png")

-------------------------------------------------------------------------------
  #points     R     N     M     max     #clusters     %largest     %noise
     24       1.0    0     1    None         2          0.875     0.041667
-------------------------------------------------------------------------------
Execution time for call of fit():     0 hours, 0 minutes, 0.0056 seconds
recording: ...
../_images/tutorial_tutorial_35_1.png
[126]:
# Level 2, child 2
r, c = 0.8, 5
obj = l2
obj.fit(radius_cutoff=r, cnn_cutoff=c)
fig, Ax = plt.subplots(1, 3, figsize=(6.50128*0.5, 6.50128*0.25*0.618*1.1))
obj.evaluate(ax=Ax[0], ax_props=ax_props2)
obj.evaluate(ax=Ax[1], ax_props=ax_props2, dim=(1, 2))
obj.evaluate(ax=Ax[2], ax_props=ax_props2, dim=(3, 4))
fig.tight_layout(pad=0.1)
# # fig.savefig(f"c2_{r}_{c}.png")

-------------------------------------------------------------------------------
  #points     R     N     M     max     #clusters     %largest     %noise
     56       0.8    5     1    None         2        0.553571    0.035714
-------------------------------------------------------------------------------
Execution time for call of fit():     0 hours, 0 minutes, 0.0058 seconds
recording: ...
../_images/tutorial_tutorial_36_1.png
[127]:
# Level 2, child 1
r, c = 1.3, 0
obj = l1
obj.fit(radius_cutoff=r, cnn_cutoff=c)
fig, Ax = plt.subplots(1, 3, figsize=(6.50128*0.5, 6.50128*0.25*0.618*1.1))
obj.evaluate(ax=Ax[0], ax_props=ax_props2)
obj.evaluate(ax=Ax[1], ax_props=ax_props2, dim=(1, 2))
obj.evaluate(ax=Ax[2], ax_props=ax_props2, dim=(3, 4))
fig.tight_layout(pad=0.1)
# # fig.savefig(f"c1_{r}_{c}.png")

-------------------------------------------------------------------------------
  #points     R     N     M     max     #clusters     %largest     %noise
    3061      1.3    0     1    None         3        0.991833       0.0
-------------------------------------------------------------------------------
Execution time for call of fit():     0 hours, 0 minutes, 1.2624 seconds
recording: ...
../_images/tutorial_tutorial_37_1.png
[128]:
# Isolate from l1 and prepare for level 3
l1.isolate()
l1_1 = l1.train_children[1]
Configuration file found in /home/janjoswig
Configuration file found in /home/janjoswig
Configuration file found in /home/janjoswig
[129]:
# Level 3, child 1
r, c = 0.9, 0
obj = l1_1
obj.fit(radius_cutoff=r, cnn_cutoff=c, member_cutoff=0)
fig, Ax = plt.subplots(1, 3, figsize=(6.50128*0.5, 6.50128*0.25*0.618*1.1))
obj.evaluate(ax=Ax[0], ax_props=ax_props2)
obj.evaluate(ax=Ax[1], ax_props=ax_props2, dim=(1, 2))
obj.evaluate(ax=Ax[2], ax_props=ax_props2, dim=(3, 4))
fig.tight_layout(pad=0.1)
# # fig.savefig(f"c1_1_{r}_{c}.png")

-------------------------------------------------------------------------------
  #points     R     N     M     max     #clusters     %largest     %noise
    3036      0.9    0     0    None         6        0.972661       0.0
-------------------------------------------------------------------------------
Execution time for call of fit():     0 hours, 0 minutes, 1.1927 seconds
recording: ...
../_images/tutorial_tutorial_39_1.png
[130]:
# Isolate from l1_1 and prepare for level 4
l1_1.isolate()
l1_1_1 = l1_1.train_children[1]
Configuration file found in /home/janjoswig
Configuration file found in /home/janjoswig
Configuration file found in /home/janjoswig
Configuration file found in /home/janjoswig
Configuration file found in /home/janjoswig
Configuration file found in /home/janjoswig
[131]:
# Level 4, child 1
r, c = 0.6, 10
obj = l1_1_1
obj.fit(radius_cutoff=r, cnn_cutoff=c)
fig, Ax = plt.subplots(1, 3, figsize=(6.50128*0.5, 6.50128*0.25*0.618*1.1))
obj.evaluate(ax=Ax[0], ax_props=ax_props2)
obj.evaluate(ax=Ax[1], ax_props=ax_props2, dim=(1, 2))
obj.evaluate(ax=Ax[2], ax_props=ax_props2, dim=(3, 4))
fig.tight_layout(pad=0.1)
# # fig.savefig(f"c1_1_2_{r}_{c}.png")

-------------------------------------------------------------------------------
  #points     R     N     M     max     #clusters     %largest     %noise
    2953      0.6   10     1    None         4        0.850999    0.013207
-------------------------------------------------------------------------------
Execution time for call of fit():     0 hours, 0 minutes, 0.8132 seconds
recording: ...
../_images/tutorial_tutorial_41_1.png
[132]:
# Isolate from l1_1_1 and prepare for level 5
l1_1_1.isolate()
l1_1_1_1 = l1_1_1.train_children[1]
l1_1_1_2 = l1_1_1.train_children[2]
Configuration file found in /home/janjoswig
Configuration file found in /home/janjoswig
Configuration file found in /home/janjoswig
Configuration file found in /home/janjoswig
Configuration file found in /home/janjoswig
[133]:
# Level 5, child 2
r, c = 0.4, 15
obj = l1_1_1_2
obj.fit(radius_cutoff=r, cnn_cutoff=c)
fig, Ax = plt.subplots(1, 3, figsize=(6.50128*0.5, 6.50128*0.25*0.618*1.1))
obj.evaluate(ax=Ax[0], ax_props=ax_props2)
obj.evaluate(ax=Ax[1], ax_props=ax_props2, dim=(1, 2))
obj.evaluate(ax=Ax[2], ax_props=ax_props2, dim=(3, 4))
fig.tight_layout(pad=0.1)
# # fig.savefig(f"c1_1_1_2_{r}_{c}.png")

-------------------------------------------------------------------------------
  #points     R     N     M     max     #clusters     %largest     %noise
     158      0.4   15     1    None         2        0.525316    0.113924
-------------------------------------------------------------------------------
Execution time for call of fit():     0 hours, 0 minutes, 0.0100 seconds
recording: ...
../_images/tutorial_tutorial_43_1.png
[134]:
# Level 5, child 1
r, c = 0.4, 20
obj = l1_1_1_1
obj.fit(radius_cutoff=r, cnn_cutoff=c)
fig, Ax = plt.subplots(1, 3, figsize=(6.50128*0.5, 6.50128*0.25*0.618*1.1))
obj.evaluate(ax=Ax[0], ax_props=ax_props2)
obj.evaluate(ax=Ax[1], ax_props=ax_props2, dim=(1, 2))
obj.evaluate(ax=Ax[2], ax_props=ax_props2, dim=(3, 4))
fig.tight_layout(pad=0.1)
# # fig.savefig(f"c1_1_1_1_{r}_{c}.png")

-------------------------------------------------------------------------------
  #points     R     N     M     max     #clusters     %largest     %noise
    2513      0.4   20     1    None         2        0.653402    0.024274
-------------------------------------------------------------------------------
Execution time for call of fit():     0 hours, 0 minutes, 0.5373 seconds
recording: ...
../_images/tutorial_tutorial_44_1.png
[135]:
# Finished?
langerin.pie()
../_images/tutorial_tutorial_45_0.png
[136]:
# Wrap up from child to parent
l1_1_1.reel()
l1_1.reel()
l1.reel()
langerin.reel()
[137]:
# Plot the total clustering result
obj = langerin
fig, Ax = plt.subplots(1, 3, figsize=(6.50128*0.5, 6.50128*0.25*0.618*1.1))
obj.evaluate(ax=Ax[0], ax_props=ax_props2)
obj.evaluate(ax=Ax[1], ax_props=ax_props2, dim=(1, 2))
obj.evaluate(ax=Ax[2], ax_props=ax_props2, dim=(3, 4))
fig.tight_layout(pad=0.1)
# # fig.savefig(f"c1_1_1_2_{r}_{c}.png")
../_images/tutorial_tutorial_47_0.png
[138]:
# Cluster size overview
print(json.dumps({k: len(v) for k, v in langerin.train_clusterdict.items()}, indent=4))
{
    "0": 121,
    "1": 1642,
    "2": 810,
    "3": 147,
    "4": 96,
    "5": 83,
    "6": 79,
    "7": 57,
    "8": 31,
    "9": 23,
    "10": 22,
    "11": 21,
    "12": 3,
    "13": 2,
    "14": 1,
    "15": 1,
    "16": 1,
    "17": 1
}
[405]:
# store train set assignments
# np.save("train_labels_30_5.npy", langerin.train_labels)
[9]:
# load train set assignments
langerin.train_labels = np.load("train_labels_30_5.npy", allow_pickle=True)
# translate label information into clusterdict information
langerin.labels2dict()
[10]:
# Prepare distance matrices for lookup
langerin.dist()
langerin.map()
[ ]:
# Predict labels for the test set
# Going from small to large
langerin.predict(0.2, 0, clusters=[14, 15, 16, 17], behaviour="lookup")
[ ]:
langerin.predict(0.5, 1, clusters=[12, 13], behaviour="lookup")
[ ]:
langerin.predict(0.5, 5, clusters=[10, 11], behaviour="lookup")
[ ]:
langerin.predict(0.4, 20, clusters=[1, 2], behaviour="lookup")
[ ]:
langerin.predict(0.4, 10, clusters=[3, 4, 5, 6, 7, 8, 9], behaviour="lookup")
[ ]:

[ ]:
# Plot the total clustering result
obj = langerin
fig, Ax = plt.subplots(1, 3, figsize=(6.50128*0.5, 6.50128*0.25*0.618*1.1))
obj.evaluate(ax=Ax[0], ax_props=axprops, mode="test")
obj.evaluate(ax=Ax[1], ax_props=axprops, dim=(1, 2), mode="test")
obj.evaluate(ax=Ax[2], ax_props=axprops, dim=(3, 4), mode="test")
fig.tight_layout(pad=0.1)
# # fig.savefig(f"c1_1_1_2_{r}_{c}.png")

Coreset MSM estimation

(MSM on train data dt = 10 ns)

[18]:
# Create an MSM object
M = cmsm.CMSM(langerin.get_dtraj(mode='train'), unit="ns", step=10)
[21]:
# Estimate csMSM for different lag times (given in steps)
lags = [1, 2, 3, 4]
for i in lags:
    M.cmsm(lag=i, minlenfactor=5)
    M.get_its()

*********************************************************
---------------------------------------------------------
Computing coreset MSM at lagtime 10 ns
---------------------------------------------------------

Trajectories [38]
are shorter then step threshold (lag*minlenfactor = 5)
and will not be used to compute the MSM.

Using 129 trajectories with 3126 steps over 17 coresets
---------------------------------------------------------
*********************************************************


*********************************************************
---------------------------------------------------------
Computing coreset MSM at lagtime 20 ns
---------------------------------------------------------

Trajectories [38]
are shorter then step threshold (lag*minlenfactor = 10)
and will not be used to compute the MSM.

Using 129 trajectories with 3126 steps over 17 coresets
---------------------------------------------------------
*********************************************************


*********************************************************
---------------------------------------------------------
Computing coreset MSM at lagtime 30 ns
---------------------------------------------------------

Trajectories [38]
are shorter then step threshold (lag*minlenfactor = 15)
and will not be used to compute the MSM.

Using 129 trajectories with 3126 steps over 17 coresets
---------------------------------------------------------
*********************************************************


*********************************************************
---------------------------------------------------------
Computing coreset MSM at lagtime 40 ns
---------------------------------------------------------

Trajectories [38, 86, 89, 121]
are shorter then step threshold (lag*minlenfactor = 20)
and will not be used to compute the MSM.

Using 126 trajectories with 3073 steps over 17 coresets
---------------------------------------------------------
*********************************************************

[22]:
# Plot the time scales
fig, ax, *_ = M.plot_its()
fig.tight_layout(pad=0.1)
../_images/tutorial_tutorial_63_0.png
[ ]:

[ ]:

Space B

[139]:
# Load the data
# 130 replica of holo-langerin;
# 100 ps time step;
# 9 dimensional TICA projection;
# 9 ns lag time, selected bb-dihedrals and H-bonds
proj = np.load('p9_9.npy', allow_pickle=True)
print(np.shape(proj), np.shape(proj[0]))
(130,) (4002, 9)
[140]:
# optionally reduce dimensionality
for i in range(len(proj)):
    proj[i] = proj[i][:, :3]
[141]:
# initialise clustering
langerin_b = cnn.CNN(test=proj)
langerin_b.cut(points=(None, None, 100))
print(langerin_b)
Configuration file found in /home/janjoswig
cnn.CNN cluster object
--------------------------------------------------------------------------------
alias :                                  root
hierachy level :                         0

test data shape :                        Parts      - 130
                                         Points     - [4002, 2501, 2201, 2201, 2201, '...']
                                         Dimensions - 3

train data shape :                       Parts      - 130
                                         Points     - [41, 26, 23, 23, 23, '...']
                                         Dimensions - 3

distance matrix calculated (train) :     False
distance matrix calculated (test) :      False
clustered :                              False
children :                               False
--------------------------------------------------------------------------------

[…]

[ ]:

[142]:
# You can also use this clustering and skip the prediction ...
Image("c_predicted_9_3.png")
[142]:
../_images/tutorial_tutorial_73_0.png
[143]:
# load train and test set assignments
langerin_b.train_labels = np.load("train_labels_9_3.npy", allow_pickle=True)
langerin_b.test_labels = np.load("test_labels_9_3.npy", allow_pickle=True)
# translate label information into clusterdict information
langerin_b.labels2dict()
langerin_b.labels2dict(mode="test")
[144]:
# Create an MSM object
MB = cmsm.CMSM(langerin_b.get_dtraj(mode='test'), unit="ns", step=0.1)
[145]:
# Estimate csMSM for different lag times (given in steps)
lags = [1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 125, 150, 175, 200, 250, 300]
for i in lags:
    MB.cmsm(lag=i, minlenfactor=5)
    MB.get_its()

*********************************************************
---------------------------------------------------------
Computing coreset MSM at lagtime 0.1 ns
---------------------------------------------------------

Using 130 trajectories with 301170 steps over 6 coresets
---------------------------------------------------------
*********************************************************


*********************************************************
---------------------------------------------------------
Computing coreset MSM at lagtime 1.0 ns
---------------------------------------------------------

Using 130 trajectories with 301170 steps over 6 coresets
---------------------------------------------------------
*********************************************************


*********************************************************
---------------------------------------------------------
Computing coreset MSM at lagtime 2.0 ns
---------------------------------------------------------

Trajectories [38]
are shorter then step threshold (lag*minlenfactor = 100)
and will not be used to compute the MSM.

Using 129 trajectories with 301089 steps over 6 coresets
---------------------------------------------------------
*********************************************************


*********************************************************
---------------------------------------------------------
Computing coreset MSM at lagtime 3.0 ns
---------------------------------------------------------

Trajectories [38]
are shorter then step threshold (lag*minlenfactor = 150)
and will not be used to compute the MSM.

Using 129 trajectories with 301089 steps over 6 coresets
---------------------------------------------------------
*********************************************************


*********************************************************
---------------------------------------------------------
Computing coreset MSM at lagtime 4.0 ns
---------------------------------------------------------

Trajectories [38]
are shorter then step threshold (lag*minlenfactor = 200)
and will not be used to compute the MSM.

Using 129 trajectories with 301089 steps over 6 coresets
---------------------------------------------------------
*********************************************************


*********************************************************
---------------------------------------------------------
Computing coreset MSM at lagtime 5.0 ns
---------------------------------------------------------

Trajectories [38]
are shorter then step threshold (lag*minlenfactor = 250)
and will not be used to compute the MSM.

Using 129 trajectories with 301089 steps over 6 coresets
---------------------------------------------------------
*********************************************************


*********************************************************
---------------------------------------------------------
Computing coreset MSM at lagtime 6.0 ns
---------------------------------------------------------

Trajectories [38]
are shorter then step threshold (lag*minlenfactor = 300)
and will not be used to compute the MSM.

Using 129 trajectories with 301089 steps over 6 coresets
---------------------------------------------------------
*********************************************************


*********************************************************
---------------------------------------------------------
Computing coreset MSM at lagtime 7.0 ns
---------------------------------------------------------

Trajectories [38]
are shorter then step threshold (lag*minlenfactor = 350)
and will not be used to compute the MSM.

Using 129 trajectories with 301089 steps over 6 coresets
---------------------------------------------------------
*********************************************************


*********************************************************
---------------------------------------------------------
Computing coreset MSM at lagtime 8.0 ns
---------------------------------------------------------

Trajectories [38]
are shorter then step threshold (lag*minlenfactor = 400)
and will not be used to compute the MSM.

Using 129 trajectories with 301089 steps over 6 coresets
---------------------------------------------------------
*********************************************************


*********************************************************
---------------------------------------------------------
Computing coreset MSM at lagtime 9.0 ns
---------------------------------------------------------

Trajectories [38]
are shorter then step threshold (lag*minlenfactor = 450)
and will not be used to compute the MSM.

Using 129 trajectories with 301089 steps over 6 coresets
---------------------------------------------------------
*********************************************************


*********************************************************
---------------------------------------------------------
Computing coreset MSM at lagtime 10.0 ns
---------------------------------------------------------

Trajectories [38]
are shorter then step threshold (lag*minlenfactor = 500)
and will not be used to compute the MSM.

Using 129 trajectories with 301089 steps over 6 coresets
---------------------------------------------------------
*********************************************************


*********************************************************
---------------------------------------------------------
Computing coreset MSM at lagtime 12.5 ns
---------------------------------------------------------

Trajectories [38]
are shorter then step threshold (lag*minlenfactor = 625)
and will not be used to compute the MSM.

Using 129 trajectories with 301089 steps over 6 coresets
---------------------------------------------------------
*********************************************************


*********************************************************
---------------------------------------------------------
Computing coreset MSM at lagtime 15.0 ns
---------------------------------------------------------

Trajectories [38]
are shorter then step threshold (lag*minlenfactor = 750)
and will not be used to compute the MSM.

Using 129 trajectories with 301089 steps over 6 coresets
---------------------------------------------------------
*********************************************************


*********************************************************
---------------------------------------------------------
Computing coreset MSM at lagtime 17.5 ns
---------------------------------------------------------

Trajectories [38]
are shorter then step threshold (lag*minlenfactor = 875)
and will not be used to compute the MSM.

Using 129 trajectories with 301089 steps over 6 coresets
---------------------------------------------------------
*********************************************************


*********************************************************
---------------------------------------------------------
Computing coreset MSM at lagtime 20.0 ns
---------------------------------------------------------

Trajectories [38]
are shorter then step threshold (lag*minlenfactor = 1000)
and will not be used to compute the MSM.

Using 129 trajectories with 301089 steps over 6 coresets
---------------------------------------------------------
*********************************************************


*********************************************************
---------------------------------------------------------
Computing coreset MSM at lagtime 25.0 ns
---------------------------------------------------------

Trajectories [38]
are shorter then step threshold (lag*minlenfactor = 1250)
and will not be used to compute the MSM.

Using 129 trajectories with 301089 steps over 6 coresets
---------------------------------------------------------
*********************************************************


*********************************************************
---------------------------------------------------------
Computing coreset MSM at lagtime 30.0 ns
---------------------------------------------------------

Trajectories [38]
are shorter then step threshold (lag*minlenfactor = 1500)
and will not be used to compute the MSM.

Using 129 trajectories with 301089 steps over 6 coresets
---------------------------------------------------------
*********************************************************

[146]:
# Plot the implied time scales
fig, ax, *_ = MB.plot_its()
fig.tight_layout(pad=0.1)
../_images/tutorial_tutorial_79_0.png
[147]:
# A Model at lag time 10 ns might be a good choice
MB.cmsm(lag=100)
MB.get_its()

*********************************************************
---------------------------------------------------------
Computing coreset MSM at lagtime 10.0 ns
---------------------------------------------------------

Trajectories [38]
are shorter then step threshold (lag*minlenfactor = 1000)
and will not be used to compute the MSM.

Using 129 trajectories with 301089 steps over 6 coresets
---------------------------------------------------------
*********************************************************

[148]:
# Transition matrix
MB.T
[148]:
array([[ 9.96803579e-01,  2.88007674e-04, -2.90934852e-07,
         1.85104164e-03,  7.84927156e-04,  2.72735582e-04],
       [ 2.67709646e-03,  9.68344445e-01,  2.16534956e-02,
         7.32579034e-03, -4.98370471e-08, -7.77422813e-07],
       [-7.50856852e-05,  3.43212694e-02,  9.52231599e-01,
         1.35221937e-02,  1.39780127e-09,  2.18047147e-08],
       [ 5.29790349e-02,  2.23999339e-02,  2.60014695e-02,
         8.98635933e-01, -9.86262053e-07, -1.53849930e-05],
       [ 3.47243911e-02,  1.40713811e-08, -1.42143962e-11,
        -6.79784120e-06,  9.65292477e-01, -1.00838853e-05],
       [ 2.44734782e-02,  9.91739893e-09, -1.00181948e-11,
        -4.79106511e-06, -4.55600275e-07,  9.75531759e-01]])
[149]:
# You can plot the eigenvectors of the transition matrix (right)
mpl.rcParams['figure.figsize'] = 6.50128*0.5, 6.50128*0.5*0.618*1*0.25
fig, Ax = MB.plot_eigenvectors()
fig.subplots_adjust(
    left=0.08,
    bottom=0.15,
    right=0.99,
    top=0.99,
    wspace=None,
    hspace=0
    )
../_images/tutorial_tutorial_82_0.png
[150]:
# You can plot the eigenvectors of the transition matrix (left)
mpl.rcParams['figure.figsize'] = 6.50128*0.5, 6.50128*0.5*0.618*1*0.25
fig, Ax = MB.plot_eigenvectors(which="left", invert=True)
fig.subplots_adjust(
    left=0.08,
    bottom=0.15,
    right=0.99,
    top=0.99,
    wspace=None,
    hspace=0
    )
../_images/tutorial_tutorial_83_0.png
[ ]:

[ ]:

Benchmark framework

[ ]:
# Coming soon