Implied timescales notebook¶
The purpose of this notebook is to develop a user-friendly API for computing, interpreting (and perhaps plotting) implied timescales
##########################################
# IMPORT ALL REQUIRED PACKAGES
##########################################
# numerics
import numpy as np
# matplotlib
%pylab inline
import matplotlib.pyplot as plt
# emma imports
import pyemma
import pyemma.msm.analysis as msmana
import pyemma.msm.io as msmio
import pyemma.msm.generation as msmgen
# its imports
from pyemma.msm.ui.timescales import ImpliedTimescales
from pyemma.msm.estimation import number_of_states
from pyemma.msm.estimation.dense.bootstrapping import determine_lengths
Populating the interactive namespace from numpy and matplotlib
Helper functions¶
def discretize(_trajs, _bounds):
"""
discretizes the trajectories into two states split at point p
"""
_dtrajs = []
for _traj in _trajs:
_dtraj = np.digitize(_traj,_bounds)
_dtrajs.append(_dtraj)
return _dtrajs
def determine_lengths(dtrajs):
"""
Determines the lengths of all trajectories
"""
if (isinstance(dtrajs[0],(int,int64))):
return len(dtrajs)*np.ones((1))
lengths = np.zeros((len(dtrajs)))
for i in xrange(len(dtrajs)):
lengths[i] = len(dtrajs[i])
return lengths
Example: Double-well potential¶
Next we probe a more complex quasi-continuous example: a metastable double-well potential with 100 microstates, out of which about 60 are populated with significant probability. We coarse-grain the double-well to a few (2 or more) sets. In the coarse-grained state space is usually connected.
# load T matrix
T_doublewell = msmio.read_matrix('./resources/2well.T',mode='sparse')
pi_doublewell = msmana.statdist(T_doublewell.toarray())
U_doublewell = -np.log(pi_doublewell)
trajs = []
T_dense=T_doublewell.toarray()
# start 500 trajectories with length 50 in left well
trajs += msmgen.generate_trajs(T_dense, 500, N=50, start=random.randint(0, 50), dt=10)
# start 100 trajectories with length 50 in right well
trajs += msmgen.generate_trajs(T_dense, 100, N=50, start=random.randint(50, 100), dt=10)
figure(figsize=(8, 4))
subplot2grid((1, 4), (0, 0),colspan=3)
for traj in trajs:
plot(xrange(len(traj)), traj)
plt.xlabel('t')
plt.ylabel('x')
subplot2grid((1, 4), (0, 3))
plot(U_doublewell, xrange(len(U_doublewell)), linewidth=2, color='black')
xlim([2, 10])
plt.xlabel('Energy / kT')
<matplotlib.text.Text at 0x7fa906bf9fd0>
# DECENT DISCRETIZATION
dtrajs_1 = discretize(trajs,[50])
# GOOD DISCRETIZATION
dtrajs_2 = discretize(trajs,[20,30,40,45,50,55,60,70,80])
Test¶
lags = [1, 2, 5, 10, 15, 20, 25, 30, 35, 40]
# compute implied timescales
ITS = ImpliedTimescales(dtrajs_1, lags=lags, nits=5, connected=True, reversible=True)
# bootstrap
ITS.bootstrap(50)
# plot
pyemma.plots.plot_implied_timescales(ITS)
ylim(10, 1000)
(10, 1000)
# compute implied timescales
ITS2 = ImpliedTimescales(dtrajs_2, lags=lags, nits=6, connected=True, reversible=True)
# bootstrap
ITS2.bootstrap(50)
pyemma.plots.plot_implied_timescales(ITS2, refs = 0.1 * msmana.timescales(T_doublewell, k=9)[1:])
xlim(1, 10)
ylim(1, 1000)
(1, 1000)