Source code for aquaduct.apps.valvecore

# -*- coding: utf-8 -*-

# This program is distributed in the hope that it will be useful,
# Aqua-Duct, a tool facilitating analysis of the flow of solvent molecules in molecular dynamic simulations
# Copyright (C) 2016-2018  Tomasz Magdziarz, Alicja Płuciennik, Michał Stolarczyk <info@aquaduct.pl>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.


#import ipdb as pdb


import ConfigParser
import copy
import multiprocessing as mp
import numpy as np
import operator
import os
import re
import shlex
import sys
from collections import namedtuple, OrderedDict
from functools import wraps, partial
from itertools import izip_longest, izip, imap
from keyword import iskeyword

import array

from scipy.spatial.distance import cdist
from scipy.stats import ttest_ind

from aquaduct.utils.clui import roman

from aquaduct import greetings as greetings_aquaduct
from aquaduct import logger
from aquaduct import version_nice as aquaduct_version_nice
from aquaduct.apps.data import get_vda_reader, GCS, CRIC, save_cric
from aquaduct.geom import traces
from aquaduct.geom.cluster import AVAILABLE_METHODS as available_clusterization_methods
from aquaduct.geom.cluster import PerformClustering, DBSCAN, AffinityPropagation, MeanShift, KMeans, Birch, \
    BarberCluster, get_required_params
from aquaduct.geom.master import CTypeSpathsCollection
from aquaduct.geom.smooth import WindowSmooth, MaxStepSmooth, WindowOverMaxStepSmooth, ActiveWindowSmooth, \
    ActiveWindowOverMaxStepSmooth, DistanceWindowSmooth, DistanceWindowOverMaxStepSmooth, SavgolSmooth
from aquaduct.traj.barber import WhereToCut
from aquaduct.traj.dumps import TmpDumpWriterOfMDA
from aquaduct.traj.inlets import InletClusterGenericType
from aquaduct.traj.inlets import Inlets, InletTypeCodes
from aquaduct.traj.paths import GenericPaths, yield_single_paths, PassingPath, SinglePath
from aquaduct.traj.paths import union_full, yield_generic_paths
# from aquaduct.traj.reader import ReadViaMDA
# from aquaduct.traj.selections import CompactSelectionMDA
from aquaduct.utils import clui
from aquaduct.utils.helpers import range2int, Auto, what2what, lind, is_number, robust_and, robust_or
from aquaduct.utils.multip import optimal_threads
from aquaduct.traj.sandwich import ResidueSelection, Reader
from aquaduct.utils.helpers import SmartRange, iterate_or_die

__mail__ = 'info@aquaduct.pl'
__version__ = aquaduct_version_nice()


###############################################################################
# configuration file helpers

class ConfigSpecialNames:
    special_names_dict = {'none': None,
                          'null': None,
                          'true': True,
                          'false': False,
                          'yes': True,
                          'no': False,
                          'auto': Auto}

    def special_name(self, name):
        if isinstance(name, (str, unicode)):
            if name.lower() in self.special_names_dict:
                return self.special_names_dict[name.lower()]
        return name


# TODO: add parser for initial checking of configuration file
[docs]class ValveConfig(object, ConfigSpecialNames):
[docs] def __init__(self): self.config = self.get_default_config() self.config_filename = ''
def __make_options_nt(self, input_options): # options = {opt: self.special_name(input_options[opt]) for opt in input_options} options = list() for opt in input_options: if iskeyword(opt): logger.warning('Invalid keyword <%s> in config file skipped. Check configuration file.' % opt) continue if opt.replace('_', '').isalnum(): options.append((opt, self.special_name(input_options[opt]))) else: logger.debug('Keyword <%s> in config file skipped.' % opt) options = OrderedDict(options) options_nt = namedtuple('Options', options.keys()) return options_nt(**options)
[docs] @staticmethod def common_config_names(): # execute - what to do: skip, run # load - load previous results form file name # save - save results to file name return 'execute dump'.split()
[docs] @staticmethod def common_traj_data_config_names(): # scope - scope definition # scope_convexhull - take convex hull of scope, true of false # object - object definition return 'scope scope_convexhull scope_everyframe object'.split()
[docs] @staticmethod def global_name(): return 'global'
[docs] @staticmethod def cluster_name(): return 'clusterization'
[docs] @staticmethod def recluster_name(): return 'reclusterization'
[docs] @staticmethod def recursive_clusterization_name(): return 'recursive_clusterization'
[docs] @staticmethod def recursive_threshold_name(): return 'recursive_threshold'
[docs] @staticmethod def smooth_name(): return 'smooth'
[docs] def stage_names(self, nr=None): if nr is None: return [self.stage_names(nr) for nr in range(5)] else: if nr == 0: return 'traceable_residues' elif nr == 1: return 'raw_paths' elif nr == 2: return 'separate_paths' elif nr == 3: return 'inlets_clusterization' elif nr == 4: return 'analysis' elif nr == 5: return 'visualize' raise NotImplementedError('Stage %r is not implemented.' % nr)
[docs] def get_common_traj_data(self, stage): assert isinstance(stage, int) # options = {name: None for name in self.common_traj_data_config_names()} options = OrderedDict(((name, None) for name in self.common_traj_data_config_names())) for nr in range(stage + 1)[::-1]: section = self.stage_names(nr) for name in self.common_traj_data_config_names(): if self.config.has_option(section, name): value = self.special_name(self.config.get(section, name)) if (value is not None) and (options[name] is None): options.update({name: value}) return self.__make_options_nt(options)
[docs] def get_global_options(self): section = self.global_name() names = self.config.options(section) # options = {name: self.config.get(section, name) for name in names} options = OrderedDict(((name, self.config.get(section, name)) for name in names)) return self.__make_options_nt(options)
[docs] def get_stage_options(self, stage): assert isinstance(stage, int) stage_name = self.stage_names(stage) names = self.config.options(stage_name) # options = {name: self.config.get(stage_name, name) for name in names} options = OrderedDict(((name, self.config.get(stage_name, name)) for name in names)) if stage in [0, 1]: options.update(self.get_common_traj_data(stage)._asdict()) return self.__make_options_nt(options)
[docs] def get_cluster_options(self, section_name=None): if section_name is None: section = self.cluster_name() else: section = section_name names = self.config.options(section) # options = {name: self.config.get(section, name) for name in names} options = OrderedDict(((name, self.config.get(section, name)) for name in names)) # special recursive_clusterization option if self.recursive_clusterization_name() not in options: options.update({self.recursive_clusterization_name(): None}) if self.recursive_threshold_name() not in options: options.update({self.recursive_threshold_name(): None}) return self.__make_options_nt(options)
[docs] def get_recluster_options(self): return self.get_cluster_options(section_name=self.recluster_name())
[docs] def get_smooth_options(self): section = self.smooth_name() names = self.config.options(section) # options = {name: self.config.get(section, name) for name in names} options = OrderedDict(((name, self.config.get(section, name)) for name in names)) return self.__make_options_nt(options)
[docs] def get_default_config(self): # snr = 0 # stage number config = ConfigParser.RawConfigParser() def common(section): for setting in self.common_config_names(): value = None if setting == 'execute': value = 'runonce' elif setting == 'dump': value = '%d_%s_data.dump' % (snr + 1, section) if value is None: config.set(section, setting) else: config.set(section, setting, value=value) def common_traj_data(section, commented=False): for setting in self.common_traj_data_config_names(): if commented: setting = '#' + setting config.set(section, setting, 'None') ################ # global settings section = self.global_name() config.add_section(section) # top - top file name # nc - netcdf file name config.set(section, 'top') # topology config.set(section, 'trj') # trajectory # config.set(section, 'pbar', 'simple') ################ snr = 0 # stage number # stage I # find traceable residues section = self.stage_names(snr) config.add_section(section) common(section) common_traj_data(section) config.set(section, 'scope_everyframe', 'False') ################ snr += 1 # stage II # find raw paths section = self.stage_names(snr) config.add_section(section) common(section) common_traj_data(section) config.set(section, 'clear_in_object_info', 'False') ################ snr += 1 # stage III # create separate frames section = self.stage_names(snr) config.add_section(section) common(section) config.set(section, 'allow_passing_paths', 'False') config.set(section, 'auto_barber_tovdw', 'True') config.set(section, 'auto_barber_maxcut', '2.8') config.set(section, 'auto_barber_mincut', 'None') config.set(section, 'auto_barber_maxcut_level', 'True') config.set(section, 'auto_barber_mincut_level', 'True') config.set(section, 'auto_barber', 'False') config.set(section, 'discard_empty_paths', 'True') config.set(section, 'sort_by_id', 'True') config.set(section, 'apply_smoothing', 'False') config.set(section, 'apply_soft_smoothing', 'True') config.set(section, 'discard_short_paths', '20') config.set(section, 'discard_short_object', '2.0') config.set(section, 'discard_short_logic', 'or') ################ snr += 1 # stage IV # inlets clusterisation section = self.stage_names(snr) config.add_section(section) common(section) config.set(section, 'max_level', '0') config.set(section, 'recluster_outliers', 'False') config.set(section, 'detect_outliers', 'False') config.set(section, 'singletons_outliers', 'False') config.set(section, 'create_master_paths', 'False') config.set(section, 'exclude_passing_in_clusterization', 'True') config.set(section, 'add_passing_to_clusters', 'None') ################ # smooth section = self.smooth_name() config.add_section(section) config.set(section, 'method', 'window') ################ # clusterization section = self.cluster_name() config.add_section(section) config.set(section, 'method', 'barber') config.set(section, self.recursive_clusterization_name(), self.cluster_name()) config.set(section, self.recursive_threshold_name(), 'False') ################ # reclusterization section = self.recluster_name() config.add_section(section) config.set(section, 'method', 'dbscan') config.set(section, self.recursive_clusterization_name(), 'False') config.set(section, self.recursive_threshold_name(), 'False') ################ snr += 1 # stage V # analysis section = self.stage_names(snr) config.add_section(section) common(section) config.remove_option(section, 'dump') config.set(section, 'save', '%d_%s_results.txt' % (snr + 1, section)) config.set(section, 'calculate_scope_object_size', 'False') config.set(section, 'scope_chull', 'None') config.set(section, 'object_chull', 'None') config.set(section, 'dump_config', 'True') ################ snr += 1 # stage VI # visualize section = self.stage_names(snr) config.add_section(section) common(section) config.remove_option(section, 'dump') config.set(section, 'save', '%d_%s_results.py' % (snr + 1, section)) config.set(section, 'simply_smooths', 'RecursiveVector') # visualize spaths, all paths in one object config.set(section, 'all_paths_raw', 'False') config.set(section, 'all_paths_smooth', 'False') config.set(section, 'all_paths_split', 'False') # split by in obj out config.set(section, 'all_paths_raw_io', 'False') config.set(section, 'all_paths_smooth_io', 'False') # visualize spaths, separate objects config.set(section, 'paths_raw', 'False') config.set(section, 'paths_smooth', 'False') config.set(section, 'paths_states', 'False') config.set(section, 'paths_raw_io', 'False') config.set(section, 'paths_smooth_io', 'False') config.set(section, 'ctypes_raw', 'False') config.set(section, 'ctypes_smooth', 'False') # visualize clusters config.set(section, 'inlets_clusters', 'False') # show protein config.set(section, 'show_molecule', 'None') config.set(section, 'show_molecule_frames', '0') config.set(section, 'show_scope_chull', 'None') config.set(section, 'show_scope_chull_frames', '0') config.set(section, 'show_object_chull', 'None') config.set(section, 'show_object_chull_frames', '0') return config
[docs] def load_config(self, filename): self.config.read(filename) self.config_filename = filename
[docs] def save_config_stream(self, fs): self.config.write(fs)
[docs] def save_config(self, filename): with open(filename, 'w') as fs: self.save_config_stream(fs)
[docs] def get_general_comment(self, section): if section == self.global_name(): return ['Global settings.'] if section == self.cluster_name(): out = ['Possible clusterization methods:'] # get default clusterization method defmet = self.config.get(section, 'method') for method in available_clusterization_methods: if method == defmet: continue out.append('method = %s' % method) params = get_required_params(method) if params: sss = '' if len(params) > 1: sss = 's' out.append('Required parameter%s for %s method:' % (sss, method)) for param in params: out.append('%s = None' % param) # out.append('') return out
[docs] def dump_config(self, dump_template=False): skip_list = '''simply_smooths'''.split() # these options are very optional! concise = True if dump_template: concise = False output = [] options = [self.get_global_options()] + \ [self.get_stage_options(stage) for stage in range(6)] + \ [self.get_cluster_options(), self.get_recluster_options(), self.get_smooth_options()] names = [self.global_name()] + \ [self.stage_names(stage) for stage in range(6)] + \ [self.cluster_name(), self.recluster_name(), self.smooth_name()] def value2str(val): if val is Auto: val = "Auto" else: val = str(val) return val # this loop ensures it is returned in particular order for opts, name in zip(options, names): output.append('[%s]' % name) # section name # general comments if dump_template: comment = self.get_general_comment(name) if comment: output.extend(['# %s' % line for line in comment]) for key, value in opts._asdict().iteritems(): # loop over options if key in skip_list: continue # comment scope etc. in stage II if dump_template if dump_template and name == self.stage_names(1): if key in self.common_traj_data_config_names(): key = '#' + key output.append('%s = %s' % (key, value2str(value))) if not concise: output.append('') # is something missing? another loop over all additional sections for miss in self.config.sections(): if miss in names: continue # skip if it was already dumped in the loop above output.append('[%s]' % miss) for key in self.config.options(miss): # loop over options in section miss if key in skip_list: continue value = self.config.get(miss, key) output.append('%s = %s' % (key, value2str(value))) if not concise: output.append('') while not len(output[-1].strip()): output.pop() return output
################################################################################ def get_res_in_scope(is_res_in_scope, res): res_new = None for iris, r in zip(is_res_in_scope, res.iterate_over_residues()): if iris: if res_new is None: res_new = r else: res_new += r return res_new ################################################################################ # separators - logging def sep(): return clui.gsep(sep='-', times=48) def asep(): return clui.gsep(sep='=', times=72) ################################################################################ def get_smooth_method(soptions): assert soptions.method in ['window', 'mss', 'window_mss', 'awin', 'awin_mss', 'dwin', 'dwin_mss', 'savgol'], 'Unknown smoothing method %s.' % soptions.method opts = {} if 'recursive' in soptions._asdict(): opts.update({'recursive': int(soptions.recursive)}) def window_opts(): if 'window' in soptions._asdict(): opts.update({'window': int(float(soptions.window))}) function_opts() def awin_dwin_opts(): if 'window' in soptions._asdict(): opts.update({'window': float(soptions.window)}) function_opts() def function_opts(): if 'function' in soptions._asdict(): assert soptions.function in ['mean', 'median'], 'Unknown smoothing function %s.' % soptions.function if soptions.function == 'mean': opts.update({'function': np.mean}) if soptions.function == 'median': opts.update({'function': np.median}) def mss_opts(): if 'step' in soptions._asdict(): opts.update({'step': float(soptions.step)}) def savgol_opts(): window_length = 5 # TODO: magic constant (default value) if 'window' in soptions._asdict(): window_length = int(float(soptions.window)) assert window_length % 2 == 1, 'Window in Savgol method must be positive odd number, %d given instead.' % window_length opts.update({'window_length': window_length}) polyorder = 2 # TODO: magic constant (default value) if 'polyorder' in soptions._asdict(): polyorder = int(float(soptions.polyorder)) assert polyorder > 0, 'Polynomial order should be greater then 0, %d given instead.' % polyorder opts.update({'polyorder': polyorder}) assert polyorder < window_length, 'Polynomial order (%d) should be less then window (%d).' % ( polyorder, window_length) ''' if 'deriv' in soptions._asdict(): deriv = int(float(soptions.deriv)) assert deriv >= 0 and deriv <= polyorder, 'Order of derrivative should be integer greater or equal 0 and less then or equal to Polynomial order (%d), %d given instead.' % (polyorder,deriv) opts.update({'deriv': deriv}) if 'delta' in soptions._asdict(): delta = float(soptions.delta) assert delta >= 0, 'Delta should be greater or equal 0, %f given instead.' % delta if 'deriv' in soptions._asdict(): if deriv == 0: logger.warning('Delta %f make no sense if deriv is 0.' % delta) opts.update({'delta': delta}) mode = 'interp' # TODO: magic constant (default value) if 'mode' in soptions._asdict(): mode = str(soptions.mode) assert mode in ['mirror', 'constant', 'nearest', 'wrap', 'interp'], 'Unknown mode %s.' % mode opts.update({'mode': mode}) if 'cval' in soptions._asdict(): cval = float(soptions.cval) if mode != 'constant': logger.warning('Cval make no sense if mode is %s.' % mode) opts.update({'cval': cval}) ''' if soptions.method == 'window': window_opts() smooth = WindowSmooth(**opts) elif soptions.method == 'awin': awin_dwin_opts() smooth = ActiveWindowSmooth(**opts) elif soptions.method == 'dwin': awin_dwin_opts() smooth = DistanceWindowSmooth(**opts) elif soptions.method == 'mss': mss_opts() smooth = MaxStepSmooth(**opts) elif soptions.method == 'window_mss': window_opts() mss_opts() smooth = WindowOverMaxStepSmooth(**opts) elif soptions.method == 'awin_mss': awin_dwin_opts() mss_opts() smooth = ActiveWindowOverMaxStepSmooth(**opts) elif soptions.method == 'dwin_mss': awin_dwin_opts() mss_opts() smooth = DistanceWindowOverMaxStepSmooth(**opts) elif soptions.method == 'savgol': savgol_opts() smooth = SavgolSmooth(**opts) return smooth def get_auto_barber_options(abo): opts = {} if 'auto_barber' in abo._asdict(): opts.update({'selection': str(abo.auto_barber)}) if 'auto_barber_maxcut' in abo._asdict(): if is_number(abo.auto_barber_maxcut): opts.update({'maxcut': float(abo.auto_barber_maxcut)}) else: opts.update({'maxcut': None}) if 'auto_barber_mincut' in abo._asdict(): if is_number(abo.auto_barber_mincut): opts.update({'mincut': float(abo.auto_barber_mincut)}) else: opts.update({'mincut': None}) if 'auto_barber_mincut_level' in abo._asdict(): opts.update({'mincut_level': bool(abo.auto_barber_mincut_level)}) if 'auto_barber_maxcut_level' in abo._asdict(): opts.update({'maxcut_level': bool(abo.auto_barber_maxcut_level)}) if 'auto_barber_tovdw' in abo._asdict(): opts.update({'tovdw': bool(abo.auto_barber_tovdw)}) return opts def get_clustering_method(coptions, config): assert coptions.method in available_clusterization_methods, 'Unknown clusterization method %s.' % coptions.method opts = {} def dbscan_opts(): if 'eps' in coptions._asdict(): opts.update({'eps': float(coptions.eps)}) if 'min_samples' in coptions._asdict(): opts.update({'min_samples': int(coptions.min_samples)}) if 'metric' in coptions._asdict(): assert coptions.metric in ['cityblock', 'cosine', 'euclidean', 'manhattan'], "Unknown metric <%s>." % coptions.metric opts.update({'metric': str(coptions.metric)}) if 'algorithm' in coptions._asdict(): assert coptions.algorithm in ['auto', 'ball_tree', 'kd_tree', 'brute'], "Unknown NN algorithm <%s>." % coptions.algorithm opts.update({'algorithm': str(coptions.algorithm)}) if 'leaf_size' in coptions._asdict(): opts.update({'leaf_size': int(coptions.leaf_size)}) def affprop_opts(): if 'damping' in coptions._asdict(): opts.update({'damping': float(coptions.damping)}) if 'convergence_iter' in coptions._asdict(): opts.update({'convergence_iter': int(coptions.convergence_iter)}) if 'max_iter' in coptions._asdict(): opts.update({'max_iter': int(coptions.max_iter)}) if 'preference' in coptions._asdict(): opts.update({'preference': float(coptions.preference)}) def kmeans_opts(): if 'n_clusters' in coptions._asdict(): opts.update({'n_clusters': int(coptions.n_clusters)}) if 'max_iter' in coptions._asdict(): opts.update({'max_iter': int(coptions.max_iter)}) if 'n_init' in coptions._asdict(): opts.update({'n_init': int(coptions.n_init)}) if 'init' in coptions._asdict(): assert coptions.init in ['k-means++', 'random'], "Unknown initialization method <%s>." % coptions.init opts.update({'init': str(coptions.init)}) if 'tol' in coptions._asdict(): opts.update({'tol': float(coptions.tol)}) def meanshift_opts(): if 'cluster_all' in coptions._asdict(): opts.update({'cluster_all': bool(coptions.cluster_all)}) if 'bandwidth' in coptions._asdict(): if coptions.bandwidth in (Auto, None): opts.update({'bandwidth': coptions.bandwidth}) else: opts.update({'bandwidth': float(coptions.bandwidth)}) if 'bin_seeding' in coptions._asdict(): opts.update({'bin_seeding': bool(coptions.bin_seeding)}) if 'min_bin_freq' in coptions._asdict(): opts.update({'min_bin_freq': int(coptions.min_bin_freq)}) def birch_opts(): if 'threshold' in coptions._asdict(): opts.update({'threshold': float(coptions.threshold)}) if 'branching_factor' in coptions._asdict(): opts.update({'branching_factor': int(coptions.branching_factor)}) if 'n_clusters' in coptions._asdict(): opts.update({'n_clusters': int(coptions.n_clusters)}) def barber_opts(): abo = config.get_stage_options(2) opts.update(get_auto_barber_options(abo)) opts.update(get_auto_barber_options(coptions)) if coptions.method == 'dbscan': dbscan_opts() method = DBSCAN elif coptions.method == 'affprop': affprop_opts() method = AffinityPropagation elif coptions.method == 'kmeans': kmeans_opts() method = KMeans elif coptions.method == 'meanshift': meanshift_opts() method = MeanShift elif coptions.method == 'birch': birch_opts() method = Birch elif coptions.method == 'barber': barber_opts() method = BarberCluster return PerformClustering(method, **opts) def get_linearize_method(loption): if loption: assert isinstance(loption, (str, unicode)), "Wrong Linearize method definition: %r" % loption possible_formats = [ re.compile('^(recursive|oneway|hobbit)(triangle|vector)[(][+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?[)]$'), re.compile('^(recursive|oneway|hobbit)(triangle|vector)[(][)]$'), re.compile('^(recursive|oneway|hobbit)(triangle|vector)$')] assert True in [pf.match(loption.lower()) is not None for pf in possible_formats], "Wrong Linearize method definition: %s" % loption # http://stackoverflow.com/questions/12929308/python-regular-expression-that-matches-floating-point-numbers#12929311 way = [w for w in ['recursive', 'oneway', 'hobbit'] if w in loption.lower()][0] crit = [c for c in ['triangle', 'vector'] if c in loption.lower()][0] threshold = re.compile('[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?').findall(loption) if len(threshold): threshold = float(threshold[0][0]) else: threshold = None # get method if way == 'recursive': if crit == 'triangle': met = traces.LinearizeRecursiveTriangle else: met = traces.LinearizeRecursiveVector elif way == 'oneway': if crit == 'triangle': met = traces.LinearizeOneWayTriangle else: met = traces.LinearizeOneWayVector elif way == 'hobbit': if crit == 'triangle': met = traces.LinearizeHobbitTriangle else: met = traces.LinearizeHobbitVector if threshold is None: return met() return met(threshold) ################################################################################
[docs]def valve_begin(): clui.message(greetings_aquaduct()) # nice greetings clui.message('Aqua-Duct version %s' % aquaduct_version_nice()) # clui.message('Valve driver version %s' % version_nice()) clui.message(sep())
[docs]def valve_end(): clui.message(sep()) clui.message('Let the Valve be always open!') clui.message('Goodbye!')
[docs]def valve_load_config(filename, config): assert filename is not None, "No config file provided." assert os.path.isfile(filename), "Config file %s does not exist." % filename with clui.fbm('Load configuration file'): config.load_config(filename)
''' def valve_read_trajectory(top, traj, frames_window=None): with clui.fbm('Read trajectory'): return TrajectoryReader(top, traj, frames_window=frames_window) # read trajectory # traj_list = shlex.split(traj) # return ReadAmberNetCDFviaMDA(top, traj_list) # reader = ReadDCDviaMDA(topology, trajectory) ''' def valve_begin_stage(stage, config): clui.message(sep()) clui.message('Starting Stage %s: %s' % (roman.toRoman(stage + 1), config.stage_names(stage))) options = config.get_stage_options(stage) clui.message('Execute mode: %s' % options.execute) return options
[docs]def valve_exec_stage(stage, config, stage_run, no_io=False, run_status=None, **kwargs): with clui.tictoc('Stage %s (%s)' % (roman.toRoman(stage + 1), config.stage_names(stage))): # This function runs stages in a smart way, checks execution logic, and loads/saves dumps if required. options = valve_begin_stage(stage, config) run_status.update({stage: False}) # TODO: Consider to create traj_reader object here instead of doing it in stage_run or in load... # execute? can_be_loaded = False if (not no_io) and options.dump: if os.path.isfile(options.dump) or os.path.islink(options.dump): can_be_loaded = True # has to be run? if options.execute in ['runonce'] and can_be_loaded and stage > 0: if run_status[stage - 1]: can_be_loaded = False if options.execute in ['run'] or (options.execute in ['runonce'] and not can_be_loaded): result = stage_run(config, options, **kwargs) save_cric() run_status.update({stage: True}) if not no_io: ########### # S A V E # ########### with clui.fbm('Saving data dump in %s file' % options.dump): vda = get_vda_reader(options.dump) vda(mode='w', data_file_name=options.dump).dump(**result) # save_stage_dump(options.dump, **result) elif options.execute in ['skip'] or (options.execute in ['runonce'] and can_be_loaded): if not no_io: ########### # L O A D # ########### if options.dump: with clui.fbm('Loading data dump from %s file' % options.dump): vda = get_vda_reader(options.dump) result = vda(mode='r', data_file_name=options.dump).load() # result = load_stage_dump(options.dump, reader=reader) else: raise NotImplementedError('exec mode %s not implemented' % options.execute) # remove options stuff if not no_io: if result is not None: return dict(((key, val) for key, val in result.iteritems() if 'options' not in key))
################################################################################ # stages run # traceable_residues
[docs]def stage_I_run(config, options, **kwargs): # create pool of workers - mapping function map_fun = map if optimal_threads.threads_count > 1: pool = mp.Pool(optimal_threads.threads_count) map_fun = pool.map clui.message("Loop over frames - search of residues in object:") pbar = clui.pbar(Reader.number_of_frames()) # create some containers number_frame_rid_in_object = [] # res_ids_in_scope_over_frames = {} # not used all_res = None # scope will be used to derrive center of system center_of_system = np.array([0., 0., 0.]) # loop over possible layers of sandwich for number, traj_reader in Reader.iterate(number=True): # scope is evaluated only once before the loop over frames starts if not options.scope_everyframe: scope = traj_reader.parse_selection(options.scope) # all_res_this_layer = [] # list of all new res in this layer found in an order of apparance frame_rid_in_object = [] # the loop over frames for frame in traj_reader.iterate_over_frames(): if options.scope_everyframe: scope = traj_reader.parse_selection(options.scope) # center of system center_of_system += scope.center_of_mass() # current res selection res = traj_reader.parse_selection(options.object).residues() # find matching residues, ie those which are in the scope: res_new = scope.containing_residues(res, convex_hull=options.scope_convexhull, map_fun=map_fun) res_new.uniquify() # here is a list of residues in this layer that are in the object and in the scope # adds them to all_res if all_res: all_res.add(res_new) all_res.uniquify() else: all_res = res_new # remeber ids of res in object in current frame if res_new is not None: frame_rid_in_object.append([rid[-1] for rid in res_new.ids()]) else: frame_rid_in_object.append([]) pbar.next() number_frame_rid_in_object.append(frame_rid_in_object) # destroy pool of workers if optimal_threads.threads_count > 1: pool.close() pool.join() del pool # res_ids_in_object_frames_list = [res_ids_in_object_frames_list[number][rid] for number,rid in all_res.ids()] pbar.finish() center_of_system /= (Reader.number_of_frames()) logger.info('Center of system is %0.2f, %0.2f, %0.2f' % tuple(center_of_system)) if all_res is None: raise ValueError("No traceable residues was found.") clui.message("Number of residues to trace: %d" % all_res.len()) # 'res_ids_in_object_over_frames': IdsOverIds.dict2arrays(res_ids_in_object_over_frames), return {'all_res': all_res, # 'res_ids_in_object_over_frames': res_ids_in_object_over_frames, 'number_frame_rid_in_object': number_frame_rid_in_object, 'center_of_system': center_of_system, 'options': options._asdict()}
################################################################################ # raw_paths
[docs]def stage_II_run(config, options, all_res=None, number_frame_rid_in_object=None, # res_ids_in_object_over_frames=None, **kwargs): #################################################################################################################### # FIXME: temporary solution, remove it later if 'res_ids_in_object_over_frames' in kwargs: res_ids_in_object_over_frames = kwargs['res_ids_in_object_over_frames'] else: res_ids_in_object_over_frames = None if number_frame_rid_in_object is None and res_ids_in_object_over_frames is not None: with clui.fbm("Get number_frame_rid_in_object if possible"): number_frame_rid_in_object = [] for number in xrange(max(res_ids_in_object_over_frames.keys())): frame_rid_in_object = [] for layer in xrange(max(res_ids_in_object_over_frames[number].keys())): frame_rid_in_object.append(res_ids_in_object_over_frames[number][layer]) number_frame_rid_in_object.append(frame_rid_in_object) #################################################################################################################### # create pool of workers - mapping function map_fun = map if optimal_threads.threads_count > 1: pool = mp.Pool(optimal_threads.threads_count) chunk_size = all_res.len() / Reader.number_of_layers() / (optimal_threads.threads_count ** 1) - 1 if chunk_size <= 0: chunk_size = 1 logger.debug("Chunk size %d.",chunk_size) map_fun = partial(pool.imap, chunksize=chunk_size) #map_fun = pool.map # clear in object info if required if options.clear_in_object_info: clui.message('Clear data on residues in object over frames.') clui.message('This will be recalculated on demand.') number_frame_rid_in_object = None is_number_frame_rid_in_object = bool(number_frame_rid_in_object) ''' with clui.fbm("Init paths container"): number_of_frames = Reader.window.len() - 1 paths = dict( ((resid, GenericPaths(resid, name_of_res=resname, single_res_selection=sressel, min_pf=0, max_pf=number_of_frames)) for resid, resname, sressel in zip(all_res.ids(), all_res.names(), all_res.single_residues()))) ''' number_of_frames = Reader.number_of_frames(onelayer=True) - 1 clui.message("Trajectory scan:") pbar = clui.pbar(Reader.number_of_frames()) # loop over possible layers of sandwich paths = [] for frame_rid_in_object, (number, traj_reader) in izip( iterate_or_die(number_frame_rid_in_object, times=Reader.number_of_layers()), Reader.iterate(number=True)): # scope is evaluated only once before loop over frames so it cannot be frame dependent if not options.scope_everyframe: scope = traj_reader.parse_selection(options.scope) logger.debug("Scope definition evaluated only once for given layer") else: logger.debug("Scope definition evaluated in every frame, this might be very slow.") # speed up! all_res_this_layer = all_res.layer(number) all_res_this_ids = list(all_res_this_layer.ids()) paths_this_layer = (GenericPaths(resid, name_of_res=resname, single_res_selection=sressel, min_pf=0, max_pf=number_of_frames) for resid, resname, sressel in izip(all_res_this_ids, all_res_this_layer.names(), all_res_this_layer.single_residues())) # big container for 012 path data number_frame_object_scope = np.zeros((Reader.number_of_frames(onelayer=True), all_res_this_layer.len()), dtype=np.int8) # the loop over frames, use izip otherwise iteration over frames does not work for rid_in_object, frame in izip( iterate_or_die(frame_rid_in_object, times=Reader.number_of_frames(onelayer=True)), traj_reader.iterate_over_frames()): # do we have object data? if not is_number_frame_rid_in_object: rid_in_object = [rid[-1] for rid in traj_reader.parse_selection(options.object).residues().ids()] # assert rid_in_object is not None is_res_in_object = (rid[-1] in rid_in_object for rid in all_res_this_ids) if options.scope_everyframe: scope = traj_reader.parse_selection(options.scope) # check if all_res are in the scope, reuse res_ids_in_object_over_frames is_res_in_scope = scope.contains_residues(all_res_this_layer, convex_hull=options.scope_convexhull, map_fun=map_fun, known_true=None) # known_true could be rid_in_object number_frame_object_scope[frame,:] = np.array(map(sum,izip(is_res_in_object,is_res_in_scope)),dtype=np.int8) pbar.next() #number_frame_object_scope = np.array(number_frame_object_scope,dtype=np.int8).T # another loop over this columns for pat,nfos in izip(paths_this_layer,number_frame_object_scope.T): pat.add_012(nfos) paths.append(pat) del number_frame_object_scope #paths.extend(paths_this_layer) # destroy pool of workers if optimal_threads.threads_count > 1: pool.close() pool.join() del pool pbar.finish() clui.message("Number of paths: %d" % len(paths)) return {'all_res': all_res, 'paths': paths, 'options': options._asdict()}
################################################################################ class ABSphere(namedtuple('ABSphere', 'center radius')): pass # separate_paths
[docs]def stage_III_run(config, options, paths=None, **kwargs): soptions = config.get_smooth_options() if options.allow_passing_paths: logger.warning("Passing paths is a highly experimental feature. Please, analyze results with care.") if options.discard_empty_paths: with clui.fbm("Discard residues with empty paths"): paths = [pat for pat in paths if len(pat.frames) > 0] clui.message("Create separate paths:") pbar = clui.pbar(len(paths)) # yield_single_paths requires a list of paths not a dictionary spaths = [sp for sp, nr in yield_single_paths(paths, progress=True, passing=options.allow_passing_paths) if pbar.update(nr + 1) is None] pbar.finish() clui.message("Created %d separate paths out of %d raw paths" % (len(spaths),len(paths))) pbar = clui.pbar(len(spaths),"Removing unused parts of paths:") paths = yield_generic_paths(spaths,progress=pbar) pbar.finish() if options.discard_short_paths or options.discard_short_object: if is_number(options.discard_short_paths): short_paths = int(options.discard_short_paths) else: short_paths = None if is_number(options.discard_short_object): short_object = float(options.discard_short_object) else: short_object = None if options.discard_short_logic == 'and': short_logic = robust_or # for and use or, this is intentional short_logic_name = "AND" else: short_logic = robust_and short_logic_name = "OR" if options.discard_short_logic != 'or': logger.warning("Invalid discard_short_logic '%s', using %s by default." % ( options.discard_short_logic, short_logic_name)) # make message if short_paths is not None and short_object is not None: discard_message = "Discard paths shorter than %d %s object shorter than %0.2f" % ( short_paths, short_logic_name, short_object) elif short_paths is None and short_object is not None: discard_message = "Discard paths object shorter than %0.2f" % short_object elif short_paths is not None and short_object is None: discard_message = "Discard paths shorter than %d" % short_paths if short_paths is not None or short_object is not None: with clui.fbm(discard_message): spaths_nr = len(spaths) # TODO: if not short object is used there is no sense in calling object_len as it is very expensive if short_object is not None: spaths = [sp for sp in spaths if short_logic(sp.size > short_paths, sp.object_len > short_object)] else: spaths = [sp for sp in spaths if sp.size > short_paths] spaths_nr_new = len(spaths) if spaths_nr == spaths_nr_new: clui.message("No paths were discarded.") else: clui.message("%d paths were discarded." % (spaths_nr - spaths_nr_new)) else: clui.message("No paths were discarded - no values were set.") if options.auto_barber: wtc = WhereToCut(spaths=spaths, selection=options.auto_barber, mincut=options.auto_barber_mincut, mincut_level=options.auto_barber_mincut_level, maxcut=options.auto_barber_maxcut, maxcut_level=options.auto_barber_maxcut_level, tovdw=options.auto_barber_tovdw) # cut thyself! wtc.cut_thyself() clui.message("Auto Barber in action:") pbar = clui.pbar(len(paths)) for p in paths: p.barber_with_spheres(wtc.spheres) pbar.next() pbar.finish() # now, it might be that some of paths are empty # paths = {k: v for k, v in paths.iteritems() if len(v.coords) > 0} # paths = dict((k, v) for k, v in paths.iteritems() if # len(v.frames) > 0) # more universal as dict comprehension may not work in <2.7 paths = [pat for pat in paths if len(pat.frames) > 0] clui.message("Recreate separate paths:") pbar = clui.pbar(len(paths)) # yield_single_paths requires a list of paths not a dictionary spaths = [sp for sp, nr in yield_single_paths(paths, progress=True, passing=options.allow_passing_paths) if pbar.update(nr + 1) is None] pbar.finish() if options.discard_short_paths or options.discard_short_object: if short_paths is not None or short_object is not None: with clui.fbm(discard_message): spaths_nr = len(spaths) spaths = [sp for sp in spaths if short_logic(sp.size > short_paths, sp.object_len > short_object)] spaths_nr_new = len(spaths) if spaths_nr == spaths_nr_new: clui.message("No paths were discarded.") else: clui.message("%d paths were discarded." % (spaths_nr - spaths_nr_new)) else: clui.message("No paths were discarded - no values were set.") if options.sort_by_id: with clui.fbm("Sort separate paths by resid"): spaths = sorted(spaths, key=lambda sp: (sp.id.id, sp.id.nr)) # apply smoothing? # it is no longer necessary if options.apply_smoothing: logger.warning("Hard smoothing is not available in the current version but may be available in the future. Stay tuned!") if options.apply_soft_smoothing: logger.warning("Soft smoothing option is not available any more. Soft smoothing is enabled by default if --cache-dir or --cache-mem options are used.") clui.message("Number of paths: %d" % len(paths)) clui.message("Number of spaths: %d" % len(spaths)) return {'paths': paths, 'spaths': spaths, 'options': options._asdict(), 'soptions': soptions._asdict()}
################################################################################ def get_allow_size_function(rt=None): if not isinstance(rt, str): return None assert re.compile('^[<>=]+[0-9.]+$').match(rt) is not None, "Wrong threshold definition: %s" % rt op = re.compile('[<>=]+') op = ''.join(sorted(op.findall(rt)[0])) vl = re.compile('[0-9.]+') vl = float(vl.findall(rt)[0]) operator_dict = {'>': operator.gt, '=>': operator.ge, '<=': operator.le, '<': operator.lt} assert op in operator_dict.keys(), "Unsupported operator %s in threshold %s" % (op, rt) return lambda size_of_cluster: operator_dict[op](size_of_cluster, vl) class SkipSizeFunction(object): def __init__(self, ths_def): self.thresholds = [] if isinstance(ths_def, (str, unicode)): for thd in ths_def.split(): self.thresholds.append(get_allow_size_function(thd)) def __call__(self, size_of_cluster): for thd in self.thresholds: if not thd(size_of_cluster): return True return False def potentially_recursive_clusterization(config=None, clusterization_name=None, inlets_object=None, spaths=None, message='clusterization', deep=0, max_level=5): with clui.fbm("Performing %s, level %d of %d" % (message, deep, max_level), cont=False): logger.debug('Clustering options section: %s' % clusterization_name) cluster_options = config.get_cluster_options(section_name=clusterization_name) clui.message('Clustering options:') for k, v in cluster_options._asdict().iteritems(): clui.message("%s = %s" % (str(k), str(v))) # TODO: Print clusterization options in a nice way! clustering_function = get_clustering_method(cluster_options, config) # special case of barber!!! if cluster_options.method == 'barber': logger.debug('Getting inltets refs...') inlets_refs = inlets_object.get_inlets_references() logger.debug('Starting wtc...') wtc = WhereToCut(spaths=(sp for sp in spaths if sp.id in inlets_refs), expected_nr_of_spaths=len(inlets_refs), forceempty=True, **clustering_function.method_kwargs) # clouds = wtc.cloud_groups(progress=True) logger.debug('Getting spheres...') inlets_object.add_spheres(wtc.spheres) logger.debug('Proceed with clusterization, skip size...') # get skip_size function according to recursive_treshold skip_size = SkipSizeFunction(cluster_options.recursive_threshold) logger.debug('Proceed with clusterization, call method...') inlets_object.perform_reclustering(clustering_function, skip_outliers=True, skip_size=skip_size) clui.message('Number of clusters detected so far: %d' % len(inlets_object.clusters_list)) if cluster_options.recursive_clusterization: deep += 1 if deep > max_level: return return potentially_recursive_clusterization(config=config, clusterization_name=cluster_options.recursive_clusterization, inlets_object=inlets_object, spaths=spaths, deep=deep, max_level=max_level) # inlets_clusterization
[docs]def stage_IV_run(config, options, spaths=None, center_of_system=None, **kwargs): coptions = config.get_cluster_options() rcoptions = config.get_recluster_options() soptions = config.get_smooth_options() max_level = int(options.max_level) assert max_level >= 0 # new style clustering #with clui.fbm("Create inlets"): # here we can check center of system pbar = clui.SimpleProgressBar(maxval=len(spaths),mess="Create inlets") inls = Inlets(spaths, center_of_system=center_of_system, passing=not options.exclude_passing_in_clusterization, pbar=pbar) pbar.finish() clui.message("Number of inlets: %d" % inls.size) def noo(): # returns number of outliers if 0 in inls.clusters_list: return inls.clusters.count(0) return 0 if inls.size > 0: # ***** CLUSTERIZATION ***** with clui.fbm("Performing clusterization", cont=False): potentially_recursive_clusterization(config=config, clusterization_name=config.cluster_name(), inlets_object=inls, spaths=spaths, message='clusterization', max_level=max_level) # with log.fbm("Performing clusterization"): # clustering_function = get_clustering_method(coptions) # inls.perform_clustering(clustering_function) clui.message('Number of outliers: %d' % noo()) # ***** OUTLIERS DETECTION ***** if options.detect_outliers: with clui.fbm("Detecting outliers", cont=False): if options.detect_outliers is not Auto: threshold = float(options.detect_outliers) clusters = list(inls.clusters) no_out_detected = 0 for cluster, center, std in zip(inls.clusters_list, inls.clusters_centers, inls.clusters_std): if cluster == 0: continue inls_lim = inls.lim2clusters(cluster) dmat = cdist(np.matrix(center), inls_lim.coords, metric='euclidean').flatten() # Auto procedure if options.detect_outliers is Auto: threshold = std * 4 # FIXME: magic constant! # defined threshold procedure for nr, (d, ids) in enumerate(zip(dmat, inls_lim.inlets_ids)): # print d, threshold if d > threshold: clusters[ids] = 0 no_out_detected += 1 clui.message('Detected %d outliers.' % no_out_detected) inls.add_outliers_annotations(clusters) clui.message('Number of clusters detected so far: %d' % len(inls.clusters_list)) clui.message('Number of outliers: %d' % noo()) # ***** RECLUSTERIZATION ***** if options.recluster_outliers: with clui.fbm("Performing reclusterization of outliers", cont=False): ''' potentially_recursive_clusterization(config=config, clusterization_name=config.recluster_name(), inlets_object=inls, spaths=spaths, traj_reader=traj_reader, message='reclusterization', max_level=max_level) ''' clustering_function = get_clustering_method(rcoptions, config) # perform reclusterization # inls.recluster_outliers(clustering_function) inls.recluster_cluster(clustering_function, 0) clui.message('Number of clusters detected so far: %d' % len(inls.clusters_list)) clui.message('Number of outliers: %d' % noo()) # ***** SINGLETONS REMOVAL ***** if options.singletons_outliers: with clui.fbm("Removing clusters of size %d" % int(options.singletons_outliers)): inls.small_clusters_to_outliers(int(options.singletons_outliers)) clui.message('Number of clusters detected so far: %d' % len(inls.clusters_list)) clui.message('Number of outliers: %d' % noo()) # TODO: Move it after master paths! # ***** ADD PASSING PATHS TO CLUSTERS ***** if options.exclude_passing_in_clusterization and options.add_passing_to_clusters: with clui.fbm("Adding passing paths inlets to clusters",cont=False): # passing paths were excluded and they are meant to be added # one need loop over clusters and then all passing paths have to checked # it is assumed taht adding method is barber abo = config.get_stage_options(2) # ab from stage III ab_options = get_auto_barber_options(abo) ab_options.update(get_auto_barber_options(options)) # get single paths only (no passing paths) spaths_single = [sp for sp in spaths if sp.is_single()] # ids of passing paths spaths_passing_ids = [nr for nr in xrange(len(spaths)) if spaths[nr].is_passing()] if len(spaths_passing_ids) == 0: clui.message("No passing paths to add.") else: # loop over passing paths, add to inlets inls.passing = True passing_inlets_ids = [] for passing_id in spaths_passing_ids: passing_inlets_ids.extend(inls.extend_inlets(spaths[passing_id])) # loop over clusters for cluster in inls.clusters_list: added_to_cluster = 0 if cluster == 0: continue clui.message("Current cluster: %d." % cluster) # sps = inls.lim2clusters(cluster).limspaths2(spaths_single) # chull = inls.lim2clusters(cluster).get_chull() wtc = WhereToCut(inlets=inls.lim2clusters(cluster), **ab_options) wtc.cut_thyself() pbar = clui.SimpleProgressBar(len(passing_inlets_ids),"Loop over available passing paths inlets:") for passing_inlet_nr in range(len(passing_inlets_ids))[::-1]: inlet = inls.inlets_list[passing_inlets_ids[passing_inlet_nr]] sphere = wtc.inlet2sphere(inlet) if sphere is not None: # if True: if wtc.is_overlaping_with_cloud(sphere): # if chull.point_within(inlet.coords): # add this inlet to cluster! inls.clusters[passing_inlets_ids[passing_inlet_nr]] = cluster added_to_cluster += 1 passing_inlets_ids.pop(passing_inlet_nr) pbar.next() if added_to_cluster: inls.add_message_wrapper(message='+%d passing' % added_to_cluster, toleaf=cluster) pbar.finish() if len(passing_inlets_ids): inls.add_message_wrapper(message='+%d passing' % len(passing_inlets_ids), toleaf=0) clui.message('Clustering history:') clui.message(clui.print_simple_tree(inls.tree, prefix='').rstrip()) with clui.fbm("Calculating cluster types"): ctypes = inls.spaths2ctypes(spaths) # now, there is something to do with ctypes! # we can create master paths! # but only if user wants this master_paths = {} master_paths_smooth = {} if options.create_master_paths: with clui.fbm("Master paths calculations", cont=False): smooth = get_smooth_method(soptions) # this have to preceed GCS if GCS.cachedir or GCS.cachemem: pbar = clui.pbar(len(spaths)*2, mess='Building coords cache') [sp.get_coords(smooth=None) for sp in spaths if pbar.next() is None and not isinstance(sp, PassingPath)] [sp.get_coords(smooth=smooth) for sp in spaths if pbar.next() is None and not isinstance(sp, PassingPath)] pbar.finish() use_threads = optimal_threads.threads_count else: logger.warning( "Master paths calculation without cache-dir or cache-mem option can be EXTREMELY slow.") use_threads = 1 with clui.fbm("Creating master paths for cluster types", cont=False): ctypes_generic = [ct.generic for ct in ctypes] ctypes_generic_list = sorted(list(set(ctypes_generic))) pbar = clui.pbar(len([None for sp in spaths if not isinstance(sp, PassingPath)]) * 2) for nr, ct in enumerate(ctypes_generic_list): logger.debug('CType %s (%d)' % (str(ct), nr)) sps = lind(spaths, what2what(ctypes_generic, [ct])) # no passing paths are allowed sps = [sp for sp in sps if not isinstance(sp, PassingPath)] # no PassingPaths! if not len(sps): logger.debug( 'CType %s (%d), no single paths found, MasterPath calculation skipped.' % ( str(ct), nr,)) continue logger.debug('CType %s (%d), number of spaths %d' % (str(ct), nr, len(sps))) # print len(sps),ct ctspc = CTypeSpathsCollection(spaths=sps, ctype=ct, pbar=pbar, threads=use_threads) master_paths.update({ct: ctspc.get_master_path(resid=(0, nr))}) master_paths_smooth.update({ct: ctspc.get_master_path(resid=(0, nr), smooth=smooth)}) del ctspc pbar.finish() else: clui.message("No inlets found. Clusterization skipped.") # make empty results ctypes = inls.spaths2ctypes(spaths) master_paths = {} master_paths_smooth = {} return {'inls': inls, 'ctypes': ctypes, 'master_paths': master_paths, 'master_paths_smooth': master_paths_smooth}
################################################################################ def make_line(template, line): if len(template) != len(line): pass # detect nan and int problem for nr, (t, l) in enumerate(zip(template, line)): if is_number(l): if np.isnan(l): if 'd' in t: template[nr] = t.replace('d', 's') line[nr] = 'nan' return (' '.join(template)) % tuple(line) def make_header_template(line_template): header_template = [] col_re = re.compile('[0-9]+') for l in line_template: header_template.append('%{0}s'.format(col_re.findall(l)[0])) return header_template def nr_header(): return ['Nr'], ['%7d'] def get_header_line_and_line_template(header_line_and_line_template, head_nr=False): header, line_template = header_line_and_line_template header_template = make_header_template(line_template) # head_nr? only to header if head_nr: nrh, nrlt = nr_header() header_template = make_header_template(nrlt + line_template) header = nrh + header header_line = make_line(header_template, header) return header_line, line_template def spath_id_header(): return ['ID'], ['%9s'] def spath_name_header(): return ['RES'], ['%4s'] def add_path_id_head(gen): sph, splt = zip(spath_id_header(), spath_name_header()) sph = [e[0] for e in sph] splt = [e[0] for e in splt] @wraps(gen) def patched(*args, **kwargs): add_id = True if 'add_id' in kwargs: add_id = kwargs.pop('add_id') h, lt = gen(*args, **kwargs) if add_id: return sph + h, splt + lt return h, lt return patched def add_path_id(gen): @wraps(gen) def patched(spath, *args, **kwargs): add_id = True if 'add_id' in kwargs: add_id = kwargs.pop('add_id') line = gen(spath, *args, **kwargs) if add_id: line = [spath.id, spath.id.name] + line return line return patched def size_header(): return ['Size', 'Size%'], ['%7d', '%6.2f'] def add_size_head(gen): sph, splt = size_header() @wraps(gen) def patched(*args, **kwargs): h, lt = gen(*args, **kwargs) return sph + h, splt + lt return patched def add_size(gen): @wraps(gen) def patched(spaths, add_size=True, add_size_p100=None, *args, **kwargs): line = gen(spaths, *args, **kwargs) if add_size_p100 is not None: line = [len(spaths) / float(add_size_p100) * 100] + line if add_size: line = [len(spaths)] + line return line return patched def cluster_id_header(): return ['Cluster'], ['%7d'] def add_cluster_id_head(gen): sph, splt = cluster_id_header() @wraps(gen) def patched(*args, **kwargs): h, lt = gen(*args, **kwargs) return sph + h, splt + lt return patched def add_cluster_id(gen): @wraps(gen) def patched(cluster, something, add_id=True, *args, **kwargs): line = gen(cluster, something, *args, **kwargs) if add_id: line = [int(cluster)] + line return line return patched def ctype_id_header(): return ['CType'], ['%7s'] def add_ctype_id_head(gen): sph, splt = ctype_id_header() @wraps(gen) def patched(*args, **kwargs): h, lt = gen(*args, **kwargs) return sph + h, splt + lt return patched def add_ctype_id(gen): @wraps(gen) def patched(ctype, something, add_id=True, *args, **kwargs): line = gen(ctype, something, *args, **kwargs) if add_id: line = [str(ctype)] + line return line return patched class PrintAnalysis(object): nr_template = '%7d ' # TODO: Change it in such a way that it cooperates well with debug-file option. def __init__(self, fileoption, line_nr=False): self.output2stderr = False if fileoption: self.filehandle = open(fileoption, 'w') # self.output2stderr = True else: self.filehandle = sys.stdout self.line_nr = line_nr def __call__(self, info2print, nr=None): if self.line_nr and nr is not None: info2print = (self.nr_template % nr) + info2print if self.output2stderr: clui.message(info2print) print >> self.filehandle, info2print def sep(self): self(asep()) def thead(self, info2print): self(clui.thead(info2print)) def tend(self, info2print): self(clui.tsep(info2print)) def under(self, info2print): self(clui.underline(info2print)) ################################################################################ @add_path_id_head def spath_basic_info_header(): header = 'BeginF InpF ObjF OutF EndF'.split() line_template = ['%7d'] * len(header) return header, line_template @add_path_id def spath_basic_info(spath): line = [] line.append(spath.begins) if not isinstance(spath, PassingPath): line.extend(map(len, (spath.path_in, spath.path_object, spath.path_out))) else: line += [float('nan')] * 3 line.append(spath.ends) return line ################ @add_path_id_head def spath_lenght_total_info_header(total=None): header = 'InpL ObjL OutL'.split() if total: header = ['TotL'] + header line_template = ['%9.1f'] * len(header) return header, line_template @add_path_id def spath_lenght_total_info(spath, totalonly=False, total=False): # total and totalonly are internal flags # to calculate: total len, in len, obj len, out len call: # total=True (this will calculate total) # to skip calculation of total call: # total=False, totalonly=False (this will calculate in, obj, and out lens) # to calculate total len call: # total=False, totalonly=True line = [] if not total: if not totalonly: for t in traces.midpoints(spath.coords): if len(t) > 1: # traces.length_step_std requires at least 2 points line.append(traces.length_step_std(t)[0]) else: line.append(float('nan')) return line else: for t in traces.midpoints((spath.coords_cont,)): if len(t) > 1: # traces.length_step_std requires at least 2 points line.append(traces.length_step_std(t)[0]) else: line.append(float('nan')) return line line += spath_lenght_total_info(spath, add_id=False, total=False, totalonly=True) if not isinstance(spath, PassingPath): line += spath_lenght_total_info(spath, add_id=False, total=False, totalonly=False) else: line += [float('nan')] * 3 return line @add_path_id def spath_frames_total_info(spath, totalonly=False, total=False): # total and totalonly are internal flags # to calculate: total len, in len, obj len, out len call: # total=True (this will calculate total) # to skip calculation of total call: # total=False, totalonly=False (this will calculate in, obj, and out lens) # to calculate total len call: # total=False, totalonly=True line = [] if not total: if not totalonly: for t in spath.coords: line.append(len(t)) return line else: for t in (spath.coords_cont,): line.append(len(t)) return line line += spath_frames_total_info(spath, add_id=False, total=False, totalonly=True) if not isinstance(spath, PassingPath): line += spath_frames_total_info(spath, add_id=False, total=False, totalonly=False) else: line += [float('nan')] * 3 return line ################ @add_path_id_head def spath_steps_info_header(total=None): header = 'InpS InpStdS ObjS ObjStdS OutS OutStdS'.split() if total: header = ['TotS', 'TotStdS'] + header line_template = ['%8.2f', '%8.3f'] * (len(header) / 2) return header, line_template @add_path_id def spath_steps_info(spath, total=None): line = [] if not total: for t in traces.midpoints(spath.coords): if len(t) > 0: line.extend(traces.length_step_std(t)[1:]) else: line.extend([float('nan'), float('nan')]) return line line += spath_steps_info(spath, add_id=False, total=False) if not isinstance(spath, PassingPath): t = traces.midpoints((spath.coords_cont,)).next() if len(t) > 0: line = list(traces.length_step_std(t)[1:]) + line else: line = [float('nan'), float('nan')] + line else: line += [float('nan'), float('nan')] * 3 return line @add_path_id def spath_frames_info(spath, total=None): line = [] if not total: for t in spath.coords: line.append(len(t)) return line line += spath_steps_info(spath, add_id=False, total=False) if not isinstance(spath, PassingPath): t = spath.coords_cont line = [len(t)] + line else: line = [float('nan')] + line return line ################ @add_path_id_head def spath_ctype_header(): header, line_template = ctype_id_header() return header, line_template @add_path_id def spath_ctype(spath, ctype=None): line = [str(ctype)] return line ################ @add_path_id_head def spath_full_info_header(total=None): header = [] line_template = [] for h, lt in (spath_basic_info_header(add_id=False), spath_lenght_total_info_header(add_id=False, total=total), spath_steps_info_header(add_id=False, total=total), spath_ctype_header(add_id=False)): header += h line_template += lt return header, line_template @add_path_id def spath_full_info(spath, ctype=None, total=None): line = [] for l in (spath_basic_info(spath, add_id=False), spath_lenght_total_info(spath, add_id=False, total=total), spath_steps_info(spath, add_id=False, total=total), spath_ctype(spath, ctype=ctype, add_id=False)): line += l return line ################################################################################ @add_size_head def spaths_lenght_total_header(): header = 'Tot TotStd Inp InpStd Obj ObjStd Out OutStd'.split() line_template = ['%9.1f', '%9.2f'] * (len(header) / 2) return header, line_template @add_size def spaths_length_total(spaths): line = [] d4s = [] for sp in spaths: if not isinstance(sp, PassingPath): d4s.append(spath_lenght_total_info(sp, add_id=False)) d4s = np.array(d4s) if d4s.size: line.extend(np.mean(d4s, 0)) line.extend(np.std(d4s, 0)) else: line = [float('nan')] * 6 # total d4s = [] for sp in spaths: d4s.append(spath_lenght_total_info(sp, totalonly=True, add_id=False)) d4s = np.array(d4s) if d4s.size: line.extend(np.mean(d4s, 0)) line.extend(np.std(d4s, 0)) else: line += [float('nan')] * 4 return [line[6], line[7], line[0], line[3], line[1], line[4], line[2], line[5]] @add_size def spaths_frames_total(spaths): line = [] d4s = [] for sp in spaths: if not isinstance(sp, PassingPath): d4s.append(spath_frames_total_info(sp, add_id=False)) d4s = np.array(d4s) if d4s.size: line.extend(np.mean(d4s, 0)) line.extend(np.std(d4s, 0)) else: line = [float('nan')] * 6 # total d4s = [] for sp in spaths: d4s.append(spath_frames_total_info(sp, totalonly=True, add_id=False)) d4s = np.array(d4s) if d4s.size: line.extend(np.mean(d4s, 0)) line.extend(np.std(d4s, 0)) else: line += [float('nan')] * 4 return [line[6], line[7], line[0], line[3], line[1], line[4], line[2], line[5]] ################################################################################ @add_cluster_id_head def clusters_inlets_header(): header = 'Size SInp IInp IOut SOut'.split() header = 'Size INCOMING OUTGOING'.split() line_template = ['%7d'] + ['%8d'] * (len(header) - 1) return header, line_template @add_cluster_id def clusters_inlets(cluster, inlets): line = [inlets.size] line.append(inlets.lim2types([InletTypeCodes.surface_incoming]).size) # line.append(inlets.lim2types([InletTypeCodes.internal_incoming]).size) # line.append(inlets.lim2types([InletTypeCodes.internal_outgoing]).size) line.append(inlets.lim2types([InletTypeCodes.surface_outgoing]).size) return line ################################################################################ @add_ctype_id_head def ctypes_spaths_info_header(): header, line_template = spaths_lenght_total_header() return header, line_template @add_ctype_id def ctypes_spaths_info(ctype, spaths, show='len', add_size_p100=None): # show could be len or frames line = [] if show == 'len': line += spaths_length_total(spaths, add_size_p100=add_size_p100) if show == 'frames': line += spaths_frames_total(spaths, add_size_p100=add_size_p100) return line ################################################################################ @add_cluster_id_head def clusters_stats_prob_header(): header = 'IN-OUT diff N IN-OUT_prob diff_prob N_prob'.split() line_template = ['%8d'] * (len(header) / 2) + ['%12.2f'] * (len(header) / 2) # header += 'IN_len OUT_len Both_len'.split() # line_template += ['%9.1f'] * 3 return header, line_template @add_cluster_id def clusters_stats_prob(cluster, sp_ct): # calculates probabilities of some events for cluster # X:X transition - io # X:? and ?:X transition - d # X:N and N:X transition - N line = [] io, d, N = 0, 0, 0 in_len, out_len, tot_len = 0., 0., 0. in_n, out_n, tot_n = 0, 0, 0 for sp, ct in sp_ct: ct = ct.generic.clusters assert cluster in ct if cluster == ct[0] and cluster == ct[1]: io += 1 elif None in ct: N += 1 else: d += 1 line += [io, d, N] summa = float(sum([io, d, N])) line += map(lambda x: x / summa if summa else float('nan'), [io, d, N]) return line @add_cluster_id_head def clusters_stats_len_header(): header = 'X->Obj Obj->X p-value X->ObjMin X->ObjMinID Obj->XMin Obj->XMinID'.split() line_template = (['%9.1f'] * 2) + ['%9.4f'] + ['%9.1f', '%11s'] * 2 return header, line_template @add_cluster_id def clusters_stats_len(cluster, sp_ct): line = [] in_len, out_len = [], [] in_len_min, out_len_min = float('inf'), float('inf') in_len_min_id, out_len_min_id = None, None for sp, ct in sp_ct: ct = ct.generic.clusters assert cluster in ct lens = spath_lenght_total_info(sp, add_id=False, total=False, totalonly=False) # tot,in,obj,out if cluster == ct[0]: if not np.isnan(lens[0]): in_len.append(lens[0]) if lens[0] < in_len_min: in_len_min = lens[0] in_len_min_id = str(sp.id) if cluster == ct[1]: if not np.isnan(lens[-1]): out_len.append(lens[-1]) if lens[-1] < out_len_min: out_len_min = lens[-1] out_len_min_id = str(sp.id) if len(in_len): line.append(np.mean(in_len)) else: line.append(float('nan')) if len(out_len): line.append(np.mean(out_len)) else: line.append(float('nan')) if len(in_len) > 1 and len(out_len) > 1: line.append(ttest_ind(in_len, out_len)[-1]) # this is supposed to return p-value else: line.append(float('nan')) line += [in_len_min, in_len_min_id, out_len_min, out_len_min_id] return line @add_cluster_id_head def clusters_stats_steps_header(): header = 'X->Obj Obj->X p-value X->ObjMin X->ObjMinID Obj->XMin Obj->XMinID'.split() line_template = (['%9.1f'] * 2) + ['%9.4f'] + ['%9.1f', '%11s'] * 2 return header, line_template @add_cluster_id def clusters_stats_steps(cluster, sp_ct): line = [] in_len, out_len = [], [] in_len_min, out_len_min = float('inf'), float('inf') in_len_min_id, out_len_min_id = None, None for sp, ct in sp_ct: ct = ct.generic.clusters assert cluster in ct lens = spath_frames_info(sp, add_id=False, total=False) # tot,in,obj,out if cluster == ct[0]: if not np.isnan(lens[0]): in_len.append(lens[0]) if lens[0] < in_len_min: in_len_min = lens[0] in_len_min_id = str(sp.id) if cluster == ct[1]: if not np.isnan(lens[-1]): out_len.append(lens[-1]) if lens[-1] < out_len_min: out_len_min = lens[-1] out_len_min_id = str(sp.id) if len(in_len): line.append(np.mean(in_len)) else: line.append(float('nan')) if len(out_len): line.append(np.mean(out_len)) else: line.append(float('nan')) if len(in_len) > 1 and len(out_len) > 1: line.append(ttest_ind(in_len, out_len)[-1]) # this is supposed to return p-value else: line.append(float('nan')) line += [in_len_min, in_len_min_id, out_len_min, out_len_min_id] return line ################################################################################ # analysis
[docs]def stage_V_run(config, options, spaths=None, paths=None, inls=None, ctypes=None, reader=None, **kwargs): # file handle? head_nr = False line_nr = head_nr pa = PrintAnalysis(options.save, line_nr=line_nr) if options.save: clui.message('Using user provided file (%s), and' % options.save) clui.message('for histograms data file (%s).' % (options.save + '.csv')) else: clui.message('Using standard output.') clui.message(sep()) clui.message('') ############ pa.sep() pa('Aqua-Duct analysis') pa(clui.get_str_timestamp()) ############ if options.dump_config: pa.sep() pa.under('Configuration file name: %s' % config.config_filename) pa(os.linesep.join(config.dump_config())) ############ pa.sep() pa("Frames window: %d:%d step %d" % (Reader.window.start, Reader.window.stop, Reader.window.step)) ############ traced_names = tuple(sorted(list(set([sp.id.name for sp in spaths])))) # traced_names = ['all'] + traced_names pa.sep() pa("Names of traced molecules: %s" % (' '.join(traced_names))) # spaths_types = ['all'] spaths_types = [] for sp in spaths: if isinstance(sp, PassingPath): if not PassingPath in spaths_types: spaths_types.append(PassingPath) else: if not SinglePath in spaths_types: spaths_types.append(SinglePath) spaths_types = tuple(spaths_types) ############ def iter_over_tn(): yield traced_names, '' if len(traced_names) > 1: for _tname in traced_names: yield (_tname,), " of %s" % _tname def iter_over_tnspt(): for _tname, _message in iter_over_tn(): yield _tname, spaths_types, _message if len(spaths_types) > 1: for _sptype in spaths_types: if _sptype == PassingPath: _sptype_name = 'passing paths' elif _sptype == SinglePath: _sptype_name = 'object paths' yield _tname, (_sptype,), _message + (" of %s" % _sptype_name) ############ pa.sep() for tname, message in iter_over_tn(): pa("Number of traceable residues%s: %d" % (message, len([None for p in paths if p.name in tname]))) for tname, sptype, message in iter_over_tnspt(): pa("Number of separate paths%s: %d" % (message, len([None for sp in spaths if (sp.id.name in tname) and (isinstance(sp, sptype))]))) ############ pa.sep() for tname, sptype, message in iter_over_tnspt(): pa("Number of inlets%s: %d" % ( message, inls.lim2spaths([sp for sp in spaths if isinstance(sp, sptype)]).lim2rnames(tname).size)) no_of_clusters = len(inls.clusters_list) - {True: 1, False: 0}[0 in inls.clusters_list] # minus outliers, if any pa("Number of clusters: %d" % no_of_clusters) pa("Outliers: %s" % ({True: 'yes', False: 'no'}[0 in inls.clusters_list])) ############ pa.sep() pa('Clustering history:') pa(clui.print_simple_tree(inls.tree, prefix='').rstrip()) ############ pa.sep() header_line, line_template = get_header_line_and_line_template(clusters_inlets_header(), head_nr=head_nr) for tname, sptype, message in iter_over_tnspt(): pa("Clusters summary - inlets%s" % message) pa.thead(header_line) for nr, cl in enumerate(inls.clusters_list): inls_lim = inls.lim2spaths([sp for sp in spaths if isinstance(sp, sptype)]).lim2rnames(tname).lim2clusters( cl) pa(make_line(line_template, clusters_inlets(cl, inls_lim)), nr=nr) pa.tend(header_line) ############ ############ ############ pa.sep() for tname, sptype, message in iter_over_tnspt(): header_line, line_template = get_header_line_and_line_template(clusters_stats_prob_header(), head_nr=head_nr) pa("Clusters statistics (of paths%s) probabilities of transfers" % message) pa.thead(header_line) for nr, cl in enumerate(inls.clusters_list): sp_ct_lim = ((sp, ct) for sp, ct in zip(spaths, ctypes) if cl in ct.clusters and isinstance(sp, sptype) and sp.id.name in tname) pa(make_line(line_template, clusters_stats_prob(cl, sp_ct_lim)), nr=nr) pa.tend(header_line) header_line, line_template = get_header_line_and_line_template(clusters_stats_len_header(), head_nr=head_nr) pa("Clusters statistics (of paths%s) mean lengths of transfers" % message) pa.thead(header_line) for nr, cl in enumerate(inls.clusters_list): sp_ct_lim = ((sp, ct) for sp, ct in zip(spaths, ctypes) if cl in ct.clusters and isinstance(sp, sptype) and sp.id.name in tname) pa(make_line(line_template, clusters_stats_len(cl, sp_ct_lim)), nr=nr) pa.tend(header_line) header_line, line_template = get_header_line_and_line_template(clusters_stats_steps_header(), head_nr=head_nr) pa("Clusters statistics (of paths%s) mean frames numbers of transfers" % message) pa.thead(header_line) for nr, cl in enumerate(inls.clusters_list): sp_ct_lim = ((sp, ct) for sp, ct in zip(spaths, ctypes) if cl in ct.clusters and isinstance(sp, sptype) and sp.id.name in tname) pa(make_line(line_template, clusters_stats_steps(cl, sp_ct_lim)), nr=nr) pa.tend(header_line) ############ pa.sep() header_line, line_template = get_header_line_and_line_template(ctypes_spaths_info_header(), head_nr=head_nr) ctypes_generic = [ct.generic for ct in ctypes] ctypes_generic_list = sorted(list(set(ctypes_generic))) # sorted by ctype ctypes_size = [] for nr, ct in enumerate(ctypes_generic_list): sps = lind(spaths, what2what(ctypes_generic, [ct])) ctypes_size.append(len(sps)) ctypes_generic_list = [ctypes_generic_list[i] for i in np.argsort(ctypes_size)[::-1]] for tname, sptype, message in iter_over_tnspt(): pa("Separate paths clusters types summary - mean lengths of paths%s" % message) pa.thead(header_line) total_size = len([sp for sp in spaths if sp.id.name in tname and isinstance(sp, sptype)]) for nr, ct in enumerate(ctypes_generic_list): sps = lind(spaths, what2what(ctypes_generic, [ct])) sps = [sp for sp in sps if sp.id.name in tname and isinstance(sp, sptype)] # ctypes_size.append(len(sps)) if len(sps) > 0: pa(make_line(line_template, ctypes_spaths_info(ct, sps, add_size_p100=total_size, show="len")), nr=nr) pa.tend(header_line) pa("Separate paths clusters types summary - mean number of frames of paths%s" % message) pa.thead(header_line) for nr, ct in enumerate(ctypes_generic_list): sps = lind(spaths, what2what(ctypes_generic, [ct])) sps = [sp for sp in sps if sp.id.name in tname and isinstance(sp, sptype)] # ctypes_size.append(len(sps)) if len(sps) > 0: pa(make_line(line_template, ctypes_spaths_info(ct, sps, add_size_p100=total_size, show="frames")), nr=nr) pa.tend(header_line) ############ pa.sep() pa("List of separate paths and properties") header_line, line_template = get_header_line_and_line_template(spath_full_info_header(total=True), head_nr=head_nr) pa.thead(header_line) for nr, (sp, ctype) in enumerate(izip_longest(spaths, ctypes, fillvalue=None)): if ctype is not None: ctype = ctype.generic pa(make_line(line_template, spath_full_info(sp, ctype=ctype, total=True)), nr=nr) pa.tend(header_line) ############ # additional analysis def iter_over_tn(): yield traced_names, 'amol' if len(traced_names) > 1: for _tname in traced_names: yield (_tname,), "%s" % _tname def iter_over_part(): for _part in 'walk in object out'.split() + ['in out'.split()]: if isinstance(_part, list): _message = '_'.join(_part) else: _message = _part _part = [_part] yield _part, _message def iter_over_spt(): yield spaths_types, 'apaths' if len(spaths_types) > 1: for _sptype in spaths_types: if _sptype == PassingPath: _sptype_name = 'passing' elif _sptype == SinglePath: _sptype_name = 'object' yield (_sptype,), _sptype_name def iter_over_c(): yield inls.clusters_list + [None], 'aclusts' if len(inls.clusters_list + [None]) > 1: for _cluster in inls.clusters_list + [None]: yield [_cluster], str(_cluster) def iter_over_ct(): yield ctypes_generic_list, 'actypes' if len(ctypes_generic_list) > 1: for _cluster in ctypes_generic_list: yield (_cluster,), str(_cluster) def iter_over_all(): for _tname, _m_tname in iter_over_tn(): for _sptype, _m_sptype in iter_over_spt(): for _cluster, _m_cluster in iter_over_c(): for _part, _m_part in iter_over_part(): _message = '_'.join((_m_tname, _m_sptype, _m_cluster, _m_part)) yield _tname, _sptype, _cluster, _part, _message for _ctype, _m_ctype in iter_over_ct(): for _part, _m_part in iter_over_part(): _message = '_'.join((_m_tname, _m_sptype, _m_ctype, _m_part)) yield _tname, _sptype, _ctype, _part, _message # histograms # calculate old max_frame max_frame = Reader.number_of_frames(onelayer=True) header = [column[-1] for column in iter_over_all()] fmt = ['%u'] * len(header) h = np.zeros((max_frame, len(header))) # loop over spaths pbar = clui.pbar(maxval=len(spaths), mess='Calculating histograms') # loop over paths and ctypes for sp, ct in zip(spaths, ctypes): # loop over columns # traced names, paths types, clusters cluster types, part of paths, column name for tname, sptype, c_ct, part, col_name in iter_over_all(): # check if column fits to the requirements if not sp.id.name in tname: continue if not isinstance(sp, sptype): continue # c or ct it_is_ct = False if isinstance(c_ct[0], InletClusterGenericType): it_is_ct = True if not ct.generic in c_ct: continue else: if len(union_full(ct.generic.clusters, c_ct)) == 0: continue # this is more than that... # if c_ct is not InletClusterGenericType then: # if 'in' in part only incoming paths in ct are used # if 'out' in part only outgoing paths in ct are used col_index = header.index(col_name) if 'walk' in part: h[sp.paths_cont, col_index] += 1 if isinstance(sp, PassingPath): continue if 'in' in part: if not it_is_ct: if not ct.input in c_ct: continue h[sp.path_in, col_index] += 1 if 'object' in part: h[sp.path_object, col_index] += 1 if 'out' in part: if not it_is_ct: if not ct.output in c_ct: continue h[sp.path_out, col_index] += 1 pbar.next() pbar.finish() # scope/object size? if options.calculate_scope_object_size: scope_size = [] object_size = [] # now, the problem is in the scope and object definition. pbar = clui.pbar(maxval=Reader.number_of_frames(), mess='Calculating scope and object sizes') for number, traj_reader in Reader.iterate(number=True): scope_size.append([]) object_size.append([]) for frame in traj_reader.iterate_over_frames(): scope = traj_reader.parse_selection(options.scope_chull) ch = scope.chull() scope_size[-1].append((ch.area, ch.volume)) res = traj_reader.parse_selection(options.object_chull) ch = res.chull() object_size[-1].append((ch.area, ch.volume)) pbar.next() header += map(lambda s: '%s_%d' % (s, number), ['scope_area', 'scope_volume', 'object_area', 'object_volume']) fmt += ['%0.3f', '%0.2f'] * 2 for s_s, o_s in zip(scope_size, object_size): h = np.hstack((h, s_s, o_s)) pbar.finish() # add frame column? frame_col = np.array([range(max_frame)]).T h = np.hstack((frame_col, h)) header = ['frame'] + header fmt = ['%u'] + fmt # save??? if options.save: h_fname = options.save + '.csv' else: import cStringIO as StringIO h_fname = StringIO.StringIO() np.savetxt(h_fname, h, fmt=fmt, delimiter=',', header=','.join(header)) if not options.save: print h_fname.getvalue() return {'hist': h, 'header': header}
################################################################################ # visualize def plot_spaths_traces(spaths, spp=None, name=None, split=False, states=False, separate=False, smooth=None): if states or separate: spaths_iter = spaths else: spaths_iter = [spaths] state = None name_separate = '' for nr, sp in enumerate(spaths_iter): if states: state = nr + 1 if separate: name_separate = '_%d' % (nr + 1) if states or separate: sp = [sp] if split: # FIXME: if path(s) is empty this will probably produce CGO_END without CGO_BEGIN spp.paths_trace(sp, name=name + '_in' + name_separate, plot_walk=False, plot_object=False, plot_out=False, state=state, smooth=smooth) spp.paths_trace(sp, name=name + '_obj' + name_separate, plot_walk=False, plot_in=False, plot_out=False, state=state, smooth=smooth) spp.paths_trace(sp, name=name + '_out' + name_separate, plot_walk=False, plot_in=False, plot_object=False, state=state, smooth=smooth) spp.paths_trace(sp, name=name + '_walk' + name_separate, plot_in=False, plot_object=False, plot_out=False, state=state, smooth=smooth) else: spp.paths_trace(sp, name=name + name_separate, state=state, smooth=smooth) def plot_spaths_inlets(spaths, spp=None, name=None, states=False, separate=False, smooth=None): if states or separate: spaths_iter = spaths else: spaths_iter = [spaths] state = None name_separate = '' for nr, sp in enumerate(spaths_iter): if states: state = nr + 1 if separate: name_separate = '_%d' % (nr + 1) if states or separate: sp = [sp] spp.paths_inlets(sp, name=name + name_separate, state=state, smooth=smooth) def is_pymol_connector_session(filename): session_ext = re.compile('[.][pP][sS][eE]') if filename: ext = os.path.splitext(filename)[-1] if session_ext.match(ext): return True return False def is_pymol_connector_script(filename): script = re.compile('.*[.]([pP][yY]|[.pP][yY][.][gG][zZ])$') if filename: if script.match(filename): return True return False
[docs]def stage_VI_run(config, options, spaths=None, inls=None, ctypes=None, master_paths=None, master_paths_smooth=None, **kwargs): from aquaduct.visual.pymol_connector import ConnectToPymol, SinglePathPlotter # from aquaduct.visual.pymol_connector import cmd as pymol_cmd from aquaduct.visual.helpers import ColorMapDistMap soptions = config.get_smooth_options() smooth = get_smooth_method(soptions) # start pymol with clui.fbm("Starting PyMOL connection", cont=False): pymol_connector = ConnectToPymol() if is_pymol_connector_script(options.save): pymol_connector.init_script(options.save) else: pymol_connector.init_pymol() if options.simply_smooths: spp = SinglePathPlotter(pymol_connector, linearize=get_linearize_method(options.simply_smooths)) else: spp = SinglePathPlotter(pymol_connector, linearize=None) ctypes_generic = [ct.generic for ct in ctypes] ctypes_generic_list = sorted(list(set(ctypes_generic))) if options.show_molecule: molecule_name = '' with clui.fbm("Molecule"): for nr, traj_reader in enumerate(Reader.iterate()): # mda_ppr = mda.core.flags["permissive_pdb_reader"] # mda.core.flags["permissive_pdb_reader"] = False #mda16 it is porbably always True frames_to_show = range2int(options.show_molecule_frames) pdbfile = traj_reader.dump_frames(frames_to_show, selection=options.show_molecule) pymol_connector.load_pdb('molecule%d' % nr, pdbfile) if len(molecule_name) == 0: molecule_name = 'molecule%d' % nr os.unlink(pdbfile) # it would be nice to plot convexhull if options.show_scope_chull: with clui.fbm("Convexhull"): for nr, traj_reader in enumerate(Reader.iterate()): frames_to_show = range2int(options.show_scope_chull_frames) for frame in frames_to_show: traj_reader.set_frame(frame) scope = traj_reader.parse_selection(options.show_scope_chull) chull = scope.chull() spp.convexhull(chull, name='scope_shape%d' % nr, state=frame + 1) if options.show_object_chull: with clui.fbm("Object shape"): for nr, traj_reader in enumerate(Reader.iterate()): frames_to_show = range2int(options.show_scope_chull_frames) for frame in frames_to_show: traj_reader.set_frame(frame) object_shape = traj_reader.parse_selection(options.show_object_chull) chull = object_shape.chull() spp.convexhull(chull, name='object_shape%d' % nr, color=np.array([255, 153, 0]) / 255., state=frame + 1) # orange if options.inlets_clusters: with clui.fbm("Clusters"): # TODO: require stage V for that? no_of_clusters = len(inls.clusters_list) # total, including outliers cmap = ColorMapDistMap() for c in inls.clusters_list: # coords for current cluster ics = inls.lim2clusters(c).coords if c == 0: c_name = 'out' else: c_name = str(int(c)) spp.scatter(ics, color=cmap(c), name="cluster_%s" % c_name) if False: # TODO: This does not work any more in that way. Rewrite it or remove it radii = inls.lim2clusters(c).radii if len(radii) > 0: spp.scatter(ics, color=cmap(c), radius=radii, name="cluster_radii_%s" % c_name) if options.ctypes_raw: with clui.fbm("CTypes raw"): for nr, ct in enumerate(ctypes_generic_list): clui.message(str(ct), cont=True) sps = lind(spaths, what2what(ctypes_generic, [ct])) plot_spaths_traces(sps, name=str(ct) + '_raw', split=False, spp=spp) if ct in master_paths: if master_paths[ct] is not None: plot_spaths_traces([master_paths[ct]], name=str(ct) + '_raw_master', split=False, spp=spp) if options.ctypes_smooth: with clui.fbm("CTypes smooth"): for nr, ct in enumerate(ctypes_generic_list): clui.message(str(ct), cont=True) sps = lind(spaths, what2what(ctypes_generic, [ct])) plot_spaths_traces(sps, name=str(ct) + '_smooth', split=False, spp=spp, smooth=smooth) if ct in master_paths_smooth: if master_paths_smooth[ct] is None: continue plot_spaths_traces([master_paths_smooth[ct]], name=str(ct) + '_smooth_master', split=False, spp=spp, smooth=lambda anything: anything) if ct in master_paths: if master_paths[ct] is None: continue plot_spaths_traces([master_paths[ct]], name=str(ct) + '_raw_master_smooth', split=False, spp=spp, smooth=smooth) if options.all_paths_raw: with clui.fbm("All raw paths"): plot_spaths_traces(spaths, name='all_raw', split=options.all_paths_split, spp=spp) if options.all_paths_raw_io: with clui.fbm("All raw paths io"): plot_spaths_inlets(spaths, name='all_raw_paths_io', spp=spp) if options.all_paths_smooth: with clui.fbm("All smooth paths"): plot_spaths_traces(spaths, name='all_smooth', split=options.all_paths_split, spp=spp, smooth=smooth) if options.all_paths_smooth_io: with clui.fbm("All smooth paths io"): plot_spaths_inlets(spaths, name='all_smooth_paths_io', spp=spp) with clui.fbm("Paths as states"): if options.paths_raw: clui.message("raw", cont=True) plot_spaths_traces(spaths, name='raw_paths', states=options.paths_states, separate=not options.paths_states, spp=spp) if options.paths_smooth: clui.message("smooth", cont=True) plot_spaths_traces(spaths, name='smooth_paths', states=options.paths_states, separate=not options.paths_states, smooth=smooth, spp=spp) if options.paths_raw_io: clui.message("raw_io", cont=True) plot_spaths_inlets(spaths, name='raw_paths_io', states=options.paths_states, separate=not options.paths_states, spp=spp) if options.paths_smooth_io: clui.message("smooth_io", cont=True) plot_spaths_inlets(spaths, name='smooth_paths_io', states=options.paths_states, separate=not options.paths_states, smooth=smooth, spp=spp) if options.show_molecule: pymol_connector.orient_on(molecule_name) if is_pymol_connector_session(options.save): with clui.fbm("Saving session (%s)" % options.save): clui.message("") # new line pbar = clui.pbar(len(spaths)) # FIXME: Loop over states is not required if there is no object with many states. import time for state in range(len(spaths)): pymol_connector.cmd.set_frame(state + 1) pbar.update(state) time.sleep(0.1) pbar.finish() clui.message("Finalizing session saving...", cont=True) # new line pymol_connector.cmd.set_frame(1) pymol_connector.cmd.save(options.save, state=0) pymol_connector.cmd.quit()
################################################################################ __all__ = '''clui optimal_threads aquaduct_version_nice ValveConfig valve_begin valve_load_config valve_exec_stage stage_I_run stage_II_run stage_III_run stage_IV_run stage_V_run stage_VI_run valve_end '''.split()