Source code for stateinterpreter.utils.io

import pandas as pd
import mdtraj as md

__all__ = ["load_dataframe", "load_trajectory", "plumed_to_pandas"]

def is_plumed_file(filename):
    """
    Check if given file is in PLUMED format.

    Parameters
    ----------
    filename : string, optional
        PLUMED output file

    Returns
    -------
    bool
        wheter is a plumed output file
    """
    headers = pd.read_csv(filename, sep=" ", skipinitialspace=True, nrows=0)
    is_plumed = True if " ".join(headers.columns[:2]) == "#! FIELDS" else False
    return is_plumed


def plumed_to_pandas(filename="./COLVAR"):
    """
    Load a PLUMED file and save it to a dataframe.

    Parameters
    ----------
    filename : string, optional
        PLUMED output file

    Returns
    -------
    df : DataFrame
        Collective variables dataframe
    """
    skip_rows = 1
    # Read header
    headers = pd.read_csv(filename, sep=" ", skipinitialspace=True, nrows=0)
    # Discard #! FIELDS
    headers = headers.columns[2:]
    # Load dataframe and use headers for columns names
    df = pd.read_csv(
        filename,
        sep=" ",
        skipinitialspace=True,
        header=None,
        skiprows=range(skip_rows),
        names=headers,
        comment="#",
    )

    return df


[docs]def load_dataframe(data, start = 0, stop = None, stride = 1, **kwargs): """Load dataframe from object or from file. Parameters ---------- data : str, pandas.DataFrame, or list input data Returns ------- pandas.DataFrame Dataframe Raises ------ TypeError if data is not a valid type """ # check if data is Dataframe if type(data) == pd.DataFrame: df = data df = df.iloc[start:stop:stride, :] df.reset_index(drop=True, inplace=True) # or is a string elif type(data) == str: filename = data # check if file is in PLUMED format if is_plumed_file(filename): df = plumed_to_pandas(filename) # else use read_csv with optional kwargs else: df = pd.read_csv(filename, **kwargs) df = df.iloc[start:stop:stride, :] df.reset_index(drop=True, inplace=True) # or a list elif type(data) == list: # (a) list of filenames if type(data[0]) == str: df_list = [] for i, filename in enumerate(data): # check if file is in PLUMED format if is_plumed_file(filename): df_tmp = plumed_to_pandas(filename) df_tmp['walker'] = [i for _ in range(len(df_tmp))] df_tmp = df_tmp.iloc[start:stop:stride, :] df_list.append( df_tmp ) # else use read_csv with optional kwargs else: df_tmp = pd.read_csv(filename, **kwargs) df_tmp['walker'] = [i for _ in range(len(df_tmp))] df_tmp = df_tmp.iloc[start:stop:stride, :] df_list.append( df_tmp ) elif type(data[0]) == pd.DataFrame: df_list = [] for df_tmp in data: df_tmp = df_tmp.iloc[start:stop:stride, :] df_list.append(df_tmp) df = pd.concat(df_list) df.reset_index(drop=True, inplace=True) else: raise TypeError(f"{data}: Accepted types are 'pandas.Dataframe', 'str', or list") return df
[docs]def load_trajectory(traj_dict, start=0, stop=None, stride=1): """Load trajectory with mdtraj. Parameters ---------- traj_dict : dict dictionary containing trajectory and topology (optional) file """ traj_file = traj_dict["trajectory"] topo_file = traj_dict["topology"] if "topology" in traj_dict else None if type(traj_file) == list: traj_list = [] for traj in traj_file: tmp_traj = md.load(traj, top=topo_file, stride=stride) if stop is not None: tmp_traj = tmp_traj[int(start/stride) : int(stop/stride)] else: tmp_traj = tmp_traj[int(start/stride) : ] traj_list.append(tmp_traj) traj = md.join(traj_list) else: traj = md.load(traj_file, top=topo_file, stride=stride) if stop is not None: traj = traj[int(start/stride) : int(stop/stride)] else: traj = traj[int(start/stride) : ] return traj