Source code for md.calculators.orca_calculator

from __future__ import annotations
import torch
import os
import subprocess
from ase import Atoms
from ase.data import chemical_symbols
import numpy as np

from typing import List, Union, Dict, Optional, Tuple, TYPE_CHECKING

if TYPE_CHECKING:
    from schnetpack.md import System

from schnetpack.md.calculators.base_calculator import QMCalculatorError, QMCalculator
from schnetpack.md.parsers.orca_parser import OrcaMainFileParser
from schnetpack import units as spk_units

__all__ = ["OrcaCalculator"]


[docs]class OrcaCalculator(QMCalculator): """ Calculator for interfacing the ORCA code package [#orca1]_ with SchNetPack molecular dynamics. Requires ORCA to be installed and an input file template. This template is a standard ORCA input file, with everything past the specification of coordinate format, charge and multiplicity removed (coordinates and final `*`). If desired, a Queuer can be give, which will attempt to send all jobs to a grid engine queue. In general, the calculator will take the current System to generate inputs, perform the calculation with ORCA, extract data from the ouput file (using the OrcaParser class) and update the System. Args: required_properties (list): List of the property names which will be passed to the simulator force_key (str): Name of the property corresponding to the forces. compdir (str): Directory in which computations are performed. qm_executable (str): Path to the ORCA executable. orca_template (str): Path to an ORCA template which will be used to generate input files. This should be a full ORCA input, where the geometry section between *xyz and * is replaced by the string `{geometry}`. energy_unit (str, float): Energy units returned by the internal computation model. position_unit (str, float): Unit conversion for the length used in the model computing all properties. E.g. if the model needs Angstrom, one has to provide the conversion factor converting from the atomic units used internally (Bohr) to Angstrom: 0.529177. Is used together with `energy_unit` to determine units of force and stress. energy_key (str, optional): Name of the property corresponding to the energy. stress_key (str, optional): Name of the property corresponding to the stress. property_conversion (dict, optional): Convert properties to requested units. If left empty, no conversion is performed. overwrite (bool): Overwrite previous computation results. Default is true. adaptive (bool, optional): Specify, whether the calculator should be used for adaptive sampling. basename (str, optional): Basename of the generated input files. orca_parser (schnetpack.md.parsers.OrcaParser, optional): Parser used to extract data from output files. References ---------- .. [#orca1] Neese: The ORCA program system. WIREs Comput Mol Sci, 2 (1), 73-78. 2012. """ def __init__( self, required_properties: List, force_key: str, compdir: str, qm_executable: str, orca_template: str, energy_unit: Union[str, float], position_unit: Union[str, float], energy_key: Optional[str] = None, stress_key: Optional[str] = None, property_conversion: Dict[str, Union[str, float]] = {}, overwrite: bool = True, adaptive: bool = False, basename: str = "input", orca_parser=OrcaMainFileParser, ): super(OrcaCalculator, self).__init__( required_properties=required_properties, force_key=force_key, compdir=compdir, qm_executable=qm_executable, energy_unit=energy_unit, position_unit=position_unit, energy_key=energy_key, stress_key=stress_key, property_conversion=property_conversion, overwrite=overwrite, adaptive=adaptive, ) self.orca_template = open(orca_template, "r").read() self.basename = basename self.orca_parser = orca_parser(target_properties=required_properties) def _generate_orca_inputs( self, molecules: List[Tuple[np.array, np.array]], current_compdir: str ): """ Generate input files for all molecules in the current System. Args: molecules (list((np.array, np.array)): List of tuples of numpy arrays containing the atom types and structures of all molecules to be computed. current_compdir (str): Path to the computation directory in which the input files should be generated. Returns: list: List of all generated input files. """ input_files = [] for idx, molecule in enumerate(molecules): # Convert data and generate input files atom_types, positions = molecule # Convert inputs to Angstrom for input file, since this is the default length unit there positions *= spk_units.convert_units( self.position_conversion * spk_units.length, "Angstrom" ) input_file_name = os.path.join( current_compdir, "{:s}_{:06d}.oinp".format(self.basename, idx + 1) ) self._write_orca_input(input_file_name, atom_types, positions) input_files.append(input_file_name) return input_files def _write_orca_input( self, input_file_name: str, atom_types: np.array, positions: np.array ): """ Write the ORCA input file using the provided template. Args: input_file_name (str): Name of the input file. atom_types (numpy.array): Integer array holding the atomic numbers of each species. positions (numpy.array): Array of all atom positions in Angstrom. """ input_file = open(input_file_name, "w") geometry_block = [] for idx in range(len(atom_types)): geometry_block.append( "{:2s} {:15.8f} {:15.8f} {:15.8f}".format( chemical_symbols[atom_types[idx]], *positions[idx] ) ) input_file.write(self.orca_template.format(geometry="\n".join(geometry_block))) input_file.close() def _run_computation( self, molecules: List[Tuple[np.array, np.array]], current_compdir: str ): """ Perform the actual computation. First, inputs are generated, then computations are performed, finally the data is extracted from the output files and returned. Args: molecules (list): List of atom_types, position tuples current_compdir (str): Path to computation directory. Returns: dict: Dictionary holding the requested properties which were computed. """ # Generate input files input_files = self._generate_orca_inputs(molecules, current_compdir) # Perform computations for input_file in input_files: command = "{:s} {:s}".format(self.qm_executable, input_file, input_file) with open("{:s}.log".format(input_file), "wb") as out: computation = subprocess.Popen(command.split(), stdout=out) computation.wait() # Extract the results outputs = [] for input_file in input_files: self.orca_parser.parse_file("{:s}.log".format(input_file)) orca_outputs = self.orca_parser.get_parsed() for p in self.required_properties: if orca_outputs[p] is None: raise QMCalculatorError( "Requested property {:s} was not computed in {:s}".format( p, input_file ) ) outputs.append(orca_outputs) return outputs def _format_calc(self, outputs: Dict[str, np.array], system: System): """ Format the extracted properties into the form used by the schnetpack.md.System class (zero padding, reshaping, etc.). Args: outputs (dict(str, np.array): Dictionary of ouput arrays. system (schnetpack.md.System): System from the molecular dynamics simulation. Returns: dict: Dictionary of all extracted computation results formatted and padded to the right dimensions used in the system class. """ results = {p: [] for p in self.required_properties} for output in outputs: for p in self.required_properties: # Check for convergence if output[p] is None: raise QMCalculatorError("Errors encountered during computation.") results[p].append(torch.from_numpy(output[p])) for p in self.required_properties: results[p] = torch.stack(results[p]).to(system.device, system.dtype) return results def _format_ase(self, molecules, outputs): """ Format the ouput for storage in an ASE database (for adaptive sampling). Args: molecules (list): List of tuples containing the atom types (integer numpy.array) and positions (float numpy.array). outputs (list): Paths to output files. """ atom_buffer = [] property_buffer = [] for idx, molecule in enumerate(molecules): atom_types, positions = molecule atoms = Atoms(atom_types.cpu(), positions.cpu()) props = outputs[idx] atom_buffer.append(atoms) property_buffer.append(props) return atom_buffer, property_buffer