Source code for burnman.utils.chemistry

# This file is part of BurnMan - a thermoelastic and thermodynamic toolkit
# for the Earth and Planetary Sciences
# Copyright (C) 2012 - 2021 by the BurnMan team, released under the GNU
# GPL v2 or later.

# This module provides the functions required to process the
# standard burnman formula formats.
# tools.chemistry returns the number of atoms and molar mass of a compound
# given its unit formula as an argument.
# process_solution_chemistry returns information required to calculate
# solution properties from a set of endmember formulae

from __future__ import absolute_import
import re
import numpy as np
from fractions import Fraction
from collections import Counter
import pkgutil
from string import ascii_uppercase as ucase
from sympy import nsimplify


[docs]def read_masses(): """ A simple function to read a file with a two column list of elements and their masses into a dictionary """ datastream = pkgutil.get_data("burnman", "data/input_masses/atomic_masses.dat") datalines = [ line.strip() for line in datastream.decode("ascii").split("\n") if line.strip() ] lookup = dict() for line in datalines: data = "%".join(line.split("%")[:1]).split() if data != []: lookup[data[0]] = float(data[1]) return lookup
""" atomic_masses is a dictionary of atomic masses """ atomic_masses = read_masses() """ IUPAC_element_order provides a list of all the elements. Element order is based loosely on electronegativity, following the scheme suggested by IUPAC, except that H comes after the Group 16 elements, not before them. """ IUPAC_element_order = [ "v", "Og", "Rn", "Xe", "Kr", "Ar", "Ne", "He", # Group 18 "Fr", "Cs", "Rb", "K", "Na", "Li", # Group 1 (not H) "Ra", "Ba", "Sr", "Ca", "Mg", "Be", # Group 2 "Lr", "No", "Md", "Fm", "Es", "Cf", "Bk", "Cm", "Am", "Pu", "Np", "U", "Pa", "Th", "Ac", # Actinides "Lu", "Yb", "Tm", "Er", "Ho", "Dy", "Tb", "Gd", "Eu", "Sm", "Pm", "Nd", "Pr", "Ce", "La", # Lanthanides "Y", "Sc", # Group 3 "Rf", "Hf", "Zr", "Ti", # Group 4 "Db", "Ta", "Nb", "V", # Group 5 "Sg", "W", "Mo", "Cr", # Group 6 "Bh", "Re", "Tc", "Mn", # Group 7 "Hs", "Os", "Ru", "Fe", # Group 8 "Mt", "Ir", "Rh", "Co", # Group 9 "Ds", "Pt", "Pd", "Ni", # Group 10 "Rg", "Au", "Ag", "Cu", # Group 11 "Cn", "Hg", "Cd", "Zn", # Group 12 "Nh", "Tl", "In", "Ga", "Al", "B", # Group 13 "Fl", "Pb", "Sn", "Ge", "Si", "C", # Group 14 "Mc", "Bi", "Sb", "As", "P", "N", # Group 15 "Lv", "Po", "Te", "Se", "S", "O", # Group 16 "H", # hydrogen "Ts", "At", "I", "Br", "Cl", "F", ] # Group 17
[docs]def dictionarize_formula(formula): """ A function to read a chemical formula string and convert it into a dictionary :param formula: Chemical formula, written in the XnYm format, where the formula has n atoms of element X and m atoms of element Y :type formula: str :returns: The same chemical formula, but expressed as a dictionary. :rtype: dict """ f = dict() elements = re.findall("[A-Z][^A-Z]*", formula) for element in elements: element_name = re.split("[0-9][^A-Z]*", element)[0] element_atoms = re.findall("[0-9][^A-Z]*", element) if len(element_atoms) == 0: element_atoms = Fraction(1.0) else: element_atoms = Fraction(element_atoms[0]) f[element_name] = f.get(element_name, 0.0) + element_atoms return f
[docs]def sum_formulae(formulae, amounts=None): """ Adds together a set of formulae. :param formulae: List of chemical formulae. :type formulae: list of dictionary or counter objects :param amounts: List of amounts of each formula. :type amounts: list of floats :returns: The sum of the user-provided formulae :rtype: Counter object """ if amounts is None: amounts = [1.0 for formula in formulae] else: assert len(formulae) == len(amounts) summed_formula = Counter() for i, formula in enumerate(formulae): summed_formula.update( Counter( { element: amounts[i] * n_atoms for (element, n_atoms) in formula.items() } ) ) return summed_formula
[docs]def formula_mass(formula): """ A function to take a chemical formula and compute the formula mass. :param formula: A chemical formula :type formula: dict or Counter object :returns: The mass per mole of formula [kg] :rtype: float """ mass = sum(formula[element] * atomic_masses[element] for element in formula) return mass
[docs]def convert_formula(formula, to_type="mass", normalize=False): """ Converts a chemical formula from one type (mass or molar) into the other. Renormalises amounts if normalize=True. :param formula: A chemical formula. :type formula: dict or Counter object :param to_type: Conversion type, one of 'mass' or 'molar'. :type to_type: str :param normalize: Whether or not to normalize the converted formula to 1. :type normalize: bool :returns: The converted formula. :rtype: dict """ if to_type == "mass": f = { element: n_atoms * atomic_masses[element] for (element, n_atoms) in formula.items() } elif to_type == "molar": f = { element: n_atoms / atomic_masses[element] for (element, n_atoms) in formula.items() } else: raise Exception( "Value of parameter to_type not recognised. " 'Should be either "mass" or "molar".' ) if normalize: s = np.sum([n for (element, n) in f.items()]) f = {element: n / s for (element, n) in f.items()} return f
[docs]def process_solution_chemistry(solution_model): """ This function parses a class instance with a "formulas" attribute containing site information, e.g. [ '[Mg]3[Al]2Si3O12', '[Mg]3[Mg1/2Si1/2]2Si3O12' ] It outputs the bulk composition of each endmember (removing the site information), and also a set of variables and arrays which contain the site information. These are output in a format that can easily be used to calculate activities and gibbs free energies, given molar fractions of the phases and pressure and temperature where necessary. :param solution_model: Class must have a "formulas" attribute, containing a list of chemical formulae with site information :type solution model: instance of class :rtype: None .. note:: Nothing is returned from this function, but the solution_model object gains the following attributes: * solution_formulae [list of dictionaries] List of endmember formulae in dictionary form. * empty_formula [string] Abbreviated chemical formula with sites denoted by empty square brackets. * general_formula [string] General chemical formula with sites denoted by square brackets filled with a comma-separated list of species * n_sites [integer] Number of sites in the solution. Should be the same for all endmembers. * sites [list of lists of strings] A list of species for each site in the solution. * site_names [list of strings] A list of species_site pairs in the solution, where each distinct site is given by a unique uppercase letter e.g. ['Mg_A', 'Fe_A', 'Al_A', 'Al_B', 'Si_B']. * n_occupancies [integer] Sum of the number of possible species on each of the sites in the solution. Example: A binary solution [[A][B],[B][C1/2D1/2]] would have n_occupancies = 5, with two possible species on Site 1 and three on Site 2. * site_multiplicities [2D array of floats] A 1D array for each endmember in the solution, containing the multiplicities of each site per formula unit. To simplify computations later, the multiplicities are repeated for each species on each site, so the shape of this attribute is (n_endmembers, n_site_species). * endmember_occupancies [2d array of floats] A 1D array for each endmember in the solution, containing the fraction of atoms of each species on each site. * endmember_noccupancies [2d array of floats] A 1D array for each endmember in the solution, containing the number of atoms of each species on each site per mole of endmember. """ formulae = solution_model.formulas n_sites = formulae[0].count("[") n_endmembers = len(formulae) # Check the number of sites is the same for all endmembers if not np.all(np.array([f.count("[") for f in formulae]) == n_sites): raise Exception("All formulae must have the same " "number of distinct sites.") solution_formulae = [{} for i in range(n_endmembers)] sites = [[] for i in range(n_sites)] list_occupancies = [] list_multiplicities = np.empty(shape=(n_endmembers, n_sites)) n_occupancies = 0 # Number of unique site occupancies (e.g.. Mg on X etc.) for i_mbr in range(n_endmembers): list_occupancies.append([[0] * len(sites[site]) for site in range(n_sites)]) s = re.split(r"\[", formulae[i_mbr])[1:] for i_site, site_string in enumerate(s): site_split = re.split(r"\]", site_string) site_occupancy = site_split[0] mult = re.split("[A-Z][^A-Z]*", site_split[1])[0] if mult == "": list_multiplicities[i_mbr][i_site] = Fraction(1.0) else: list_multiplicities[i_mbr][i_site] = Fraction(mult) # Loop over species on a site species = re.findall("[A-Z][^A-Z]*", site_occupancy) for sp in species: # Find the species and its proportion on the site species_split = re.split("([0-9][^A-Z]*)", sp) name_of_species = species_split[0] if len(species_split) == 1: proportion_species_on_site = Fraction(1.0) else: proportion_species_on_site = Fraction(species_split[1]) solution_formulae[i_mbr][name_of_species] = solution_formulae[ i_mbr ].get(name_of_species, 0.0) + ( list_multiplicities[i_mbr][i_site] * proportion_species_on_site ) if name_of_species not in sites[i_site]: n_occupancies += 1 sites[i_site].append(name_of_species) i_el = sites[i_site].index(name_of_species) for parsed_mbr in range(len(list_occupancies)): list_occupancies[parsed_mbr][i_site].append(0) else: i_el = sites[i_site].index(name_of_species) list_occupancies[i_mbr][i_site][i_el] = proportion_species_on_site # Loop over species after site if len(site_split) != 1: not_in_site = str(filter(None, site_split[1])) not_in_site = not_in_site.replace(mult, "", 1) for enamenumber in re.findall("[A-Z][^A-Z]*", not_in_site): sp = list(filter(None, re.split(r"(\d+)", enamenumber))) # Look up number of atoms of element if len(sp) == 1: nel = 1.0 else: nel = float(float(sp[1])) solution_formulae[i_mbr][sp[0]] = ( solution_formulae[i_mbr].get(sp[0], 0.0) + nel ) # Site occupancies and multiplicities endmember_occupancies = np.empty(shape=(n_endmembers, n_occupancies)) site_multiplicities = np.empty(shape=(n_endmembers, n_occupancies)) for i_mbr in range(n_endmembers): n_species = 0 for i_site in range(n_sites): for i_el in range(len(list_occupancies[i_mbr][i_site])): endmember_occupancies[i_mbr][n_species] = list_occupancies[i_mbr][ i_site ][i_el] site_multiplicities[i_mbr][n_species] = list_multiplicities[i_mbr][ i_site ] n_species += 1 # Site names solution_model.site_names = [] for i, species in enumerate(sites): for sp in species: solution_model.site_names.append("{0}_{1}".format(sp, ucase[i])) # Finally, make attributes for solution model instance: solution_model.solution_formulae = solution_formulae solution_model.n_sites = n_sites solution_model.sites = sites solution_model.site_multiplicities = site_multiplicities solution_model.n_occupancies = n_occupancies solution_model.endmember_occupancies = endmember_occupancies solution_model.endmember_noccupancies = np.einsum( "ij, ij->ij", endmember_occupancies, site_multiplicities ) solution_model.empty_formula = re.sub( "([\[]).*?([\]])", "\g<1>\g<2>", solution_model.formulas[0] ) split_empty = solution_model.empty_formula.split("[") solution_model.general_formula = split_empty[0] for i in range(n_sites): solution_model.general_formula += f"[{','.join(sites[i])}{split_empty[i+1]}"
[docs]def site_occupancies_to_strings( site_species_names, site_multiplicities, endmember_occupancies ): """ Converts a list of endmember site occupancies into a list of string representations of those occupancies. :param site_species_names: A list of list of strings, giving the names of the species which reside on each site. List of sites, each of which contains a list of the species occupying each site. :type site_species_names: 2D list of strings :param site_multiplicities: List of floats giving the multiplicity of each site. If 2D, must have the same shape as endmember_occupancies. If 1D, must be either the same length as the number of sites, or the same length as site_species_names (with an implied repetition of the same number for each species on a given site). :type site_multiplicities: 1D or 2D numpy array of floats :param endmember_occupancies: A list of site-species occupancies for each endmember. The first dimension loops over the endmembers, and the second dimension loops over the site-species occupancies for that endmember. The total number and order of occupancies must be the same as the strings in site_species_names. :type endmember_occupancies: 2D numpy array of floats :returns: A list of strings in standard burnman format. For example, [Mg]3[Al]2 would correspond to the classic two-site pyrope garnet. :rtype: list of strings """ site_multiplicities = np.array(site_multiplicities) endmember_occupancies = np.array(endmember_occupancies) n_endmembers = endmember_occupancies.shape[0] if len(site_multiplicities.shape) == 1: # Site multiplicities should either be given on a per-site basis, # or a per-species basis if len(site_species_names) == len(site_multiplicities): site_mults = [] for i, site in enumerate(site_species_names): for species in site: site_mults.append(site_multiplicities[i]) site_multiplicities = np.array(site_mults) elif len(endmember_occupancies[0]) != len(site_multiplicities): raise Exception( "Site multiplicities should either be given " "on a per-site basis or a per-species basis" ) site_multiplicities = np.einsum( "i, j->ij", np.ones(n_endmembers), site_multiplicities ) elif len(site_multiplicities.shape) == 2: if site_multiplicities.shape != endmember_occupancies.shape: raise Exception( "If site_multiplicities is 2D, it should have " "the same shape as endmember_occupancies. " "They currently have shapes " f"{site_multiplicities.shape} and " f"{endmember_occupancies.shape}." ) else: raise Exception("Site multiplicities should either be 1D or 2D.") site_formulae = [] for i_mbr, mbr_occupancies in enumerate(endmember_occupancies): i = 0 site_formulae.append("") for site in site_species_names: amounts = mbr_occupancies[i : i + len(site)] mult = site_multiplicities[i_mbr, i] if np.abs(mult - 1.0) < 1.0e-12: mult = "" else: mult = str(nsimplify(mult)) amounts /= sum(amounts) site_occupancy = formula_to_string(dict(zip(site, amounts))) site_formulae[-1] += "[{0}]{1}".format(site_occupancy, mult) i += len(site) return site_formulae
[docs]def compositional_array(formulae): """ :param formulae: List of chemical formulae :type formulae: list of dicts :returns: Array of endmember formulae and a list of elements. :rtype: 2D numpy.array of floats and a list of strs """ elements = [] for formula in formulae: for element in formula: if element not in elements: elements.append(element) formula_array = ordered_compositional_array(formulae, elements) return formula_array, elements
[docs]def ordered_compositional_array(formulae, elements): """ :param formulae: List of chemical formulae :type formulae: list of dicts :param elements : List of elements :type elements: list of strings :returns: Array of endmember formulae :rtype: 2D array of floats """ formula_array = np.zeros(shape=(len(formulae), len(elements))) for idx, formula in enumerate(formulae): for element in formula: assert element in elements formula_array[idx][elements.index(element)] = formula[element] return formula_array
[docs]def formula_to_string(formula): """ :param formula: Chemical formula :type formula: dict or Counter :returns: A formula string, with element order as given in the list IUPAC_element_order. If one or more keys in the dictionary are not one of the elements in the periodic table, then they are added at the end of the string. :rtype: str """ formula_string = "" for e in IUPAC_element_order: if e in formula and np.abs(formula[e]) > 1.0e-12: if np.abs(formula[e] - 1.0) < 1.0e-12: formula_string += e else: formula_string += e + str(nsimplify(formula[e])) for e in formula: if e not in IUPAC_element_order: if e in formula and np.abs(formula[e]) > 1.0e-12: if np.abs(formula[e] - 1.0) < 1.0e-12: formula_string += e else: formula_string += e + str(nsimplify(formula[e])) return formula_string
[docs]def sort_element_list_to_IUPAC_order(element_list): """ :param element_list : List of elements. :type element_list: list :returns: List of elements sorted into IUPAC order :rtype: list """ sorted_list = [e for e in IUPAC_element_order if e in element_list] assert len(sorted_list) == len(element_list) return sorted_list
[docs]def convert_fractions(composite, phase_fractions, input_type, output_type): """ Takes a composite with a set of user defined molar, volume or mass fractions (which do not have to be the fractions currently associated with the composite) and converts the fractions to molar, mass or volume. Conversions to and from mass require a molar mass to be defined for all phases. Conversions to and from volume require set_state to have been called for the composite. :param composite: Composite for which fractions are to be defined. :type composite: :class:`~burnman.Composite` :param phase_fractions: List of input phase fractions (of type input_type). :type phase_fractions: list of floats :param input_type: Input fraction type. One of 'molar', 'mass' or 'volume'. :type input_type: str :param output_type: Output fraction type. One of 'molar', 'mass' or 'volume'. :type output_type: str :returns: List of output phase fractions (of type output_type) :rtype: list of floats """ if input_type == "volume" or output_type == "volume": if composite.temperature is None: raise Exception( composite.to_string() + ".set_state(P, T) has not been called, so volume fractions are currently undefined. Exiting." ) if input_type == "molar": molar_fractions = phase_fractions if input_type == "volume": total_moles = sum( volume_fraction / phase.molar_volume for volume_fraction, phase in zip(phase_fractions, composite.phases) ) molar_fractions = [ volume_fraction / (phase.molar_volume * total_moles) for volume_fraction, phase in zip(phase_fractions, composite.phases) ] if input_type == "mass": total_moles = sum( mass_fraction / phase.molar_mass for mass_fraction, phase in zip(phase_fractions, composite.phases) ) molar_fractions = [ mass_fraction / (phase.molar_mass * total_moles) for mass_fraction, phase in zip(phase_fractions, composite.phases) ] if output_type == "volume": total_volume = sum( molar_fraction * phase.molar_volume for molar_fraction, phase in zip(molar_fractions, composite.phases) ) output_fractions = [ molar_fraction * phase.molar_volume / total_volume for molar_fraction, phase in zip(molar_fractions, composite.phases) ] elif output_type == "mass": total_mass = sum( molar_fraction * phase.molar_mass for molar_fraction, phase in zip(molar_fractions, composite.phases) ) output_fractions = [ molar_fraction * phase.molar_mass / total_mass for molar_fraction, phase in zip(molar_fractions, composite.phases) ] elif output_type == "molar": output_fractions = molar_fractions return output_fractions
[docs]def reaction_matrix_as_strings(reaction_matrix, compound_names): """ Returns a list of string representations of all the reactions in reaction_matrix. :param reaction_matrix: Matrix of stoichiometric amounts of each compound j in reaction i. :type reaction_matrix: 2D numpy array :param compound_names: List of compound names. :type compound_names: list of strings :returns: List of strings corresponding to each reaction. :rtype: list of strings """ reaction_strings = [] for reaction in reaction_matrix: lhs, rhs = ("", "") for i, coefficient in enumerate(reaction): if coefficient < -1.0e-10: if len(lhs) > 0: lhs += " + " lhs += f"{-coefficient} {compound_names[i]}" if coefficient > 1.0e-10: if len(rhs) > 0: rhs += " + " rhs += f"{coefficient} {compound_names[i]}" reaction_strings.append(f"{lhs} = {rhs}") return reaction_strings