Source code for burnman.processchemistry

# This file is part of BurnMan - a thermoelastic and thermodynamic toolkit
# for the Earth and Planetary Sciences
# Copyright (C) 2012 - 2021 by the BurnMan team, released under the GNU
# GPL v2 or later.

# This module provides the functions required to process the
# standard burnman formula formats.
# ProcessChemistry returns the number of atoms and molar mass of a compound
# given its unit formula as an argument.
# process_solution_chemistry returns information required to calculate
# solid solution properties from a set of endmember formulae

from __future__ import absolute_import
import re
import numpy as np
from fractions import Fraction
from collections import Counter
import pkgutil
from string import ascii_uppercase as ucase
from sympy import nsimplify


[docs]def read_masses(): """ A simple function to read a file with a two column list of elements and their masses into a dictionary """ datastream = pkgutil.get_data( 'burnman', 'data/input_masses/atomic_masses.dat') datalines = [line.strip() for line in datastream.decode('ascii').split('\n') if line.strip()] lookup = dict() for line in datalines: data = "%".join(line.split("%")[:1]).split() if data != []: lookup[data[0]] = float(data[1]) return lookup
""" atomic_masses is a dictionary of atomic masses """ atomic_masses = read_masses() """ IUPAC_element_order provides a list of all the elements. Element order is based loosely on electronegativity, following the scheme suggested by IUPAC, except that H comes after the Group 16 elements, not before them. """ IUPAC_element_order = ['v', 'Og', 'Rn', 'Xe', 'Kr', 'Ar', 'Ne', 'He', # Group 18 'Fr', 'Cs', 'Rb', 'K', 'Na', 'Li', # Group 1 (not H) 'Ra', 'Ba', 'Sr', 'Ca', 'Mg', 'Be', # Group 2 'Lr', 'No', 'Md', 'Fm', 'Es', 'Cf', 'Bk', 'Cm', 'Am', 'Pu', 'Np', 'U', 'Pa', 'Th', 'Ac', # Actinides 'Lu', 'Yb', 'Tm', 'Er', 'Ho', 'Dy', 'Tb', 'Gd', 'Eu', 'Sm', 'Pm', 'Nd', 'Pr', 'Ce', 'La', # Lanthanides 'Y', 'Sc', # Group 3 'Rf', 'Hf', 'Zr', 'Ti', # Group 4 'Db', 'Ta', 'Nb', 'V', # Group 5 'Sg', 'W', 'Mo', 'Cr', # Group 6 'Bh', 'Re', 'Tc', 'Mn', # Group 7 'Hs', 'Os', 'Ru', 'Fe', # Group 8 'Mt', 'Ir', 'Rh', 'Co', # Group 9 'Ds', 'Pt', 'Pd', 'Ni', # Group 10 'Rg', 'Au', 'Ag', 'Cu', # Group 11 'Cn', 'Hg', 'Cd', 'Zn', # Group 12 'Nh', 'Tl', 'In', 'Ga', 'Al', 'B', # Group 13 'Fl', 'Pb', 'Sn', 'Ge', 'Si', 'C', # Group 14 'Mc', 'Bi', 'Sb', 'As', 'P', 'N', # Group 15 'Lv', 'Po', 'Te', 'Se', 'S', 'O', # Group 16 'H', # hydrogen 'Ts', 'At', 'I', 'Br', 'Cl', 'F'] # Group 17
[docs]def dictionarize_formula(formula): """ A function to read a chemical formula string and convert it into a dictionary Parameters ---------- formula : string object Chemical formula, written in the XnYm format, where the formula has n atoms of element X and m atoms of element Y Returns ------- f : dictionary object The same chemical formula, but expressed as a dictionary. """ f = dict() elements = re.findall('[A-Z][^A-Z]*', formula) for element in elements: element_name = re.split('[0-9][^A-Z]*', element)[0] element_atoms = re.findall('[0-9][^A-Z]*', element) if len(element_atoms) == 0: element_atoms = Fraction(1.0) else: element_atoms = Fraction(element_atoms[0]) f[element_name] = f.get(element_name, 0.0) + element_atoms return f
[docs]def sum_formulae(formulae, amounts=None): """ Adds together a set of formulae. Parameters ---------- formulae : list of dictionary or counter objects List of chemical formulae amounts : list of floats List of amounts of each formula Returns ------- summed_formula : Counter object The sum of the user-provided formulae """ if amounts is None: amounts = [1. for formula in formulae] else: assert (len(formulae) == len(amounts)) summed_formula = Counter() for i, formula in enumerate(formulae): summed_formula.update(Counter({element: amounts[i] * n_atoms for (element, n_atoms) in formula.items()})) return summed_formula
[docs]def formula_mass(formula): """ A function to take a chemical formula and compute the formula mass. Parameters ---------- formula : dictionary or counter object A chemical formula Returns ------- mass : float The mass per mole of formula """ mass = sum(formula[element] * atomic_masses[element] for element in formula) return mass
[docs]def convert_formula(formula, to_type='mass', normalize=False): """ Converts a chemical formula from one type (mass or molar) into the other. Renormalises amounts if normalize=True Parameters ---------- formula : dictionary or counter object A chemical formula to_type : string, one of 'mass' or 'molar' Conversion type normalize : boolean Whether or not to normalize the converted formula to 1 Returns ------- f : dictionary The converted formula """ if to_type == 'mass': f = {element: n_atoms * atomic_masses[element] for (element, n_atoms) in formula.items()} elif to_type == 'molar': f = {element: n_atoms / atomic_masses[element] for (element, n_atoms) in formula.items()} else: raise Exception('Value of parameter to_type not recognised. ' 'Should be either "mass" or "molar".') if normalize: s = np.sum([n for (element, n) in f.items()]) f = {element: n/s for (element, n) in f.items()} return f
[docs]def process_solution_chemistry(solution_model): """ This function parses a class instance with a "formulas" attribute containing site information, e.g. [ '[Mg]3[Al]2Si3O12', '[Mg]3[Mg1/2Si1/2]2Si3O12' ] It outputs the bulk composition of each endmember (removing the site information), and also a set of variables and arrays which contain the site information. These are output in a format that can easily be used to calculate activities and gibbs free energies, given molar fractions of the phases and pressure and temperature where necessary. Parameters ---------- solution_model : instance of class Class must have a "formulas" attribute, containing a list of chemical formulae with site information Creates attributes ------------------ solution_formulae : list of dictionaries List of endmember formulae is output from site formula strings n_sites : integer Number of sites in the solid solution. Should be the same for all endmembers. sites : list of lists of strings A list of elements for each site in the solid solution site_names : list of strings A list of elements_site pairs in the solid solution, where each distinct site is given by a unique uppercase letter e.g. ['Mg_A', 'Fe_A', 'Al_A', 'Al_B', 'Si_B'] n_occupancies : integer Sum of the number of possible elements on each of the sites in the solid solution. Example: A binary solution [[A][B],[B][C1/2D1/2]] would have n_occupancies = 5, with two possible elements on Site 1 and three on Site 2 site_multiplicities : array of floats The number of each site per formula unit To simplify computations later, the multiplicities are repeated for each element on each site endmember_occupancies : 2d array of floats A 1D array for each endmember in the solid solution, containing the fraction of atoms of each element on each site. endmember_noccupancies : 2d array of floats A 1D array for each endmember in the solid solution, containing the number of atoms of each element on each site per mole of endmember. """ formulae = solution_model.formulas n_sites = formulae[0].count('[') n_endmembers = len(formulae) # Check the number of sites is the same for all endmembers for i in range(n_endmembers): assert(formulae[i].count('[') == n_sites) solution_formulae = [] sites = [[] for i in range(n_sites)] list_occupancies = [] list_multiplicity = np.empty(shape=(n_sites)) n_occupancies = 0 # Number of unique site occupancies (e.g.. Mg on X etc.) for i_mbr in range(n_endmembers): solution_formula = dict() list_occupancies.append([[0] * len(sites[site]) for site in range(n_sites)]) s = re.split(r'\[', formulae[i_mbr])[1:] for i_site, site_string in enumerate(s): site_split = re.split(r'\]', site_string) site_occupancy = site_split[0] mult = re.split('[A-Z][^A-Z]*', site_split[1])[0] if mult == '': list_multiplicity[i_site] = Fraction(1.0) else: list_multiplicity[i_site] = Fraction(mult) # Loop over elements on a site elements = re.findall('[A-Z][^A-Z]*', site_occupancy) for element in elements: # Find the element and proportion on the site element_split = re.split('([0-9][^A-Z]*)', element) element_on_site = element_split[0] if len(element_split) == 1: proportion_element_on_site = Fraction(1.0) else: proportion_element_on_site = Fraction(element_split[1]) solution_formula[element_on_site] = solution_formula.get( element_on_site, 0.0) + (list_multiplicity[i_site] * proportion_element_on_site) if element_on_site not in sites[i_site]: n_occupancies += 1 sites[i_site].append(element_on_site) i_el = sites[i_site].index(element_on_site) for parsed_mbr in range(len(list_occupancies)): list_occupancies[parsed_mbr][i_site].append(0) else: i_el = sites[i_site].index(element_on_site) list_occupancies[i_mbr][i_site][i_el] = proportion_element_on_site # Loop over elements after site if len(site_split) != 1: not_in_site = str(filter(None, site_split[1])) not_in_site = not_in_site.replace(mult, '', 1) for enamenumber in re.findall('[A-Z][^A-Z]*', not_in_site): element = list( filter(None, re.split(r'(\d+)', enamenumber))) # Look up number of atoms of element if len(element) == 1: nel = 1. else: nel = float(float(element[1])) solution_formula[element[0]] = solution_formula.get( element[0], 0.0) + nel solution_formulae.append(solution_formula) # Site occupancies and multiplicities endmember_occupancies = np.empty(shape=(n_endmembers, n_occupancies)) site_multiplicities = np.empty(shape=(n_occupancies)) for i_mbr in range(n_endmembers): n_element = 0 for i_site in range(n_sites): for i_el in range(len(list_occupancies[i_mbr][i_site])): endmember_occupancies[i_mbr][ n_element] = list_occupancies[i_mbr][i_site][i_el] site_multiplicities[n_element] = list_multiplicity[i_site] n_element += 1 # Site names solution_model.site_names = [] for i, elements in enumerate(sites): for element in elements: solution_model.site_names.append('{0}_{1}'.format(element, ucase[i])) # Finally, make attributes for solution model instance: solution_model.solution_formulae = solution_formulae solution_model.n_sites = n_sites solution_model.sites = sites solution_model.n_occupancies = n_occupancies solution_model.site_multiplicities = site_multiplicities solution_model.endmember_occupancies = endmember_occupancies solution_model.endmember_noccupancies = np.einsum('ij, j->ij', endmember_occupancies, site_multiplicities)
[docs]def site_occupancies_to_strings(site_species_names, site_multiplicities, endmember_occupancies): """ Converts a list of endmember site occupancies into a list of string representations of those occupancies. Parameters ---------- site_species_names : 2D list of strings A list of list of strings, giving the names of the species which reside on each site. List of sites, each of which contains a list of the species occupying each site. site_multiplicities : numpy array of floats List of floats giving the multiplicity of each site Must be either the same length as the number of sites, or the same length as site_species_names (with an implied repetition of the same number for each species on a given site). endmember_occupancies : 2D numpy array of floats A list of site-species occupancies for each endmember. The first dimension loops over the endmembers, and the second dimension loops over the site-species occupancies for that endmember. The total number and order of occupancies must be the same as the strings in site_species_names. Returns ------- site_formulae : list of strings A list of strings in standard burnman format. For example, [Mg]3[Al]2 would correspond to the classic two-site pyrope garnet. """ # Site multiplicities should either be given on a per-site basis, # or a per-species basis if len(site_species_names) == len(site_multiplicities): site_mults = [] for i, site in enumerate(site_species_names): for species in site: site_mults.append(site_multiplicities[i]) site_multiplicities = site_mults elif len(endmember_occupancies[0]) != len(site_multiplicities): raise Exception( 'Site multiplicities should either be given on a per-site basis or a per-species basis') site_formulae = [] for mbr_occupancies in endmember_occupancies: i = 0 site_formulae.append('') for site in site_species_names: amounts = mbr_occupancies[i:i+len(site)] mult = site_multiplicities[i] if np.abs(mult - 1.) < 1.e-12: mult = '' else: mult = str(nsimplify(mult)) amounts /= sum(amounts) site_occupancy = formula_to_string(dict(zip(site, amounts))) site_formulae[-1] += '[{0}]{1}'.format(site_occupancy, mult) i += len(site) return site_formulae
[docs]def compositional_array(formulae): """ Parameters ---------- formulae : list of dictionaries List of chemical formulae Returns ------- formula_array : 2D array of floats Array of endmember formulae elements : List of strings List of elements """ elements = [] for formula in formulae: for element in formula: if element not in elements: elements.append(element) formula_array = ordered_compositional_array(formulae, elements) return formula_array, elements
[docs]def ordered_compositional_array(formulae, elements): """ Parameters ---------- formulae : list of dictionaries List of chemical formulae elements : List of strings List of elements Returns ------- formula_array : 2D array of floats Array of endmember formulae """ formula_array = np.zeros(shape=(len(formulae), len(elements))) for idx, formula in enumerate(formulae): for element in formula: assert(element in elements) formula_array[idx][elements.index(element)] = formula[element] return formula_array
[docs]def formula_to_string(formula): """ Parameters ---------- formula : dictionary or counter Chemical formula Returns ------- formula_string : string A formula string, with element order as given in the list IUPAC_element_order. If one or more keys in the dictionary are not one of the elements in the periodic table, then they are added at the end of the string. """ formula_string = '' for e in IUPAC_element_order: if e in formula and np.abs(formula[e]) > 1.e-12: if np.abs(formula[e] - 1.) < 1.e-12: formula_string += e else: formula_string += e + str(nsimplify(formula[e])) for e in formula: if e not in IUPAC_element_order: if e in formula and np.abs(formula[e]) > 1.e-12: if np.abs(formula[e] - 1.) < 1.e-12: formula_string += e else: formula_string += e + str(nsimplify(formula[e])) return formula_string