# This file is part of BurnMan - a thermoelastic and thermodynamic toolkit
# for the Earth and Planetary Sciences
# Copyright (C) 2012 - 2021 by the BurnMan team, released under the GNU
# GPL v2 or later.
# This module provides the functions required to process the
# standard burnman formula formats.
# ProcessChemistry returns the number of atoms and molar mass of a compound
# given its unit formula as an argument.
# process_solution_chemistry returns information required to calculate
# solid solution properties from a set of endmember formulae
from __future__ import absolute_import
import re
import numpy as np
from fractions import Fraction
from collections import Counter
import pkgutil
from string import ascii_uppercase as ucase
from sympy import nsimplify
[docs]def read_masses():
"""
A simple function to read a file with a two column list of
elements and their masses into a dictionary
"""
datastream = pkgutil.get_data(
'burnman', 'data/input_masses/atomic_masses.dat')
datalines = [line.strip()
for line in datastream.decode('ascii').split('\n')
if line.strip()]
lookup = dict()
for line in datalines:
data = "%".join(line.split("%")[:1]).split()
if data != []:
lookup[data[0]] = float(data[1])
return lookup
"""
atomic_masses is a dictionary of atomic masses
"""
atomic_masses = read_masses()
"""
IUPAC_element_order provides a list of all the elements.
Element order is based loosely on electronegativity,
following the scheme suggested by IUPAC, except that H
comes after the Group 16 elements, not before them.
"""
IUPAC_element_order = ['v', 'Og', 'Rn', 'Xe', 'Kr', 'Ar', 'Ne', 'He', # Group 18
'Fr', 'Cs', 'Rb', 'K', 'Na', 'Li', # Group 1 (not H)
'Ra', 'Ba', 'Sr', 'Ca', 'Mg', 'Be', # Group 2
'Lr', 'No', 'Md', 'Fm', 'Es', 'Cf', 'Bk', 'Cm',
'Am', 'Pu', 'Np', 'U', 'Pa', 'Th', 'Ac', # Actinides
'Lu', 'Yb', 'Tm', 'Er', 'Ho', 'Dy', 'Tb', 'Gd',
'Eu', 'Sm', 'Pm', 'Nd', 'Pr', 'Ce', 'La', # Lanthanides
'Y', 'Sc', # Group 3
'Rf', 'Hf', 'Zr', 'Ti', # Group 4
'Db', 'Ta', 'Nb', 'V', # Group 5
'Sg', 'W', 'Mo', 'Cr', # Group 6
'Bh', 'Re', 'Tc', 'Mn', # Group 7
'Hs', 'Os', 'Ru', 'Fe', # Group 8
'Mt', 'Ir', 'Rh', 'Co', # Group 9
'Ds', 'Pt', 'Pd', 'Ni', # Group 10
'Rg', 'Au', 'Ag', 'Cu', # Group 11
'Cn', 'Hg', 'Cd', 'Zn', # Group 12
'Nh', 'Tl', 'In', 'Ga', 'Al', 'B', # Group 13
'Fl', 'Pb', 'Sn', 'Ge', 'Si', 'C', # Group 14
'Mc', 'Bi', 'Sb', 'As', 'P', 'N', # Group 15
'Lv', 'Po', 'Te', 'Se', 'S', 'O', # Group 16
'H', # hydrogen
'Ts', 'At', 'I', 'Br', 'Cl', 'F'] # Group 17
[docs]def process_solution_chemistry(solution_model):
"""
This function parses a class instance with a "formulas"
attribute containing site information, e.g.
[ '[Mg]3[Al]2Si3O12', '[Mg]3[Mg1/2Si1/2]2Si3O12' ]
It outputs the bulk composition of each endmember
(removing the site information), and also a set of
variables and arrays which contain the site information.
These are output in a format that can easily be used to
calculate activities and gibbs free energies, given
molar fractions of the phases and pressure
and temperature where necessary.
Parameters
----------
solution_model : instance of class
Class must have a "formulas" attribute, containing a
list of chemical formulae with site information
Creates attributes
------------------
solution_formulae : list of dictionaries
List of endmember formulae is output from site formula strings
n_sites : integer
Number of sites in the solid solution.
Should be the same for all endmembers.
sites : list of lists of strings
A list of elements for each site in the solid solution
site_names : list of strings
A list of elements_site pairs in the solid solution, where
each distinct site is given by a unique uppercase letter
e.g. ['Mg_A', 'Fe_A', 'Al_A', 'Al_B', 'Si_B']
n_occupancies : integer
Sum of the number of possible elements on each of the sites
in the solid solution.
Example: A binary solution [[A][B],[B][C1/2D1/2]] would have
n_occupancies = 5, with two possible elements on
Site 1 and three on Site 2
site_multiplicities : array of floats
The number of each site per formula unit
To simplify computations later, the multiplicities
are repeated for each element on each site
endmember_occupancies : 2d array of floats
A 1D array for each endmember in the solid solution,
containing the fraction of atoms of each element on each site.
endmember_noccupancies : 2d array of floats
A 1D array for each endmember in the solid solution,
containing the number of atoms of each element on each site
per mole of endmember.
"""
formulae = solution_model.formulas
n_sites = formulae[0].count('[')
n_endmembers = len(formulae)
# Check the number of sites is the same for all endmembers
for i in range(n_endmembers):
assert(formulae[i].count('[') == n_sites)
solution_formulae = []
sites = [[] for i in range(n_sites)]
list_occupancies = []
list_multiplicity = np.empty(shape=(n_sites))
n_occupancies = 0
# Number of unique site occupancies (e.g.. Mg on X etc.)
for i_mbr in range(n_endmembers):
solution_formula = dict()
list_occupancies.append([[0] * len(sites[site])
for site in range(n_sites)])
s = re.split(r'\[', formulae[i_mbr])[1:]
for i_site, site_string in enumerate(s):
site_split = re.split(r'\]', site_string)
site_occupancy = site_split[0]
mult = re.split('[A-Z][^A-Z]*', site_split[1])[0]
if mult == '':
list_multiplicity[i_site] = Fraction(1.0)
else:
list_multiplicity[i_site] = Fraction(mult)
# Loop over elements on a site
elements = re.findall('[A-Z][^A-Z]*', site_occupancy)
for element in elements:
# Find the element and proportion on the site
element_split = re.split('([0-9][^A-Z]*)', element)
element_on_site = element_split[0]
if len(element_split) == 1:
proportion_element_on_site = Fraction(1.0)
else:
proportion_element_on_site = Fraction(element_split[1])
solution_formula[element_on_site] = solution_formula.get(
element_on_site, 0.0) + (list_multiplicity[i_site]
* proportion_element_on_site)
if element_on_site not in sites[i_site]:
n_occupancies += 1
sites[i_site].append(element_on_site)
i_el = sites[i_site].index(element_on_site)
for parsed_mbr in range(len(list_occupancies)):
list_occupancies[parsed_mbr][i_site].append(0)
else:
i_el = sites[i_site].index(element_on_site)
list_occupancies[i_mbr][i_site][i_el] = proportion_element_on_site
# Loop over elements after site
if len(site_split) != 1:
not_in_site = str(filter(None, site_split[1]))
not_in_site = not_in_site.replace(mult, '', 1)
for enamenumber in re.findall('[A-Z][^A-Z]*', not_in_site):
element = list(
filter(None, re.split(r'(\d+)', enamenumber)))
# Look up number of atoms of element
if len(element) == 1:
nel = 1.
else:
nel = float(float(element[1]))
solution_formula[element[0]] = solution_formula.get(
element[0], 0.0) + nel
solution_formulae.append(solution_formula)
# Site occupancies and multiplicities
endmember_occupancies = np.empty(shape=(n_endmembers, n_occupancies))
site_multiplicities = np.empty(shape=(n_occupancies))
for i_mbr in range(n_endmembers):
n_element = 0
for i_site in range(n_sites):
for i_el in range(len(list_occupancies[i_mbr][i_site])):
endmember_occupancies[i_mbr][
n_element] = list_occupancies[i_mbr][i_site][i_el]
site_multiplicities[n_element] = list_multiplicity[i_site]
n_element += 1
# Site names
solution_model.site_names = []
for i, elements in enumerate(sites):
for element in elements:
solution_model.site_names.append('{0}_{1}'.format(element,
ucase[i]))
# Finally, make attributes for solution model instance:
solution_model.solution_formulae = solution_formulae
solution_model.n_sites = n_sites
solution_model.sites = sites
solution_model.n_occupancies = n_occupancies
solution_model.site_multiplicities = site_multiplicities
solution_model.endmember_occupancies = endmember_occupancies
solution_model.endmember_noccupancies = np.einsum('ij, j->ij',
endmember_occupancies,
site_multiplicities)
[docs]def site_occupancies_to_strings(site_species_names, site_multiplicities,
endmember_occupancies):
"""
Converts a list of endmember site occupancies into a list
of string representations of those occupancies.
Parameters
----------
site_species_names : 2D list of strings
A list of list of strings, giving the names of the species
which reside on each site.
List of sites, each of which contains a list of the species
occupying each site.
site_multiplicities : numpy array of floats
List of floats giving the multiplicity of each site
Must be either the same length as the number of sites, or
the same length as site_species_names
(with an implied repetition of the same
number for each species on a given site).
endmember_occupancies : 2D numpy array of floats
A list of site-species occupancies for each endmember.
The first dimension loops over the endmembers, and the
second dimension loops over the site-species occupancies for that endmember.
The total number and order of occupancies must
be the same as the strings in site_species_names.
Returns
-------
site_formulae : list of strings
A list of strings in standard burnman format.
For example, [Mg]3[Al]2 would correspond to the
classic two-site pyrope garnet.
"""
# Site multiplicities should either be given on a per-site basis,
# or a per-species basis
if len(site_species_names) == len(site_multiplicities):
site_mults = []
for i, site in enumerate(site_species_names):
for species in site:
site_mults.append(site_multiplicities[i])
site_multiplicities = site_mults
elif len(endmember_occupancies[0]) != len(site_multiplicities):
raise Exception(
'Site multiplicities should either be given on a per-site basis or a per-species basis')
site_formulae = []
for mbr_occupancies in endmember_occupancies:
i = 0
site_formulae.append('')
for site in site_species_names:
amounts = mbr_occupancies[i:i+len(site)]
mult = site_multiplicities[i]
if np.abs(mult - 1.) < 1.e-12:
mult = ''
else:
mult = str(nsimplify(mult))
amounts /= sum(amounts)
site_occupancy = formula_to_string(dict(zip(site, amounts)))
site_formulae[-1] += '[{0}]{1}'.format(site_occupancy, mult)
i += len(site)
return site_formulae
[docs]def compositional_array(formulae):
"""
Parameters
----------
formulae : list of dictionaries
List of chemical formulae
Returns
-------
formula_array : 2D array of floats
Array of endmember formulae
elements : List of strings
List of elements
"""
elements = []
for formula in formulae:
for element in formula:
if element not in elements:
elements.append(element)
formula_array = ordered_compositional_array(formulae, elements)
return formula_array, elements
[docs]def ordered_compositional_array(formulae, elements):
"""
Parameters
----------
formulae : list of dictionaries
List of chemical formulae
elements : List of strings
List of elements
Returns
-------
formula_array : 2D array of floats
Array of endmember formulae
"""
formula_array = np.zeros(shape=(len(formulae), len(elements)))
for idx, formula in enumerate(formulae):
for element in formula:
assert(element in elements)
formula_array[idx][elements.index(element)] = formula[element]
return formula_array