libatomprobe
Library for Atom Probe Tomography (APT) computation
deconvolution.cpp
Go to the documentation of this file.
1 /* deconvolution.cpp : Mass overlap deconvolution algorithm
2  * Copyright (C) 2017 Daniel Haley
3  *
4  * This program is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 3 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program. If not, see <http://www.gnu.org/licenses/>.
16  */
17 #include "atomprobe/atomprobe.h"
20 #include "helper/helpFuncs.h"
21 
22 #include <utility>
23 #include <vector>
24 #include <map>
25 
26 using std::pair;
27 using std::map;
28 using std::vector;
29 
30 
31 #ifdef DEBUG
32 //DEBUG
33 using std::endl;
34 using std::cout;
35 #include <string>
36 #endif
37 
38 namespace AtomProbe
39 {
40 
41 float nullBackground(float rangeStart,float rangeEnd)
42 {
43  return 0;
44 }
45 
46 
48  const std::vector<IonHit> &hits, float (*backgroundEstimator)(float,float),
49  std::vector<std::pair<unsigned int,float> > &decomposedHits, float rangeTolerance)
50 {
51 
52  if(!rangeData.getNumIons() ||
53  !rangeData.getNumRanges() || hits.empty() || rangeTolerance < 0)
54  return false;
55 
56 
57  vector<MultiRange> decomposedRanges;
58 
59  //FIXME: Currently mass tolerance not taken into account
60  // when splitting overlapping ranges
61  rangeData.splitOverlapping(decomposedRanges,rangeTolerance);
62 
63  if(decomposedRanges.empty())
64  return false;
65 
66  vector<vector<float> > solutions;
67  vector<vector<unsigned int> > solvedIonIds;
68  vector<float> residuals;
69  //Loop through each disconnected overlap problem
70  for(const auto &mRng : decomposedRanges)
71  {
72  //"Flatten" the problem onto the mass axis
73  std::vector<FLATTENED_RANGE> flatRange;
74  mRng.flattenToMassAxis(flatRange);
75 
76  //A unique set of ionIDs in this problem
77  vector<unsigned int> containedIonIds;
78  containedIonIds.clear();
79  {
80  std::set<unsigned int> idSet;
81  for(const auto &e : flatRange)
82  {
83  //Add all ionIDs from this range set
84  for(const auto id : e.containedIonIDs)
85  idSet.insert(id);
86  }
87 
88  //Move unique identifiers into vector
89  containedIonIds.assign(idSet.begin(),idSet.end());
90 
91  }
92 
93 
94 
95  gsl_matrix *m;
96  m=gsl_matrix_alloc(flatRange.size(),containedIonIds.size());
97  gsl_matrix_set_zero(m);
98 
99  //Construct the abundance matrix from the flattened range data
100  // for this problem
101  for(auto ui=0u; ui< containedIonIds.size();ui++)
102  {
103  std::set<SIMPLE_SPECIES> ss;
104  ss = mRng.getMolecule(containedIonIds[ui]);
105  vector<size_t> elemIdx,frequency;
106 
107  elemIdx.clear(); frequency.clear();
108  for(const auto &mol : ss)
109  {
110  frequency.push_back(mol.count);
111  elemIdx.push_back(abundance.symbolIdxFromAtomicNumber(
112  mol.atomicNumber));
113 
114  //Check we were able to identify the atomic number
115  if(elemIdx.back() == -1)
116  return false;
117  }
118 
119 
120  for(auto uj=0u; uj< flatRange.size();uj++)
121  {
122  float thisAbundance;
123  thisAbundance=abundance.abundanceBetweenLimits(elemIdx,frequency,
124  flatRange[uj].startMass,flatRange[uj].endMass);
125  gsl_matrix_set(m,uj,ui,thisAbundance);
126  }
127  }
128 
129  //TODO: We could speed this up, as we actually are performing the SVD twice.
130  // once is to estimate rank, and the other is to perform the least-squares.
131  // this is unnecessary
132  unsigned int rank;
133  rank = estimateRank(m);
134 
135  //FIXME: Warn if exactly constrained (invalid residual)
136  if(rank < mRng.getNumIons())
137  return false;
138 
139  gsl_vector *b;
140  b=gsl_vector_alloc(flatRange.size());
141 
142  vector<unsigned int> counts;
143  counts.resize(flatRange.size());
144  for(auto &h : hits)
145  {
146  for(auto uj=0;uj<flatRange.size();uj++)
147  {
148  if(flatRange[uj].startMass < h.getMassToCharge()
149  && flatRange[uj].endMass> h.getMassToCharge())
150  {
151  counts[uj]++;
152  break;
153  }
154  }
155 
156  }
157 
158  if(backgroundEstimator)
159  {
160  vector<float> background;
161  background.resize(counts.size(),0);
162  for(auto ui=0;ui<flatRange.size();ui++)
163  {
164  background[ui]=(*backgroundEstimator)(
165  flatRange[ui].startMass,flatRange[ui].endMass);
166  }
167 
168  //Preform background subtraction
169  for(auto ui=0;ui<counts.size();ui++)
170  gsl_vector_set(b,ui,counts[ui]-background[ui]);
171  }
172  else
173  {
174  for(auto ui=0;ui<counts.size();ui++)
175  gsl_vector_set(b,ui,counts[ui]);
176  }
177 
178 
179  //Now we have the matrix and vector,
180  // perform the actual solve step
181  gsl_vector *x;
182  x=gsl_vector_alloc(mRng.getNumIons());
183 
184  solveLeastSquares(m,b,x);
185 
186  //Copy out data from GSL structures to vectors
187  vector<float> soln;
188  soln.resize(x->size);
189  for(auto ui=0;ui<x->size;ui++)
190  soln[ui]=x->data[ui];
191 
192  solutions.push_back(soln);
193  //FIXME: These are the ionIDs of the child multirange. We need to
194  // track them back to their parents. Use the mapping from MultiRange::splitOverlapping to solve this
195  solvedIonIds.push_back(containedIonIds);
196 
197  //FIXME: residual not tracked
198 
199  gsl_matrix_free(m);
200  gsl_vector_free(b);
201  gsl_vector_free(x);
202  }
203 
204  //Squish each sub-problem solution back into a global solution
205  map<unsigned int,float> decomposeMap;
206  ASSERT(solutions.size() == solvedIonIds.size());
207  for(auto ui=0;ui<solvedIonIds.size();ui++)
208  {
209  ASSERT(solutions[ui].size() == solvedIonIds[ui].size());
210  for(auto uj=0;uj<solvedIonIds[ui].size();uj++)
211  {
212  auto it =decomposeMap.find(solvedIonIds[ui][uj]);
213 
214  if( it == decomposeMap.end())
215  decomposeMap[solvedIonIds[ui][uj]] = solutions[ui][uj];
216  else
217  it->second+=solutions[ui][uj];
218  }
219  }
220 
221  for(auto &v : decomposeMap)
222  {
223  decomposedHits.push_back(std::make_pair(v.first,v.second));
224  }
225 
226  return true;
227 }
228 
229 #ifdef DEBUG
230 
231 void runFe_Ni_Sim(MultiRange &mRng,const AbundanceData &natTable,
232  const unsigned int numIron,
233  const unsigned int numNickel,
234  vector<ISOTOPE_ENTRY> entriesFe, vector<ISOTOPE_ENTRY> &entriesNi,
235  vector<float> ironCDF, vector<float> &nickelCDF,
236  vector<float> &feCounts, vector<float> &niCounts)
237 {
238  gsl_rng *rng = AtomProbe::randGen.getRng();
239 
240  vector<IonHit> ions;
241 
242  //Add iron atoms, randomly from spectra
243  unsigned int actualFeCount=0,actualNiCount=0;
244  for(auto ui=0;ui<numIron;ui++)
245  {
246  float r;
247  r=gsl_rng_uniform(rng);
248 
249  unsigned int atom=-1;
250  for(auto uj=0;uj < ironCDF.size();uj++)
251  {
252  if(r <=ironCDF[uj])
253  {
254  atom=uj;
255  break;
256  }
257  }
258 
259  if(atom == (unsigned int) -1)
260  continue;
261  ions.push_back(IonHit(0,0,0,entriesFe[atom].mass));
262  actualFeCount++;
263  }
264 
265  //Add Nickel atoms, randomly from spectra
266  for(auto ui=0;ui<numNickel;ui++)
267  {
268  float r;
269  r=gsl_rng_uniform(rng);
270 
271  unsigned int atom=-1;
272  for(auto uj=0;uj < nickelCDF.size();uj++)
273  {
274  if(r <=nickelCDF[uj])
275  {
276  atom=uj;
277  break;
278  }
279  }
280 
281  if(atom == (unsigned int) -1)
282  continue;
283  ions.push_back(IonHit(0,0,0,entriesNi[atom].mass));
284  actualNiCount++;
285  }
286 
287  //So we now have overlap, pass to solver
288  vector<pair<unsigned int,float> > decomposedResults;
289  if(!leastSquaresOverlapSolve(mRng, natTable,
290  ions,nullptr,decomposedResults,0.05))
291  {
292  ASSERT(false);
293  }
294 
295  ASSERT(decomposedResults.size() ==2);
296 
297  for(auto &v : decomposedResults)
298  {
299  std::string name;
300  name=mRng.getIonName(v.first);
301  if( name == "Fe")
302  feCounts.push_back(v.second);
303  else if(name == "Ni")
304  niCounts.push_back(v.second);
305  else
306  {
307  ASSERT(false);
308  }
309  }
310 }
311 
312 bool testDeconvolve()
313 {
314 
315  //Construct an Fe-Ni overlap
316  MultiRange mRng;
317 
318  RGBf col;
319  col.red=col.green=col.blue=1.0f;
320 
321 
322  SIMPLE_SPECIES ss;
323  ss.atomicNumber=26;
324  ss.count=1;
325  auto ionIdFe=mRng.addIon(ss,"Fe",col);
326 
327 
328  ss.atomicNumber=28;
329  ss.count=1;
330  auto ionIdNi=mRng.addIon(ss,"Ni",col);
331 
332 
333  AbundanceData natTable;
334  auto errCode=natTable.open("../data/naturalAbundance.xml");
335 
336  if(errCode)
337  {
338  WARN(false,"Unable to read abundance table. Skipping test");
339  return true;
340  }
341 
342  //synthesise some ions
343  const unsigned int NUM_IRON =100;
344  const unsigned int NUM_NICKEL=200;
345  vector<float> ironCDF,nickelCDF;
346 
347  unsigned int ironIdx,nickelIdx;
348  ironIdx=natTable.symbolIndex("Fe");
349  nickelIdx=natTable.symbolIndex("Ni");
350 
351  ASSERT(ironIdx != (unsigned int) -1);
352  ASSERT(nickelIdx != (unsigned int) -1);
353 
354  vector<ISOTOPE_ENTRY> entriesFe = natTable.isotopes(ironIdx);
355  vector<ISOTOPE_ENTRY> entriesNi = natTable.isotopes(nickelIdx);
356 
357  vector<unsigned int> rangeGrouping;
358 
359  //Fe+
360  const float MASSTOL =0.1;
361  float accum=0;
362  for(auto v: entriesFe)
363  {
364  accum+=v.abundance;
365  ironCDF.push_back(accum);
366  mRng.addRange(v.mass-MASSTOL,v.mass+MASSTOL,ionIdFe);
367 
368  rangeGrouping.push_back(0);
369  }
370 
371  //Ni+
372  accum=0;
373  for(auto v: entriesNi)
374  {
375  accum+=v.abundance;
376  nickelCDF.push_back(accum);
377  mRng.addRange(v.mass-MASSTOL,v.mass+MASSTOL,ionIdNi);
378  rangeGrouping.push_back(1);
379  }
380 
381  ASSERT( (ironCDF.back() <= 1.0f) );
382  ASSERT( (nickelCDF.back() <= 1.0f) );
383 
384 
385  mRng.setRangeGroups(rangeGrouping);
386 
387  //Fe and Ni count vectors.
388 
389  vector<float> counts[2];
390 
391  //Accumulate a series of different trials, and statistically check results
392  const unsigned int NTRIALS=100;
393  for(auto ui=0;ui<NTRIALS;ui++)
394  {
395  runFe_Ni_Sim(mRng,natTable,NUM_IRON,NUM_NICKEL,
396  entriesFe,entriesNi,ironCDF, nickelCDF, counts[0],counts[1]);
397  }
398 
399  float meanFe,stdFe;
400  float meanNi,stdNi;
401  meanAndStdev(counts[0],meanFe,stdFe);
402  meanAndStdev(counts[1],meanNi,stdNi);
403 
404  float errFe=fabs(meanFe-NUM_IRON)/(float)NUM_IRON ;
405  float errNi=fabs(meanNi-NUM_NICKEL)/(float)NUM_NICKEL;
406 
407  //Check for percentage error. At most 20% error, averaged over all trials
408  // if NUM_IRON is low, this will be bad per-trial
409  // this is set conservatively, so that a poor user doesn't trip it randomly
410  ASSERT(errFe< 0.20);
411  ASSERT(errNi< 0.20);
412 
413  return true;
414 }
415 
416 #endif
417 
418 }
419 
420 
unsigned int atomicNumber
Number of protons (element number)
Definition: multiRange.h:37
size_t symbolIndex(const char *symbol, bool caseSensitive=true) const
Return the element&#39;s position in table, starting from 0.
Definition: abundance.cpp:214
Data holder for colour as float.
Definition: misc.h:26
float green
Definition: misc.h:30
bool solveLeastSquares(gsl_matrix *m, gsl_vector *b, gsl_vector *&x)
Use an SVD based least-squares solver to solve Mx=b (for x).
Definition: misc.cpp:94
unsigned int addRange(float start, float end, unsigned int ionID)
Add a range to the rangefile. Returns ID number of added range if adding successful, (unsigned int)-1 otherwise.
Definition: multiRange.cpp:303
std::string getIonName(unsigned int ionID) const
Get the name of a specified ionID.
Definition: multiRange.cpp:733
#define WARN(f, g)
Definition: aptAssert.h:48
unsigned int addIon(const std::set< SIMPLE_SPECIES > &molecule, const std::string &name, const RGBf &ionCol)
Add the ion to the database returns ion ID if successful, -1 otherwise.
Definition: multiRange.cpp:271
void meanAndStdev(const std::vector< T > &f, float &meanVal, float &stdevVal, bool normalCorrection=true)
Definition: misc.h:76
float abundanceBetweenLimits(const std::vector< size_t > &elemIdx, const std::vector< size_t > &frequency, float massStart, float massEnd) const
Obtain the fractional abundance between two limits for the given species [start,end) ...
Definition: abundance.cpp:463
Data storage and retrieval class for "ranging" a spectra, where overlapping ranges are permitted...
Definition: multiRange.h:63
float blue
Definition: misc.h:31
float nullBackground(float rangeStart, float rangeEnd)
unsigned int getNumRanges() const
Get the number of unique ranges.
Definition: multiRange.cpp:720
void setRangeGroups(const std::vector< unsigned int > &groups)
Set the groupings for the ranges : this is used when splitting overlapping ranges.
Definition: multiRange.cpp:321
void splitOverlapping(std::vector< MultiRange > &decomposedRanges, float massTolerance=0) const
Definition: multiRange.cpp:844
size_t open(const char *file, bool strict=false)
Attempt to open the abundance data file, return 0 on success, nonzero on failure. ...
Definition: abundance.cpp:79
Class to load abundance information for natural isotopes.
Definition: abundance.h:54
unsigned int getNumIons() const
Get the number of unique ions.
Definition: multiRange.cpp:747
const std::vector< ISOTOPE_ENTRY > & isotopes(size_t offset) const
Return the isotope at a given position in the table. Offset must exist.
Definition: abundance.cpp:621
This is a data holding class for POS file ions, from.
Definition: ionHit.h:36
#define ASSERT(f)
unsigned int count
Number of copies of this isotope.
Definition: multiRange.h:40
unsigned int estimateRank(const gsl_matrix *m, float tolerance=sqrt(std::numeric_limits< float >::epsilon()))
Estimate the rank of the given matrix.
Definition: misc.cpp:58
gsl_rng * getRng() const
Obtain a GSL random number generator.
Definition: atomprobe.h:115
RandNumGen randGen
Definition: atomprobe.cpp:29
float red
Definition: misc.h:29
size_t symbolIdxFromAtomicNumber(size_t atomicNumber) const
Return the symbol index (offset in vector) for the given atomic number, -1 if it cannot be found...
Definition: abundance.cpp:609
bool leastSquaresOverlapSolve(const AtomProbe::MultiRange &rangeData, const AtomProbe::AbundanceData &abundance, const std::vector< IonHit > &hits, float(*backgroundEstimator)(float, float), std::vector< std::pair< unsigned int, float > > &decomposedHits, float rangeTolerance=0)
Solve overlaps using least squares for given multi-range.