libatomprobe
Library for Atom Probe Tomography (APT) computation
componentAnalysis.cpp
Go to the documentation of this file.
1 /*
2  * componentAnalysis.cpp : Rangefile sanity checking routines
3  * Copyright (C) 2018 D Haley
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 
22 #include "helper/helpFuncs.h"
23 
24 #include <string>
25 #include <utility>
26 #include <vector>
27 #include <set>
28 #include <algorithm>
29 
30 using std::set;
31 using std::vector;
32 using std::pair;
33 using std::tuple;
34 
35 namespace AtomProbe
36 {
37 
38 
39 //This builds a matrix that says which parts of the mass distribution are within a given distance of one another
40 void computeIonDistAdjacency(const MultiRange &mrf, const AbundanceData &abundance,
41  const OVERLAP_PROBLEM_SETTINGS &settings, gsl_matrix* &m, gsl_vector* &vOwner, gsl_vector* &vMass)
42 {
43  ASSERT(settings.massTolerance > 0);
44 
45  vector<vector<pair<float, float> > > massDistributions;
46  vector<unsigned int> sourceIonID;
47  //Loop over each ion and work out the effective mass distribution
48  for(auto ui=0;ui<mrf.getNumIons();ui++)
49  {
50  //Obtain the molecule from the multiRange
51  set<SIMPLE_SPECIES> molecule;
52  molecule = mrf.getMolecule(ui);
53 
54  vector<size_t> elementIdx, frequency;
55  for(auto &v : molecule)
56  {
57  elementIdx.push_back(
58  abundance.symbolIdxFromAtomicNumber(v.atomicNumber));
59  frequency.push_back(v.count);
60  }
61  sourceIonID.push_back(ui);
62 
63 
64  //First of pair is mass, second of pair is ionic fraction
65  vector<pair<float,float> > massDist;
66  abundance.generateIsotopeDist(elementIdx,frequency,
67  massDist);
68 
69  //Cut any mass distribution values we meet
70  // that are below our tolerance value
71 
72  vector<bool> killVec;
73  killVec.resize(massDist.size(),false);
74  for(auto uj=0; uj<massDist.size();uj++)
75  {
76  //If the ionic fraction is below threshold, don't count
77  // it.
78  if(massDist[uj].second < settings.intensityTolerance)
79  killVec[uj]=true;
80  }
81 
82  //Remove entries that are below tolerance
83  vectorMultiErase(massDist,killVec);
84 
85  //FIXME: This doesn't know that there is a maximum limit to charge states, e.g. H^(2+) cannot occur
86  //Fold the mass distribution by each charge state, up to some maximum
87  for(auto uj=0; uj<settings.maxDefaultCharge; uj++)
88  {
89  vector<pair<float,float> > tmpDist;
90  tmpDist=massDist;
91  for(auto v : tmpDist)
92  v.first/=uj;
93  massDistributions.push_back(tmpDist);
94  }
95  }
96 
97  //Now we have each mass distribution. We now have to work out the linkage for this.
98 
99  //First entry in pair is the source ionID, the second is the mass
100  vector<pair<unsigned int, float> > massPositions;
101  for(auto ui=0;ui<massDistributions.size();ui++)
102  {
103  for(auto v : massDistributions[ui] )
104  {
105  massPositions.push_back(
106  std::make_pair(sourceIonID[ui],v.first));
107  }
108  }
109 
110  //Sort by mass
111  ComparePairSecond cmp;
112  std::sort(massPositions.begin(),massPositions.end(),cmp);
113 
114  vector<unsigned int> currentPositions;
115  gsl_matrix *massDistAdjacency = gsl_matrix_alloc(massPositions.size(),
116  massPositions.size());
117  vMass = gsl_vector_alloc(massPositions.size());
118  vOwner= gsl_vector_alloc(massPositions.size());
119 
120  //Construct the adjacency matrix
121  for(unsigned int ui=0;ui<massPositions.size();ui++)
122  {
123  float m1;
124  m1=massPositions[ui].second;
125 
126 
127  for(unsigned int uj=0;uj<massPositions.size();uj++)
128  {
129  float m2;
130 
131  m2 = massPositions[uj].second;
132  if(fabs(m2-m1) < settings.massTolerance)
133  gsl_matrix_set(massDistAdjacency,ui,uj,1);
134  else
135  gsl_matrix_set(massDistAdjacency,ui,uj,0);
136  }
137  }
138 
139  for(unsigned int ui=0;ui<massPositions.size();ui++)
140  {
141  //Set the
142  gsl_vector_set(vMass,ui,massPositions[ui].second);
143  gsl_vector_set(vOwner,ui,massPositions[ui].first);
144  }
145 
146  m=massDistAdjacency;
147 }
148 
149 
150 bool rangeOverlaps(const pair<float,float> &r1,
151  const pair<float,float> &r2)
152 {
153  //first entry inside r2
154  if(r1.first >=r2.first && r1.first <=r2.second)
155  return true;
156 
157  //second entry inside r2
158  if(r1.second <= r2.second && r1.second >= r2.first)
159  return true;
160 
161  //Outer straddle
162  if(r1.first <=r2.first && r1.second >=r2.second)
163  return true;
164 
165  //inner straddle
166  if(r2.first <=r1.first && r2.second >=r1.second)
167  return true;
168 
169  return false;
170 }
171 
172 void computeRangeAdjacency(const MultiRange &mrf, const OVERLAP_PROBLEM_SETTINGS &settings, gsl_matrix* &m)
173 {
174 
175  ASSERT(settings.massTolerance >=0);
176  m = gsl_matrix_alloc(mrf.getNumRanges(),mrf.getNumRanges());
177 
178  for(auto ui=0;ui<mrf.getNumRanges(); ui++)
179  {
180  pair<float,float> rng1;
181  rng1 = mrf.getRange(ui);
182 
183  //Expand the range a little
184  rng1.first-=settings.massTolerance;
185  rng1.second+=settings.massTolerance;
186  for(auto uj=0;uj<mrf.getNumRanges(); uj++)
187  {
188  pair<float,float> rng2;
189  rng2 = mrf.getRange(uj);
190  //Expand the range a little
191  rng2.first-=settings.massTolerance;
192  rng2.second+=settings.massTolerance;
193  if(rangeOverlaps(rng1,rng2))
194  gsl_matrix_set(m,ui,uj,1);
195  else
196  gsl_matrix_set(m,ui,uj,0);
197 
198  }
199  }
200 
201 }
202 
203 
204 #ifdef DEBUG
205 bool testIonDistAdjacency()
206 {
207  AbundanceData abundance;
208  if(abundance.open("naturalAbundance.xml"))
209  {
210  WARN(false,"Unable to test overlap adjacency matrix generation - no abundance data found");
211  return true;
212  }
213 
214  OVERLAP_PROBLEM_SETTINGS settings;
215  settings.maxDefaultCharge=1;
216  settings.massTolerance=0.05; //Da
217  settings.intensityTolerance=1e-6; //atomic fraction
218 
219  //We only need the ion information to
220  // be able to fully describe the mass distributions
221  RGBf col;
222  col.red=col.green=col.blue=1;
223 
224  SIMPLE_SPECIES s;
225  s.atomicNumber= abundance.getAtomicNumber(
226  abundance.symbolIndex("Fe"));
227  s.count=1;
228 
229  //Add iron
230  MultiRange mrf;
231  mrf.addIon(s,"Fe",col);
232 
233  //add Nickel
234  s.atomicNumber= abundance.getAtomicNumber(
235  abundance.symbolIndex("Ni"));
236  mrf.addIon(s,"Ni",col);
237 
238  gsl_vector *vOwner,*vMass;
239 
240  gsl_matrix *m;
241  computeIonDistAdjacency(mrf,abundance,settings,m,vOwner,vMass);
242 
243  //Output should be an adjacency matrix
244  TEST(m->size1 == m->size2,"Matrix should be square");
245  TEST(gsl_matrix_max(m) ==1,"matrix should be max 1");
246  TEST(gsl_matrix_min(m) >=0,"matrix should be min 0");
247 
248  for(auto ui=0;ui<m->size1; ui++)
249  {
250  //Diagonal of matrix should be unitary
251  TEST(fabs(gsl_matrix_get(m,ui,ui)-1) < 0.01,"Unitary diagonal");
252 
253  for(auto uj=ui;uj<m->size1; uj++)
254  {
255  //Should be symmetric
256  TEST(gsl_matrix_get(m,ui,uj) == gsl_matrix_get(m,uj,ui),"Symmetric test");
257 
258  }
259  }
260 
261 
262  gsl_matrix_free(m);
263 
264 
265  return true;
266 }
267 
268 bool testRangeAdjancency()
269 {
270  AbundanceData abundance;
271  if(abundance.open("naturalAbundance.xml"))
272  {
273  WARN(false,"Unable to test overlap adjacency matrix generation - no abundance data found");
274  return true;
275  }
276 
277  RGBf col;
278  col.red=col.green=col.blue=1;
279  //Add iron
280  MultiRange mrf;
281 
282  SIMPLE_SPECIES s;
283  s.atomicNumber= abundance.getAtomicNumber(
284  abundance.symbolIndex("H"));
285  s.count=1;
286  auto hIdx=mrf.addIon(s,"H",col);
287 
288  mrf.addRange(0.1,1.0f,hIdx);
289  mrf.addRange(1.5,2.5f,hIdx);
290  mrf.addRange(8.5,12.5f,hIdx);
291 
292  OVERLAP_PROBLEM_SETTINGS settings;
293  settings.maxDefaultCharge=2;
294  settings.massTolerance=0.5;
295 
296  gsl_matrix *m;
297  computeRangeAdjacency(mrf,settings,m);
298 
299  gsl_print_matrix(m);
300 
301  TEST(m->size1 == m->size2,"Matrix should be square");
302  TEST(gsl_matrix_max(m) ==1,"matrix should be max 1");
303  TEST(gsl_matrix_min(m) >=0,"matrix should be min 0");
304 
305 
306  //Check trace
307 
308  for(auto ui=0;ui<m->size1; ui++)
309  {
310  //Diagonal of matrix should be unitary
311  TEST(fabs(gsl_matrix_get(m,ui,ui)-1) < 0.01,"Unitary diagonal");
312 
313  for(auto uj=ui;uj<m->size1; uj++)
314  {
315  //Should be symmetric
316  TEST(gsl_matrix_get(m,ui,uj) == gsl_matrix_get(m,uj,ui),"Symmetric test");
317 
318  }
319  }
320 
321  gsl_matrix_free(m);
322  return true;
323 }
324 
325 bool testComponentAnalysis()
326 {
327  TEST(testIonDistAdjacency(),"Ion adjacency matrix generation");
328  TEST(testRangeAdjancency(),"Range adjacency matrix generation");
329 
330  return true;
331 }
332 #endif
333 
334 }
unsigned int atomicNumber
Number of protons (element number)
Definition: multiRange.h:37
std::set< SIMPLE_SPECIES > getMolecule(unsigned int ionID) const
Return the molecule that is associated with this ion.
Definition: multiRange.cpp:727
size_t symbolIndex(const char *symbol, bool caseSensitive=true) const
Return the element&#39;s position in table, starting from 0.
Definition: abundance.cpp:214
float massTolerance
Tolerance in Da, above which we consider problems to be.
void computeIonDistAdjacency(const MultiRange &mrf, const AbundanceData &abundance, const OVERLAP_PROBLEM_SETTINGS &settings, gsl_matrix *&m, gsl_vector *&vOwnership, gsl_vector *&vMass)
Calculate the adjacency matrix for the ions in a multirange.
bool rangeOverlaps(const pair< float, float > &r1, const pair< float, float > &r2)
Data holder for colour as float.
Definition: misc.h:26
void gsl_print_matrix(const gsl_matrix *m)
Definition: helpFuncs.cpp:122
float green
Definition: misc.h:30
void generateIsotopeDist(const std::vector< size_t > &elementIdx, const std::vector< size_t > &frequency, std::vector< std::pair< float, float > > &massDist) const
Compute the mass-probability distribution for the grouped set of ions.
Definition: abundance.cpp:277
unsigned int addRange(float start, float end, unsigned int ionID)
Add a range to the rangefile. Returns ID number of added range if adding successful, (unsigned int)-1 otherwise.
Definition: multiRange.cpp:303
unsigned int maxDefaultCharge
Maximum charge state to consider.
void vectorMultiErase(std::vector< T > &vec, const std::vector< bool > &wantKill)
Remove elements from the vector, without preserving order.
Definition: misc.h:112
#define TEST(f, g)
Definition: aptAssert.h:49
#define WARN(f, g)
Definition: aptAssert.h:48
unsigned int addIon(const std::set< SIMPLE_SPECIES > &molecule, const std::string &name, const RGBf &ionCol)
Add the ion to the database returns ion ID if successful, -1 otherwise.
Definition: multiRange.cpp:271
Data storage and retrieval class for "ranging" a spectra, where overlapping ranges are permitted...
Definition: multiRange.h:63
float blue
Definition: misc.h:31
unsigned int getNumRanges() const
Get the number of unique ranges.
Definition: multiRange.cpp:720
size_t open(const char *file, bool strict=false)
Attempt to open the abundance data file, return 0 on success, nonzero on failure. ...
Definition: abundance.cpp:79
Class to load abundance information for natural isotopes.
Definition: abundance.h:54
unsigned int getNumIons() const
Get the number of unique ions.
Definition: multiRange.cpp:747
std::pair< float, float > getRange(unsigned int rangeID) const
Retrieve the start and end of a given range as a pair(start,end)
Definition: multiRange.cpp:759
float intensityTolerance
The minimal natural abundance for which we consider the ions.
#define ASSERT(f)
unsigned int count
Number of copies of this isotope.
Definition: multiRange.h:40
float red
Definition: misc.h:29
unsigned int getAtomicNumber(size_t elemIdx) const
Obtain the atomic number for the given element, by element index.
Definition: abundance.cpp:503
size_t symbolIdxFromAtomicNumber(size_t atomicNumber) const
Return the symbol index (offset in vector) for the given atomic number, -1 if it cannot be found...
Definition: abundance.cpp:609
void computeRangeAdjacency(const MultiRange &mrf, const OVERLAP_PROBLEM_SETTINGS &settings, gsl_matrix *&m)