libatomprobe
Library for Atom Probe Tomography (APT) computation
overlaps.cpp
Go to the documentation of this file.
1 /* overlaps.cpp : Mass spectrum Overlap finding algorithms
2  * Copyright (C) 2020 Daniel Haley
3  *
4  * This program is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 3 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program. If not, see <http://www.gnu.org/licenses/>.
16  */
18 #include "atomprobe/helper/misc.h"
19 #include "../helper/helpFuncs.h"
20 
21 #include <vector>
22 #include <map>
23 #include <utility>
24 #include <string>
25 #include <algorithm>
26 #include <set>
27 
28 #define EQ_TOLV(f,g,h) (fabs( (f) - (g)) < (h))
29 
30 
31 namespace AtomProbe
32 {
33 
34 using std::vector;
35 using std::pair;
36 using std::map;
37 using std::set;
38 using std::string;
39 
40 //DEBUG ONLY
41 using std::cerr;
42 using std::endl;
43 
44 
45 //Find the masses that overlap with one another
46 void findOverlapsFromSpectra(const vector<vector<pair<float,float> > > &massDistributions,
47  float massDelta,const set<unsigned int> &skipIDs,
48  vector<pair<size_t ,size_t> > &overlapIdx,
49  vector<pair<float,float> > &overlapMasses)
50 
51 {
52 
53  //Loop through each "fingerprint" spectrum
54  for(unsigned int ui=0;ui<massDistributions.size();ui++)
55  {
56  //Skip any pair where we dont understand one ion
57  if(skipIDs.find(ui) !=skipIDs.end())
58  continue;
59  //Loop through each peak
60  for(unsigned int uj=0;uj<massDistributions[ui].size();uj++)
61  {
62  if(skipIDs.find(uj) !=skipIDs.end())
63  continue;
64  float m1;
65  m1=massDistributions[ui][uj].first;
66 
67  //Loop over other fingerprints
68  for(unsigned int uk=ui+1;uk<massDistributions.size();uk++)
69  {
70  //Loop over other fingerprint's peaks
71  for(unsigned int ul=0;ul<massDistributions[uk].size();ul++)
72  {
73  //If within tolerance, then record
74  float m2;
75  m2=massDistributions[uk][ul].first;
76  if(EQ_TOLV(m1,m2,massDelta))
77  {
78  //Mass distributions ui and uk overlap
79  overlapIdx.push_back(std::make_pair(ui,uk));
80  overlapMasses.push_back(std::make_pair(m1,m2));
81  }
82 
83  }
84  }
85 
86  }
87 
88  }
89 }
90 
91 //Find all overlaps from the ions listed in the given rangefile
92 // using the mass delta, and assuming they occupy charge states from
93 // 1->maxCharge
94 void findOverlaps(const AbundanceData &natData,
95  const RangeFile &rng, float massDelta, unsigned int maxCharge,
96  vector<pair< size_t,size_t> > &overlapIdx, vector<pair<float,float> > &overlapMasses )
97 {
98  ASSERT(maxCharge > 0);
99  ASSERT(massDelta >0.0f);
100  //Minimum acceptable probability before we consider a given mass peak to not be observable
101  const float PEAK_MIN_PROBABILITY=10.0/1e6; //10ppm
102  const float PEAK_GROUP_DIST = 0.01; // Small m/z value in Da to group isotopes, e.g. those in Mo2
103 
104  vector<pair<unsigned int ,vector<size_t> > > overlaps;
105 
106  //Create a list of the ions in the rangefile,
107  // decomposed so that the natural abundance table
108  // can look up their "fingerprint" theoretical intensities
109  vector<vector<pair<float,float> > > massDistributions;
110  set<unsigned int> skipIDs;
111 
112  for(unsigned int ui=0;ui<rng.getNumIons(); ui++)
113  {
114 
115  //FIXME: Better error handling?
116  //Obtain the fragments for this ion.
117  // abort routine if we can't do this
118  vector<pair<string,size_t> > fragments;
119  vector<pair<float,float> > nullDist;
120  if(!rng.decomposeIonById(ui,fragments))
121  {
122  skipIDs.insert(ui);
123  //FIXME: Hack, insert one distribution for each charge
124  for(unsigned int uj=0;uj<maxCharge;uj++)
125  massDistributions.push_back(nullDist);
126  continue;
127  }
128 
129 
130  //Convert to element idx (in table)/frequency pairing
131  vector<pair<size_t,unsigned int> > symbolIdxFreq;
132  natData.getSymbolIndices(fragments,symbolIdxFreq);
133 
134  //check that each fragment is a real element
135  bool skip;
136  skip=false;
137  for(unsigned int uj=0;uj<symbolIdxFreq.size();uj++)
138  {
139  if(symbolIdxFreq[uj].first == (size_t)-1)
140  {
141  skip=true;
142  break;
143  }
144 
145  }
146 
147  //If any fragment is not a real element, skip this
148  if(skip)
149  {
150  skipIDs.insert(ui);
151  //FIXME: Hack, insert one empty distribution for each charge
152  for(auto uj=0u;uj<maxCharge;uj++)
153  massDistributions.push_back(nullDist);
154  continue;
155  }
156 
157  vector<size_t> elementIdx,freq;
158  elementIdx.resize(symbolIdxFreq.size());
159  freq.resize(symbolIdxFreq.size());
160  for(unsigned int uj=0;uj<symbolIdxFreq.size();uj++)
161  {
162  elementIdx[uj]=symbolIdxFreq[uj].first;
163  freq[uj] = symbolIdxFreq[uj].second;
164  }
165 
166  //Generate the isotope
167  vector<pair<float,float> > massDist;
168  natData.generateGroupedIsotopeDist(elementIdx,freq,massDist,PEAK_GROUP_DIST);
169 
170 
171  //Kill any pair with a probability below our threshold
172  vector<bool> killVec(massDist.size(),false);
173  for(unsigned int uj=0;uj<massDist.size();uj++)
174  killVec[uj]=(massDist[uj].second < PEAK_MIN_PROBABILITY);
175 
176 
177  vectorMultiErase(massDist,killVec);
178 
179  //Sum number of electrons
180  unsigned int nElectrons=0;
181  for(auto uj=0;uj<elementIdx.size(); uj++)
182  nElectrons+=natData.getAtomicNumber(elementIdx[uj]);
183 
184  //Generate one mass distribution per charge state,
185  // up to the maximum atomic number (number of electrons)
186  unsigned int maxMultiplicity=std::min(maxCharge,nElectrons);
187 
188 
189  for(unsigned int uj=1;uj<=maxMultiplicity;uj++)
190  {
191  vector<pair<float,float> > tmpDist;
192  tmpDist.clear();
193 
194  if(uj <=nElectrons)
195  {
196  tmpDist=massDist;
197  for(unsigned int uk=0;uk<tmpDist.size();uk++)
198  tmpDist[uk].first/=uj;
199  }
200 
201  massDistributions.push_back(tmpDist);
202  }
203 
204  }
205 
206 
207  //Now, we have the mass distributions, check to see if any of them
208  // are within tolerance of one another
209  ASSERT(massDistributions.size() == rng.getNumIons()*maxCharge);
210 
211  overlapMasses.clear();
212  overlapIdx.clear();
213 
214  findOverlapsFromSpectra(massDistributions,massDelta,skipIDs, overlapIdx,overlapMasses);
215 }
216 
217 
218 void findOverlaps(const AbundanceData &natData,
219  const vector<string> &ionNames, float massDelta, unsigned int maxCharge,
220  vector<pair< size_t,size_t> > &overlapIdx, vector<pair<float,float> > &overlapMasses )
221 {
222  RangeFile rng;
223  RGBf rgb;
224 
225  //Fake a rangefile
226  for(unsigned int ui=0;ui<ionNames.size();ui++)
227  {
228  rng.addIon(ionNames[ui],ionNames[ui],rgb);
229  }
230 
231  //call find overlaps
232  findOverlaps(natData, rng, massDelta,maxCharge,overlapIdx,overlapMasses);
233 
234 }
235 #ifdef DEBUG
236 bool testOverlapDetect()
237 {
238  RangeFile rng;
239  RGBf c;
240  c.red=c.green=c.blue=1.0f;
241 
242  unsigned int ionIdTi,ionIdO;
243  ionIdTi =rng.addIon("Ti","Ti",c);
244  ionIdO =rng.addIon("O","O",c);
245  rng.addRange(23.5,24.5,ionIdTi);
246  rng.addRange(13.5,14.5,ionIdO);
247  AbundanceData natData;
248  if(natData.open("naturalAbundance.xml"))
249  return false;
250 
251  vector<pair< size_t,size_t> > overlapIdx;
252  vector<pair<float,float> > overlapMasses;
253 
254  //Ti overlaps with O
255  const unsigned int MAX_CHARGESTATE=3;
256  findOverlaps(natData,rng, 0.1,MAX_CHARGESTATE,overlapIdx,overlapMasses);
257 
258  TEST(overlapIdx.size() >=1,"Overlaps exist");
259 
260  return true;
261 
262 }
263 #endif
264 }
unsigned int addIon(const std::string &shortName, const std::string &longName, const RGBf &ionCol)
Add the ion to the database returns ion ID if successful, -1 otherwise.
Definition: ranges.cpp:3274
Data holder for colour as float.
Definition: misc.h:26
float green
Definition: misc.h:30
void vectorMultiErase(std::vector< T > &vec, const std::vector< bool > &wantKill)
Remove elements from the vector, without preserving order.
Definition: misc.h:112
void findOverlaps(const AbundanceData &natData, const RangeFile &rng, float massDelta, unsigned int maxCharge, std::vector< std::pair< size_t, size_t > > &overlapIdx, std::vector< std::pair< float, float > > &overlapMasses)
Find the overlaps stemming from a given rangefile.
#define TEST(f, g)
Definition: aptAssert.h:49
void getSymbolIndices(const std::vector< std::string > &symbols, std::vector< size_t > &indices) const
Return a vector of symbol indices.
Definition: abundance.cpp:559
float blue
Definition: misc.h:31
void generateGroupedIsotopeDist(const std::vector< size_t > &elementIdx, const std::vector< size_t > &frequency, std::vector< std::pair< float, float > > &massDist, float massTolerance) const
As per generateIsotopeDist, however, this convenience groups the distribution to limit the effect of ...
Definition: abundance.cpp:395
size_t open(const char *file, bool strict=false)
Attempt to open the abundance data file, return 0 on success, nonzero on failure. ...
Definition: abundance.cpp:79
Class to load abundance information for natural isotopes.
Definition: abundance.h:54
bool decomposeIonById(unsigned int ionId, std::vector< std::pair< std::string, size_t > > &fragments) const
Perform decomposeIonNames(...) for a given ionID.
Definition: ranges.cpp:252
void findOverlapsFromSpectra(const std::vector< std::vector< std::pair< float, float > > > &massDistributions, float massDelta, const std::set< unsigned int > &skipIDs, std::vector< std::pair< size_t, size_t > > &overlapIdx, std::vector< std::pair< float, float > > &overlapMasses)
Find overlaps in the given mass distributions, as per findOverlaps(...Rangefile...)
#define ASSERT(f)
#define EQ_TOLV(f, g, h)
Definition: overlaps.cpp:28
Data storage and retrieval class for various range files.
Definition: ranges.h:95
unsigned int getNumIons() const
Get the number of unique ions.
Definition: ranges.cpp:2863
float red
Definition: misc.h:29
unsigned int getAtomicNumber(size_t elemIdx) const
Obtain the atomic number for the given element, by element index.
Definition: abundance.cpp:503
unsigned int addRange(float start, float end, unsigned int ionID)
Add a range to the rangefile. Returns ID number of added range.
Definition: ranges.cpp:3235