39 #pragma omp parallel for reduction(+:n) 40 for(
unsigned int ui=0;ui<data.size();ui++)
42 if(data[ui].first >=minV && data[ui].first < maxV)
51 vector<size_t> &solutionElements, vector<size_t> &solutionFrequency)
53 ASSERT(solutionVec.size());
56 map<size_t,size_t> solutionCount;
57 solutionCount.clear();
58 for(
const auto & soln : solutionVec)
60 map<size_t,size_t>::iterator it;
61 it=solutionCount.find(soln.atomicNumber-1);
65 if(it==solutionCount.end())
66 solutionCount[soln.atomicNumber-1] =1;
73 solutionFrequency.clear(); solutionElements.clear();
74 for(map<size_t,size_t>::const_iterator it = solutionCount.begin(); it!=solutionCount.end(); ++it)
76 solutionElements.push_back(it->first);
77 solutionFrequency.push_back(it->second);
85 vector<float> solutionProbabilities;
86 solutionProbabilities.resize(solutions.size());
89 killVec.resize(solutions.size(),
false);
91 for(
size_t ui=0;ui<solutions.size();ui++)
97 for(
auto & soln : solutions[ui])
100 pair<size_t,size_t> tmp;
101 size_t elementIdx,isotopeIdx;
102 elementIdx= soln.massNumber-1;
103 isotopeIdx= soln.atomicNumber-1;
105 solutionComp*=massTable.
isotope(elementIdx,
111 killVec[ui] = solutionComp*1e6 < minPpm;
120 const vector<float > &peakPosition,
float tolerance,
121 size_t solutionCharge,vector<vector<ISOTOPE_ENTRY> > &solutions)
127 vector<bool> cullSolutions;
128 cullSolutions.resize(solutions.size(),
false);
130 for(
size_t ui=0;ui<solutions.size();ui++)
133 vector<pair<float,float> > massDist;
139 vector<size_t> solutionElements;
140 vector<size_t> solutionFreq;
142 solutionElements,solutionFreq);
148 solutionFreq,massDist);
150 for(
unsigned int uj=0;uj<massDist.size();uj++)
151 massDist[uj].first/=solutionCharge;
162 for(
size_t uj=0;uj<solutions[ui].size();uj++)
163 solutionMass+=solutions[ui][uj].mass;
166 float solutionNaturalAmp=-1;
167 for(
size_t uj=0; uj<massDist.size();uj++)
169 if(fabs(massDist[uj].first - solutionMass) < sqrt(std::numeric_limits<float>::epsilon()))
171 solutionNaturalAmp=massDist[uj].second;
177 ASSERT(solutionNaturalAmp>=0.0f);
179 vector<bool> killVec;
180 killVec.resize(massDist.size());
181 for(
size_t uj=0;uj<massDist.size();uj++)
182 killVec[uj] = (massDist[uj].second <= solutionNaturalAmp);
190 for(
size_t uj=0;uj<massDist.size();uj++)
194 for(
size_t uk=0; uk<peakPosition.size();uk++)
196 if(fabs(peakPosition[uk]-massDist[uj].first) < tolerance)
208 cullSolutions[ui]=
true;
221 vector<float>
maxExplainedFraction(
const vector<pair<float,float> > &intensityData,
float peakMass,
float massWidth,
222 const vector<vector<ISOTOPE_ENTRY> > &solutions,
const AbundanceData &massTable,
223 float massDistTol,
unsigned int solutionCharge)
227 vector<pair<float,float> > massDist;
229 vector<float> explainedFractions;
230 explainedFractions.resize(solutions.size());
232 for(
size_t ui=0;ui<solutions.size();ui++)
236 vector<size_t> solutionElements;
237 vector<size_t> solutionFreq;
239 solutionElements,solutionFreq);
247 solutionFreq,massDist,massDistTol);
250 for(
unsigned int uj=0;uj<massDist.size();uj++)
251 massDist[uj].first/=solutionCharge;
256 unsigned int closestPeak;
258 massErr = std::numeric_limits<float>::max();
259 closestPeak=(
unsigned int)-1;
261 vector<float> observedCounts;
262 observedCounts.resize(massDist.size());
263 for(
unsigned int uj=0;uj<massDist.size();uj++)
265 observedCounts[uj] =
countIntensityEvents(intensityData,massDist[uj].first-massWidth, massDist[uj].first+massWidth);
270 massErrTmp = fabs(massDist[uj].first - peakMass);
271 if(massErrTmp < massErr)
273 massErr = massErrTmp;
282 ASSERT(closestPeak !=(
unsigned int)-1);
287 scaleFactor=observedCounts[closestPeak]/massDist[closestPeak].second;
290 vector<float> scaledSolution;
291 scaledSolution.resize(massDist.size());
292 for(
unsigned int uj=0;uj<massDist.size();uj++)
293 scaledSolution[uj] = massDist[uj].second*scaleFactor;
299 float minTheoreticalRatio=std::numeric_limits<float>::max();
300 const unsigned int OBSERVED_COUNT_THRESHOLD = 10;
301 const unsigned int MASS_DIST_MIN_THRESHOLD= 10;
302 for(
unsigned int uj=0;uj<scaledSolution.size();uj++)
305 if(observedCounts[uj] < OBSERVED_COUNT_THRESHOLD &&
306 scaledSolution[uj] < MASS_DIST_MIN_THRESHOLD)
309 float theoreticalRatio;
312 theoreticalRatio=observedCounts[uj]/scaledSolution[uj];
313 minTheoreticalRatio=std::min(minTheoreticalRatio,theoreticalRatio);
317 for(
unsigned int uj=0;uj<scaledSolution.size();uj++)
318 scaledSolution[uj]*=minTheoreticalRatio;
320 if(minTheoreticalRatio!=std::numeric_limits<float>::max())
325 float minDelta=std::numeric_limits<float>::max();
326 unsigned int minIdx=(
unsigned int)-1;
327 for(
unsigned int uj=0;uj<massDist.size();uj++)
330 delta=fabs(massDist[uj].first-peakMass);
338 if(minIdx != (
unsigned int)-1)
340 explainedFractions[ui]=scaledSolution[minIdx]/observedCounts[minIdx];
341 ASSERT(explainedFractions[ui] >=0.0f && explainedFractions[ui] <=1.1f);
344 explainedFractions[ui]=-1.0f;
348 explainedFractions[ui]=-1.0f;
351 return explainedFractions;
363 bool testMaxExplainedFraction()
366 if(massTable.
open(
"naturalAbundance.xml"))
368 cerr <<
"WARN : Error opening abundance table, skipping" << endl;
376 vector<pair<float,float> > massData;
380 unsigned int elemIdx,isotopeIdx;
383 mnWeight= massTable.
isotope(elemIdx,isotopeIdx);
387 const unsigned int NUM_PTS =100;
389 massData[0].first=mnWeight.
mass;
390 massData[0].second=NUM_PTS;
395 vector<ISOTOPE_ENTRY> soln;
396 soln.push_back(mnWeight);
399 vector<vector<ISOTOPE_ENTRY> > solutions;
400 solutions.push_back(soln);
402 vector<float> explainedFractions;
405 ASSERT(explainedFractions.size() ==1);
407 float delta = fabs(explainedFractions[0] -1.0f);
410 TEST(delta <0.05,
"Mn isotope test");
419 vector<pair<float,float> > massData;
427 gaWeight[0]=massTable.
isotope(gaIdx,0);
428 gaWeight[1]=massTable.
isotope(gaIdx,1);
431 const unsigned int NUM_PTS =6010;
433 massData[0].first = gaWeight[0].
mass;
434 massData[1].first = gaWeight[1].
mass;
435 massData[0].second = NUM_PTS;
436 massData[1].second = NUM_PTS;
439 vector<ISOTOPE_ENTRY> soln;
440 soln.push_back(gaWeight[0]);
443 vector<vector<ISOTOPE_ENTRY> > solutions;
444 solutions.push_back(soln);
447 vector<float> explainedFractions;
449 0.15, solutions,massTable,0.01, 1);
451 ASSERT(explainedFractions.size() ==1);
453 float delta = fabs(explainedFractions[0] -1.0f);
456 TEST(delta <0.05,
"Ga large isotope test");
461 vector<float> explainedFractions;
463 0.15, solutions,massTable,0.01,1);
465 ASSERT(explainedFractions.size() ==1);
468 float delta = fabs(explainedFractions[0] -0.70925f);
471 TEST(delta <0.05,
"Ga small isotope test");
480 vector<pair<float,float> > massData;
484 unsigned int elemIdx;
486 clWeight[0]=massTable.
isotope(elemIdx,0);
487 clWeight[1]=massTable.
isotope(elemIdx,1);
492 alWeight=massTable.
isotope(elemIdx,0);
495 bWeight=massTable.
isotope(elemIdx,0);
499 const unsigned int NUM_35_CLPTS =7577;
500 const unsigned int NUM_37_CLPTS =2423;
502 massData[0].first = clWeight[0].
mass;
503 massData[1].first = clWeight[1].
mass;
504 massData[0].second = NUM_35_CLPTS;
505 massData[1].second = NUM_37_CLPTS;
511 vector<ISOTOPE_ENTRY> solnCl, solnAlB;
512 solnCl.push_back(clWeight[1]);
514 solnAlB.push_back(alWeight);
515 solnAlB.push_back(bWeight);
518 vector<vector<ISOTOPE_ENTRY> > solutions;
519 solutions.push_back(solnCl);
520 solutions.push_back(solnAlB);
524 vector<float> explainedFractions;
525 float meanMass = 0.5*(clWeight[1].
mass + (alWeight.
mass + bWeight.
mass));
527 0.15, solutions,massTable,0.01,1);
529 ASSERT(explainedFractions.size() ==2);
533 TEST(explainedFractions[0] > 0.95,
"Cl fully explains 37 peak");
534 TEST(explainedFractions[1] < 0.05,
"AlB does not explain 37 peak");
540 bool isotopeFilterTests()
542 return testMaxExplainedFraction();
size_t symbolIndex(const char *symbol, bool caseSensitive=true) const
Return the element's position in table, starting from 0.
void buildFrequencyTable(const vector< ISOTOPE_ENTRY > &solutionVec, vector< size_t > &solutionElements, vector< size_t > &solutionFrequency)
void generateIsotopeDist(const std::vector< size_t > &elementIdx, const std::vector< size_t > &frequency, std::vector< std::pair< float, float > > &massDist) const
Compute the mass-probability distribution for the grouped set of ions.
void vectorMultiErase(std::vector< T > &vec, const std::vector< bool > &wantKill)
Remove elements from the vector, without preserving order.
void filterPeakNeedBiggerObs(const AtomProbe::AbundanceData &massTable, const std::vector< float > &peakData, float tolerance, size_t solutionCharge, std::vector< std::vector< AtomProbe::ISOTOPE_ENTRY > > &solutions)
void generateGroupedIsotopeDist(const std::vector< size_t > &elementIdx, const std::vector< size_t > &frequency, std::vector< std::pair< float, float > > &massDist, float massTolerance) const
As per generateIsotopeDist, however, this convenience groups the distribution to limit the effect of ...
void filterBySolutionPPM(const AtomProbe::AbundanceData &massTable, float minPpm, std::vector< std::vector< AtomProbe::ISOTOPE_ENTRY > > &solutions)
Use the maximum possible PPM for each isotopic combination to filter possible solutions.
size_t open(const char *file, bool strict=false)
Attempt to open the abundance data file, return 0 on success, nonzero on failure. ...
Class to load abundance information for natural isotopes.
std::vector< float > maxExplainedFraction(const std::vector< std::pair< float, float > > &massData, float peakMass, float massWidth, const std::vector< std::vector< AtomProbe::ISOTOPE_ENTRY > > &solutions, const AtomProbe::AbundanceData &massTable, float massDistTol, unsigned int solutionCharge)
Compute the fraction of the data that has been explained, using the natural abundance information...
size_t getMajorIsotopeFromElemIdx(size_t elementIdx) const
Obtain the most prominent isotope's index from the element index.
const ISOTOPE_ENTRY & isotope(size_t elementIdx, size_t isotopeIdx) const
Obtain a reference to a particular isotope, using the element's index in the table, and the isotope index.
unsigned int countIntensityEvents(const vector< pair< float, float > > &data, float minV, float maxV)