50 "Unable to create XML reader.",
51 "Bad property found in XML file",
52 "XML document did not match expected layout (DTD validation)",
53 "Unable to find required node during parse",
54 "Root node missing, expect <atomic-mass-table>!",
55 "Found incorrect root node. Expected <atomic-mass-table>" 61 ASSERT(errorCode < ABUNDANCE_ERROR_ENUM_END);
62 return ABUNDANCE_ERROR[errorCode];
68 for(
size_t ui=0;ui<isotopeData.size();ui++)
69 v+=isotopeData[ui].size();
76 return isotopeData.size();
83 xmlParserCtxtPtr context;
85 context =xmlNewParserCtxt();
88 return ABUNDANCE_ERR_NO_CONTEXT;
91 doc = xmlCtxtReadFile(context, file, NULL, XML_PARSE_DTDVALID| XML_PARSE_NOENT | XML_PARSE_NONET);
95 xmlFreeParserCtxt(context);
96 return ABUNDANCE_ERR_BAD_DOC;
106 xmlFreeParserCtxt(context);
107 return ABUNDANCE_ERROR_FAILED_VALIDATION;
116 std::stack<xmlNodePtr> nodeStack;
117 xmlNodePtr nodePtr = xmlDocGetRootElement(doc);
120 throw ABUNDANCE_ERROR_MISSING_ROOT_NODE;
123 if(xmlStrcmp(nodePtr->name, (
const xmlChar *)
"atomic-mass-table"))
124 throw ABUNDANCE_ERROR_WRONG_ROOT_NODE;
126 nodeStack.push(nodePtr);
128 nodePtr=nodePtr->xmlChildrenNode;
134 throw ABUNDANCE_ERROR_BAD_VALUE;
137 throw ABUNDANCE_ERROR_BAD_VALUE;
140 nodeStack.push(nodePtr);
141 nodePtr=nodePtr->xmlChildrenNode;
145 throw ABUNDANCE_ERROR_MISSING_NODE;
147 nodePtr=nodePtr->xmlChildrenNode;
149 vector<ISOTOPE_ENTRY> curIsotopes;
155 throw ABUNDANCE_ERROR_MISSING_NODE;
157 nodeStack.push(nodePtr);
158 nodePtr=nodePtr->xmlChildrenNode;
162 throw ABUNDANCE_ERROR_MISSING_NODE;
165 throw ABUNDANCE_ERROR_BAD_VALUE;
168 throw ABUNDANCE_ERROR_BAD_VALUE;
175 throw ABUNDANCE_ERROR_MISSING_NODE;
178 throw ABUNDANCE_ERROR_BAD_VALUE;
181 throw ABUNDANCE_ERROR_BAD_VALUE;
185 curIsotopes.push_back(curIsoData);
187 nodePtr=nodeStack.top();
188 nodePtr=nodePtr->next;
192 isotopeData.push_back(curIsotopes);
195 nodePtr=nodeStack.top();
197 nodePtr=nodePtr->next;
204 xmlFreeParserCtxt(context);
209 xmlFreeParserCtxt(context);
216 ASSERT(isotopeData.size());
219 for(
size_t ui=0;ui<isotopeData.size();ui++)
221 if(isotopeData[ui].size() &&
lowercase(isotopeData[ui][0].symbol) ==
lowercase(symbol) )
227 for(
size_t ui=0;ui<isotopeData.size();ui++)
229 if(isotopeData[ui].size() && isotopeData[ui][0].symbol == symbol)
240 ASSERT(isotopeData.size());
241 for(
size_t uj=0;uj<isotopeData[elemIn].size();uj++)
243 if(isotopeData[elemIn][uj].mass == mass)
256 ASSERT(isotopeData.size());
257 for(
size_t uj=0;uj<isotopeData[elem].size();uj++)
259 if(fabs(isotopeData[elem][uj].mass - mass) < tolerance)
273 ASSERT(isotopeData.size());
274 return isotopeData[elemIdx][isotopeIdx];
278 const vector<size_t> &frequency,
279 vector<pair<float,float> > &massDist)
const 281 ASSERT(isotopeData.size());
282 ASSERT(frequency.size() == elementIdx.size());
287 map<unsigned int, vector< pair<float,float> > > isotopeMassDist;
292 for(
size_t ui=0;ui<elementIdx.size();ui++)
295 curIso = elementIdx[ui];
298 if(isotopeMassDist.find(curIso) != isotopeMassDist.end())
301 vector<pair<float,float> > thisIsotope;
302 for(
size_t uj=0;uj<isotopeData[curIso].size();uj++)
306 thisIsotope.push_back(make_pair(isotopeData[curIso][uj].mass,
307 isotopeData[curIso][uj].abundance));
310 isotopeMassDist[curIso] = thisIsotope;
318 vector<pair<float,float> > peakProbs;
319 for(
size_t ui=0;ui<elementIdx.size();ui++)
322 for(
size_t repeat=0;repeat<frequency[ui];repeat++)
324 vector< pair<float,float> >::const_iterator isoBegin,isoEnd;
325 isoBegin = isotopeMassDist[elementIdx[ui]].begin();
326 isoEnd = isotopeMassDist[elementIdx[ui]].end();
328 vector<pair<float,float> > newProbs;
332 if(peakProbs.empty())
335 for(vector<pair<float,float> >::const_iterator it=isoBegin;
337 peakProbs.push_back(*it);
341 const unsigned int peakProbSize = peakProbs.size();
342 newProbs.resize(peakProbSize*isotopeMassDist[elementIdx[ui]].size());
343 unsigned int offset=0;
345 for(
size_t uj=0;uj<peakProbSize;uj++)
350 for(vector<pair<float,float> >::const_iterator it=isoBegin;
353 pair<float,float> newMass;
355 newMass.first=peakProbs[uj].first+it->first;
356 newMass.second=peakProbs[uj].second*it->second;
357 newProbs[offset]=newMass;
363 peakProbs.swap(newProbs);
370 float tolerance=sqrt(std::numeric_limits<float>::epsilon());
372 vector<bool> killPeaks(peakProbs.size(),
false);
373 const unsigned int peakProbSize=peakProbs.size();
374 #pragma omp parallel for 376 for(
size_t ui=0;ui<peakProbSize;ui++)
378 for(
size_t uj=ui+1;uj<peakProbSize;uj++)
380 if(fabs(peakProbs[ui].first-peakProbs[uj].first) <tolerance &&
381 peakProbs[uj].second > 0.0f)
384 peakProbs[ui].second+=peakProbs[uj].second;
385 peakProbs[uj].second=0.0f;
392 massDist.swap(peakProbs);
396 const vector<size_t> &frequency, vector<pair<float,float> > &massDist,
397 float massTolerance)
const 403 if(massDist.size() < 2)
408 std::sort(massDist.begin(),massDist.end(),cmp);
412 vector<vector<pair<float,float> > > overlappingEntries;
414 vector<pair<float,float> > curGroup;
415 curGroup.push_back(massDist[0]);
417 float threshPos=massDist[0].first + massTolerance;
418 for(
unsigned int ui=1;ui<massDist.size(); ui++)
420 if(massDist[ui].first > threshPos)
423 overlappingEntries.push_back(curGroup);
428 curGroup.push_back(massDist[ui]);
429 threshPos=massDist[ui].first + massTolerance;
432 overlappingEntries.push_back(curGroup);
437 for(
unsigned int ui=0;ui<overlappingEntries.size(); ui++)
441 for(
unsigned int uj=0;uj<overlappingEntries[ui].size();uj++)
443 x.push_back(overlappingEntries[ui][uj].first);
444 y.push_back(overlappingEntries[ui][uj].second);
450 float summedAbundance;
451 summedAbundance=std::accumulate(y.begin(),y.end(),0.0f);
453 massDist.push_back(make_pair(wMean,summedAbundance));
464 const std::vector<size_t> &frequency,
465 float massStart,
float massEnd)
const 467 vector<pair<float,float> > massDist;
470 for(
const auto &
isotope : massDist)
478 ASSERT(abundance >=0 && abundance <=1);
484 ASSERT(elementIdx < isotopeData.size());
485 const std::vector<ISOTOPE_ENTRY> &curIsotopes= isotopeData[elementIdx];
487 ASSERT(isotopeData.size());
490 float maxV=curIsotopes[0].abundance;
491 for(
size_t ui=1; ui<curIsotopes.size();ui++)
493 if(curIsotopes[ui].abundance > maxV )
496 maxV=curIsotopes[ui].abundance;
505 ASSERT(elementIdx < isotopeData.size());
506 const std::vector<ISOTOPE_ENTRY> &curIsotopes= isotopeData[elementIdx];
509 for(
size_t ui=1;ui<curIsotopes.size();ui++)
511 ASSERT(curIsotopes[ui].atomicNumber == curIsotopes[0].atomicNumber);
514 return curIsotopes[0].atomicNumber;
518 std::vector<std::pair<float,float> > &massDist)
const 523 vector<pair<float,float> > singleDist;
524 const std::vector<ISOTOPE_ENTRY> &curIso=isotopeData[atomIdx];
525 for(
size_t ui=0;ui<curIso.size();ui++)
527 singleDist.push_back(make_pair(curIso[ui].mass,curIso[ui].abundance));
536 for(
size_t ui=1;ui<repeatCount;ui++)
538 vector<pair<float,float> > newDist;
540 for(
size_t uj=0;uj<singleDist.size();uj++)
542 for(
size_t uk=0;uk<massDist.size();uk++)
544 pair<float,float> newMass;
545 newMass.first = massDist[uk].first + singleDist[uj].first;
546 newMass.second = massDist[uk].second*singleDist[uj].second;
547 newDist.push_back(newMass);
550 massDist.swap(newDist);
561 indices.resize(symbols.size());
562 for(
size_t ui=0;ui<symbols.size(); ui++)
568 vector<pair<size_t,unsigned int >> &indices)
const 573 sV.resize(symbols.size()); iV.resize(symbols.size());
574 for(
unsigned int ui=0;ui<symbols.size(); ui++)
576 sV[ui]= symbols[ui].first;
577 iV[ui]= symbols[ui].second;
580 vector<size_t> indicesV;
584 indices.resize(sV.size());
585 for(
unsigned int ui=0; ui<indices.size();ui++)
587 indices[ui].first = indicesV[ui];
588 indices[ui].second = iV[ui];
595 ASSERT(start < end && isotopeData.size());
596 std::string retStr,sep;
598 for(
size_t ui=start; ui<end; ui++)
602 if(symbolIdx !=(
size_t)-1)
611 for(
size_t ui=0;ui<isotopeData.size(); ui++)
613 if(isotopeData[ui].size() &&
614 isotopeData[ui][0].atomicNumber == atomicNumber)
623 ASSERT(offset < isotopeData.size());
return isotopeData[offset];
629 const vector<ISOTOPE_ENTRY> &isoDat=isotopeData[elementIdx];
632 for(
size_t ui=0;ui<isoDat.size();ui++)
634 total+=isoDat[ui].abundance*isoDat[ui].mass;
645 vector<pair<float,float> > isotopeDist;
647 vector<size_t> elemIdx, elemFreq;
648 elemIdx.resize(element.size());
649 elemFreq.resize(element.size());
651 for(
size_t ui=0;ui<element.size();ui++)
653 elemIdx[ui] = element[ui].first;
654 elemFreq[ui] = element[ui].second;
659 float minDelta=std::numeric_limits<float>::max();
662 for(
size_t curCharge=1;curCharge<maxCharge; curCharge++)
664 for(
size_t ui=0;ui<isotopeDist.size();ui++)
667 curDelta=fabs(isotopeDist[ui].first/curCharge-targetMass);
668 if( curDelta< minDelta)
671 bestCharge=curCharge;
681 void AbundanceData::checkErrors()
const 685 for(
size_t ui=0;ui<isotopeData.size();ui++)
687 if(!isotopeData[ui].size())
692 for(
size_t uj=0; uj<isotopeData[ui].size();uj++)
694 sum+=isotopeData[ui][uj].abundance;
697 ASSERT(fabs(sum -1.0f) < 0.000001);
706 bool AbundanceData::runUnitTests()
709 TEST(massTable.
open(
"../data/naturalAbundance.xml") == 0,
"load table");
713 TEST(ironIndex != (
size_t)-1,
"symbol lookup");
716 vector<size_t> elements;
717 vector<size_t> concentrations;
718 elements.push_back(ironIndex);
719 concentrations.push_back(1);
721 std::vector<std::pair<float,float> > massDist;
724 TEST(massDist.size() == 4,
"Iron has 4 isotopes");
733 concentrations.clear();
735 elements.push_back(molyIdx);
736 concentrations.push_back(2);
742 const float MO2_ISOTOPES[15][2] ={
761 std::sort(massDist.begin(),massDist.end(),cmp);
763 const unsigned int NUM_ISOTOPES_MO2=15;
764 TEST(massDist.size() == NUM_ISOTOPES_MO2,
"Mo2 dist test");
766 for(
unsigned int ui=0;ui<NUM_ISOTOPES_MO2; ui++)
769 m=MO2_ISOTOPES[ui][0];
770 i=MO2_ISOTOPES[ui][1];
773 dm = fabs(m-massDist[ui].first);
774 di = fabs(i-massDist[ui].second);
775 TEST(dm < 0.01,
"Mo2 Isotope position");
776 TEST(di < 0.01,
"Mo2 Isotope intensity");
void isotopeIndex(size_t elem, float mass, size_t &isotopeIdx) const
Obtain the atom ID (first) and the isotope value(second).
size_t symbolIndex(const char *symbol, bool caseSensitive=true) const
Return the element's position in table, starting from 0.
unsigned int XMLHelpFwdToElem(xmlNodePtr &node, const char *nodeName)
std::string elementNames(size_t start, size_t end, char separator=',') const
Obtain a delimited list of element names, using the (optional) supplied separator, over the specified [start,end) range.
void generateSingleAtomDist(size_t atomIdx, unsigned int repeatCount, std::vector< std::pair< float, float > > &massDist) const
Obtain the mass distribution from a single isotope that repeats. Output is not grouped.
void generateIsotopeDist(const std::vector< size_t > &elementIdx, const std::vector< size_t > &frequency, std::vector< std::pair< float, float > > &massDist) const
Compute the mass-probability distribution for the grouped set of ions.
void vectorMultiErase(std::vector< T > &vec, const std::vector< bool > &wantKill)
Remove elements from the vector, without preserving order.
float getNominalMass(size_t elementIdx) const
Obtain the isotope weighted mass for the given element.
void getSymbolIndices(const std::vector< std::string > &symbols, std::vector< size_t > &indices) const
Return a vector of symbol indices.
const char * ABUNDANCE_ERROR[]
float abundanceBetweenLimits(const std::vector< size_t > &elemIdx, const std::vector< size_t > &frequency, float massStart, float massEnd) const
Obtain the fractional abundance between two limits for the given species [start,end) ...
std::string elementName(size_t elemIdx) const
Obtain the name of an element from its index.
T weightedMean(const std::vector< T > &values, const std::vector< T > &weight)
void generateGroupedIsotopeDist(const std::vector< size_t > &elementIdx, const std::vector< size_t > &frequency, std::vector< std::pair< float, float > > &massDist, float massTolerance) const
As per generateIsotopeDist, however, this convenience groups the distribution to limit the effect of ...
size_t open(const char *file, bool strict=false)
Attempt to open the abundance data file, return 0 on success, nonzero on failure. ...
unsigned int XMLHelpGetProp(T &prop, xmlNodePtr node, std::string propName)
Class to load abundance information for natural isotopes.
std::string lowercase(std::string s)
Return a lowercase version for a given string.
const std::vector< ISOTOPE_ENTRY > & isotopes(size_t offset) const
Return the isotope at a given position in the table. Offset must exist.
size_t getMajorIsotopeFromElemIdx(size_t elementIdx) const
Obtain the most prominent isotope's index from the element index.
const ISOTOPE_ENTRY & isotope(size_t elementIdx, size_t isotopeIdx) const
Obtain a reference to a particular isotope, using the element's index in the table, and the isotope index.
size_t numElements() const
Return the number of elements in the loaded table.
size_t numIsotopes() const
Return the number of isotopes in the loaded table.
unsigned int getAtomicNumber(size_t elemIdx) const
Obtain the atomic number for the given element, by element index.
size_t getNearestCharge(const std::vector< std::pair< size_t, size_t > > &molecule, float targetMass, size_t maxCharge) const
Obtain the closest charge state for the given mass and species group.
size_t symbolIdxFromAtomicNumber(size_t atomicNumber) const
Return the symbol index (offset in vector) for the given atomic number, -1 if it cannot be found...
static const char * getErrorText(size_t errorCode)
Obtain a human-readable error message from the open( ) call.