From d53aecc3c2dec62a736ed7f320848b0fe7116ee2 Mon Sep 17 00:00:00 2001 From: Marco Pezzutti Date: Wed, 3 Jun 2015 13:27:21 +0200 Subject: [PATCH 01/16] first implementation CifLoader, todo test --- Biopool/Sources/CifLoader.cc | 668 ++++++++++++++++++++++++++++++++ Biopool/Sources/CifLoader.h | 190 +++++++++ Biopool/Sources/CifStructure.cc | 215 ++++++++++ Biopool/Sources/CifStructure.h | 143 +++++++ Biopool/Sources/Makefile | 4 +- Biopool/Sources/PdbLoader.h | 2 +- Makefile.global | 2 +- tools/String2Number.cc | 23 +- 8 files changed, 1231 insertions(+), 16 deletions(-) create mode 100644 Biopool/Sources/CifLoader.cc create mode 100644 Biopool/Sources/CifLoader.h create mode 100644 Biopool/Sources/CifStructure.cc create mode 100644 Biopool/Sources/CifStructure.h diff --git a/Biopool/Sources/CifLoader.cc b/Biopool/Sources/CifLoader.cc new file mode 100644 index 0000000..4f2d539 --- /dev/null +++ b/Biopool/Sources/CifLoader.cc @@ -0,0 +1,668 @@ + /* This file is part of Victor. + + Victor is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Victor is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Victor. If not, see . + */ + + +// Includes: +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// Global constants, typedefs, etc. (to avoid): + +using namespace Victor; +using namespace Victor::Biopool; +using namespace std; + +// CONSTRUCTORS/DESTRUCTOR: + +CifLoader::CifLoader(istream& _input = cin, bool _permissive = false, + bool _noHAtoms = false, bool _noHetAtoms = false, bool _noSecondary = false, + bool _noConnection = false, bool _noWater = true, bool _verb = true, + bool _allChains = false, string _NULL = "", bool _onlyMetal = false, + bool _noNucleotideChains = true) : + input(_input), permissive(_permissive), valid(true), noHAtoms(_noHAtoms), + noHetAtoms(_noHetAtoms), noSecondary(_noSecondary), noConnection(_noConnection), + noWater(_noWater), verbose(_verb), allChains(_allChains), chain(' '), + model(999), altAtom('A'), helixCode(_NULL), + //sheetCode(_NULL), helixData(), sheetData(), onlyMetalHetAtoms(_onlyMetal), + sheetCode(_NULL), onlyMetalHetAtoms(_onlyMetal), noNucleotideChains(_noNucleotideChains) { + cif = new CifStructure(input); +} + +CifLoader::~CifLoader() { + PRINT_NAME; +} + +// PREDICATES: + +/** + * Reads in the maximum allowed number of NMR models, zero otherwise. + * @param void + */ +unsigned int CifLoader::getMaxModels() { + input.clear(); // reset file error flags + input.seekg(0); + + string atomLine = readLine(input); + + unsigned int max = 0; + + // search column's number of the model field in the atom group + cif.parseGroup("atom", atomLine); + int col = cif.getGroupColumnNumber("atom", "model"); + + if (col != 0) { + while (input) { + if (atomLine.substr(0,4) == "ATOM") { + max = stoiDEF(cif.getGroupField("atom", atomLine, col)); + } + atomLine = readLine(input); + } + } + return max; +} + +/** + * If user selected a chain, it check validity of this choice, + * otherwise it select first available chain. + * @param void + * @return void + */ +void CifLoader::checkAndSetChain() { + vector chainList = getAllChains(); + + if (chain != ' ') { + bool validChain = false; + for (unsigned int i = 0; i < chainList.size(); i++) + if (chain == chainList[i]) { + validChain = true; + break; + } + if (validChain == false) { + ERROR("Please check chain id. This is not valid", exception); + } + } else { + chain = chainList[0]; //the first valid chain is default choice + } +} + +/** + * If user selected a Model, it check validity of this choice, + * otherwise it select first available chain. + * @param void + * @return void + */ +void CifLoader::checkModel() { + if ((model != 999) && (model > getMaxModels())) { + ERROR("Please check model number", exception); + } +} + +/** + * Returns all available chain IDs for a PDB file. + * + * @param void + * @return vector of chars + */ +vector CifLoader::getAllChains() { + vector res; + char lastChain = ' '; + + input.clear(); // reset file error flags + input.seekg(0); + + string atomLine = readLine(input); + + unsigned int modelNum = 0; + + cif.parseGroup("atom", atomLine); + int modelCol = cif.getGroupColumnNumber("atom", "model"); + int chainCol = cif.getGroupColumnNumber("atom", "chain"); + + while (input) { + if (atomLine.substr(0, 4) == "ATOM") { + modelNum = stoiDEF(cif.getGroupField("atom", atomLine, modelCol)); + // only consider first model: others duplicate chain IDs + if (modelNum > 1) { + break; + } + // check for new chains containing amino acids + char id = (cif.getGroupField("atom", atomLine, chainCol).c_str())[0]; + if (id != lastChain) { + lastChain = id; + res.push_back(id); + } + } + atomLine = readLine(input); + } + return res; +} + +// HELPERS + +/** + * Private helper function to set bond structure after loading the spacer. + * @param Spacer reference + * @return bool + */ +bool CifLoader::setBonds(Spacer& sp) { + //cout << sp.getAmino(0).getType1L() << "\n"; + sp.getAmino(0).setBondsFromPdbCode(true); + for (unsigned int i = 1; i < sp.size(); i++) { + //cout << sp.getAmino(i).getType1L() << "\n"; + if (!sp.getAmino(i).setBondsFromPdbCode(true, &(sp.getAmino(i - 1)))) + return false; + } + return true; +} + +/** + * Private helper function to determine if atom is backbone or sidechain. + * @param Spacer reference + * @return bool + */ +bool CifLoader::inSideChain(const AminoAcid& aa, const Atom& at) { + if (isBackboneAtom(at.getCode())) + return false; + if ((at.getType() == "H") || (at.getType() == "HN") + || ((at.getType() == "HA") && (!aa.isMember(HA))) + || (at.getType() == "1HA") || (at.getType() == "1H") + || (at.getType() == "2H") || (at.getType() == "3H")) + return false; // special case for GLY H (code HA) + return true; // rest of aminoacid is its sidechain +} + + +/** + * Try to assigns the secondary structure from the PDB header. If not present + * uses Spacer's setStateFromTorsionAngles(). + * @param Spacer reference + */ +void CifLoader::assignSecondary(Spacer& sp) { + if (helixData.size() + sheetData.size() == 0) { + sp.setStateFromTorsionAngles(); + return; + } + + for (unsigned int i = 0; i < helixData.size(); i++) { + if (helixCode[i] == chain) { + for (int j = helixData[i].first; j <= const_cast (helixData[i].second); j++) { + // important: keep ifs separated to avoid errors + if (j < sp.maxPdbNumber()) + if (!sp.isGap(sp.getIndexFromPdbNumber(j))) + sp.getAmino(sp.getIndexFromPdbNumber(j)).setState(HELIX); + } + } + } + + for (unsigned int i = 0; i < sheetData.size(); i++) + if (sheetCode[i] == chain) + for (int j = sheetData[i].first; j <= const_cast (sheetData[i].second); j++) { + // important: keep ifs separated to avoid errors + if (j < sp.maxPdbNumber()) + if (!sp.isGap(sp.getIndexFromPdbNumber(j))) + sp.getAmino(sp.getIndexFromPdbNumber(j)).setState(STRAND); + } +} + +//setOnlyMetalHetAtoms +void CifLoader::setOnlyMetalHetAtoms() { + if (noHetAtoms) { + ERROR("can't load metal ions if hetAtoms option is disabled", exception); + } + onlyMetalHetAtoms = true; + noWater = true; +} + +//setWater +void CifLoader::setWater() { + if (noHetAtoms || onlyMetalHetAtoms) { + ERROR("can't load water if hetAtoms option is disabled\nor onlyMetalHetAtoms is enabled", exception); + } + noWater = false; +} + +/* +void +CifLoader::loadSpacer(Spacer& sp){ + Protein prot; + CifLoader::loadProtein(prot); + sp = prot.getSpacer(0); +} + */ + +/** + * Core function for PDB file parsing. + * @param prot (Protein&) + */ +void CifLoader::loadProtein(Protein& prot) { + PRINT_NAME; + + vector chainList = getAllChains(); + + if (chainList.size() == 0) { + if (verbose) + cout << "Warning: Missing chain ID in the CIF, assuming the same chain for the entire file.\n"; + chainList.push_back(char(' ')); + } + + unsigned int readingModel = model; + bool loadChain = false; + + helixCode = ""; + sheetCode = ""; + + string path = "data/AminoAcidHydrogenData.txt"; + const char* inputFile = getenv("VICTOR_ROOT"); + if (inputFile == NULL) + ERROR("Environment variable VICTOR_ROOT was not found.", exception); + + AminoAcidHydrogen::loadParam(((string) inputFile + path).c_str()); + + for (unsigned int i = 0; i < chainList.size(); i++) { + loadChain = false; + // Load all chains + if (allChains) { + loadChain = true; + } else { + // Load only first chain + if (chain == ' ') { + loadChain = true; + chain = '#'; + } + // Load only selected chain + else if (chainList[i] == chain) { + loadChain = true; + chain = '#'; + } + } + + if (loadChain) { + if (verbose) { + cout << "\nLoading chain: ->" << chainList[i] << "<-\n"; + } + setChain(chainList[i]); + + input.clear(); // reset file error flags + input.seekg(0, ios::beg); + + Spacer* sp = new Spacer(); + LigandSet* ls = new LigandSet(); + + string atomLine; + atomLine = readLine(input); + + int aaNum = -100000; // infinite negative + int oldAaNum = -100000; + //int lastAa = -10000; + + AminoAcid* aa = new AminoAcid(); + Ligand* lig = new Ligand(); + + int start, end; + + string name = ""; + string tag = ""; + + // read all lines + do { + // read header entry + if (regex_search(atomLine, cif.getTag("header")) && (name == "")) { + name = atomLine; + sp->setType(name); + } + // read helix entry + else if (regex_search(atomLine, cif.getTag("helix"))) { + cif.parseGroup("helix", atomLine); + int colS = cif.getGroupColumnNumber("helix", "helix start"); + int colE = cif.getGroupColumnNumber("helix", "helix end"); + + start = stoiDEF(cif.getGroupField("helix", atomLine, colS)); + end = stoiDEF(cif.getGroupField("helix", atomLine, colE)); + + helixData.push_back(pair(start, end)); + int colC = cif.getGroupColumnNumber("helix", "helix chain"); + helixCode += cif.getGroupField("helix", atomLine, colC); + } + // read sheet entry + else if (regex_search(atomLine, cif.getTag("sheet"))) { + cif.parseGroup("sheet range", atomLine); + int colS = cif.getGroupColumnNumber("sheet range", "sheet start"); + int colE = cif.getGroupColumnNumber("sheet range", "sheet start"); + + start = stoiDEF(cif.getGroupField("sheet range", atomLine, colS)); + end = stoiDEF(cif.getGroupField("sheet range", atomLine, colE)); + + sheetData.push_back(pair(start, end)); + int colC = cif.getGroupColumnNumber("sheet range", "sheet chain"); + sheetCode += cif.getGroupField("sheet range", atomLine, colC); + } + // Parse one line of the "ATOM" and "HETATM" fields + else if (atomLine.substr(0, 6) == "ATOM " || + atomLine.substr(0, 6) == "HETATM") { + tag = atomLine.substr(0, 6); + cif.parseGroup("atom", atomLine); + + // Control model number + int colM = cif.getGroupColumnNumber("atom", "model"); + readingModel = stouiDEF(cif.getGroupField("atom", atomLine, colM)); + if (readingModel > model) + break; + // Get only the first model if not specified + if (model == 999) { + model = readingModel; + } + + int colC = cif.getGroupColumnNumber("atom", "chain"); + char chainID = cif.getGroupField("atom", atomLine, colC).c_str()[0]; + + if (chainList[i] == chainID) { + if ((model == 999) || (model == readingModel)) { + int colAa = cif.getGroupColumnNumber("atom", "residue num"); + aaNum = stoiDEF(cif.getGroupField("atom", atomLine, colAa)); + + // Insert the Ligand object into LigandSet + if (aaNum != oldAaNum) { + // Print some indexes for the debug + /* + cout << aa->getType1L() << " offset:" << sp->getStartOffset() << " gaps:" + << sp->sizeGaps() << " sizeAmino:" << sp->sizeAmino() << " maxPdbNum:" + << sp->maxPdbNumber() << " aaNum:" << aaNum + << " oldAaNum:" << oldAaNum << " lastAa:" << lastAa << "\n"; + */ + // Skip the first empty AminoAcid + if ((aa->size() > 0) && (aa->getType1L() != 'X')) { + if (sp->sizeAmino() == 0) { + sp->setStartOffset(oldAaNum - 1); + } else { + // Add gaps + //for (int i = lastAa+1; i < oldAaNum; i++){ + for (int i = sp->maxPdbNumber() + 1; i < oldAaNum; i++) { + sp->addGap(i); + } + } + sp->insertComponent(aa); + } + // Ligand + if (lig->size() > 0) { + if (onlyMetalHetAtoms) { + if (lig->isSimpleMetalIon()) { // skip not metal ions + ls->insertComponent(lig); + } + } else { + ls->insertComponent(lig); + } + } + aa = new AminoAcid(); + lig = new Ligand(); + } + oldAaNum = parseCIFline(atomLine, tag, lig, aa); + } // end model check + } // end chain check + } + atomLine = readLine(input); + } while (input); + + /* + // Print some indexes for the debug + cout << aa->getType1L() << " offset:" << sp->getStartOffset() << " gaps:" + << sp->sizeGaps() << " sizeAmino:" << sp->sizeAmino() << " maxPdbNum:" + << sp->maxPdbNumber() << " aaNum:" << aaNum + << " oldAaNum:" << oldAaNum << " lastAa:" << lastAa << "\n"; + */ + + // last residue/ligand + // AminoAcid + if ((aa->size() > 0) && (aa->getType1L() != 'X')) { + if (sp->sizeAmino() == 0) { + sp->setStartOffset(oldAaNum - 1); + } else { + // Add gaps + //for (int i = lastAa+1; i < oldAaNum; i++){ + for (int i = sp->maxPdbNumber() + 1; i < oldAaNum; i++) { + sp->addGap(i); + } + } + sp->insertComponent(aa); + } + // Ligand + if (lig->size() > 0) { + if (onlyMetalHetAtoms) { + if (lig->isSimpleMetalIon()) { // skip not metal ions + ls->insertComponent(lig); + } + } else { + ls->insertComponent(lig); + } + } + if (verbose) { + cout << "Parsing done\n"; + } + //////////////////////////////////////////////////////////////////// + // Spacer processing + if (sp->sizeAmino() > 0) { + // correct ''fuzzy'' (i.e. incomplete) residues + for (unsigned int j = 0; j < sp->sizeAmino(); j++) { + if ((!sp->getAmino(j).isMember(O)) || + (!sp->getAmino(j).isMember(C)) || + (!sp->getAmino(j).isMember(CA)) || + (!sp->getAmino(j).isMember(N))) { + + // remove residue + sp->deleteComponent(&(sp->getAmino(j))); + + // Add a gap for removed residues + sp->addGap(sp->getStartOffset() + j + 1); + + if (verbose) { + cout << "Warning: Residue number " + << sp->getPdbNumberFromIndex(j) + << " is incomplete and had to be removed.\n"; + } + } + } + if (verbose) + cout << "Removed incomplete residues\n"; + // connect aminoacids + if (!noConnection) { + if (!setBonds(*sp)) { // connect atoms... + valid = false; + if (verbose) { + cout << "Warning: Fail to connect residues in chain: " + << chainList[i] << ".\n"; + } + } + if (verbose) { + cout << "Connected residues\n"; + } + } + + // correct position of leading N atom + sp->setTrans(sp->getAmino(0)[N].getTrans()); + vgVector3 tmp(0.0, 0.0, 0.0); + sp->getAmino(0)[N].setTrans(tmp); + sp->getAmino(0).adjustLeadingN(); + if (verbose) + cout << "Fixed leading N atom\n"; + + // Add H atoms + if (!noHAtoms) { + for (unsigned int j = 0; j < sp->sizeAmino(); j++) { + AminoAcidHydrogen::setHydrogen(&(sp->getAmino(j)), false); // second argument is VERBOSE + } + if (verbose) { + cout << "H assigned\n"; + } + if (!noSecondary) { + sp->setDSSP(false); // argument is VERBOSE + if (verbose) { + cout << "DSSP assigned\n"; + } + } + } + + // assign secondary structure from torsion angles + if (!noSecondary) { + assignSecondary(*sp); + if (verbose) { + cout << "Torsional SS assigned\n"; + } + } + } else { + if (verbose) { + cout << "Warning: No residues in chain: " << chainList[i] << ".\n"; + } + } + + //////////////////////////////////////////////////////////////////// + // Load data into protein object + Polymer* pol = new Polymer(); + pol->insertComponent(sp); + if (verbose) { + cout << "Loaded AminoAcids: " << sp->size() << "\n"; + } + if (!(noHetAtoms)) { + if (ls->sizeLigand() > 0) { //insertion only if LigandSet is not empty + pol->insertComponent(ls); + if (verbose) { + cout << "Loaded Ligands: " << ls->size() << "\n"; + } + } else { + if (verbose) { + cout << "Warning: No ligands in chain: " << chainList[i] << ".\n"; + } + } + } + + prot.addChain(chainList[i]); + prot.insertComponent(pol); + + } // end loadChain + } // chains iteration + +} + +/** + * Parse a single line of a CIF file. + * @param atomLine the whole CIF line as it is + * @param tag the first field (keyword) in a PDB line + * @param lig pointer to a ligan + * @param aa pointer to an amino acid + * @return Residue number read from the PDB line. + */ +int +CifLoader::parseCIFline(string atomLine, string tag, Ligand* lig, AminoAcid* aa) { + // get atom id + int atNum = stoiDEF(cif.getGroupField("atom", atomLine, + cif.getGroupColumnNumber("atom", "atom id"))); + // get residue number + int aaNum = stoiDEF(cif.getGroupField("atom", atomLine, + cif.getGroupColumnNumber("atom", "residue num"))); + char altAaID = cif.getGroupField("atom", atomLine, + cif.getGroupColumnNumber("atom", "alt id")).c_str()[0]; // "Code for insertion of residues" + + // get x, y, z coordinates + vgVector3 coord; + coord.x = stodDEF(cif.getGroupField("atom", atomLine, + cif.getGroupColumnNumber("atom", "x"))); + coord.y = stodDEF(cif.getGroupField("atom", atomLine, + cif.getGroupColumnNumber("atom", "y"))); + coord.z = stodDEF(cif.getGroupField("atom", atomLine, + cif.getGroupColumnNumber("atom", "z"))); + + // get b-factor + double bfac = 0.0; + int colBfac = cif.getGroupColumnNumber("atom", "bfac"); + if (colBfac != -1) { + string sbfac = cif.getGroupField("atom", atomLine, colBfac); + if (sbfac != "?" || sbfac != ".") { + bfac = stodDEF(sbfac); + } + } + + // get atom name + string atType = cif.getGroupField("atom", atomLine, + cif.getGroupColumnNumber("atom", "atom name")); + + // get residue name + string aaType = cif.getGroupField("atom", atomLine, + cif.getGroupColumnNumber("atom", "residue name")); + + // take care of deuterium atoms + if (atType == "D") { + cerr << "--> " << atType << "\n"; + atType = "H"; + } + + // Initialize the Atom object + Atom* at = new Atom(); + at->setNumber(atNum); + at->setType(atType); + at->setCoords(coord); + at->setBFac(bfac); + + // Ligand object (includes DNA/RNA in "ATOM" field) + if ((tag == "HETATM") || + isKnownNucleotide(nucleotideThreeLetterTranslator(aaType))) { + if (noWater) { + if (!(aaType == "HOH")) { + lig->addAtom(*at); + lig->setType(aaType); + } + } else { + lig->addAtom(*at); + lig->setType(aaType); + } + } + // AminoAcid + else if ((tag == "ATOM ")) { + // skip N-terminal ACE groups + if (aaType != "ACE") { + // DEBUG: it would be nice to load also alternative atoms + // skip alternative atoms, + if (altAaID != ' ') { + if (verbose) + cout << "Warning: Skipping extraneous amino acid entry " + << aaNum << " " << atNum << " " << altAaID << ".\n"; + } else { + aa->setType(aaType); + aa->getSideChain().setType(aaType); + + if (!noHAtoms || isHeavyAtom(at->getCode())) { + if (!inSideChain(*aa, *at)) + aa->addAtom(*at); + else { + aa->getSideChain().addAtom(*at); + } + } + } + } else { + if (verbose) + cout << "Warning: Skipping N-terminal ACE group " + << aaNum << " " << atNum << ".\n"; + } + } + delete at; + return aaNum; +} diff --git a/Biopool/Sources/CifLoader.h b/Biopool/Sources/CifLoader.h new file mode 100644 index 0000000..1bd8d25 --- /dev/null +++ b/Biopool/Sources/CifLoader.h @@ -0,0 +1,190 @@ +/* This file is part of Victor. + + Victor is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Victor is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Victor. If not, see . + */ + +#ifndef _CIF_LOADER_H_ +#define _CIF_LOADER_H_ + +// Includes: +#include +#include +#include +#include +#include +#include +#include +#include "CifStructure.h" + + +// Global constants, typedefs, etc. (to avoid): + +namespace Victor { + namespace Biopool { + + /** + * @brief Loads components (Atoms, Groups, Spacer, etc.) in standard CIF format. + * */ + class CifLoader : public Loader { + public: + + // CONSTRUCTORS/DESTRUCTOR: + + /** + * Constructor. + * @param _input = the CIF file object + * @param _permissive = if true, allows loading residues with missing atoms + * @param _noHAtoms = if true, doesn't load Hydrogens + * @param _noHetAtoms = if true, doesn't load het atoms + * @param _noSecondary = if true, doesn't load secondary structure (neither the one calculated from torsional angles nor the DSSP) + * @param _noConnection = if true, doesn't connect residues + * @param _noWater = if true, doesn't load water atoms + * @param _verb = if true, verbose mode + * @param _allChains = if true, loads all chains + * @param _NULL = the name of the chain to be loaded, if not provided only loads the first chain + * @param _onlyMetal = if true, load only metals as ligands + * @param _noNucleotideChains = if true, doesn't load DNA/RNA chains + */ + CifLoader(istream& _input = cin, bool _permissive = false, + bool _noHAtoms = false, bool _noHetAtoms = false, + bool _noSecondary = false, bool _noConnection = false, + bool _noWater = true, bool _verb = true, bool _allChains = false, + string _NULL = "", bool _onlyMetal = false, + bool _noNucleotideChains = true); + + // this class uses the implicit copy operator. + + virtual ~CifLoader(); + + // PREDICATES: + + bool isValid() { + return valid; + } + void checkModel(); //to check input values ​​ + void checkAndSetChain(); //chosen by the user + unsigned int getMaxModels(); + unsigned int getMaxModelsFast(); + vector getAllChains(); + + + // MODIFIERS: + + void setPermissive() { + permissive = true; + } + + void setNonPermissive() { + permissive = false; + } + + void setVerbose() { + verbose = true; + } + + void setNoVerbose() { + verbose = false; + } + + void setChain(char _ch) { + chain = _ch; + } + + void setModel(unsigned int _mod) { + model = _mod; + } + + void setAltAtom(char _a) { + altAtom = _a; + } + + void setNoHAtoms() { + noHAtoms = true; + } + + void setNoHetAtoms() { + noHetAtoms = true; + } + void setOnlyMetalHetAtoms(); + + void setNoSecondary() { + noSecondary = true; + } + + void setWithSecondary() { + noSecondary = false; + } + + void setNoConnection() { + noConnection = true; + } + + void setWithConnection() { + noConnection = false; + } + + void setWater(); + + void setAllChains() { + allChains = true; + } + + //virtual void loadSpacer(Spacer& sp); + //virtual void loadLigandSet(LigandSet& l); + virtual void loadProtein(Protein& prot); + + + + //virtual void loadNucleotideChainSet(NucleotideChainSet& ns); //new class, new code by Damiano + + protected: + // HELPERS: + bool setBonds(Spacer& sp); + bool inSideChain(const AminoAcid& aa, const Atom& at); + void loadSecondary(); + void assignSecondary(Spacer& sp); + int parseCIFline(string atomLine, Ligand* lig, AminoAcid* aa); + + // ATTRIBUTES + private: + istream& input; // input stream + bool permissive; // + bool valid; // + bool noHAtoms; // + bool noHetAtoms; // hetatms contain water, simpleMetalIons and cofactors + bool onlyMetalHetAtoms; // with this flag we select only 2nd cathegory + bool noSecondary; + bool noConnection; // skip connecting aminoacids + bool noWater; // + bool verbose; + bool allChains; // + char chain; // chain ID to be loaded + unsigned int model; // model number to be loaded + char altAtom; // ID of alternate atoms to be loaded + + bool noNucleotideChains; // does not load nucleotide atoms + + string helixCode; // parallel vector of helix data, chain name for each helixData element + string sheetCode; + + vector > helixData; // begin and end of the helix + vector > sheetData; + + CifStructure cif; + }; + + } +} //namespace +#endif //_PDB_LOADER_H_ + diff --git a/Biopool/Sources/CifStructure.cc b/Biopool/Sources/CifStructure.cc new file mode 100644 index 0000000..bfe433c --- /dev/null +++ b/Biopool/Sources/CifStructure.cc @@ -0,0 +1,215 @@ +/* + * File: CifStructure.cc + * Author: marco + * + * Created on 1 giugno 2015, 11.36 + */ + +#include +#include +#include "CifStructure.h" + +using namespace Victor; +using namespace Victor::Biopool; +using namespace std; + +CifStructure::CifStructure(istream& input) : input(input) { +} + +CifStructure::~CifStructure() { +} + +/** + * returns the correct collection by group name + * @param name name of the CIF group + * @return reference to the collection + */ +vector& CifStructure::getGroup(string name) { + if (name == "atom") { + return atomGroup; + } else if (name == "helix") { + return helixGroup; + } else if (name == "sheet") { + return sheetGroup; + } else if (name == "sheet order") { + return sheetOrderGroup; + } else if (name == "sheet range") { + return sheetRangeGroup; + } else if (name == "sheet hbond") { + return sheetHbondGroup; + } +} + +/** + * returns the tag by name + * @param name name of tag + * @return CIF tag + */ +string CifStructure::getTag(string name) { + if (name == "header") { + return header; + } else if (name == "atom") { + return atom; + } else if (name == "residue num") { + return residueNum; + } else if (name == "atom id") { + return atomId; + } else if (name == "alt id") { + return atomAltId; + } else if (name == "x") { + return x; + } else if (name == "y") { + return y; + } else if (name == "z") { + return z; + } else if (name == "bfac") { + return tempFactor; + } else if (name == "bfac") { + return tempFactor; + } else if (name == "residue name") { + return residueName; + } else if (name == "helix") { + return helix; + } else if (name == "helix start") { + return helixStart; + } else if (name == "helix end") { + return helixEnd; + } else if (name == "helix chain") { + return helixChainId; + } else if (name == "model") { + return model; + } else if (name == "chain") { + return chain; + } else if (name == "sheet") { + return sheet; + } else if (name == "sheet order") { + return sheetOrder; + } else if (name == "sheet range") { + return sheetRange; + } else if (name == "sheet hbond") { + return sheetHbond; + } else if (name == "sheet start") { + return sheetStart; + } else if (name == "sheet end") { + return sheetEnd; + } else if (name == "sheet chain") { + return sheetChainId; + } +} + +/** + * returns the column number of the field + * @param name name of the group + * @param field name of the field + * @return field column number + */ +int CifStructure::getGroupColumnNumber(string name, string field) { + int col = -1; + vector group = getGroup(name); + vector::iterator it; + it = find(group.begin(), group.end(), getTag(field)); + if (it != group.end()) { + col = it - group.begin(); + } + return col; +} + +/** + * returns the field of the line at the columnNum column + * @param name name of the group + * @param line line of the CIF + * @param columnNum number of column + * @return field at columnNum column + */ +string CifStructure::getGroupField(string name, string line, int columnNum) { + istringstream iss(line); + vector& group = getGroup(name); + vector fields; + string field; + for (int i = 0; i < group.size(); i++) { + iss >> field; + fields.push_back(field); + } + return fields[columnNum]; +} + +/** + * parses group of CIF fields and creates a vector with columns positions + * @param name name of the group + */ +void CifStructure::parseGroup(string name, string line) { + bool found = false; + vector& group = getGroup(name); + + // exit the function if the group name is already parsed + if (isGroupParsed(name)) { + break; + } + + while (input) { + regex groupName(getTag(name)); + smatch match; + if (regex_search(line, match, groupName)) { + group.push_back(match.suffix().str()); + found = true; + } else { + found = false; + } + + // exit the loop when the research of the fields is completed + if (!found && group.size() > 1) { + setParsedFlag(name); + break; + } + + line = readLine(input); + } +} + +/** + * Sets flag of the parsed group + * @param name name of the group + */ +void CifStructure::setParsedFlag(string name) { + if (name == "atom") { + atomGroupParsed = true; + } else if (name == "helix") { + helixGroupParsed = true; + } else if (name == "sheet") { + sheetGroupParsed = true; + } else if (name == "sheet hbound") { + sheetHboundgroupParsed = true; + } else if (name == "sheet order") { + sheetOrderGroupParsed = true; + } else if (name == "sheet range") { + sheetRangeGroupParsed = true; + } +} + +/** + * Return true if the group name is parsed, false otherwise + * @param name name of the group + * @return true if group is parsed, false otherwise + */ +bool CifStructure::isGroupParsed(string name) { + if (name == "atom") { + return atomGroupParsed; + } else if (name == "helix") { + return helixGroupParsed; + } else if (name == "sheet") { + return sheetGroupParsed; + } else if (name == "sheet hbound") { + return sheetHboundgroupParsed; + } else if (name == "sheet order") { + return sheetOrderGroupParsed; + } else if (name == "sheet range") { + return sheetRangeGroupParsed; + } +} + + + + + + + diff --git a/Biopool/Sources/CifStructure.h b/Biopool/Sources/CifStructure.h new file mode 100644 index 0000000..039748c --- /dev/null +++ b/Biopool/Sources/CifStructure.h @@ -0,0 +1,143 @@ +/* + * File: CifStructure.h + * Author: marco + * + * Created on 1 giugno 2015, 11.36 + */ + +#ifndef CIFSTRUCTURE_H +#define CIFSTRUCTURE_H + + +// Includes: +#include +#include +#include +#include + +using std::string; +using std::istream; +using std::vector; + +// Global constants, typedefs, etc. (to avoid): + +namespace Victor { + namespace Biopool { + + /** + * Helper class used to hold information from CIF file + */ + class CifStructure { + public: + /** + * Constructor + * @param input input file stream + */ + CifStructure(istream& input); + + /** + * Destructor + */ + virtual ~CifStructure(); + + /** + * Returns the correct collection by group name. + * @param name name of the CIF group + * @return reference to the collection + */ + vector& getGroup(string name); + + /** + * Returns the tag by name. + * @param name name of tag + * @return CIF tag + */ + string getTag(string name); + + /** + * Returns the column number of the field. + * @param name name of the group + * @param field name of the field + * @return field column number + */ + int getGroupColumnNumber(string name, string field); + + /** + * Returns the field of the line at the columnNum column. + * @param name name of the group + * @param line line of the CIF + * @param columnNum number of column + * @return field at columnNum column + */ + string getGroupField(string name, string line, int columnNum); + + /** + * Parses group of CIF fields and creates a vector with columns positions. + * @param name name of the group + */ + void parseGroup(string group, string line); + + /** + * Sets flag of the parsed group + * @param name name of the group + */ + void setParsedFlag(string name); + + /** + * Return true if the group name is parsed, false otherwise + * @param name name of the group + * @return true if group is parsed, false otherwise + */ + bool isGroupParsed(string name); + + private: + // CIF file + istream& input; + + // CIF tags + string header = "_struct_keywords.pdbx_keywords"; + string model = "pdbx_PDB_model_num"; + string helix = "_struct_conf."; + string helixStart = "beg_auth_seq_id"; + string helixEnd = "end_auth_seq_id"; + string helixChainId = "beg_auth_asym_id"; + string atom = "_atom_site."; + string residueNum = "auth_seq_id"; + string atomId = "id"; + string atomAltId = "label_alt_id"; + string tempFactor = "B_iso_or_equiv"; + string atomName = "auth_atom_id"; + string residueName = "auth_comp_id"; + string x = "Cartn_x"; + string y = "Cartn_y"; + string z = "Cartn_z"; + string chain = "auth_asym_id"; + string sheet = "_struct_sheet."; + string sheetOrder = "_struct_sheet_order."; + string sheetRange = "_struct_sheet_range."; + string sheetHbond = "_pdbx_struct_sheet_hbond."; + string sheetStart = "beg_auth_seq_id"; + string sheetEnd = "end_auth_seq_id"; + string sheetChainId = "beg_auth_asym_id"; + + // collections of CIF group fields + vector atomGroup; + vector helixGroup; + vector sheetGroup; + vector sheetOrderGroup; + vector sheetRangeGroup; + vector sheetHbondGroup; + + // flags + bool atomGroupParsed = false; + bool helixGroupParsed = false; + bool sheetGroupParsed = false; + bool sheetOrderGroupParsed = false; + bool sheetRangeGroupParsed = false; + bool sheetHboundgroupParsed = false; + }; + } + + +#endif /* CIFSTRUCTURE_H */ + diff --git a/Biopool/Sources/Makefile b/Biopool/Sources/Makefile index 86b43b9..5f7a08c 100644 --- a/Biopool/Sources/Makefile +++ b/Biopool/Sources/Makefile @@ -32,7 +32,7 @@ SOURCES = Identity.cc SimpleBond.cc Bond.cc \ AminoAcid.cc Spacer.cc IntSaver.cc IntLoader.cc SeqSaver.cc PdbLoader.cc \ PdbSaver.cc SeqLoader.cc IntCoordConverter.cc SeqConstructor.cc Ligand.cc \ LigandSet.cc SolvExpos.cc AminoAcidHydrogen.cc Nucleotide.cc \ - RelLoader.cc XyzSaver.cc RelSaver.cc XyzLoader.cc + RelLoader.cc XyzSaver.cc RelSaver.cc XyzLoader.cc CifLoader.cc CifStructure OBJECTS = Identity.o SimpleBond.o Bond.o \ @@ -41,7 +41,7 @@ OBJECTS = Identity.o SimpleBond.o Bond.o \ SeqSaver.o PdbLoader.o PdbSaver.o SeqLoader.o \ IntCoordConverter.o SeqConstructor.o Ligand.o LigandSet.o \ SolvExpos.o Protein.o AminoAcidHydrogen.o Nucleotide.o \ - RelLoader.o XyzSaver.o RelSaver.o XyzLoader.o + RelLoader.o XyzSaver.o RelSaver.o XyzLoader.o CifLoader.o CifStructure.o TARGETS = diff --git a/Biopool/Sources/PdbLoader.h b/Biopool/Sources/PdbLoader.h index f4c1bc9..9ed1a4b 100644 --- a/Biopool/Sources/PdbLoader.h +++ b/Biopool/Sources/PdbLoader.h @@ -148,7 +148,7 @@ namespace Victor { namespace Biopool { //virtual void loadSpacer(Spacer& sp); //virtual void loadLigandSet(LigandSet& l); - virtual void loadProtein(Protein& prot); + virtual void loadProtatein(Protein& prot); diff --git a/Makefile.global b/Makefile.global index 1f78a89..e8d267a 100644 --- a/Makefile.global +++ b/Makefile.global @@ -32,7 +32,7 @@ # # These flags should always be used. -STANDARDFLAGS = -Wall -ansi -pedantic -DNEXCEPTIONS -DLINUX -c +STANDARDFLAGS = -Wall -ansi -pedantic -DNEXCEPTIONS -DLINUX -c -std=c++11 DEBUGFLAGS = -g diff --git a/tools/String2Number.cc b/tools/String2Number.cc index ec41c69..68cf368 100644 --- a/tools/String2Number.cc +++ b/tools/String2Number.cc @@ -43,7 +43,7 @@ int stoiDEF(const string &s) { * @Description if the input is no number (e.g. char or string) then error, * if wrong input (integer expected but e.g. float given) then error * changes string into integer: - * */ + */ unsigned int stouiDEF(const string &s) { unsigned int i; double temp; @@ -201,9 +201,8 @@ vector sToVectorOfUIntDEF(const string& s_orig) { } /** - * @Description changes integer into sting: + * @Description changes integer into string: */ - string itosDEF(const int &i) { string s; @@ -213,8 +212,8 @@ string itosDEF(const int &i) { } /** - * @Description changes unsigned integer into sting: - * */ + * @Description changes unsigned integer into string: + */ string uitosDEF(const unsigned int &i) { string s; @@ -224,8 +223,8 @@ string uitosDEF(const unsigned int &i) { } /** - * @Description changes long into sting: - * */ + * @Description changes long into string: + */ string ltosDEF(const long &l) { string s; @@ -235,8 +234,8 @@ string ltosDEF(const long &l) { } /** - * @Description changes float into sting: - * */ + * @Description changes float into string: + */ string ftosDEF(const float &f) { string s; @@ -246,8 +245,8 @@ string ftosDEF(const float &f) { } /** - * @Description changes double into sting: - * */ + * @Description changes double into string: + */ string dtosDEF(const double &d) { string s; @@ -258,7 +257,7 @@ string dtosDEF(const double &d) { /** * @Description tokenize text - * */ + */ vector getTokens(const string& text) { istringstream ist(text.c_str()); char* charLine = new char[text.size() + 1]; // size of string From c21cc57520f569000bad91e496aaa5be49d25068 Mon Sep 17 00:00:00 2001 From: Marco Pezzutti Date: Wed, 3 Jun 2015 23:17:11 +0200 Subject: [PATCH 02/16] strange compile errors --- Biopool/Sources/CifLoader.cc | 391 ++++++++++++++++---------------- Biopool/Sources/CifLoader.h | 163 +++++++------ Biopool/Sources/CifSaver.cc | 317 ++++++++++++++++++++++++++ Biopool/Sources/CifSaver.h | 145 ++++++++++++ Biopool/Sources/CifStructure.cc | 1 + Biopool/Sources/CifStructure.h | 3 +- Biopool/Sources/Makefile | 6 +- Biopool/Sources/PdbLoader.h | 2 +- 8 files changed, 755 insertions(+), 273 deletions(-) create mode 100644 Biopool/Sources/CifSaver.cc create mode 100644 Biopool/Sources/CifSaver.h diff --git a/Biopool/Sources/CifLoader.cc b/Biopool/Sources/CifLoader.cc index 4f2d539..5d58244 100644 --- a/Biopool/Sources/CifLoader.cc +++ b/Biopool/Sources/CifLoader.cc @@ -1,32 +1,33 @@ - /* This file is part of Victor. +/* This file is part of Victor. - Victor is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. + Victor is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. - Victor is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. + Victor is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with Victor. If not, see . + You should have received a copy of the GNU General Public License + along with Victor. If not, see . */ // Includes: -#include +#include +#include + #include #include #include -#include #include #include #include #include -#include -#include + +#include "CifLoader.h" // Global constants, typedefs, etc. (to avoid): @@ -40,47 +41,32 @@ CifLoader::CifLoader(istream& _input = cin, bool _permissive = false, bool _noHAtoms = false, bool _noHetAtoms = false, bool _noSecondary = false, bool _noConnection = false, bool _noWater = true, bool _verb = true, bool _allChains = false, string _NULL = "", bool _onlyMetal = false, - bool _noNucleotideChains = true) : - input(_input), permissive(_permissive), valid(true), noHAtoms(_noHAtoms), - noHetAtoms(_noHetAtoms), noSecondary(_noSecondary), noConnection(_noConnection), - noWater(_noWater), verbose(_verb), allChains(_allChains), chain(' '), - model(999), altAtom('A'), helixCode(_NULL), - //sheetCode(_NULL), helixData(), sheetData(), onlyMetalHetAtoms(_onlyMetal), - sheetCode(_NULL), onlyMetalHetAtoms(_onlyMetal), noNucleotideChains(_noNucleotideChains) { + bool _noNucleotideChains = true) : +input(_input), permissive(_permissive), valid(true), noHAtoms(_noHAtoms), +noHetAtoms(_noHetAtoms), noSecondary(_noSecondary), noConnection(_noConnection), +noWater(_noWater), verbose(_verb), allChains(_allChains), chain(' '), +model(999), altAtom('A'), helixCode(_NULL), +//sheetCode(_NULL), helixData(), sheetData(), onlyMetalHetAtoms(_onlyMetal), +sheetCode(_NULL), onlyMetalHetAtoms(_onlyMetal), noNucleotideChains(_noNucleotideChains) { cif = new CifStructure(input); } CifLoader::~CifLoader() { - PRINT_NAME; + PRINT_NAME; } // PREDICATES: /** - * Reads in the maximum allowed number of NMR models, zero otherwise. - * @param void + * If user selected a Model, it check validity of this choice, + * otherwise it select first available chain. + * @param void + * @return void */ -unsigned int CifLoader::getMaxModels() { - input.clear(); // reset file error flags - input.seekg(0); - - string atomLine = readLine(input); - - unsigned int max = 0; - - // search column's number of the model field in the atom group - cif.parseGroup("atom", atomLine); - int col = cif.getGroupColumnNumber("atom", "model"); - - if (col != 0) { - while (input) { - if (atomLine.substr(0,4) == "ATOM") { - max = stoiDEF(cif.getGroupField("atom", atomLine, col)); - } - atomLine = readLine(input); - } +void CifLoader::checkModel() { + if ((model != 999) && (model > getMaxModels())) { + ERROR("Please check model number", exception); } - return max; } /** @@ -91,7 +77,7 @@ unsigned int CifLoader::getMaxModels() { */ void CifLoader::checkAndSetChain() { vector chainList = getAllChains(); - + if (chain != ' ') { bool validChain = false; for (unsigned int i = 0; i < chainList.size(); i++) @@ -108,34 +94,47 @@ void CifLoader::checkAndSetChain() { } /** - * If user selected a Model, it check validity of this choice, - * otherwise it select first available chain. - * @param void - * @return void + * Reads in the maximum allowed number of NMR models, zero otherwise. + * @param void */ -void CifLoader::checkModel() { - if ((model != 999) && (model > getMaxModels())) { - ERROR("Please check model number", exception); +unsigned int CifLoader::getMaxModels() { + input.clear(); // reset file error flags + input.seekg(0); + + string atomLine = readLine(input); + + unsigned int max = 0; + + // search column's number of the model field in the atom group + cif.parseGroup("atom", atomLine); + int col = cif.getGroupColumnNumber("atom", "model"); + + if (col != 0) { + while (input) { + if (atomLine.substr(0, 4) == "ATOM") { + max = stoiDEF(cif.getGroupField("atom", atomLine, col)); + } + atomLine = readLine(input); + } } + return max; } /** * Returns all available chain IDs for a PDB file. - * - * @param void - * @return vector of chars + * @return all available chain IDs */ vector CifLoader::getAllChains() { vector res; char lastChain = ' '; - + input.clear(); // reset file error flags input.seekg(0); - + string atomLine = readLine(input); - + unsigned int modelNum = 0; - + cif.parseGroup("atom", atomLine); int modelCol = cif.getGroupColumnNumber("atom", "model"); int chainCol = cif.getGroupColumnNumber("atom", "chain"); @@ -159,6 +158,21 @@ vector CifLoader::getAllChains() { return res; } +void CifLoader::setOnlyMetalHetAtoms() { + if (noHetAtoms) { + ERROR("can't load metal ions if hetAtoms option is disabled", exception); + } + onlyMetalHetAtoms = true; + noWater = true; +} + +void CifLoader::setWater() { + if (noHetAtoms || onlyMetalHetAtoms) { + ERROR("can't load water if hetAtoms option is disabled\nor onlyMetalHetAtoms is enabled", exception); + } + noWater = false; +} + // HELPERS /** @@ -193,7 +207,6 @@ bool CifLoader::inSideChain(const AminoAcid& aa, const Atom& at) { return true; // rest of aminoacid is its sidechain } - /** * Try to assigns the secondary structure from the PDB header. If not present * uses Spacer's setStateFromTorsionAngles(). @@ -226,23 +239,6 @@ void CifLoader::assignSecondary(Spacer& sp) { } } -//setOnlyMetalHetAtoms -void CifLoader::setOnlyMetalHetAtoms() { - if (noHetAtoms) { - ERROR("can't load metal ions if hetAtoms option is disabled", exception); - } - onlyMetalHetAtoms = true; - noWater = true; -} - -//setWater -void CifLoader::setWater() { - if (noHetAtoms || onlyMetalHetAtoms) { - ERROR("can't load water if hetAtoms option is disabled\nor onlyMetalHetAtoms is enabled", exception); - } - noWater = false; -} - /* void CifLoader::loadSpacer(Spacer& sp){ @@ -252,6 +248,109 @@ CifLoader::loadSpacer(Spacer& sp){ } */ +/** + * Parse a single line of a CIF file. + * @param atomLine the whole CIF line as it is + * @param tag the first field (keyword) in a PDB line + * @param lig pointer to a ligan + * @param aa pointer to an amino acid + * @return Residue number read from the PDB line. + */ +int +CifLoader::parseCifline(string atomLine, string tag, Ligand* lig, AminoAcid* aa) { + // get atom id + int atNum = stoiDEF(cif.getGroupField("atom", atomLine, + cif.getGroupColumnNumber("atom", "atom id"))); + // get residue number + int aaNum = stoiDEF(cif.getGroupField("atom", atomLine, + cif.getGroupColumnNumber("atom", "residue num"))); + char altAaID = cif.getGroupField("atom", atomLine, + cif.getGroupColumnNumber("atom", "alt id")).c_str()[0]; // "Code for insertion of residues" + + // get x, y, z coordinates + vgVector3 coord; + coord.x = stodDEF(cif.getGroupField("atom", atomLine, + cif.getGroupColumnNumber("atom", "x"))); + coord.y = stodDEF(cif.getGroupField("atom", atomLine, + cif.getGroupColumnNumber("atom", "y"))); + coord.z = stodDEF(cif.getGroupField("atom", atomLine, + cif.getGroupColumnNumber("atom", "z"))); + + // get b-factor + double bfac = 0.0; + int colBfac = cif.getGroupColumnNumber("atom", "bfac"); + if (colBfac != -1) { + string sbfac = cif.getGroupField("atom", atomLine, colBfac); + if (sbfac != "?" || sbfac != ".") { + bfac = stodDEF(sbfac); + } + } + + // get atom name + string atType = cif.getGroupField("atom", atomLine, + cif.getGroupColumnNumber("atom", "atom name")); + + // get residue name + string aaType = cif.getGroupField("atom", atomLine, + cif.getGroupColumnNumber("atom", "residue name")); + + // take care of deuterium atoms + if (atType == "D") { + cerr << "--> " << atType << "\n"; + atType = "H"; + } + + // Initialize the Atom object + Atom* at = new Atom(); + at->setNumber(atNum); + at->setType(atType); + at->setCoords(coord); + at->setBFac(bfac); + + // Ligand object (includes DNA/RNA in "ATOM" field) + if ((tag == "HETATM") || + isKnownNucleotide(nucleotideThreeLetterTranslator(aaType))) { + if (noWater) { + if (!(aaType == "HOH")) { + lig->addAtom(*at); + lig->setType(aaType); + } + } else { + lig->addAtom(*at); + lig->setType(aaType); + } + }// AminoAcid + else if ((tag == "ATOM ")) { + // skip N-terminal ACE groups + if (aaType != "ACE") { + // DEBUG: it would be nice to load also alternative atoms + // skip alternative atoms, + if (altAaID != ' ') { + if (verbose) + cout << "Warning: Skipping extraneous amino acid entry " + << aaNum << " " << atNum << " " << altAaID << ".\n"; + } else { + aa->setType(aaType); + aa->getSideChain().setType(aaType); + + if (!noHAtoms || isHeavyAtom(at->getCode())) { + if (!inSideChain(*aa, *at)) + aa->addAtom(*at); + else { + aa->getSideChain().addAtom(*at); + } + } + } + } else { + if (verbose) + cout << "Warning: Skipping N-terminal ACE group " + << aaNum << " " << atNum << ".\n"; + } + } + delete at; + return aaNum; +} + /** * Core function for PDB file parsing. * @param prot (Protein&) @@ -290,8 +389,7 @@ void CifLoader::loadProtein(Protein& prot) { if (chain == ' ') { loadChain = true; chain = '#'; - } - // Load only selected chain + }// Load only selected chain else if (chainList[i] == chain) { loadChain = true; chain = '#'; @@ -331,39 +429,36 @@ void CifLoader::loadProtein(Protein& prot) { if (regex_search(atomLine, cif.getTag("header")) && (name == "")) { name = atomLine; sp->setType(name); - } - // read helix entry + }// read helix entry else if (regex_search(atomLine, cif.getTag("helix"))) { cif.parseGroup("helix", atomLine); int colS = cif.getGroupColumnNumber("helix", "helix start"); int colE = cif.getGroupColumnNumber("helix", "helix end"); - + start = stoiDEF(cif.getGroupField("helix", atomLine, colS)); end = stoiDEF(cif.getGroupField("helix", atomLine, colE)); helixData.push_back(pair(start, end)); int colC = cif.getGroupColumnNumber("helix", "helix chain"); helixCode += cif.getGroupField("helix", atomLine, colC); - } - // read sheet entry + }// read sheet entry else if (regex_search(atomLine, cif.getTag("sheet"))) { cif.parseGroup("sheet range", atomLine); int colS = cif.getGroupColumnNumber("sheet range", "sheet start"); int colE = cif.getGroupColumnNumber("sheet range", "sheet start"); - + start = stoiDEF(cif.getGroupField("sheet range", atomLine, colS)); end = stoiDEF(cif.getGroupField("sheet range", atomLine, colE)); sheetData.push_back(pair(start, end)); int colC = cif.getGroupColumnNumber("sheet range", "sheet chain"); sheetCode += cif.getGroupField("sheet range", atomLine, colC); - } - // Parse one line of the "ATOM" and "HETATM" fields - else if (atomLine.substr(0, 6) == "ATOM " || + }// Parse one line of the "ATOM" and "HETATM" fields + else if (atomLine.substr(0, 6) == "ATOM " || atomLine.substr(0, 6) == "HETATM") { tag = atomLine.substr(0, 6); cif.parseGroup("atom", atomLine); - + // Control model number int colM = cif.getGroupColumnNumber("atom", "model"); readingModel = stouiDEF(cif.getGroupField("atom", atomLine, colM)); @@ -373,10 +468,10 @@ void CifLoader::loadProtein(Protein& prot) { if (model == 999) { model = readingModel; } - + int colC = cif.getGroupColumnNumber("atom", "chain"); char chainID = cif.getGroupField("atom", atomLine, colC).c_str()[0]; - + if (chainList[i] == chainID) { if ((model == 999) || (model == readingModel)) { int colAa = cif.getGroupColumnNumber("atom", "residue num"); @@ -392,7 +487,7 @@ void CifLoader::loadProtein(Protein& prot) { << " oldAaNum:" << oldAaNum << " lastAa:" << lastAa << "\n"; */ // Skip the first empty AminoAcid - if ((aa->size() > 0) && (aa->getType1L() != 'X')) { + if ((aa->size() > 0) && (aa->getType1L() != 'X')) { if (sp->sizeAmino() == 0) { sp->setStartOffset(oldAaNum - 1); } else { @@ -417,7 +512,7 @@ void CifLoader::loadProtein(Protein& prot) { aa = new AminoAcid(); lig = new Ligand(); } - oldAaNum = parseCIFline(atomLine, tag, lig, aa); + oldAaNum = parseCifline(atomLine, tag, lig, aa); } // end model check } // end chain check } @@ -431,7 +526,7 @@ void CifLoader::loadProtein(Protein& prot) { << sp->maxPdbNumber() << " aaNum:" << aaNum << " oldAaNum:" << oldAaNum << " lastAa:" << lastAa << "\n"; */ - + // last residue/ligand // AminoAcid if ((aa->size() > 0) && (aa->getType1L() != 'X')) { @@ -561,108 +656,4 @@ void CifLoader::loadProtein(Protein& prot) { } // end loadChain } // chains iteration -} - -/** - * Parse a single line of a CIF file. - * @param atomLine the whole CIF line as it is - * @param tag the first field (keyword) in a PDB line - * @param lig pointer to a ligan - * @param aa pointer to an amino acid - * @return Residue number read from the PDB line. - */ -int -CifLoader::parseCIFline(string atomLine, string tag, Ligand* lig, AminoAcid* aa) { - // get atom id - int atNum = stoiDEF(cif.getGroupField("atom", atomLine, - cif.getGroupColumnNumber("atom", "atom id"))); - // get residue number - int aaNum = stoiDEF(cif.getGroupField("atom", atomLine, - cif.getGroupColumnNumber("atom", "residue num"))); - char altAaID = cif.getGroupField("atom", atomLine, - cif.getGroupColumnNumber("atom", "alt id")).c_str()[0]; // "Code for insertion of residues" - - // get x, y, z coordinates - vgVector3 coord; - coord.x = stodDEF(cif.getGroupField("atom", atomLine, - cif.getGroupColumnNumber("atom", "x"))); - coord.y = stodDEF(cif.getGroupField("atom", atomLine, - cif.getGroupColumnNumber("atom", "y"))); - coord.z = stodDEF(cif.getGroupField("atom", atomLine, - cif.getGroupColumnNumber("atom", "z"))); - - // get b-factor - double bfac = 0.0; - int colBfac = cif.getGroupColumnNumber("atom", "bfac"); - if (colBfac != -1) { - string sbfac = cif.getGroupField("atom", atomLine, colBfac); - if (sbfac != "?" || sbfac != ".") { - bfac = stodDEF(sbfac); - } - } - - // get atom name - string atType = cif.getGroupField("atom", atomLine, - cif.getGroupColumnNumber("atom", "atom name")); - - // get residue name - string aaType = cif.getGroupField("atom", atomLine, - cif.getGroupColumnNumber("atom", "residue name")); - - // take care of deuterium atoms - if (atType == "D") { - cerr << "--> " << atType << "\n"; - atType = "H"; - } - - // Initialize the Atom object - Atom* at = new Atom(); - at->setNumber(atNum); - at->setType(atType); - at->setCoords(coord); - at->setBFac(bfac); - - // Ligand object (includes DNA/RNA in "ATOM" field) - if ((tag == "HETATM") || - isKnownNucleotide(nucleotideThreeLetterTranslator(aaType))) { - if (noWater) { - if (!(aaType == "HOH")) { - lig->addAtom(*at); - lig->setType(aaType); - } - } else { - lig->addAtom(*at); - lig->setType(aaType); - } - } - // AminoAcid - else if ((tag == "ATOM ")) { - // skip N-terminal ACE groups - if (aaType != "ACE") { - // DEBUG: it would be nice to load also alternative atoms - // skip alternative atoms, - if (altAaID != ' ') { - if (verbose) - cout << "Warning: Skipping extraneous amino acid entry " - << aaNum << " " << atNum << " " << altAaID << ".\n"; - } else { - aa->setType(aaType); - aa->getSideChain().setType(aaType); - - if (!noHAtoms || isHeavyAtom(at->getCode())) { - if (!inSideChain(*aa, *at)) - aa->addAtom(*at); - else { - aa->getSideChain().addAtom(*at); - } - } - } - } else { - if (verbose) - cout << "Warning: Skipping N-terminal ACE group " - << aaNum << " " << atNum << ".\n"; - } - } - delete at; - return aaNum; -} +} \ No newline at end of file diff --git a/Biopool/Sources/CifLoader.h b/Biopool/Sources/CifLoader.h index 1bd8d25..813a5ae 100644 --- a/Biopool/Sources/CifLoader.h +++ b/Biopool/Sources/CifLoader.h @@ -20,11 +20,13 @@ // Includes: #include #include + #include #include #include #include #include + #include "CifStructure.h" @@ -40,7 +42,6 @@ namespace Victor { public: // CONSTRUCTORS/DESTRUCTOR: - /** * Constructor. * @param _input = the CIF file object @@ -69,82 +70,35 @@ namespace Victor { // PREDICATES: - bool isValid() { - return valid; - } + bool isValid(); void checkModel(); //to check input values ​​ void checkAndSetChain(); //chosen by the user unsigned int getMaxModels(); - unsigned int getMaxModelsFast(); vector getAllChains(); - // MODIFIERS: - void setPermissive() { - permissive = true; - } - - void setNonPermissive() { - permissive = false; - } - - void setVerbose() { - verbose = true; - } - - void setNoVerbose() { - verbose = false; - } - - void setChain(char _ch) { - chain = _ch; - } - - void setModel(unsigned int _mod) { - model = _mod; - } - - void setAltAtom(char _a) { - altAtom = _a; - } - - void setNoHAtoms() { - noHAtoms = true; - } - - void setNoHetAtoms() { - noHetAtoms = true; - } + void setPermissive(); + void setNonPermissive(); + void setVerbose(); + void setNoVerbose(); + void setChain(char _ch); + void setModel(unsigned int _mod); + void setAltAtom(char _a); + void setNoHAtoms(); + void setNoHetAtoms(); void setOnlyMetalHetAtoms(); - - void setNoSecondary() { - noSecondary = true; - } - - void setWithSecondary() { - noSecondary = false; - } - - void setNoConnection() { - noConnection = true; - } - - void setWithConnection() { - noConnection = false; - } - + void setNoSecondary(); + void setWithSecondary(); + void setNoConnection(); + void setWithConnection(); void setWater(); - - void setAllChains() { - allChains = true; - } + void setAllChains(); //virtual void loadSpacer(Spacer& sp); //virtual void loadLigandSet(LigandSet& l); - virtual void loadProtein(Protein& prot); - + virtual void loadProtein(Protein& prot); //virtual void loadNucleotideChainSet(NucleotideChainSet& ns); //new class, new code by Damiano @@ -152,9 +106,8 @@ namespace Victor { // HELPERS: bool setBonds(Spacer& sp); bool inSideChain(const AminoAcid& aa, const Atom& at); - void loadSecondary(); void assignSecondary(Spacer& sp); - int parseCIFline(string atomLine, Ligand* lig, AminoAcid* aa); + int parseCifline(string atomLine, string tag, Ligand* lig, AminoAcid* aa); // ATTRIBUTES private: @@ -172,7 +125,6 @@ namespace Victor { char chain; // chain ID to be loaded unsigned int model; // model number to be loaded char altAtom; // ID of alternate atoms to be loaded - bool noNucleotideChains; // does not load nucleotide atoms string helixCode; // parallel vector of helix data, chain name for each helixData element @@ -184,7 +136,82 @@ namespace Victor { CifStructure cif; }; + inline + bool CifLoader::isValid() { + return valid; + } + + inline + void CifLoader::setPermissive() { + permissive = true; + } + + inline + void CifLoader::setNonPermissive() { + permissive = false; + } + + inline + void CifLoader::setVerbose() { + verbose = true; + } + + inline + void CifLoader::setNoVerbose() { + verbose = false; + } + + inline + void CifLoader::setChain(char _ch) { + chain = _ch; + } + + inline + void CifLoader::setModel(unsigned int _mod) { + model = _mod; + } + + inline + void CifLoader::setAltAtom(char _a) { + altAtom = _a; + } + + inline + void CifLoader::setNoHAtoms() { + noHAtoms = true; + } + + inline + void CifLoader::setNoHetAtoms() { + noHetAtoms = true; + } + + inline + void CifLoader::setNoSecondary() { + noSecondary = true; + } + + inline + void CifLoader::setWithSecondary() { + noSecondary = false; + } + + inline + void CifLoader::setNoConnection() { + noConnection = true; + } + + inline + void CifLoader::setWithConnection() { + noConnection = false; + } + + inline + void CifLoader::setAllChains() { + allChains = true; + } + } } //namespace -#endif //_PDB_LOADER_H_ +#endif //_CIF_LOADER_H_ diff --git a/Biopool/Sources/CifSaver.cc b/Biopool/Sources/CifSaver.cc new file mode 100644 index 0000000..9e81418 --- /dev/null +++ b/Biopool/Sources/CifSaver.cc @@ -0,0 +1,317 @@ +/* This file is part of Victor. + + Victor is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Victor is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Victor. If not, see . + */ + + +// Includes: +#include +#include + +#include "CifSaver.h" + +// Global constants, typedefs, etc. (to avoid): +using namespace Victor; +using namespace Victor::Biopool; +using namespace std; + +// CONSTRUCTORS/DESTRUCTOR: + +CifSaver::CifSaver(ostream& _output = cout) : +output(_output), writeSeq(true), writeSecStr(true), writeTer(true), +atomOffset(0), aminoOffset(0), ligandOffset(0), chain(' ') { +} + +CifSaver::~CifSaver() { + PRINT_NAME; +} + +// PREDICATES: + +void CifSaver::endFile() { + output << "END\n"; +} + +// MODIFIERS: + + +/** + * Saves a group in PDB format. + * @param group reference + * @return void + */ +void PdbSaver::saveGroup(Group& gr) { + gr.sync(); + + for (unsigned int i = 0; i < gr.size(); i++) { + string atName = gr[i].getType(); + + if (atName == "OXT") // cosmetics: OXT has to be output after + continue; // the sidechain and therefore goes in saveSpacer + + // Added variable for correcting atom type H (last column in PDBs) + char atomOneLetter; + if (!isdigit(atName[0])) { + atomOneLetter = atName[0]; + } else { + atomOneLetter = atName[1]; + } + + // Added control for size by Damiano Piovesan + // example HG12 + if (!isdigit(atName[0]) && (atName.size() < 4)) + atName = ' ' + atName; + while (atName.size() < 4) + atName += ' '; + + output << "ATOM" << setw(7) << gr[i].getNumber() << " " << atName + << " " + << gr.getType() << " " << chain << setw(4) << aminoOffset << " " + << setw(8) << setprecision(3) << gr[i].getCoords().x + << setw(8) << setprecision(3) << gr[i].getCoords().y + << setw(8) << setprecision(3) << gr[i].getCoords().z + << " 1.00" << setw(6) << setprecision(2) << gr[i].getBFac() + << " " << atomOneLetter << "\n"; + + atomOffset = gr[i].getNumber() + 1; + } + + //aminoOffset++; +} + +/** + * Saves a sidechain in PDB format. + *@param sideChain reference + *@return void + */ +void PdbSaver::saveSideChain(SideChain& sc) { + saveGroup(sc); +} + +/** + * Saves an aminoacid in PDB format. + *@param AminoAcid reference + *@return void + */ +void PdbSaver::saveAminoAcid(AminoAcid& aa) { + saveGroup(aa); +} + +/** + * Saves a spacer in PDB format. + *@param Spacer reference + *@return void + */ +void PdbSaver::saveSpacer(Spacer& sp) { + PRINT_NAME; + + if (sp.size() > 0) { + unsigned int oldPrec = output.precision(); + ios::fmtflags oldFlags = output.flags(); + output.setf(ios::fixed, ios::floatfield); + + //method of class Component. It checks how deep is the spacer + if (sp.getDepth() == 0) { + if (writeTer) { + output << "HEADER " << sp.getType() << "\n" + << "REMARK created using Biopool2000 $Revision: 1.6.2.3 $ \n"; + } + aminoOffset = 0; + atomOffset = sp.getAtomStartOffset(); + } + + if (writeSeq) + writeSeqRes(sp); + if (writeSecStr) + writeSecondary(sp); + + aminoOffset = sp.getStartOffset(); + atomOffset = sp.getAtomStartOffset(); + + //saving is one ammino at a time + for (unsigned int i = 0; i < sp.sizeAmino(); i++) { + aminoOffset++; + while ((sp.isGap(aminoOffset)) && (aminoOffset < sp.maxPdbNumber())) { + aminoOffset++; + } + //cout << i << " " << aminoOffset << "\n"; + sp.getAmino(i).save(*this); + } + + // cosmetics: write OXT after last side chain + if (sp.getAmino(sp.sizeAmino() - 1).isMember(OXT)) { + unsigned int index = sp.sizeAmino() - 1; + output << "ATOM" << setw(7) << sp.getAmino(index)[OXT].getNumber() + << " OXT " + << sp.getAmino(index).getType() << " " << chain << setw(4) << aminoOffset + << " " << setw(8) << setprecision(3) + << sp.getAmino(index)[OXT].getCoords().x + << setw(8) << setprecision(3) + << sp.getAmino(index)[OXT].getCoords().y + << setw(8) << setprecision(3) + << sp.getAmino(index)[OXT].getCoords().z + << " 1.00" << setw(6) << setprecision(2) + << sp.getAmino(index)[OXT].getBFac() << " O\n"; + } + + if ((sp.getDepth() == 0) && (writeTer)) + output << "TER " << setw(4) << atomOffset + 1 << " " + << sp.getAmino(sp.sizeAmino() - 1).getType() << " " + << setw(4) << aminoOffset << "\n"; + + output.precision(oldPrec); + output.flags(oldFlags); + aminoOffset = 0; //necessary if the's more than one spacer + output << "TER\n"; + } + +} + +/** + * Saves a Ligand in PDB format. + *@param Ligand reference + *@return void + */ +void PdbSaver::saveLigand(Ligand& gr) { + gr.sync(); + unsigned int oldPrec = output.precision(); + ios::fmtflags oldFlags = output.flags(); + output.setf(ios::fixed, ios::floatfield); + + string aaType = gr.getType(); + + + // DEBUG: write TER for DNA/RNA ligands + + string tag = "HETATM"; + if (isKnownNucleotide(nucleotideThreeLetterTranslator(aaType))) { + tag = "ATOM "; + } + + for (unsigned int i = 0; i < gr.size(); i++) //print all HETATM of a ligand + { + string atType = gr[i].getType(); + aaType = gr.getType(); + string atTypeShort; //last column in a Pdb File + unsigned int atNum = gr[i].getNumber(); + if (atType != aaType) { + atTypeShort = atType[0]; + atTypeShort = ' ' + atTypeShort; + atType = ' ' + atType; + } else { + atTypeShort = atType; + aaType = ' ' + aaType; + } + while (atType.size() < 4) + atType = atType + ' '; + while (aaType.size() < 3) + aaType = ' ' + aaType; + + + + output << tag << setw(5) << atNum << " " << setw(4) << atType << " " + << setw(3) << aaType << " " << chain << setw(4) << ligandOffset << " " + << setw(8) << setprecision(3) << gr[i].getCoords().x + << setw(8) << setprecision(3) << gr[i].getCoords().y + << setw(8) << setprecision(3) << gr[i].getCoords().z + << " 1.00" << setw(6) << setprecision(2) << gr[i].getBFac() + << " " << atTypeShort << "\n"; + } + if (tag == "ATOM ") { + output << "TER\n"; + } + + ligandOffset++; + output.precision(oldPrec); + output.flags(oldFlags); +} + +/** + * Saves a LigandSet in PDB format. + *@param LigandSet reference + *@return void + */ +void PdbSaver::saveLigandSet(LigandSet& ls) { + ligandOffset = ls.getStartOffset(); //set the offset for current LigandSet + + for (unsigned int i = 0; i < ls.sizeLigand(); i++) { + while ((ls.isGap(ligandOffset)) + && (ligandOffset < ls.maxPdbNumber())) + ligandOffset++; + ls[i].save(*this); + } +} + +/** + * Saves a Protein in PDB format. + *@param Protein reference + *@return void + */ +void PdbSaver::saveProtein(Protein& prot) { + //if (prot.sizeProtein()==0) + // ERROR("Empty Protein",exception); + + Spacer* sp = NULL; + LigandSet* ls = NULL; + + + for (unsigned int i = 0; i < prot.sizeProtein(); i++) { + setChain(prot.getChainLetter(i)); //set the actual chain's ID + sp = prot.getSpacer(i); + saveSpacer(*sp); + + } + + for (unsigned int i = 0; i < prot.sizeProtein(); i++) { + setChain(prot.getChainLetter(i)); //set the actual chain's ID + ls = prot.getLigandSet(i); + + + if (ls != NULL) { + saveLigandSet(*ls); + } + } +} + +/** + * Writes the SEQRES entry (PDB format) for a spacer. + *@param Spacer reference + *@return void + */ +void PdbSaver::writeSeqRes(Spacer& sp) { + for (unsigned int i = 0; i < sp.sizeAmino() / 13; i++) { + output << "SEQRES " << setw(3) << i << " " << setw(3) + << sp.sizeAmino() << " "; + for (unsigned int j = 0; j < 13; j++) + output << sp.getAmino((i * 13) + j).getType() << " "; + output << "\n"; + } + if (sp.sizeAmino() % 13 > 0) { + output << "SEQRES " << setw(3) << sp.sizeAmino() / 13 + 1 << " " + << setw(3) << sp.sizeAmino() << " "; + for (unsigned int j = 13 * (sp.sizeAmino() / 13); j < sp.sizeAmino(); j++) + output << sp.getAmino(j).getType() << " "; + output << "\n"; + } +} + +/** + * Writes the secondary information (PDB format) for a spacer, e.g. HELIX, + * SHEET, etc. + *@param sideChain reference + *@return void + */ +void PdbSaver::writeSecondary(Spacer& sp) { + +} diff --git a/Biopool/Sources/CifSaver.h b/Biopool/Sources/CifSaver.h new file mode 100644 index 0000000..1d31817 --- /dev/null +++ b/Biopool/Sources/CifSaver.h @@ -0,0 +1,145 @@ +/* This file is part of Victor. + + Victor is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Victor is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Victor. If not, see . + */ + +#ifndef _PDB_SAVER_H_ +#define _PDB_SAVER_H_ + +// Includes: +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "CifStructure.h" + +// Global constants, typedefs, etc. (to avoid): + +namespace Victor { + namespace Biopool { + + /** + * @brief Saves components (Atoms, Groups, etc.) in standard PDB format + * */ + class CifSaver : public Saver { + public: + // CONSTRUCTORS/DESTRUCTOR: + + /** + * Basic constructor. By default it writes sequence, + * secondary structure and the term line. + * @param _output the output file object + */ + CifSaver(ostream& _output = cout); + + // this class uses the implicit copy operator. + + virtual ~CifSaver(); + + // PREDICATES: + + inline void endFile(); + + // MODIFIERS: + + void setWriteSecondaryStructure(); + void setDoNotWriteSecondaryStructure(); + void setWriteSeqRes(); + void setDoNotWriteSeqRes(); + void setWriteAtomOnly(); + void setWriteAll(); + void setChain(char _ch); + + /** + * Saves a group in PDB format. + * @param group reference + * @return void + */ + virtual void saveGroup(Group& gr); + virtual void saveSideChain(SideChain& sc); + virtual void saveAminoAcid(AminoAcid& aa); + virtual void saveSpacer(Spacer& sp); + virtual void saveLigand(Ligand& l); + virtual void saveLigandSet(LigandSet& l); + virtual void saveProtein(Protein& prot); + + protected: + + private: + // HELPERS: + void writeSeqRes(Spacer& sp); // writes SEQRES entry + void writeSecondary(Spacer& sp); + // writes secondary entries (SHEET, HELIX, etc.) + // ATTRIBUTES + ostream& output; // output stream + bool writeSeq, writeSecStr, writeTer; + unsigned int atomOffset, ligandOffset; + int aminoOffset; + char chain; // chain ID + // offsets that determine at which atom, aminoacid and ligand number to start + }; + + inline + void CifSaver::setWriteSecondaryStructure() { + writeSecStr = true; + } + + inline + void CifSaver::setDoNotWriteSecondaryStructure() { + writeSecStr = false; + } + + inline + void CifSaver::setWriteSeqRes() { + writeSeq = true; + } + + inline + void CifSaver::setDoNotWriteSeqRes() { + writeSeq = false; + } + + inline + void CifSaver::setWriteAtomOnly() { + writeSecStr = false; + writeSeq = false; + writeTer = false; + } + + inline + void CifSaver::setWriteAll() { + writeSecStr = true; + writeSeq = true; + writeTer = true; + } + + inline + void CifSaver::setChain(char _ch) { + chain = _ch; + } + + } +} //namespace +#endif //_PDB_SAVER_H_ + + diff --git a/Biopool/Sources/CifStructure.cc b/Biopool/Sources/CifStructure.cc index bfe433c..308533e 100644 --- a/Biopool/Sources/CifStructure.cc +++ b/Biopool/Sources/CifStructure.cc @@ -7,6 +7,7 @@ #include #include + #include "CifStructure.h" using namespace Victor; diff --git a/Biopool/Sources/CifStructure.h b/Biopool/Sources/CifStructure.h index 039748c..c8f86b8 100644 --- a/Biopool/Sources/CifStructure.h +++ b/Biopool/Sources/CifStructure.h @@ -1,4 +1,4 @@ -/* +/* * File: CifStructure.h * Author: marco * @@ -13,7 +13,6 @@ #include #include #include -#include using std::string; using std::istream; diff --git a/Biopool/Sources/Makefile b/Biopool/Sources/Makefile index 5f7a08c..7df3030 100644 --- a/Biopool/Sources/Makefile +++ b/Biopool/Sources/Makefile @@ -32,7 +32,8 @@ SOURCES = Identity.cc SimpleBond.cc Bond.cc \ AminoAcid.cc Spacer.cc IntSaver.cc IntLoader.cc SeqSaver.cc PdbLoader.cc \ PdbSaver.cc SeqLoader.cc IntCoordConverter.cc SeqConstructor.cc Ligand.cc \ LigandSet.cc SolvExpos.cc AminoAcidHydrogen.cc Nucleotide.cc \ - RelLoader.cc XyzSaver.cc RelSaver.cc XyzLoader.cc CifLoader.cc CifStructure + RelLoader.cc XyzSaver.cc RelSaver.cc XyzLoader.cc \ + CifLoader.cc CifStructure.cc CifSaver.cc OBJECTS = Identity.o SimpleBond.o Bond.o \ @@ -41,7 +42,8 @@ OBJECTS = Identity.o SimpleBond.o Bond.o \ SeqSaver.o PdbLoader.o PdbSaver.o SeqLoader.o \ IntCoordConverter.o SeqConstructor.o Ligand.o LigandSet.o \ SolvExpos.o Protein.o AminoAcidHydrogen.o Nucleotide.o \ - RelLoader.o XyzSaver.o RelSaver.o XyzLoader.o CifLoader.o CifStructure.o + RelLoader.o XyzSaver.o RelSaver.o XyzLoader.o \ + CifLoader.o CifStructure.o CifSaver.o TARGETS = diff --git a/Biopool/Sources/PdbLoader.h b/Biopool/Sources/PdbLoader.h index 9ed1a4b..f4c1bc9 100644 --- a/Biopool/Sources/PdbLoader.h +++ b/Biopool/Sources/PdbLoader.h @@ -148,7 +148,7 @@ namespace Victor { namespace Biopool { //virtual void loadSpacer(Spacer& sp); //virtual void loadLigandSet(LigandSet& l); - virtual void loadProtatein(Protein& prot); + virtual void loadProtein(Protein& prot); From a37deaeb7788dfddec9d7dbb9b8f3669de44b7ab Mon Sep 17 00:00:00 2001 From: Marco Pezzutti Date: Thu, 4 Jun 2015 12:16:22 +0200 Subject: [PATCH 03/16] compile errors resolved --- Biopool/Sources/CifLoader.cc | 132 +++++++++++++++----------------- Biopool/Sources/CifLoader.h | 37 ++++----- Biopool/Sources/CifSaver.cc | 22 ++---- Biopool/Sources/CifSaver.h | 25 ++---- Biopool/Sources/CifStructure.cc | 53 ++++++------- Biopool/Sources/CifStructure.h | 4 +- Biopool/Sources/Makefile | 8 +- Makefile.global | 2 +- 8 files changed, 119 insertions(+), 164 deletions(-) diff --git a/Biopool/Sources/CifLoader.cc b/Biopool/Sources/CifLoader.cc index 5d58244..7fb321b 100644 --- a/Biopool/Sources/CifLoader.cc +++ b/Biopool/Sources/CifLoader.cc @@ -1,20 +1,10 @@ -/* This file is part of Victor. - - Victor is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Victor is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with Victor. If not, see . +/* + * File: CifLoader.cc + * Author: marco + * + * Created on 3 giugno 2015, 23.20 */ - // Includes: #include #include @@ -37,18 +27,17 @@ using namespace std; // CONSTRUCTORS/DESTRUCTOR: -CifLoader::CifLoader(istream& _input = cin, bool _permissive = false, - bool _noHAtoms = false, bool _noHetAtoms = false, bool _noSecondary = false, - bool _noConnection = false, bool _noWater = true, bool _verb = true, - bool _allChains = false, string _NULL = "", bool _onlyMetal = false, - bool _noNucleotideChains = true) : +CifLoader::CifLoader(istream& _input, bool _permissive, bool _noHAtoms, + bool _noHetAtoms, bool _noSecondary, bool _noConnection, bool _noWater, + bool _verb, bool _allChains, string _NULL, bool _onlyMetal, + bool _noNucleotideChains ) : input(_input), permissive(_permissive), valid(true), noHAtoms(_noHAtoms), noHetAtoms(_noHetAtoms), noSecondary(_noSecondary), noConnection(_noConnection), noWater(_noWater), verbose(_verb), allChains(_allChains), chain(' '), model(999), altAtom('A'), helixCode(_NULL), //sheetCode(_NULL), helixData(), sheetData(), onlyMetalHetAtoms(_onlyMetal), sheetCode(_NULL), onlyMetalHetAtoms(_onlyMetal), noNucleotideChains(_noNucleotideChains) { - cif = new CifStructure(input); + cif = new CifStructure(_input); } CifLoader::~CifLoader() { @@ -106,13 +95,13 @@ unsigned int CifLoader::getMaxModels() { unsigned int max = 0; // search column's number of the model field in the atom group - cif.parseGroup("atom", atomLine); - int col = cif.getGroupColumnNumber("atom", "model"); + cif->parseGroup("atom", atomLine); + int col = cif->getGroupColumnNumber("atom", "model"); if (col != 0) { while (input) { if (atomLine.substr(0, 4) == "ATOM") { - max = stoiDEF(cif.getGroupField("atom", atomLine, col)); + max = stoiDEF(cif->getGroupField("atom", atomLine, col)); } atomLine = readLine(input); } @@ -135,19 +124,19 @@ vector CifLoader::getAllChains() { unsigned int modelNum = 0; - cif.parseGroup("atom", atomLine); - int modelCol = cif.getGroupColumnNumber("atom", "model"); - int chainCol = cif.getGroupColumnNumber("atom", "chain"); + cif->parseGroup("atom", atomLine); + int modelCol = cif->getGroupColumnNumber("atom", "model"); + int chainCol = cif->getGroupColumnNumber("atom", "chain"); while (input) { if (atomLine.substr(0, 4) == "ATOM") { - modelNum = stoiDEF(cif.getGroupField("atom", atomLine, modelCol)); + modelNum = stoiDEF(cif->getGroupField("atom", atomLine, modelCol)); // only consider first model: others duplicate chain IDs if (modelNum > 1) { break; } // check for new chains containing amino acids - char id = (cif.getGroupField("atom", atomLine, chainCol).c_str())[0]; + char id = (cif->getGroupField("atom", atomLine, chainCol).c_str())[0]; if (id != lastChain) { lastChain = id; res.push_back(id); @@ -259,40 +248,40 @@ CifLoader::loadSpacer(Spacer& sp){ int CifLoader::parseCifline(string atomLine, string tag, Ligand* lig, AminoAcid* aa) { // get atom id - int atNum = stoiDEF(cif.getGroupField("atom", atomLine, - cif.getGroupColumnNumber("atom", "atom id"))); + int atNum = stoiDEF(cif->getGroupField("atom", atomLine, + cif->getGroupColumnNumber("atom", "atom id"))); // get residue number - int aaNum = stoiDEF(cif.getGroupField("atom", atomLine, - cif.getGroupColumnNumber("atom", "residue num"))); - char altAaID = cif.getGroupField("atom", atomLine, - cif.getGroupColumnNumber("atom", "alt id")).c_str()[0]; // "Code for insertion of residues" + int aaNum = stoiDEF(cif->getGroupField("atom", atomLine, + cif->getGroupColumnNumber("atom", "residue num"))); + char altAaID = cif->getGroupField("atom", atomLine, + cif->getGroupColumnNumber("atom", "alt id")).c_str()[0]; // "Code for insertion of residues" // get x, y, z coordinates vgVector3 coord; - coord.x = stodDEF(cif.getGroupField("atom", atomLine, - cif.getGroupColumnNumber("atom", "x"))); - coord.y = stodDEF(cif.getGroupField("atom", atomLine, - cif.getGroupColumnNumber("atom", "y"))); - coord.z = stodDEF(cif.getGroupField("atom", atomLine, - cif.getGroupColumnNumber("atom", "z"))); + coord.x = stodDEF(cif->getGroupField("atom", atomLine, + cif->getGroupColumnNumber("atom", "x"))); + coord.y = stodDEF(cif->getGroupField("atom", atomLine, + cif->getGroupColumnNumber("atom", "y"))); + coord.z = stodDEF(cif->getGroupField("atom", atomLine, + cif->getGroupColumnNumber("atom", "z"))); // get b-factor double bfac = 0.0; - int colBfac = cif.getGroupColumnNumber("atom", "bfac"); + int colBfac = cif->getGroupColumnNumber("atom", "bfac"); if (colBfac != -1) { - string sbfac = cif.getGroupField("atom", atomLine, colBfac); + string sbfac = cif->getGroupField("atom", atomLine, colBfac); if (sbfac != "?" || sbfac != ".") { bfac = stodDEF(sbfac); } } // get atom name - string atType = cif.getGroupField("atom", atomLine, - cif.getGroupColumnNumber("atom", "atom name")); + string atType = cif->getGroupField("atom", atomLine, + cif->getGroupColumnNumber("atom", "atom name")); // get residue name - string aaType = cif.getGroupField("atom", atomLine, - cif.getGroupColumnNumber("atom", "residue name")); + string aaType = cif->getGroupField("atom", atomLine, + cif->getGroupColumnNumber("atom", "residue name")); // take care of deuterium atoms if (atType == "D") { @@ -426,42 +415,43 @@ void CifLoader::loadProtein(Protein& prot) { // read all lines do { // read header entry - if (regex_search(atomLine, cif.getTag("header")) && (name == "")) { + if (regex_search(atomLine, regex(cif->getTag("header"))) + && (name == "")) { name = atomLine; sp->setType(name); }// read helix entry - else if (regex_search(atomLine, cif.getTag("helix"))) { - cif.parseGroup("helix", atomLine); - int colS = cif.getGroupColumnNumber("helix", "helix start"); - int colE = cif.getGroupColumnNumber("helix", "helix end"); + else if (regex_search(atomLine, regex(cif->getTag("helix")))) { + cif->parseGroup("helix", atomLine); + int colS = cif->getGroupColumnNumber("helix", "helix start"); + int colE = cif->getGroupColumnNumber("helix", "helix end"); - start = stoiDEF(cif.getGroupField("helix", atomLine, colS)); - end = stoiDEF(cif.getGroupField("helix", atomLine, colE)); + start = stoiDEF(cif->getGroupField("helix", atomLine, colS)); + end = stoiDEF(cif->getGroupField("helix", atomLine, colE)); helixData.push_back(pair(start, end)); - int colC = cif.getGroupColumnNumber("helix", "helix chain"); - helixCode += cif.getGroupField("helix", atomLine, colC); + int colC = cif->getGroupColumnNumber("helix", "helix chain"); + helixCode += cif->getGroupField("helix", atomLine, colC); }// read sheet entry - else if (regex_search(atomLine, cif.getTag("sheet"))) { - cif.parseGroup("sheet range", atomLine); - int colS = cif.getGroupColumnNumber("sheet range", "sheet start"); - int colE = cif.getGroupColumnNumber("sheet range", "sheet start"); + else if (regex_search(atomLine, regex(cif->getTag("sheet")))) { + cif->parseGroup("sheet range", atomLine); + int colS = cif->getGroupColumnNumber("sheet range", "sheet start"); + int colE = cif->getGroupColumnNumber("sheet range", "sheet start"); - start = stoiDEF(cif.getGroupField("sheet range", atomLine, colS)); - end = stoiDEF(cif.getGroupField("sheet range", atomLine, colE)); + start = stoiDEF(cif->getGroupField("sheet range", atomLine, colS)); + end = stoiDEF(cif->getGroupField("sheet range", atomLine, colE)); sheetData.push_back(pair(start, end)); - int colC = cif.getGroupColumnNumber("sheet range", "sheet chain"); - sheetCode += cif.getGroupField("sheet range", atomLine, colC); + int colC = cif->getGroupColumnNumber("sheet range", "sheet chain"); + sheetCode += cif->getGroupField("sheet range", atomLine, colC); }// Parse one line of the "ATOM" and "HETATM" fields else if (atomLine.substr(0, 6) == "ATOM " || atomLine.substr(0, 6) == "HETATM") { tag = atomLine.substr(0, 6); - cif.parseGroup("atom", atomLine); + cif->parseGroup("atom", atomLine); // Control model number - int colM = cif.getGroupColumnNumber("atom", "model"); - readingModel = stouiDEF(cif.getGroupField("atom", atomLine, colM)); + int colM = cif->getGroupColumnNumber("atom", "model"); + readingModel = stouiDEF(cif->getGroupField("atom", atomLine, colM)); if (readingModel > model) break; // Get only the first model if not specified @@ -469,13 +459,13 @@ void CifLoader::loadProtein(Protein& prot) { model = readingModel; } - int colC = cif.getGroupColumnNumber("atom", "chain"); - char chainID = cif.getGroupField("atom", atomLine, colC).c_str()[0]; + int colC = cif->getGroupColumnNumber("atom", "chain"); + char chainID = cif->getGroupField("atom", atomLine, colC).c_str()[0]; if (chainList[i] == chainID) { if ((model == 999) || (model == readingModel)) { - int colAa = cif.getGroupColumnNumber("atom", "residue num"); - aaNum = stoiDEF(cif.getGroupField("atom", atomLine, colAa)); + int colAa = cif->getGroupColumnNumber("atom", "residue num"); + aaNum = stoiDEF(cif->getGroupField("atom", atomLine, colAa)); // Insert the Ligand object into LigandSet if (aaNum != oldAaNum) { diff --git a/Biopool/Sources/CifLoader.h b/Biopool/Sources/CifLoader.h index 813a5ae..9c7591a 100644 --- a/Biopool/Sources/CifLoader.h +++ b/Biopool/Sources/CifLoader.h @@ -1,26 +1,16 @@ -/* This file is part of Victor. - - Victor is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Victor is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with Victor. If not, see . +/* + * File: CifLoader.h + * Author: marco + * + * Created on 3 giugno 2015, 23.20 */ -#ifndef _CIF_LOADER_H_ -#define _CIF_LOADER_H_ +#ifndef CIFLOADER_H +#define CIFLOADER_H // Includes: #include #include - #include #include #include @@ -34,10 +24,10 @@ namespace Victor { namespace Biopool { - + /** * @brief Loads components (Atoms, Groups, Spacer, etc.) in standard CIF format. - * */ + */ class CifLoader : public Loader { public: @@ -133,7 +123,7 @@ namespace Victor { vector > helixData; // begin and end of the helix vector > sheetData; - CifStructure cif; + CifStructure* cif; }; inline @@ -211,7 +201,8 @@ namespace Victor { allChains = true; } - } -} //namespace -#endif //_CIF_LOADER_H_ + } // namespace Biopool +} // namespace Victor + +#endif /* CIFLOADER_H */ diff --git a/Biopool/Sources/CifSaver.cc b/Biopool/Sources/CifSaver.cc index 9e81418..c2b21d5 100644 --- a/Biopool/Sources/CifSaver.cc +++ b/Biopool/Sources/CifSaver.cc @@ -1,20 +1,10 @@ -/* This file is part of Victor. - - Victor is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Victor is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with Victor. If not, see . +/* + * File: CifSaver.cc + * Author: marco + * + * Created on 3 giugno 2015, 23.20 */ - // Includes: #include #include @@ -314,4 +304,4 @@ void PdbSaver::writeSeqRes(Spacer& sp) { */ void PdbSaver::writeSecondary(Spacer& sp) { -} +} \ No newline at end of file diff --git a/Biopool/Sources/CifSaver.h b/Biopool/Sources/CifSaver.h index 1d31817..edf0dba 100644 --- a/Biopool/Sources/CifSaver.h +++ b/Biopool/Sources/CifSaver.h @@ -1,21 +1,12 @@ -/* This file is part of Victor. - - Victor is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - Victor is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with Victor. If not, see . +/* + * File: CifSaver.h + * Author: marco + * + * Created on 3 giugno 2015, 23.20 */ -#ifndef _PDB_SAVER_H_ -#define _PDB_SAVER_H_ +#ifndef CIFSAVER_H +#define CIFSAVER_H // Includes: #include @@ -140,6 +131,6 @@ namespace Victor { } } //namespace -#endif //_PDB_SAVER_H_ +#endif /* CIFSAVER_H */ diff --git a/Biopool/Sources/CifStructure.cc b/Biopool/Sources/CifStructure.cc index 308533e..0089896 100644 --- a/Biopool/Sources/CifStructure.cc +++ b/Biopool/Sources/CifStructure.cc @@ -6,7 +6,9 @@ */ #include -#include +#include + +#include #include "CifStructure.h" @@ -127,7 +129,7 @@ string CifStructure::getGroupField(string name, string line, int columnNum) { vector& group = getGroup(name); vector fields; string field; - for (int i = 0; i < group.size(); i++) { + for (unsigned int i = 0; i < group.size(); i++) { iss >> field; fields.push_back(field); } @@ -143,27 +145,25 @@ void CifStructure::parseGroup(string name, string line) { vector& group = getGroup(name); // exit the function if the group name is already parsed - if (isGroupParsed(name)) { - break; - } - - while (input) { - regex groupName(getTag(name)); - smatch match; - if (regex_search(line, match, groupName)) { - group.push_back(match.suffix().str()); - found = true; - } else { - found = false; - } - - // exit the loop when the research of the fields is completed - if (!found && group.size() > 1) { - setParsedFlag(name); - break; + if (!isGroupParsed(name)) { + while (input) { + regex groupName(getTag(name)); + smatch match; + if (regex_search(line, match, groupName)) { + group.push_back(match.suffix().str()); + found = true; + } else { + found = false; + } + + // exit the loop when the research of the fields is completed + if (!found && group.size() > 1) { + setParsedFlag(name); + break; + } + + line = readLine(input); } - - line = readLine(input); } } @@ -206,11 +206,4 @@ bool CifStructure::isGroupParsed(string name) { } else if (name == "sheet range") { return sheetRangeGroupParsed; } -} - - - - - - - +} \ No newline at end of file diff --git a/Biopool/Sources/CifStructure.h b/Biopool/Sources/CifStructure.h index c8f86b8..71f195b 100644 --- a/Biopool/Sources/CifStructure.h +++ b/Biopool/Sources/CifStructure.h @@ -135,8 +135,8 @@ namespace Victor { bool sheetRangeGroupParsed = false; bool sheetHboundgroupParsed = false; }; - } - + } // namespace Biopool +} // namespace Victor #endif /* CIFSTRUCTURE_H */ diff --git a/Biopool/Sources/Makefile b/Biopool/Sources/Makefile index 7df3030..623fb72 100644 --- a/Biopool/Sources/Makefile +++ b/Biopool/Sources/Makefile @@ -32,8 +32,8 @@ SOURCES = Identity.cc SimpleBond.cc Bond.cc \ AminoAcid.cc Spacer.cc IntSaver.cc IntLoader.cc SeqSaver.cc PdbLoader.cc \ PdbSaver.cc SeqLoader.cc IntCoordConverter.cc SeqConstructor.cc Ligand.cc \ LigandSet.cc SolvExpos.cc AminoAcidHydrogen.cc Nucleotide.cc \ - RelLoader.cc XyzSaver.cc RelSaver.cc XyzLoader.cc \ - CifLoader.cc CifStructure.cc CifSaver.cc + RelLoader.cc XyzSaver.cc RelSaver.cc XyzLoader.cc ProvaLoader.cc \ + CifStructure.cc CifLoader.cc OBJECTS = Identity.o SimpleBond.o Bond.o \ @@ -42,8 +42,8 @@ OBJECTS = Identity.o SimpleBond.o Bond.o \ SeqSaver.o PdbLoader.o PdbSaver.o SeqLoader.o \ IntCoordConverter.o SeqConstructor.o Ligand.o LigandSet.o \ SolvExpos.o Protein.o AminoAcidHydrogen.o Nucleotide.o \ - RelLoader.o XyzSaver.o RelSaver.o XyzLoader.o \ - CifLoader.o CifStructure.o CifSaver.o + RelLoader.o XyzSaver.o RelSaver.o XyzLoader.o ProvaLoader.o \ + CifStructure.o CifLoader.o TARGETS = diff --git a/Makefile.global b/Makefile.global index e8d267a..a668d52 100644 --- a/Makefile.global +++ b/Makefile.global @@ -38,7 +38,7 @@ DEBUGFLAGS = -g NORMALFLAGS = $(DEBUGFLAGS) -FASTFLAGS = -O3 -ffast-math -DNDEBUG -ftemplate-depth-36 +FASTFLAGS = -O3 -ffast-math -DNDEBUG -ftemplate-depth-48 QUICKFLAGS = -DNDEBUG From 6f860e568868866842e214bdcc0543a5e0b55eb6 Mon Sep 17 00:00:00 2001 From: Marco Pezzutti Date: Fri, 5 Jun 2015 11:45:23 +0200 Subject: [PATCH 04/16] regex library dependency removed --- Biopool/Sources/CifLoader.cc | 7 ++-- Biopool/Sources/CifLoader.h | 45 +++++++++---------------- Biopool/Sources/CifSaver.cc | 26 ++++++-------- Biopool/Sources/CifSaver.h | 40 +++++++++++----------- Biopool/Sources/CifStructure.cc | 42 ++++++++++++++++++++--- Biopool/Sources/CifStructure.h | 60 ++++++++++++++++----------------- Biopool/Sources/Makefile | 8 ++--- Makefile.global | 2 +- 8 files changed, 122 insertions(+), 108 deletions(-) diff --git a/Biopool/Sources/CifLoader.cc b/Biopool/Sources/CifLoader.cc index 7fb321b..4264204 100644 --- a/Biopool/Sources/CifLoader.cc +++ b/Biopool/Sources/CifLoader.cc @@ -6,7 +6,6 @@ */ // Includes: -#include #include #include @@ -415,12 +414,12 @@ void CifLoader::loadProtein(Protein& prot) { // read all lines do { // read header entry - if (regex_search(atomLine, regex(cif->getTag("header"))) + if (atomLine.find(cif->getTag("header")) != string::npos && (name == "")) { name = atomLine; sp->setType(name); }// read helix entry - else if (regex_search(atomLine, regex(cif->getTag("helix")))) { + else if (atomLine.find(cif->getTag("helix")) != string::npos) { cif->parseGroup("helix", atomLine); int colS = cif->getGroupColumnNumber("helix", "helix start"); int colE = cif->getGroupColumnNumber("helix", "helix end"); @@ -432,7 +431,7 @@ void CifLoader::loadProtein(Protein& prot) { int colC = cif->getGroupColumnNumber("helix", "helix chain"); helixCode += cif->getGroupField("helix", atomLine, colC); }// read sheet entry - else if (regex_search(atomLine, regex(cif->getTag("sheet")))) { + else if (atomLine.find(cif->getTag("sheet")) != string::npos) { cif->parseGroup("sheet range", atomLine); int colS = cif->getGroupColumnNumber("sheet range", "sheet start"); int colE = cif->getGroupColumnNumber("sheet range", "sheet start"); diff --git a/Biopool/Sources/CifLoader.h b/Biopool/Sources/CifLoader.h index 9c7591a..f5888d4 100644 --- a/Biopool/Sources/CifLoader.h +++ b/Biopool/Sources/CifLoader.h @@ -126,78 +126,63 @@ namespace Victor { CifStructure* cif; }; - inline - bool CifLoader::isValid() { + inline bool CifLoader::isValid() { return valid; } - inline - void CifLoader::setPermissive() { + inline void CifLoader::setPermissive() { permissive = true; } - inline - void CifLoader::setNonPermissive() { + inline void CifLoader::setNonPermissive() { permissive = false; } - inline - void CifLoader::setVerbose() { + inline void CifLoader::setVerbose() { verbose = true; } - inline - void CifLoader::setNoVerbose() { + inline void CifLoader::setNoVerbose() { verbose = false; } - inline - void CifLoader::setChain(char _ch) { + inline void CifLoader::setChain(char _ch) { chain = _ch; } - inline - void CifLoader::setModel(unsigned int _mod) { + inline void CifLoader::setModel(unsigned int _mod) { model = _mod; } - inline - void CifLoader::setAltAtom(char _a) { + inline void CifLoader::setAltAtom(char _a) { altAtom = _a; } - inline - void CifLoader::setNoHAtoms() { + inline void CifLoader::setNoHAtoms() { noHAtoms = true; } - inline - void CifLoader::setNoHetAtoms() { + inline void CifLoader::setNoHetAtoms() { noHetAtoms = true; } - inline - void CifLoader::setNoSecondary() { + inline void CifLoader::setNoSecondary() { noSecondary = true; } - inline - void CifLoader::setWithSecondary() { + inline void CifLoader::setWithSecondary() { noSecondary = false; } - inline - void CifLoader::setNoConnection() { + inline void CifLoader::setNoConnection() { noConnection = true; } - inline - void CifLoader::setWithConnection() { + inline void CifLoader::setWithConnection() { noConnection = false; } - inline - void CifLoader::setAllChains() { + inline void CifLoader::setAllChains() { allChains = true; } diff --git a/Biopool/Sources/CifSaver.cc b/Biopool/Sources/CifSaver.cc index c2b21d5..0f73e4d 100644 --- a/Biopool/Sources/CifSaver.cc +++ b/Biopool/Sources/CifSaver.cc @@ -18,7 +18,7 @@ using namespace std; // CONSTRUCTORS/DESTRUCTOR: -CifSaver::CifSaver(ostream& _output = cout) : +CifSaver::CifSaver(ostream& _output) : output(_output), writeSeq(true), writeSecStr(true), writeTer(true), atomOffset(0), aminoOffset(0), ligandOffset(0), chain(' ') { } @@ -29,19 +29,15 @@ CifSaver::~CifSaver() { // PREDICATES: -void CifSaver::endFile() { - output << "END\n"; -} - // MODIFIERS: /** - * Saves a group in PDB format. + * Saves a group in CIF format. * @param group reference * @return void */ -void PdbSaver::saveGroup(Group& gr) { +void CifSaver::saveGroup(Group& gr) { gr.sync(); for (unsigned int i = 0; i < gr.size(); i++) { @@ -85,7 +81,7 @@ void PdbSaver::saveGroup(Group& gr) { *@param sideChain reference *@return void */ -void PdbSaver::saveSideChain(SideChain& sc) { +void CifSaver::saveSideChain(SideChain& sc) { saveGroup(sc); } @@ -94,7 +90,7 @@ void PdbSaver::saveSideChain(SideChain& sc) { *@param AminoAcid reference *@return void */ -void PdbSaver::saveAminoAcid(AminoAcid& aa) { +void CifSaver::saveAminoAcid(AminoAcid& aa) { saveGroup(aa); } @@ -103,7 +99,7 @@ void PdbSaver::saveAminoAcid(AminoAcid& aa) { *@param Spacer reference *@return void */ -void PdbSaver::saveSpacer(Spacer& sp) { +void CifSaver::saveSpacer(Spacer& sp) { PRINT_NAME; if (sp.size() > 0) { @@ -173,7 +169,7 @@ void PdbSaver::saveSpacer(Spacer& sp) { *@param Ligand reference *@return void */ -void PdbSaver::saveLigand(Ligand& gr) { +void CifSaver::saveLigand(Ligand& gr) { gr.sync(); unsigned int oldPrec = output.precision(); ios::fmtflags oldFlags = output.flags(); @@ -232,7 +228,7 @@ void PdbSaver::saveLigand(Ligand& gr) { *@param LigandSet reference *@return void */ -void PdbSaver::saveLigandSet(LigandSet& ls) { +void CifSaver::saveLigandSet(LigandSet& ls) { ligandOffset = ls.getStartOffset(); //set the offset for current LigandSet for (unsigned int i = 0; i < ls.sizeLigand(); i++) { @@ -248,7 +244,7 @@ void PdbSaver::saveLigandSet(LigandSet& ls) { *@param Protein reference *@return void */ -void PdbSaver::saveProtein(Protein& prot) { +void CifSaver::saveProtein(Protein& prot) { //if (prot.sizeProtein()==0) // ERROR("Empty Protein",exception); @@ -279,7 +275,7 @@ void PdbSaver::saveProtein(Protein& prot) { *@param Spacer reference *@return void */ -void PdbSaver::writeSeqRes(Spacer& sp) { +void CifSaver::writeSeqRes(Spacer& sp) { for (unsigned int i = 0; i < sp.sizeAmino() / 13; i++) { output << "SEQRES " << setw(3) << i << " " << setw(3) << sp.sizeAmino() << " "; @@ -302,6 +298,6 @@ void PdbSaver::writeSeqRes(Spacer& sp) { *@param sideChain reference *@return void */ -void PdbSaver::writeSecondary(Spacer& sp) { +void CifSaver::writeSecondary(Spacer& sp) { } \ No newline at end of file diff --git a/Biopool/Sources/CifSaver.h b/Biopool/Sources/CifSaver.h index edf0dba..bf582f6 100644 --- a/Biopool/Sources/CifSaver.h +++ b/Biopool/Sources/CifSaver.h @@ -49,7 +49,7 @@ namespace Victor { // PREDICATES: - inline void endFile(); + void endFile(); // MODIFIERS: @@ -62,7 +62,7 @@ namespace Victor { void setChain(char _ch); /** - * Saves a group in PDB format. + * Saves a group in CIF format. * @param group reference * @return void */ @@ -78,54 +78,56 @@ namespace Victor { private: // HELPERS: - void writeSeqRes(Spacer& sp); // writes SEQRES entry - void writeSecondary(Spacer& sp); + + // writes SEQRES entry + void writeSeqRes(Spacer& sp); + // writes secondary entries (SHEET, HELIX, etc.) + void writeSecondary(Spacer& sp); + // ATTRIBUTES ostream& output; // output stream bool writeSeq, writeSecStr, writeTer; + // offsets that determine at which atom, + // aminoacid and ligand number to start unsigned int atomOffset, ligandOffset; int aminoOffset; char chain; // chain ID - // offsets that determine at which atom, aminoacid and ligand number to start }; - inline - void CifSaver::setWriteSecondaryStructure() { + inline void CifSaver::endFile() { + output << "END\n"; + } + + inline void CifSaver::setWriteSecondaryStructure() { writeSecStr = true; } - inline - void CifSaver::setDoNotWriteSecondaryStructure() { + inline void CifSaver::setDoNotWriteSecondaryStructure() { writeSecStr = false; } - inline - void CifSaver::setWriteSeqRes() { + inline void CifSaver::setWriteSeqRes() { writeSeq = true; } - inline - void CifSaver::setDoNotWriteSeqRes() { + inline void CifSaver::setDoNotWriteSeqRes() { writeSeq = false; } - inline - void CifSaver::setWriteAtomOnly() { + inline void CifSaver::setWriteAtomOnly() { writeSecStr = false; writeSeq = false; writeTer = false; } - inline - void CifSaver::setWriteAll() { + inline void CifSaver::setWriteAll() { writeSecStr = true; writeSeq = true; writeTer = true; } - inline - void CifSaver::setChain(char _ch) { + inline void CifSaver::setChain(char _ch) { chain = _ch; } diff --git a/Biopool/Sources/CifStructure.cc b/Biopool/Sources/CifStructure.cc index 0089896..dfe89b5 100644 --- a/Biopool/Sources/CifStructure.cc +++ b/Biopool/Sources/CifStructure.cc @@ -5,8 +5,8 @@ * Created on 1 giugno 2015, 11.36 */ -#include #include +#include #include @@ -17,6 +17,37 @@ using namespace Victor::Biopool; using namespace std; CifStructure::CifStructure(istream& input) : input(input) { + header = "_struct_keywords.pdbx_keywords"; + model = "pdbx_PDB_model_num"; + helix = "_struct_conf."; + helixStart = "beg_auth_seq_id"; + helixEnd = "end_auth_seq_id"; + helixChainId = "beg_auth_asym_id"; + atom = "_atom_site."; + residueNum = "auth_seq_id"; + atomId = "id"; + atomAltId = "label_alt_id"; + tempFactor = "B_iso_or_equiv"; + atomName = "auth_atom_id"; + residueName = "auth_comp_id"; + x = "Cartn_x"; + y = "Cartn_y"; + z = "Cartn_z"; + chain = "auth_asym_id"; + sheet = "_struct_sheet."; + sheetOrder = "_struct_sheet_order."; + sheetRange = "_struct_sheet_range."; + sheetHbond = "_pdbx_struct_sheet_hbond."; + sheetStart = "beg_auth_seq_id"; + sheetEnd = "end_auth_seq_id"; + sheetChainId = "beg_auth_asym_id"; + + atomGroupParsed = false; + helixGroupParsed = false; + sheetGroupParsed = false; + sheetOrderGroupParsed = false; + sheetRangeGroupParsed = false; + sheetHboundgroupParsed = false; } CifStructure::~CifStructure() { @@ -147,10 +178,11 @@ void CifStructure::parseGroup(string name, string line) { // exit the function if the group name is already parsed if (!isGroupParsed(name)) { while (input) { - regex groupName(getTag(name)); - smatch match; - if (regex_search(line, match, groupName)) { - group.push_back(match.suffix().str()); + string groupName(getTag(name)); + size_t pos = line.find(groupName); + if (pos != string::npos) { + group.push_back(line.substr(pos + groupName.size(), + line.size() - groupName.size())); found = true; } else { found = false; diff --git a/Biopool/Sources/CifStructure.h b/Biopool/Sources/CifStructure.h index 71f195b..560e253 100644 --- a/Biopool/Sources/CifStructure.h +++ b/Biopool/Sources/CifStructure.h @@ -94,30 +94,30 @@ namespace Victor { istream& input; // CIF tags - string header = "_struct_keywords.pdbx_keywords"; - string model = "pdbx_PDB_model_num"; - string helix = "_struct_conf."; - string helixStart = "beg_auth_seq_id"; - string helixEnd = "end_auth_seq_id"; - string helixChainId = "beg_auth_asym_id"; - string atom = "_atom_site."; - string residueNum = "auth_seq_id"; - string atomId = "id"; - string atomAltId = "label_alt_id"; - string tempFactor = "B_iso_or_equiv"; - string atomName = "auth_atom_id"; - string residueName = "auth_comp_id"; - string x = "Cartn_x"; - string y = "Cartn_y"; - string z = "Cartn_z"; - string chain = "auth_asym_id"; - string sheet = "_struct_sheet."; - string sheetOrder = "_struct_sheet_order."; - string sheetRange = "_struct_sheet_range."; - string sheetHbond = "_pdbx_struct_sheet_hbond."; - string sheetStart = "beg_auth_seq_id"; - string sheetEnd = "end_auth_seq_id"; - string sheetChainId = "beg_auth_asym_id"; + string header; + string model; + string helix; + string helixStart; + string helixEnd; + string helixChainId; + string atom; + string residueNum; + string atomId; + string atomAltId; + string tempFactor; + string atomName; + string residueName; + string x; + string y; + string z; + string chain; + string sheet; + string sheetOrder; + string sheetRange; + string sheetHbond; + string sheetStart; + string sheetEnd; + string sheetChainId; // collections of CIF group fields vector atomGroup; @@ -128,12 +128,12 @@ namespace Victor { vector sheetHbondGroup; // flags - bool atomGroupParsed = false; - bool helixGroupParsed = false; - bool sheetGroupParsed = false; - bool sheetOrderGroupParsed = false; - bool sheetRangeGroupParsed = false; - bool sheetHboundgroupParsed = false; + bool atomGroupParsed; + bool helixGroupParsed; + bool sheetGroupParsed; + bool sheetOrderGroupParsed; + bool sheetRangeGroupParsed; + bool sheetHboundgroupParsed; }; } // namespace Biopool } // namespace Victor diff --git a/Biopool/Sources/Makefile b/Biopool/Sources/Makefile index 623fb72..8a96a1c 100644 --- a/Biopool/Sources/Makefile +++ b/Biopool/Sources/Makefile @@ -32,8 +32,8 @@ SOURCES = Identity.cc SimpleBond.cc Bond.cc \ AminoAcid.cc Spacer.cc IntSaver.cc IntLoader.cc SeqSaver.cc PdbLoader.cc \ PdbSaver.cc SeqLoader.cc IntCoordConverter.cc SeqConstructor.cc Ligand.cc \ LigandSet.cc SolvExpos.cc AminoAcidHydrogen.cc Nucleotide.cc \ - RelLoader.cc XyzSaver.cc RelSaver.cc XyzLoader.cc ProvaLoader.cc \ - CifStructure.cc CifLoader.cc + RelLoader.cc XyzSaver.cc RelSaver.cc XyzLoader.cc \ + CifStructure.cc CifLoader.cc CifSaver.cc OBJECTS = Identity.o SimpleBond.o Bond.o \ @@ -42,8 +42,8 @@ OBJECTS = Identity.o SimpleBond.o Bond.o \ SeqSaver.o PdbLoader.o PdbSaver.o SeqLoader.o \ IntCoordConverter.o SeqConstructor.o Ligand.o LigandSet.o \ SolvExpos.o Protein.o AminoAcidHydrogen.o Nucleotide.o \ - RelLoader.o XyzSaver.o RelSaver.o XyzLoader.o ProvaLoader.o \ - CifStructure.o CifLoader.o + RelLoader.o XyzSaver.o RelSaver.o XyzLoader.o \ + CifStructure.o CifLoader.o CifSaver.o TARGETS = diff --git a/Makefile.global b/Makefile.global index a668d52..0a9ddda 100644 --- a/Makefile.global +++ b/Makefile.global @@ -32,7 +32,7 @@ # # These flags should always be used. -STANDARDFLAGS = -Wall -ansi -pedantic -DNEXCEPTIONS -DLINUX -c -std=c++11 +STANDARDFLAGS = -Wall -ansi -pedantic -DNEXCEPTIONS -DLINUX -c DEBUGFLAGS = -g From 593a6facc18b4bb0e421ef455bb13027cb951e76 Mon Sep 17 00:00:00 2001 From: Marco Pezzutti Date: Sat, 6 Jun 2015 20:43:35 +0200 Subject: [PATCH 05/16] debug done, working --- Biopool/Sources/CifLoader.cc | 85 ++++++++++++++++++++---------- Biopool/Sources/CifLoader.h | 6 ++- Biopool/Sources/CifStructure.cc | 92 ++++++++++++++++++++++----------- Biopool/Sources/CifStructure.h | 18 +++++-- 4 files changed, 135 insertions(+), 66 deletions(-) diff --git a/Biopool/Sources/CifLoader.cc b/Biopool/Sources/CifLoader.cc index 4264204..2934f33 100644 --- a/Biopool/Sources/CifLoader.cc +++ b/Biopool/Sources/CifLoader.cc @@ -26,17 +26,17 @@ using namespace std; // CONSTRUCTORS/DESTRUCTOR: -CifLoader::CifLoader(istream& _input, bool _permissive, bool _noHAtoms, +CifLoader::CifLoader(istream& _input, ostream& output, bool _permissive, bool _noHAtoms, bool _noHetAtoms, bool _noSecondary, bool _noConnection, bool _noWater, bool _verb, bool _allChains, string _NULL, bool _onlyMetal, bool _noNucleotideChains ) : -input(_input), permissive(_permissive), valid(true), noHAtoms(_noHAtoms), +input(_input), output(output), permissive(_permissive), valid(true), noHAtoms(_noHAtoms), noHetAtoms(_noHetAtoms), noSecondary(_noSecondary), noConnection(_noConnection), noWater(_noWater), verbose(_verb), allChains(_allChains), chain(' '), model(999), altAtom('A'), helixCode(_NULL), //sheetCode(_NULL), helixData(), sheetData(), onlyMetalHetAtoms(_onlyMetal), sheetCode(_NULL), onlyMetalHetAtoms(_onlyMetal), noNucleotideChains(_noNucleotideChains) { - cif = new CifStructure(_input); + cif = new CifStructure(_input, output); } CifLoader::~CifLoader() { @@ -113,6 +113,7 @@ unsigned int CifLoader::getMaxModels() { * @return all available chain IDs */ vector CifLoader::getAllChains() { + output << "IN getAllChains" << endl; vector res; char lastChain = ' '; @@ -124,18 +125,24 @@ vector CifLoader::getAllChains() { unsigned int modelNum = 0; cif->parseGroup("atom", atomLine); + cif->printGroup("atom"); + output << "line: " << atomLine << endl; int modelCol = cif->getGroupColumnNumber("atom", "model"); int chainCol = cif->getGroupColumnNumber("atom", "chain"); + output << "model: " << modelCol << ", chain: " << chainCol << endl; while (input) { if (atomLine.substr(0, 4) == "ATOM") { modelNum = stoiDEF(cif->getGroupField("atom", atomLine, modelCol)); + output << "riga: " << atomLine << endl; + output << "numero modello: " << modelNum << endl; // only consider first model: others duplicate chain IDs if (modelNum > 1) { break; } // check for new chains containing amino acids char id = (cif->getGroupField("atom", atomLine, chainCol).c_str())[0]; + output << "id" << endl; if (id != lastChain) { lastChain = id; res.push_back(id); @@ -143,6 +150,7 @@ vector CifLoader::getAllChains() { } atomLine = readLine(input); } + output << "OUT getAllChains" << endl; return res; } @@ -246,6 +254,7 @@ CifLoader::loadSpacer(Spacer& sp){ */ int CifLoader::parseCifline(string atomLine, string tag, Ligand* lig, AminoAcid* aa) { + output << "IN parseCifline" << endl; // get atom id int atNum = stoiDEF(cif->getGroupField("atom", atomLine, cif->getGroupColumnNumber("atom", "atom id"))); @@ -253,7 +262,7 @@ CifLoader::parseCifline(string atomLine, string tag, Ligand* lig, AminoAcid* aa) int aaNum = stoiDEF(cif->getGroupField("atom", atomLine, cif->getGroupColumnNumber("atom", "residue num"))); char altAaID = cif->getGroupField("atom", atomLine, - cif->getGroupColumnNumber("atom", "alt id")).c_str()[0]; // "Code for insertion of residues" + cif->getGroupColumnNumber("atom", "residue ins")).c_str()[0]; // "Code for insertion of residues" // get x, y, z coordinates vgVector3 coord; @@ -313,7 +322,7 @@ CifLoader::parseCifline(string atomLine, string tag, Ligand* lig, AminoAcid* aa) if (aaType != "ACE") { // DEBUG: it would be nice to load also alternative atoms // skip alternative atoms, - if (altAaID != ' ') { + if (altAaID != '?') { if (verbose) cout << "Warning: Skipping extraneous amino acid entry " << aaNum << " " << atNum << " " << altAaID << ".\n"; @@ -336,6 +345,7 @@ CifLoader::parseCifline(string atomLine, string tag, Ligand* lig, AminoAcid* aa) } } delete at; + output << "OUT parseCifline" << endl; return aaNum; } @@ -350,7 +360,8 @@ void CifLoader::loadProtein(Protein& prot) { if (chainList.size() == 0) { if (verbose) - cout << "Warning: Missing chain ID in the CIF, assuming the same chain for the entire file.\n"; + cout << "Warning: Missing chain ID in the CIF," + "assuming the same chain for the entire file.\n"; chainList.push_back(char(' ')); } @@ -398,6 +409,7 @@ void CifLoader::loadProtein(Protein& prot) { string atomLine; atomLine = readLine(input); + output << "atomLine: " << atomLine << endl; int aaNum = -100000; // infinite negative int oldAaNum = -100000; @@ -418,35 +430,51 @@ void CifLoader::loadProtein(Protein& prot) { && (name == "")) { name = atomLine; sp->setType(name); - }// read helix entry + } + // read helix entry else if (atomLine.find(cif->getTag("helix")) != string::npos) { cif->parseGroup("helix", atomLine); - int colS = cif->getGroupColumnNumber("helix", "helix start"); - int colE = cif->getGroupColumnNumber("helix", "helix end"); - - start = stoiDEF(cif->getGroupField("helix", atomLine, colS)); - end = stoiDEF(cif->getGroupField("helix", atomLine, colE)); - - helixData.push_back(pair(start, end)); - int colC = cif->getGroupColumnNumber("helix", "helix chain"); - helixCode += cif->getGroupField("helix", atomLine, colC); - }// read sheet entry + + while (atomLine != "# ") { + int colS = cif->getGroupColumnNumber("helix", "helix start"); + int colE = cif->getGroupColumnNumber("helix", "helix end"); + start = stoiDEF(cif->getGroupField("helix", atomLine, colS)); + end = stoiDEF(cif->getGroupField("helix", atomLine, colE)); + helixData.push_back(pair(start, end)); + + int colC = cif->getGroupColumnNumber("helix", "helix chain"); + helixCode += cif->getGroupField("helix", atomLine, colC); + + char s[256]; + input.getline(s, 256); + atomLine.assign(s); + output << "atomLine: " << atomLine << endl; + } + } + // read sheet entry else if (atomLine.find(cif->getTag("sheet")) != string::npos) { cif->parseGroup("sheet range", atomLine); - int colS = cif->getGroupColumnNumber("sheet range", "sheet start"); - int colE = cif->getGroupColumnNumber("sheet range", "sheet start"); - - start = stoiDEF(cif->getGroupField("sheet range", atomLine, colS)); - end = stoiDEF(cif->getGroupField("sheet range", atomLine, colE)); - - sheetData.push_back(pair(start, end)); - int colC = cif->getGroupColumnNumber("sheet range", "sheet chain"); - sheetCode += cif->getGroupField("sheet range", atomLine, colC); - }// Parse one line of the "ATOM" and "HETATM" fields + + while (atomLine != "# ") { + int colS = cif->getGroupColumnNumber("sheet range", "sheet start"); + int colE = cif->getGroupColumnNumber("sheet range", "sheet start"); + start = stoiDEF(cif->getGroupField("sheet range", atomLine, colS)); + end = stoiDEF(cif->getGroupField("sheet range", atomLine, colE)); + sheetData.push_back(pair(start, end)); + + int colC = cif->getGroupColumnNumber("sheet range", "sheet chain"); + sheetCode += cif->getGroupField("sheet range", atomLine, colC); + + char s[256]; + input.getline(s, 256); + atomLine.assign(s); + output << "atomLine: " << atomLine << endl; + } + } + // Parse one line of the "ATOM" and "HETATM" fields else if (atomLine.substr(0, 6) == "ATOM " || atomLine.substr(0, 6) == "HETATM") { tag = atomLine.substr(0, 6); - cif->parseGroup("atom", atomLine); // Control model number int colM = cif->getGroupColumnNumber("atom", "model"); @@ -506,6 +534,7 @@ void CifLoader::loadProtein(Protein& prot) { } // end chain check } atomLine = readLine(input); + output << "atomLine: " << atomLine << endl; } while (input); /* diff --git a/Biopool/Sources/CifLoader.h b/Biopool/Sources/CifLoader.h index f5888d4..fc68773 100644 --- a/Biopool/Sources/CifLoader.h +++ b/Biopool/Sources/CifLoader.h @@ -34,7 +34,8 @@ namespace Victor { // CONSTRUCTORS/DESTRUCTOR: /** * Constructor. - * @param _input = the CIF file object + * @param _input = CIF file object + * @param _output = log file * @param _permissive = if true, allows loading residues with missing atoms * @param _noHAtoms = if true, doesn't load Hydrogens * @param _noHetAtoms = if true, doesn't load het atoms @@ -47,7 +48,7 @@ namespace Victor { * @param _onlyMetal = if true, load only metals as ligands * @param _noNucleotideChains = if true, doesn't load DNA/RNA chains */ - CifLoader(istream& _input = cin, bool _permissive = false, + CifLoader(istream& _input = cin, ostream& output = cout, bool _permissive = false, bool _noHAtoms = false, bool _noHetAtoms = false, bool _noSecondary = false, bool _noConnection = false, bool _noWater = true, bool _verb = true, bool _allChains = false, @@ -102,6 +103,7 @@ namespace Victor { // ATTRIBUTES private: istream& input; // input stream + ostream& output; bool permissive; // bool valid; // bool noHAtoms; // diff --git a/Biopool/Sources/CifStructure.cc b/Biopool/Sources/CifStructure.cc index dfe89b5..77b358f 100644 --- a/Biopool/Sources/CifStructure.cc +++ b/Biopool/Sources/CifStructure.cc @@ -16,31 +16,32 @@ using namespace Victor; using namespace Victor::Biopool; using namespace std; -CifStructure::CifStructure(istream& input) : input(input) { +CifStructure::CifStructure(istream& input, ostream& output) : +input(input), output(output) { header = "_struct_keywords.pdbx_keywords"; - model = "pdbx_PDB_model_num"; + model = "pdbx_PDB_model_num "; helix = "_struct_conf."; - helixStart = "beg_auth_seq_id"; - helixEnd = "end_auth_seq_id"; - helixChainId = "beg_auth_asym_id"; + helixStart = "beg_auth_seq_id "; + helixEnd = "end_auth_seq_id "; + helixChainId = "beg_auth_asym_id "; atom = "_atom_site."; - residueNum = "auth_seq_id"; - atomId = "id"; - atomAltId = "label_alt_id"; - tempFactor = "B_iso_or_equiv"; - atomName = "auth_atom_id"; - residueName = "auth_comp_id"; - x = "Cartn_x"; - y = "Cartn_y"; - z = "Cartn_z"; - chain = "auth_asym_id"; + residueNum = "auth_seq_id "; + atomId = "id "; + residueIns = "pdbx_PDB_ins_code "; + tempFactor = "B_iso_or_equiv "; + atomName = "auth_atom_id "; + residueName = "auth_comp_id "; + x = "Cartn_x "; + y = "Cartn_y "; + z = "Cartn_z "; + chain = "auth_asym_id "; sheet = "_struct_sheet."; sheetOrder = "_struct_sheet_order."; sheetRange = "_struct_sheet_range."; sheetHbond = "_pdbx_struct_sheet_hbond."; - sheetStart = "beg_auth_seq_id"; - sheetEnd = "end_auth_seq_id"; - sheetChainId = "beg_auth_asym_id"; + sheetStart = "beg_auth_seq_id "; + sheetEnd = "end_auth_seq_id "; + sheetChainId = "beg_auth_asym_id "; atomGroupParsed = false; helixGroupParsed = false; @@ -88,8 +89,8 @@ string CifStructure::getTag(string name) { return residueNum; } else if (name == "atom id") { return atomId; - } else if (name == "alt id") { - return atomAltId; + } else if (name == "residue ins") { + return residueIns; } else if (name == "x") { return x; } else if (name == "y") { @@ -98,8 +99,8 @@ string CifStructure::getTag(string name) { return z; } else if (name == "bfac") { return tempFactor; - } else if (name == "bfac") { - return tempFactor; + } else if (name == "atom name") { + return atomName; } else if (name == "residue name") { return residueName; } else if (name == "helix") { @@ -138,13 +139,19 @@ string CifStructure::getTag(string name) { * @return field column number */ int CifStructure::getGroupColumnNumber(string name, string field) { + output << "IN getGroupColumnNumber" << endl; int col = -1; - vector group = getGroup(name); - vector::iterator it; - it = find(group.begin(), group.end(), getTag(field)); - if (it != group.end()) { - col = it - group.begin(); + vector& group = getGroup(name); + string tag = getTag(field); + output << "tag: " << tag << endl; + for (vector::iterator it = group.begin(); it != group.end(); it++) { + //output << "it: " << *it << endl; + if (*it == tag) { + col = it - group.begin(); + break; + } } + output << "OUT getGroupColumnNumber" << endl; return col; } @@ -155,7 +162,8 @@ int CifStructure::getGroupColumnNumber(string name, string field) { * @param columnNum number of column * @return field at columnNum column */ -string CifStructure::getGroupField(string name, string line, int columnNum) { +string CifStructure::getGroupField(string name, string& line, int columnNum) { + output << "IN getGroupField" << endl; istringstream iss(line); vector& group = getGroup(name); vector fields; @@ -163,15 +171,24 @@ string CifStructure::getGroupField(string name, string line, int columnNum) { for (unsigned int i = 0; i < group.size(); i++) { iss >> field; fields.push_back(field); + //output << "field: " << field << endl; + } + if (columnNum != -1) { + output << "field: " << fields[columnNum] << endl; + output << "OUT getGroupField" << endl; + return fields[columnNum]; + } else { + output << "OUT getGroupField" << endl; + return "?"; } - return fields[columnNum]; } /** * parses group of CIF fields and creates a vector with columns positions * @param name name of the group */ -void CifStructure::parseGroup(string name, string line) { +void CifStructure::parseGroup(string name, string& line) { + output << "IN parseGroup" << endl; bool found = false; vector& group = getGroup(name); @@ -180,9 +197,12 @@ void CifStructure::parseGroup(string name, string line) { while (input) { string groupName(getTag(name)); size_t pos = line.find(groupName); + output << "line: " << line << endl; + output << "name: " << groupName << ", pos: " << pos << endl; if (pos != string::npos) { group.push_back(line.substr(pos + groupName.size(), line.size() - groupName.size())); + //output << "field: " << group.back() << endl; found = true; } else { found = false; @@ -190,6 +210,7 @@ void CifStructure::parseGroup(string name, string line) { // exit the loop when the research of the fields is completed if (!found && group.size() > 1) { + output << name << " group" << " parsed" << endl; setParsedFlag(name); break; } @@ -197,6 +218,7 @@ void CifStructure::parseGroup(string name, string line) { line = readLine(input); } } + output << "OUT parseGroup" << endl; } /** @@ -238,4 +260,12 @@ bool CifStructure::isGroupParsed(string name) { } else if (name == "sheet range") { return sheetRangeGroupParsed; } -} \ No newline at end of file +} + +void CifStructure::printGroup(string name) { + vector& group = getGroup(name); + + for (vector::iterator it = group.begin(); it != group.end(); it++) { + output << *it << endl; + } +} diff --git a/Biopool/Sources/CifStructure.h b/Biopool/Sources/CifStructure.h index 560e253..4012f81 100644 --- a/Biopool/Sources/CifStructure.h +++ b/Biopool/Sources/CifStructure.h @@ -12,7 +12,7 @@ // Includes: #include #include -#include +#include using std::string; using std::istream; @@ -31,8 +31,9 @@ namespace Victor { /** * Constructor * @param input input file stream + * @param output output file stream */ - CifStructure(istream& input); + CifStructure(istream& input, ostream& output = cout); /** * Destructor @@ -68,13 +69,13 @@ namespace Victor { * @param columnNum number of column * @return field at columnNum column */ - string getGroupField(string name, string line, int columnNum); + string getGroupField(string name, string& line, int columnNum); /** * Parses group of CIF fields and creates a vector with columns positions. * @param name name of the group */ - void parseGroup(string group, string line); + void parseGroup(string group, string& line); /** * Sets flag of the parsed group @@ -88,10 +89,17 @@ namespace Victor { * @return true if group is parsed, false otherwise */ bool isGroupParsed(string name); + + /** + * Prints group records names into output stream. + * @param name name of the group + */ + void printGroup(string name); private: // CIF file istream& input; + ostream& output; // CIF tags string header; @@ -103,7 +111,7 @@ namespace Victor { string atom; string residueNum; string atomId; - string atomAltId; + string residueIns; string tempFactor; string atomName; string residueName; From bdaf56c3ee6b67d363b8a057d161c1e183b89f20 Mon Sep 17 00:00:00 2001 From: Marco Pezzutti Date: Mon, 8 Jun 2015 11:25:51 +0200 Subject: [PATCH 06/16] debug output commented, minor changes --- Biopool/Sources/CifLoader.cc | 28 ++-- Biopool/Sources/CifStructure.cc | 257 ++++++++++++++++++++++---------- 2 files changed, 192 insertions(+), 93 deletions(-) diff --git a/Biopool/Sources/CifLoader.cc b/Biopool/Sources/CifLoader.cc index 2934f33..bb01ff4 100644 --- a/Biopool/Sources/CifLoader.cc +++ b/Biopool/Sources/CifLoader.cc @@ -113,7 +113,7 @@ unsigned int CifLoader::getMaxModels() { * @return all available chain IDs */ vector CifLoader::getAllChains() { - output << "IN getAllChains" << endl; + //output << "IN getAllChains" << endl; vector res; char lastChain = ' '; @@ -126,23 +126,23 @@ vector CifLoader::getAllChains() { cif->parseGroup("atom", atomLine); cif->printGroup("atom"); - output << "line: " << atomLine << endl; + //output << "line: " << atomLine << endl; int modelCol = cif->getGroupColumnNumber("atom", "model"); int chainCol = cif->getGroupColumnNumber("atom", "chain"); - output << "model: " << modelCol << ", chain: " << chainCol << endl; + //output << "model: " << modelCol << ", chain: " << chainCol << endl; while (input) { if (atomLine.substr(0, 4) == "ATOM") { modelNum = stoiDEF(cif->getGroupField("atom", atomLine, modelCol)); - output << "riga: " << atomLine << endl; - output << "numero modello: " << modelNum << endl; + //output << "riga: " << atomLine << endl; + //output << "numero modello: " << modelNum << endl; // only consider first model: others duplicate chain IDs if (modelNum > 1) { break; } // check for new chains containing amino acids char id = (cif->getGroupField("atom", atomLine, chainCol).c_str())[0]; - output << "id" << endl; + //output << "id" << endl; if (id != lastChain) { lastChain = id; res.push_back(id); @@ -150,7 +150,7 @@ vector CifLoader::getAllChains() { } atomLine = readLine(input); } - output << "OUT getAllChains" << endl; + //output << "OUT getAllChains" << endl; return res; } @@ -254,7 +254,7 @@ CifLoader::loadSpacer(Spacer& sp){ */ int CifLoader::parseCifline(string atomLine, string tag, Ligand* lig, AminoAcid* aa) { - output << "IN parseCifline" << endl; + //output << "IN parseCifline" << endl; // get atom id int atNum = stoiDEF(cif->getGroupField("atom", atomLine, cif->getGroupColumnNumber("atom", "atom id"))); @@ -345,7 +345,7 @@ CifLoader::parseCifline(string atomLine, string tag, Ligand* lig, AminoAcid* aa) } } delete at; - output << "OUT parseCifline" << endl; + //output << "OUT parseCifline" << endl; return aaNum; } @@ -409,7 +409,7 @@ void CifLoader::loadProtein(Protein& prot) { string atomLine; atomLine = readLine(input); - output << "atomLine: " << atomLine << endl; + //output << "atomLine: " << atomLine << endl; int aaNum = -100000; // infinite negative int oldAaNum = -100000; @@ -448,11 +448,11 @@ void CifLoader::loadProtein(Protein& prot) { char s[256]; input.getline(s, 256); atomLine.assign(s); - output << "atomLine: " << atomLine << endl; + //output << "atomLine: " << atomLine << endl; } } // read sheet entry - else if (atomLine.find(cif->getTag("sheet")) != string::npos) { + else if (atomLine.find(cif->getTag("sheet range")) != string::npos) { cif->parseGroup("sheet range", atomLine); while (atomLine != "# ") { @@ -468,7 +468,7 @@ void CifLoader::loadProtein(Protein& prot) { char s[256]; input.getline(s, 256); atomLine.assign(s); - output << "atomLine: " << atomLine << endl; + //output << "atomLine: " << atomLine << endl; } } // Parse one line of the "ATOM" and "HETATM" fields @@ -534,7 +534,7 @@ void CifLoader::loadProtein(Protein& prot) { } // end chain check } atomLine = readLine(input); - output << "atomLine: " << atomLine << endl; + //output << "atomLine: " << atomLine << endl; } while (input); /* diff --git a/Biopool/Sources/CifStructure.cc b/Biopool/Sources/CifStructure.cc index 77b358f..bc76361 100644 --- a/Biopool/Sources/CifStructure.cc +++ b/Biopool/Sources/CifStructure.cc @@ -16,7 +16,7 @@ using namespace Victor; using namespace Victor::Biopool; using namespace std; -CifStructure::CifStructure(istream& input, ostream& output) : +CifStructure::CifStructure(istream& input, ostream& output) : input(input), output(output) { header = "_struct_keywords.pdbx_keywords"; model = "pdbx_PDB_model_num "; @@ -42,7 +42,7 @@ input(input), output(output) { sheetStart = "beg_auth_seq_id "; sheetEnd = "end_auth_seq_id "; sheetChainId = "beg_auth_asym_id "; - + atomGroupParsed = false; helixGroupParsed = false; sheetGroupParsed = false; @@ -61,17 +61,17 @@ CifStructure::~CifStructure() { */ vector& CifStructure::getGroup(string name) { if (name == "atom") { - return atomGroup; + return atomGroup; } else if (name == "helix") { - return helixGroup; + return helixGroup; } else if (name == "sheet") { - return sheetGroup; + return sheetGroup; } else if (name == "sheet order") { - return sheetOrderGroup; + return sheetOrderGroup; } else if (name == "sheet range") { - return sheetRangeGroup; + return sheetRangeGroup; } else if (name == "sheet hbond") { - return sheetHbondGroup; + return sheetHbondGroup; } } @@ -82,53 +82,53 @@ vector& CifStructure::getGroup(string name) { */ string CifStructure::getTag(string name) { if (name == "header") { - return header; + return header; } else if (name == "atom") { - return atom; + return atom; } else if (name == "residue num") { - return residueNum; + return residueNum; } else if (name == "atom id") { - return atomId; + return atomId; } else if (name == "residue ins") { - return residueIns; + return residueIns; } else if (name == "x") { - return x; + return x; } else if (name == "y") { - return y; + return y; } else if (name == "z") { - return z; + return z; } else if (name == "bfac") { - return tempFactor; + return tempFactor; } else if (name == "atom name") { - return atomName; + return atomName; } else if (name == "residue name") { - return residueName; + return residueName; } else if (name == "helix") { - return helix; + return helix; } else if (name == "helix start") { - return helixStart; + return helixStart; } else if (name == "helix end") { - return helixEnd; + return helixEnd; } else if (name == "helix chain") { - return helixChainId; + return helixChainId; } else if (name == "model") { - return model; + return model; } else if (name == "chain") { - return chain; + return chain; } else if (name == "sheet") { - return sheet; + return sheet; } else if (name == "sheet order") { - return sheetOrder; + return sheetOrder; } else if (name == "sheet range") { - return sheetRange; + return sheetRange; } else if (name == "sheet hbond") { - return sheetHbond; + return sheetHbond; } else if (name == "sheet start") { - return sheetStart; + return sheetStart; } else if (name == "sheet end") { - return sheetEnd; + return sheetEnd; } else if (name == "sheet chain") { - return sheetChainId; + return sheetChainId; } } @@ -139,11 +139,11 @@ string CifStructure::getTag(string name) { * @return field column number */ int CifStructure::getGroupColumnNumber(string name, string field) { - output << "IN getGroupColumnNumber" << endl; + //output << "IN getGroupColumnNumber" << endl; int col = -1; vector& group = getGroup(name); string tag = getTag(field); - output << "tag: " << tag << endl; + //output << "tag: " << tag << endl; for (vector::iterator it = group.begin(); it != group.end(); it++) { //output << "it: " << *it << endl; if (*it == tag) { @@ -151,7 +151,7 @@ int CifStructure::getGroupColumnNumber(string name, string field) { break; } } - output << "OUT getGroupColumnNumber" << endl; + //output << "OUT getGroupColumnNumber" << endl; return col; } @@ -163,22 +163,22 @@ int CifStructure::getGroupColumnNumber(string name, string field) { * @return field at columnNum column */ string CifStructure::getGroupField(string name, string& line, int columnNum) { - output << "IN getGroupField" << endl; + //output << "IN getGroupField" << endl; istringstream iss(line); vector& group = getGroup(name); vector fields; string field; for (unsigned int i = 0; i < group.size(); i++) { - iss >> field; - fields.push_back(field); + iss >> field; + fields.push_back(field); //output << "field: " << field << endl; } if (columnNum != -1) { - output << "field: " << fields[columnNum] << endl; - output << "OUT getGroupField" << endl; + //output << "field: " << fields[columnNum] << endl; + //output << "OUT getGroupField" << endl; return fields[columnNum]; } else { - output << "OUT getGroupField" << endl; + //output << "OUT getGroupField" << endl; return "?"; } } @@ -188,37 +188,37 @@ string CifStructure::getGroupField(string name, string& line, int columnNum) { * @param name name of the group */ void CifStructure::parseGroup(string name, string& line) { - output << "IN parseGroup" << endl; + //output << "IN parseGroup" << endl; bool found = false; vector& group = getGroup(name); // exit the function if the group name is already parsed if (!isGroupParsed(name)) { - while (input) { - string groupName(getTag(name)); - size_t pos = line.find(groupName); - output << "line: " << line << endl; - output << "name: " << groupName << ", pos: " << pos << endl; - if (pos != string::npos) { - group.push_back(line.substr(pos + groupName.size(), + while (input) { + string groupName(getTag(name)); + size_t pos = line.find(groupName); + //output << "line: " << line << endl; + //output << "name: " << groupName << ", pos: " << pos << endl; + if (pos != string::npos) { + group.push_back(line.substr(pos + groupName.size(), line.size() - groupName.size())); //output << "field: " << group.back() << endl; - found = true; - } else { - found = false; - } + found = true; + } else { + found = false; + } - // exit the loop when the research of the fields is completed - if (!found && group.size() > 1) { - output << name << " group" << " parsed" << endl; - setParsedFlag(name); - break; - } + // exit the loop when the research of the fields is completed + if (!found && group.size() > 1) { + //output << name << " group" << " parsed" << endl; + setParsedFlag(name); + break; + } - line = readLine(input); - } + line = readLine(input); + } } - output << "OUT parseGroup" << endl; + //output << "OUT parseGroup" << endl; } /** @@ -227,17 +227,17 @@ void CifStructure::parseGroup(string name, string& line) { */ void CifStructure::setParsedFlag(string name) { if (name == "atom") { - atomGroupParsed = true; + atomGroupParsed = true; } else if (name == "helix") { - helixGroupParsed = true; + helixGroupParsed = true; } else if (name == "sheet") { - sheetGroupParsed = true; - } else if (name == "sheet hbound") { - sheetHboundgroupParsed = true; + sheetGroupParsed = true; + } else if (name == "sheet hbond") { + sheetHboundgroupParsed = true; } else if (name == "sheet order") { - sheetOrderGroupParsed = true; + sheetOrderGroupParsed = true; } else if (name == "sheet range") { - sheetRangeGroupParsed = true; + sheetRangeGroupParsed = true; } } @@ -248,24 +248,123 @@ void CifStructure::setParsedFlag(string name) { */ bool CifStructure::isGroupParsed(string name) { if (name == "atom") { - return atomGroupParsed; + return atomGroupParsed; } else if (name == "helix") { - return helixGroupParsed; + return helixGroupParsed; } else if (name == "sheet") { - return sheetGroupParsed; - } else if (name == "sheet hbound") { - return sheetHboundgroupParsed; + return sheetGroupParsed; + } else if (name == "sheet hbond") { + return sheetHboundgroupParsed; } else if (name == "sheet order") { - return sheetOrderGroupParsed; + return sheetOrderGroupParsed; } else if (name == "sheet range") { - return sheetRangeGroupParsed; + return sheetRangeGroupParsed; } } void CifStructure::printGroup(string name) { vector& group = getGroup(name); - - for (vector::iterator it = group.begin(); it != group.end(); it++) { - output << *it << endl; + + output << "loop_" << endl; + + if (isGroupParsed(group)) { + for (vector::iterator it = group.begin(); it != group.end(); it++) { + output << *it << endl; + } + } else { + if (name == "atom") { + output << "_atom_site.group_PDB " << endl << + "_atom_site.id " << endl << + "_atom_site.type_symbol " << endl << + "_atom_site.label_atom_id " << endl << + "_atom_site.label_alt_id " << endl << + "_atom_site.label_comp_id " << endl << + "_atom_site.label_asym_id " << endl << + "_atom_site.label_entity_id " << endl << + "_atom_site.label_seq_id " << endl << + "_atom_site.pdbx_PDB_ins_code " << endl << + "_atom_site.Cartn_x " << endl << + "_atom_site.Cartn_y " << endl << + "_atom_site.Cartn_z " << endl << + "_atom_site.occupancy " << endl << + "_atom_site.B_iso_or_equiv " << endl << + "_atom_site.Cartn_x_esd " << endl << + "_atom_site.Cartn_y_esd " << endl << + "_atom_site.Cartn_z_esd " << endl << + "_atom_site.occupancy_esd " << endl << + "_atom_site.B_iso_or_equiv_esd " << endl << + "_atom_site.pdbx_formal_charge " << endl << + "_atom_site.auth_seq_id " << endl << + "_atom_site.auth_comp_id " << endl << + "_atom_site.auth_asym_id " << endl << + "_atom_site.auth_atom_id " << endl << + "_atom_site.pdbx_PDB_model_num " << endl; + } else if (name == "helix") { + output << "_struct_conf.conf_type_id " << endl << + "_struct_conf.id " << endl << + "_struct_conf.pdbx_PDB_helix_id " << endl << + "_struct_conf.beg_label_comp_id " << endl << + "_struct_conf.beg_label_asym_id " << endl << + "_struct_conf.beg_label_seq_id " << endl << + "_struct_conf.pdbx_beg_PDB_ins_code " << endl << + "_struct_conf.end_label_comp_id " << endl << + "_struct_conf.end_label_asym_id " << endl << + "_struct_conf.end_label_seq_id " << endl << + "_struct_conf.pdbx_end_PDB_ins_code " << endl << + "_struct_conf.beg_auth_comp_id " << endl << + "_struct_conf.beg_auth_asym_id " << endl << + "_struct_conf.beg_auth_seq_id " << endl << + "_struct_conf.end_auth_comp_id " << endl << + "_struct_conf.end_auth_asym_id " << endl << + "_struct_conf.end_auth_seq_id " << endl << + "_struct_conf.pdbx_PDB_helix_class " << endl << + "_struct_conf.details " << endl << + "_struct_conf.pdbx_PDB_helix_length " << endl; + } else if (name == "sheet") { + output << "_struct_sheet.id " << endl << + "_struct_sheet.type " << endl << + "_struct_sheet.number_strands " << endl << + "_struct_sheet.details " << endl; + } else if (name == "sheet range") { + output << "_struct_sheet_range.sheet_id " << endl << + "_struct_sheet_range.id " << endl << + "_struct_sheet_range.beg_label_comp_id " << endl << + "_struct_sheet_range.beg_label_asym_id " << endl << + "_struct_sheet_range.beg_label_seq_id " << endl << + "_struct_sheet_range.pdbx_beg_PDB_ins_code " << endl << + "_struct_sheet_range.end_label_comp_id " << endl << + "_struct_sheet_range.end_label_asym_id " << endl << + "_struct_sheet_range.end_label_seq_id " << endl << + "_struct_sheet_range.pdbx_end_PDB_ins_code " << endl << + "_struct_sheet_range.symmetry " << endl << + "_struct_sheet_range.beg_auth_comp_id " << endl << + "_struct_sheet_range.beg_auth_asym_id " << endl << + "_struct_sheet_range.beg_auth_seq_id " << endl << + "_struct_sheet_range.end_auth_comp_id " << endl << + "_struct_sheet_range.end_auth_asym_id " << endl << + "_struct_sheet_range.end_auth_seq_id " << endl; + } else if (name = "sheet hbond") { + output << "_pdbx_struct_sheet_hbond.sheet_id " << endl << + "_pdbx_struct_sheet_hbond.range_id_1 " << endl << + "_pdbx_struct_sheet_hbond.range_id_2 " << endl << + "_pdbx_struct_sheet_hbond.range_1_label_atom_id " << endl << + "_pdbx_struct_sheet_hbond.range_1_label_comp_id " << endl << + "_pdbx_struct_sheet_hbond.range_1_label_asym_id " << endl << + "_pdbx_struct_sheet_hbond.range_1_label_seq_id " << endl << + "_pdbx_struct_sheet_hbond.range_1_PDB_ins_code " << endl << + "_pdbx_struct_sheet_hbond.range_1_auth_atom_id " << endl << + "_pdbx_struct_sheet_hbond.range_1_auth_comp_id " << endl << + "_pdbx_struct_sheet_hbond.range_1_auth_asym_id " << endl << + "_pdbx_struct_sheet_hbond.range_1_auth_seq_id " << endl << + "_pdbx_struct_sheet_hbond.range_2_label_atom_id " << endl << + "_pdbx_struct_sheet_hbond.range_2_label_comp_id " << endl << + "_pdbx_struct_sheet_hbond.range_2_label_asym_id " << endl << + "_pdbx_struct_sheet_hbond.range_2_label_seq_id " << endl << + "_pdbx_struct_sheet_hbond.range_2_PDB_ins_code " << endl << + "_pdbx_struct_sheet_hbond.range_2_auth_atom_id " << endl << + "_pdbx_struct_sheet_hbond.range_2_auth_comp_id " << endl << + "_pdbx_struct_sheet_hbond.range_2_auth_asym_id " << endl << + "_pdbx_struct_sheet_hbond.range_2_auth_seq_id " << endl; + } } } From d4c72cac27f4a482874c15bf52bb2df91539a8bc Mon Sep 17 00:00:00 2001 From: Marco Pezzutti Date: Mon, 8 Jun 2015 11:39:11 +0200 Subject: [PATCH 07/16] debug --- Biopool/Sources/CifStructure.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Biopool/Sources/CifStructure.cc b/Biopool/Sources/CifStructure.cc index bc76361..53b4d35 100644 --- a/Biopool/Sources/CifStructure.cc +++ b/Biopool/Sources/CifStructure.cc @@ -267,7 +267,7 @@ void CifStructure::printGroup(string name) { output << "loop_" << endl; - if (isGroupParsed(group)) { + if (isGroupParsed(name)) { for (vector::iterator it = group.begin(); it != group.end(); it++) { output << *it << endl; } From be599c420d0c76b43a10f26cdca74d51e77b72dd Mon Sep 17 00:00:00 2001 From: Marco Pezzutti Date: Wed, 10 Jun 2015 00:31:25 +0200 Subject: [PATCH 08/16] tests implementation started --- Biopool/APPS/Makefile | 8 +-- Biopool/Sources/CifLoader.cc | 87 ++++++++++++---------- Biopool/Sources/CifStructure.cc | 19 +---- Biopool/Tests/Makefile | 15 ++-- Biopool/Tests/TestCif.cc | 63 ++++++++++++++++ Biopool/Tests/TestCifLoader.h | 120 +++++++++++++++++++++++++++++++ Biopool/Tests/data/modelTest.cif | 43 +++++++++++ 7 files changed, 291 insertions(+), 64 deletions(-) create mode 100644 Biopool/Tests/TestCif.cc create mode 100644 Biopool/Tests/TestCifLoader.h create mode 100644 Biopool/Tests/data/modelTest.cif diff --git a/Biopool/APPS/Makefile b/Biopool/APPS/Makefile index cb26b82..dd121ff 100644 --- a/Biopool/APPS/Makefile +++ b/Biopool/APPS/Makefile @@ -27,16 +27,16 @@ INC_PATH = -I. -I../../tools/ -I../../Biopool/Sources -I../../Energy/Sources -I. # SOURCES = PdbCorrector.cc PdbSecondary.cc PdbEditor.cc Pdb2Seq.cc pdb2secondary.cc pdbshifter.cc \ - pdbMover.cc + pdbMover.cc CifEditor.cc OBJECTS = PdbCorrector.o PdbSecondary.o PdbEditor.o Pdb2Seq.o pdb2secondary.o pdbshifter.o \ - pdbMover.o + pdbMover.o CifEditor.o TARGETS = PdbCorrector PdbSecondary PdbEditor Pdb2Seq pdb2secondary pdbshifter \ - pdbMover + pdbMover CifEditor EXECS = PdbCorrector PdbSecondary PdbEditor Pdb2Seq pdb2secondary pdbshifter \ - pdbMover + pdbMover CifEditor LIBRARY = APPSlibBiopool.a diff --git a/Biopool/Sources/CifLoader.cc b/Biopool/Sources/CifLoader.cc index bb01ff4..8bd4083 100644 --- a/Biopool/Sources/CifLoader.cc +++ b/Biopool/Sources/CifLoader.cc @@ -125,7 +125,6 @@ vector CifLoader::getAllChains() { unsigned int modelNum = 0; cif->parseGroup("atom", atomLine); - cif->printGroup("atom"); //output << "line: " << atomLine << endl; int modelCol = cif->getGroupColumnNumber("atom", "model"); int chainCol = cif->getGroupColumnNumber("atom", "chain"); @@ -193,13 +192,15 @@ bool CifLoader::setBonds(Spacer& sp) { * @return bool */ bool CifLoader::inSideChain(const AminoAcid& aa, const Atom& at) { - if (isBackboneAtom(at.getCode())) + if (isBackboneAtom(at.getCode())) { return false; + } if ((at.getType() == "H") || (at.getType() == "HN") || ((at.getType() == "HA") && (!aa.isMember(HA))) || (at.getType() == "1HA") || (at.getType() == "1H") - || (at.getType() == "2H") || (at.getType() == "3H")) + || (at.getType() == "2H") || (at.getType() == "3H")) { return false; // special case for GLY H (code HA) + } return true; // rest of aminoacid is its sidechain } @@ -218,21 +219,27 @@ void CifLoader::assignSecondary(Spacer& sp) { if (helixCode[i] == chain) { for (int j = helixData[i].first; j <= const_cast (helixData[i].second); j++) { // important: keep ifs separated to avoid errors - if (j < sp.maxPdbNumber()) - if (!sp.isGap(sp.getIndexFromPdbNumber(j))) + if (j < sp.maxPdbNumber()) { + if (!sp.isGap(sp.getIndexFromPdbNumber(j))) { sp.getAmino(sp.getIndexFromPdbNumber(j)).setState(HELIX); + } + } } } } - for (unsigned int i = 0; i < sheetData.size(); i++) - if (sheetCode[i] == chain) + for (unsigned int i = 0; i < sheetData.size(); i++) { + if (sheetCode[i] == chain) { for (int j = sheetData[i].first; j <= const_cast (sheetData[i].second); j++) { // important: keep ifs separated to avoid errors - if (j < sp.maxPdbNumber()) - if (!sp.isGap(sp.getIndexFromPdbNumber(j))) + if (j < sp.maxPdbNumber()) { + if (!sp.isGap(sp.getIndexFromPdbNumber(j))) { sp.getAmino(sp.getIndexFromPdbNumber(j)).setState(STRAND); + } + } } + } + } } /* @@ -261,8 +268,9 @@ CifLoader::parseCifline(string atomLine, string tag, Ligand* lig, AminoAcid* aa) // get residue number int aaNum = stoiDEF(cif->getGroupField("atom", atomLine, cif->getGroupColumnNumber("atom", "residue num"))); + // get code for insertion of residues char altAaID = cif->getGroupField("atom", atomLine, - cif->getGroupColumnNumber("atom", "residue ins")).c_str()[0]; // "Code for insertion of residues" + cif->getGroupColumnNumber("atom", "residue ins")).c_str()[0]; // get x, y, z coordinates vgVector3 coord; @@ -278,7 +286,7 @@ CifLoader::parseCifline(string atomLine, string tag, Ligand* lig, AminoAcid* aa) int colBfac = cif->getGroupColumnNumber("atom", "bfac"); if (colBfac != -1) { string sbfac = cif->getGroupField("atom", atomLine, colBfac); - if (sbfac != "?" || sbfac != ".") { + if (sbfac != "?" && sbfac != ".") { bfac = stodDEF(sbfac); } } @@ -322,7 +330,7 @@ CifLoader::parseCifline(string atomLine, string tag, Ligand* lig, AminoAcid* aa) if (aaType != "ACE") { // DEBUG: it would be nice to load also alternative atoms // skip alternative atoms, - if (altAaID != '?') { + if (altAaID != '?' && altAaID != '.') { if (verbose) cout << "Warning: Skipping extraneous amino acid entry " << aaNum << " " << atNum << " " << altAaID << ".\n"; @@ -388,7 +396,8 @@ void CifLoader::loadProtein(Protein& prot) { if (chain == ' ') { loadChain = true; chain = '#'; - }// Load only selected chain + } + // Load only selected chain else if (chainList[i] == chain) { loadChain = true; chain = '#'; @@ -435,35 +444,35 @@ void CifLoader::loadProtein(Protein& prot) { else if (atomLine.find(cif->getTag("helix")) != string::npos) { cif->parseGroup("helix", atomLine); - while (atomLine != "# ") { - int colS = cif->getGroupColumnNumber("helix", "helix start"); - int colE = cif->getGroupColumnNumber("helix", "helix end"); - start = stoiDEF(cif->getGroupField("helix", atomLine, colS)); - end = stoiDEF(cif->getGroupField("helix", atomLine, colE)); + while (atomLine != "# " && input) { + start = stoiDEF(cif->getGroupField("helix", atomLine, + cif->getGroupColumnNumber("helix", "helix start"))); + end = stoiDEF(cif->getGroupField("helix", atomLine, + cif->getGroupColumnNumber("helix", "helix end"))); helixData.push_back(pair(start, end)); - int colC = cif->getGroupColumnNumber("helix", "helix chain"); - helixCode += cif->getGroupField("helix", atomLine, colC); + helixCode += cif->getGroupField("helix", atomLine, + cif->getGroupColumnNumber("helix", "helix chain")); char s[256]; input.getline(s, 256); atomLine.assign(s); //output << "atomLine: " << atomLine << endl; - } + } } // read sheet entry else if (atomLine.find(cif->getTag("sheet range")) != string::npos) { cif->parseGroup("sheet range", atomLine); - while (atomLine != "# ") { - int colS = cif->getGroupColumnNumber("sheet range", "sheet start"); - int colE = cif->getGroupColumnNumber("sheet range", "sheet start"); - start = stoiDEF(cif->getGroupField("sheet range", atomLine, colS)); - end = stoiDEF(cif->getGroupField("sheet range", atomLine, colE)); + while (atomLine != "# " && input) { + start = stoiDEF(cif->getGroupField("sheet range", atomLine, + cif->getGroupColumnNumber("sheet range", "sheet start"))); + end = stoiDEF(cif->getGroupField("sheet range", atomLine, + cif->getGroupColumnNumber("sheet range", "sheet end"))); sheetData.push_back(pair(start, end)); - int colC = cif->getGroupColumnNumber("sheet range", "sheet chain"); - sheetCode += cif->getGroupField("sheet range", atomLine, colC); + sheetCode += cif->getGroupField("sheet range", atomLine, + cif->getGroupColumnNumber("sheet range", "sheet chain")); char s[256]; input.getline(s, 256); @@ -477,8 +486,8 @@ void CifLoader::loadProtein(Protein& prot) { tag = atomLine.substr(0, 6); // Control model number - int colM = cif->getGroupColumnNumber("atom", "model"); - readingModel = stouiDEF(cif->getGroupField("atom", atomLine, colM)); + readingModel = stouiDEF(cif->getGroupField("atom", atomLine, + cif->getGroupColumnNumber("atom", "model"))); if (readingModel > model) break; // Get only the first model if not specified @@ -486,13 +495,13 @@ void CifLoader::loadProtein(Protein& prot) { model = readingModel; } - int colC = cif->getGroupColumnNumber("atom", "chain"); - char chainID = cif->getGroupField("atom", atomLine, colC).c_str()[0]; + char chainID = cif->getGroupField("atom", atomLine, + cif->getGroupColumnNumber("atom", "chain")).c_str()[0]; if (chainList[i] == chainID) { if ((model == 999) || (model == readingModel)) { - int colAa = cif->getGroupColumnNumber("atom", "residue num"); - aaNum = stoiDEF(cif->getGroupField("atom", atomLine, colAa)); + aaNum = stoiDEF(cif->getGroupField("atom", atomLine, + cif->getGroupColumnNumber("atom", "residue num"))); // Insert the Ligand object into LigandSet if (aaNum != oldAaNum) { @@ -595,8 +604,9 @@ void CifLoader::loadProtein(Protein& prot) { } } } - if (verbose) + if (verbose) { cout << "Removed incomplete residues\n"; + } // connect aminoacids if (!noConnection) { if (!setBonds(*sp)) { // connect atoms... @@ -616,9 +626,9 @@ void CifLoader::loadProtein(Protein& prot) { vgVector3 tmp(0.0, 0.0, 0.0); sp->getAmino(0)[N].setTrans(tmp); sp->getAmino(0).adjustLeadingN(); - if (verbose) + if (verbose) { cout << "Fixed leading N atom\n"; - + } // Add H atoms if (!noHAtoms) { for (unsigned int j = 0; j < sp->sizeAmino(); j++) { @@ -656,7 +666,8 @@ void CifLoader::loadProtein(Protein& prot) { cout << "Loaded AminoAcids: " << sp->size() << "\n"; } if (!(noHetAtoms)) { - if (ls->sizeLigand() > 0) { //insertion only if LigandSet is not empty + //insertion only if LigandSet is not empty + if (ls->sizeLigand() > 0) { pol->insertComponent(ls); if (verbose) { cout << "Loaded Ligands: " << ls->size() << "\n"; diff --git a/Biopool/Sources/CifStructure.cc b/Biopool/Sources/CifStructure.cc index 53b4d35..5c19939 100644 --- a/Biopool/Sources/CifStructure.cc +++ b/Biopool/Sources/CifStructure.cc @@ -139,19 +139,15 @@ string CifStructure::getTag(string name) { * @return field column number */ int CifStructure::getGroupColumnNumber(string name, string field) { - //output << "IN getGroupColumnNumber" << endl; int col = -1; vector& group = getGroup(name); string tag = getTag(field); - //output << "tag: " << tag << endl; for (vector::iterator it = group.begin(); it != group.end(); it++) { - //output << "it: " << *it << endl; if (*it == tag) { col = it - group.begin(); break; } } - //output << "OUT getGroupColumnNumber" << endl; return col; } @@ -163,7 +159,6 @@ int CifStructure::getGroupColumnNumber(string name, string field) { * @return field at columnNum column */ string CifStructure::getGroupField(string name, string& line, int columnNum) { - //output << "IN getGroupField" << endl; istringstream iss(line); vector& group = getGroup(name); vector fields; @@ -171,14 +166,10 @@ string CifStructure::getGroupField(string name, string& line, int columnNum) { for (unsigned int i = 0; i < group.size(); i++) { iss >> field; fields.push_back(field); - //output << "field: " << field << endl; } if (columnNum != -1) { - //output << "field: " << fields[columnNum] << endl; - //output << "OUT getGroupField" << endl; return fields[columnNum]; } else { - //output << "OUT getGroupField" << endl; return "?"; } } @@ -188,7 +179,6 @@ string CifStructure::getGroupField(string name, string& line, int columnNum) { * @param name name of the group */ void CifStructure::parseGroup(string name, string& line) { - //output << "IN parseGroup" << endl; bool found = false; vector& group = getGroup(name); @@ -197,12 +187,9 @@ void CifStructure::parseGroup(string name, string& line) { while (input) { string groupName(getTag(name)); size_t pos = line.find(groupName); - //output << "line: " << line << endl; - //output << "name: " << groupName << ", pos: " << pos << endl; if (pos != string::npos) { group.push_back(line.substr(pos + groupName.size(), line.size() - groupName.size())); - //output << "field: " << group.back() << endl; found = true; } else { found = false; @@ -210,15 +197,13 @@ void CifStructure::parseGroup(string name, string& line) { // exit the loop when the research of the fields is completed if (!found && group.size() > 1) { - //output << name << " group" << " parsed" << endl; setParsedFlag(name); break; } - + line = readLine(input); } } - //output << "OUT parseGroup" << endl; } /** @@ -343,7 +328,7 @@ void CifStructure::printGroup(string name) { "_struct_sheet_range.end_auth_comp_id " << endl << "_struct_sheet_range.end_auth_asym_id " << endl << "_struct_sheet_range.end_auth_seq_id " << endl; - } else if (name = "sheet hbond") { + } else if (name == "sheet hbond") { output << "_pdbx_struct_sheet_hbond.sheet_id " << endl << "_pdbx_struct_sheet_hbond.range_id_1 " << endl << "_pdbx_struct_sheet_hbond.range_id_2 " << endl << diff --git a/Biopool/Tests/Makefile b/Biopool/Tests/Makefile index 6f35654..2a56f49 100644 --- a/Biopool/Tests/Makefile +++ b/Biopool/Tests/Makefile @@ -26,15 +26,20 @@ INC_PATH = -I. # Objects and headers # -SOURCES = TestBiopool.cc TestAtom.h TestAminoAcid.h TestGroup.h TestSpacer.h +#SOURCES = TestBiopool.cc TestAtom.h TestAminoAcid.h TestGroup.h TestSpacer.h +SOURCES = TestCif.cc TestCifLoader.h -OBJECTS = $(SOURCES:.cpp=.o) +#OBJECTS = $(SOURCES:.cpp=.o) +OBJECTS = $(SOURCES:.cc=.o) -TARGETS = TestBiopool +#TARGETS = TestBiopool +TARGETS = TestCif -EXECS = TestBiopool +#EXECS = TestBiopool +EXECS = TestCif -LIBRARY = TESTlibBiopool.a +#LIBRARY = TESTlibBiopool.a +LIBRARY = TESTlibCif.a # # Install rule diff --git a/Biopool/Tests/TestCif.cc b/Biopool/Tests/TestCif.cc new file mode 100644 index 0000000..2770ba3 --- /dev/null +++ b/Biopool/Tests/TestCif.cc @@ -0,0 +1,63 @@ +/* + * File: TestCif.cc + * Author: marco + * + * Created on 9-giu-2015, 10.45.40 + */ + +/* +#include +#include +#include +#include +#include +#include +*/ + +#include +#include + + +#include + +using std::cout; +using std::endl; + +int main() { + + CppUnit::TextUi::TestRunner runner; + + cout << "Creating Test Suites:" << endl; + + runner.addTest(TestCifLoader::suite()); + + cout << "Running the unit tests." << endl; + + runner.run(); + + return 0; + + /* + // Create the event manager and test controller + CPPUNIT_NS::TestResult controller; + + // Add a listener that colllects test result + CPPUNIT_NS::TestResultCollector result; + controller.addListener(&result); + + // Add a listener that print dots as test run. + CPPUNIT_NS::BriefTestProgressListener progress; + controller.addListener(&progress); + + // Add the top suite to the test runner + CPPUNIT_NS::TestRunner runner; + runner.addTest(CPPUNIT_NS::TestFactoryRegistry::getRegistry().makeTest()); + runner.run(controller); + + // Print test in a compiler compatible format. + CPPUNIT_NS::CompilerOutputter outputter(&result, CPPUNIT_NS::stdCOut()); + outputter.write(); + + return result.wasSuccessful() ? 0 : 1; + */ +} diff --git a/Biopool/Tests/TestCifLoader.h b/Biopool/Tests/TestCifLoader.h new file mode 100644 index 0000000..c844053 --- /dev/null +++ b/Biopool/Tests/TestCifLoader.h @@ -0,0 +1,120 @@ +/* + * File: TestCifLoader.h + * Author: marco + * + * Created on 9-giu-2015, 10.45.39 + */ + +#ifndef TESTCIFLOADER_H +#define TESTCIFLOADER_H + +#include +#include + +#include +#include +#include +#include +#include + +#include + +using namespace std; +using namespace Victor::Biopool; +using namespace CppUnit; + +class TestCifLoader : public TestFixture { +public: + + TestCifLoader() { + } + + virtual ~TestCifLoader() { + } + + static Test* suite() { + TestSuite* suiteOfTests = new TestSuite("TestCifLoader"); + + suiteOfTests->addTest(new TestCaller("Test 1 - Get max numebers of models", + &TestCifLoader::testGetMaxModels)); + + suiteOfTests->addTest(new TestCaller("Test 2 - Get all chain ids", + &TestCifLoader::testGetAllChains)); + + return suiteOfTests; + } + + void setUp() { + // inizialize CifLoader + string path = getenv("VICTOR_ROOT"); + string input = path + "Biopool/Tests/data/modelTest.cif"; + inFile = new ifstream(input.c_str()); + cl = new CifLoader(*inFile); + + // initialize test parameters + maxModel = 5; + chainIds.push_back('A'); + chainIds.push_back('B'); + chainIds.push_back('C'); + } + + void tearDown() { + delete cl; + delete inFile; + } + +private: + + void testGetMaxModels() { + int max = cl->getMaxModels(); + CPPUNIT_ASSERT_EQUAL(max, maxModel); + } + + void testGetAllChains() { + vector testChain = cl->getAllChains(); + CPPUNIT_ASSERT(chainIds[0] == testChain[0] && + chainIds[1] == testChain[1] && + chainIds[2] == testChain[2]); + } + + int maxModel; + vector chainIds; + + ifstream* inFile; + CifLoader* cl; +}; + + +/* +#include +#include "CifLoader.h" +#include + +using namespace::Victor::Biopool; +using namespace::CppUnit; + +class TestCifLoader : public CPPUNIT_NS::TestFixture { + CPPUNIT_TEST_SUITE(TestCifLoader); + + CPPUNIT_TEST(testGetMaxModels); + CPPUNIT_TEST(testGetAllChains); + + CPPUNIT_TEST_SUITE_END(); + +public: + TestCifLoader(); + virtual ~TestCifLoader(); + void setUp(); + void tearDown(); + +private: + void testGetMaxModels(); + void testGetAllChains(); + + CifLoader* cl; + int maxModel; + vector chainIds; +}; + */ +#endif /* TESTCIFLOADER_H */ + diff --git a/Biopool/Tests/data/modelTest.cif b/Biopool/Tests/data/modelTest.cif new file mode 100644 index 0000000..6af1b88 --- /dev/null +++ b/Biopool/Tests/data/modelTest.cif @@ -0,0 +1,43 @@ +loop_ +_atom_site.group_PDB +_atom_site.id +_atom_site.type_symbol +_atom_site.label_atom_id +_atom_site.label_alt_id +_atom_site.label_comp_id +_atom_site.label_asym_id +_atom_site.label_entity_id +_atom_site.label_seq_id +_atom_site.pdbx_PDB_ins_code +_atom_site.Cartn_x +_atom_site.Cartn_y +_atom_site.Cartn_z +_atom_site.occupancy +_atom_site.B_iso_or_equiv +_atom_site.Cartn_x_esd +_atom_site.Cartn_y_esd +_atom_site.Cartn_z_esd +_atom_site.occupancy_esd +_atom_site.B_iso_or_equiv_esd +_atom_site.pdbx_formal_charge +_atom_site.auth_seq_id +_atom_site.auth_comp_id +_atom_site.auth_asym_id +_atom_site.auth_atom_id +_atom_site.pdbx_PDB_model_num +ATOM 1 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? A ? 1 +ATOM 2 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? B ? 1 +ATOM 3 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? C ? 1 +ATOM 4 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? A ? 2 +ATOM 5 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? B ? 2 +ATOM 6 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? C ? 2 +ATOM 7 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? A ? 3 +ATOM 8 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? B ? 3 +ATOM 9 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? C ? 3 +ATOM 10 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? A ? 4 +ATOM 11 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? B ? 4 +ATOM 12 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? C ? 4 +ATOM 13 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? A ? 5 +ATOM 14 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? B ? 5 +ATOM 15 ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? C ? 5 +# \ No newline at end of file From 08ff9d61d638415f08ed4fdd90c8c61f7686c318 Mon Sep 17 00:00:00 2001 From: Marco Pezzutti Date: Wed, 10 Jun 2015 19:12:07 +0200 Subject: [PATCH 09/16] added tests, parser completed --- Biopool/Sources/CifLoader.cc | 14 ++----------- Biopool/Sources/CifStructure.cc | 28 ++++++++++++++++++++------ Biopool/Sources/CifStructure.h | 17 ++++++++++++++++ Biopool/Tests/Makefile | 2 +- Biopool/Tests/TestCif.cc | 35 ++------------------------------- Biopool/Tests/TestCifLoader.h | 35 +-------------------------------- 6 files changed, 45 insertions(+), 86 deletions(-) diff --git a/Biopool/Sources/CifLoader.cc b/Biopool/Sources/CifLoader.cc index 8bd4083..5d733e0 100644 --- a/Biopool/Sources/CifLoader.cc +++ b/Biopool/Sources/CifLoader.cc @@ -113,7 +113,6 @@ unsigned int CifLoader::getMaxModels() { * @return all available chain IDs */ vector CifLoader::getAllChains() { - //output << "IN getAllChains" << endl; vector res; char lastChain = ' '; @@ -125,23 +124,18 @@ vector CifLoader::getAllChains() { unsigned int modelNum = 0; cif->parseGroup("atom", atomLine); - //output << "line: " << atomLine << endl; int modelCol = cif->getGroupColumnNumber("atom", "model"); int chainCol = cif->getGroupColumnNumber("atom", "chain"); - //output << "model: " << modelCol << ", chain: " << chainCol << endl; while (input) { if (atomLine.substr(0, 4) == "ATOM") { - modelNum = stoiDEF(cif->getGroupField("atom", atomLine, modelCol)); - //output << "riga: " << atomLine << endl; - //output << "numero modello: " << modelNum << endl; + modelNum = stoiDEF(cif->getGroupField("atom", atomLine, modelCol));; // only consider first model: others duplicate chain IDs if (modelNum > 1) { break; } // check for new chains containing amino acids char id = (cif->getGroupField("atom", atomLine, chainCol).c_str())[0]; - //output << "id" << endl; if (id != lastChain) { lastChain = id; res.push_back(id); @@ -149,7 +143,6 @@ vector CifLoader::getAllChains() { } atomLine = readLine(input); } - //output << "OUT getAllChains" << endl; return res; } @@ -261,7 +254,6 @@ CifLoader::loadSpacer(Spacer& sp){ */ int CifLoader::parseCifline(string atomLine, string tag, Ligand* lig, AminoAcid* aa) { - //output << "IN parseCifline" << endl; // get atom id int atNum = stoiDEF(cif->getGroupField("atom", atomLine, cif->getGroupColumnNumber("atom", "atom id"))); @@ -353,7 +345,6 @@ CifLoader::parseCifline(string atomLine, string tag, Ligand* lig, AminoAcid* aa) } } delete at; - //output << "OUT parseCifline" << endl; return aaNum; } @@ -437,8 +428,7 @@ void CifLoader::loadProtein(Protein& prot) { // read header entry if (atomLine.find(cif->getTag("header")) != string::npos && (name == "")) { - name = atomLine; - sp->setType(name); + sp->setType(cif->getInlineField(atomLine)); } // read helix entry else if (atomLine.find(cif->getTag("helix")) != string::npos) { diff --git a/Biopool/Sources/CifStructure.cc b/Biopool/Sources/CifStructure.cc index 5c19939..b2b4d08 100644 --- a/Biopool/Sources/CifStructure.cc +++ b/Biopool/Sources/CifStructure.cc @@ -18,7 +18,7 @@ using namespace std; CifStructure::CifStructure(istream& input, ostream& output) : input(input), output(output) { - header = "_struct_keywords.pdbx_keywords"; + header = "_entry.id"; model = "pdbx_PDB_model_num "; helix = "_struct_conf."; helixStart = "beg_auth_seq_id "; @@ -55,7 +55,7 @@ CifStructure::~CifStructure() { } /** - * returns the correct collection by group name + * Returns the correct collection by group name * @param name name of the CIF group * @return reference to the collection */ @@ -76,7 +76,7 @@ vector& CifStructure::getGroup(string name) { } /** - * returns the tag by name + * Returns the tag by name * @param name name of tag * @return CIF tag */ @@ -133,7 +133,7 @@ string CifStructure::getTag(string name) { } /** - * returns the column number of the field + * Returns the column number of the field * @param name name of the group * @param field name of the field * @return field column number @@ -152,7 +152,7 @@ int CifStructure::getGroupColumnNumber(string name, string field) { } /** - * returns the field of the line at the columnNum column + * Returns the field of the line at the columnNum column * @param name name of the group * @param line line of the CIF * @param columnNum number of column @@ -175,7 +175,19 @@ string CifStructure::getGroupField(string name, string& line, int columnNum) { } /** - * parses group of CIF fields and creates a vector with columns positions +* Returns the field present in the line +* @param line line of the CIF +* @return field of the line +*/ +string CifStructure::getInlineField(string& line) { + istringstream iss(line); + string tag, field; + iss >> tag >> field; + return field; +} + +/** + * Parses group of CIF fields and creates a vector with columns positions * @param name name of the group */ void CifStructure::parseGroup(string name, string& line) { @@ -247,6 +259,10 @@ bool CifStructure::isGroupParsed(string name) { } } +/** +* Prints group records names into output stream. +* @param name name of the group +*/ void CifStructure::printGroup(string name) { vector& group = getGroup(name); diff --git a/Biopool/Sources/CifStructure.h b/Biopool/Sources/CifStructure.h index 4012f81..9b8fc13 100644 --- a/Biopool/Sources/CifStructure.h +++ b/Biopool/Sources/CifStructure.h @@ -71,6 +71,13 @@ namespace Victor { */ string getGroupField(string name, string& line, int columnNum); + /** + * Returns the field present in the line + * @param line + * @return + */ + string getInlineField(string& line); + /** * Parses group of CIF fields and creates a vector with columns positions. * @param name name of the group @@ -95,6 +102,12 @@ namespace Victor { * @param name name of the group */ void printGroup(string name); + + /** + * Returns the input stream. + * @return input stream + */ + istream& getInput(); private: // CIF file @@ -143,6 +156,10 @@ namespace Victor { bool sheetRangeGroupParsed; bool sheetHboundgroupParsed; }; + + inline istream& CifStructure::getInput() { + return input; + } } // namespace Biopool } // namespace Victor diff --git a/Biopool/Tests/Makefile b/Biopool/Tests/Makefile index 2a56f49..d740be5 100644 --- a/Biopool/Tests/Makefile +++ b/Biopool/Tests/Makefile @@ -27,7 +27,7 @@ INC_PATH = -I. # #SOURCES = TestBiopool.cc TestAtom.h TestAminoAcid.h TestGroup.h TestSpacer.h -SOURCES = TestCif.cc TestCifLoader.h +SOURCES = TestCif.cc TestCifLoader.h TestCifStructure.h #OBJECTS = $(SOURCES:.cpp=.o) OBJECTS = $(SOURCES:.cc=.o) diff --git a/Biopool/Tests/TestCif.cc b/Biopool/Tests/TestCif.cc index 2770ba3..430ce24 100644 --- a/Biopool/Tests/TestCif.cc +++ b/Biopool/Tests/TestCif.cc @@ -5,20 +5,11 @@ * Created on 9-giu-2015, 10.45.40 */ -/* -#include -#include -#include -#include -#include -#include -*/ - #include #include - #include +#include using std::cout; using std::endl; @@ -30,6 +21,7 @@ int main() { cout << "Creating Test Suites:" << endl; runner.addTest(TestCifLoader::suite()); + runner.addTest(TestCifStructure::suite()); cout << "Running the unit tests." << endl; @@ -37,27 +29,4 @@ int main() { return 0; - /* - // Create the event manager and test controller - CPPUNIT_NS::TestResult controller; - - // Add a listener that colllects test result - CPPUNIT_NS::TestResultCollector result; - controller.addListener(&result); - - // Add a listener that print dots as test run. - CPPUNIT_NS::BriefTestProgressListener progress; - controller.addListener(&progress); - - // Add the top suite to the test runner - CPPUNIT_NS::TestRunner runner; - runner.addTest(CPPUNIT_NS::TestFactoryRegistry::getRegistry().makeTest()); - runner.run(controller); - - // Print test in a compiler compatible format. - CPPUNIT_NS::CompilerOutputter outputter(&result, CPPUNIT_NS::stdCOut()); - outputter.write(); - - return result.wasSuccessful() ? 0 : 1; - */ } diff --git a/Biopool/Tests/TestCifLoader.h b/Biopool/Tests/TestCifLoader.h index c844053..63795d2 100644 --- a/Biopool/Tests/TestCifLoader.h +++ b/Biopool/Tests/TestCifLoader.h @@ -67,7 +67,7 @@ class TestCifLoader : public TestFixture { void testGetMaxModels() { int max = cl->getMaxModels(); - CPPUNIT_ASSERT_EQUAL(max, maxModel); + CPPUNIT_ASSERT_EQUAL(maxModel, max); } void testGetAllChains() { @@ -83,38 +83,5 @@ class TestCifLoader : public TestFixture { ifstream* inFile; CifLoader* cl; }; - - -/* -#include -#include "CifLoader.h" -#include - -using namespace::Victor::Biopool; -using namespace::CppUnit; - -class TestCifLoader : public CPPUNIT_NS::TestFixture { - CPPUNIT_TEST_SUITE(TestCifLoader); - - CPPUNIT_TEST(testGetMaxModels); - CPPUNIT_TEST(testGetAllChains); - - CPPUNIT_TEST_SUITE_END(); - -public: - TestCifLoader(); - virtual ~TestCifLoader(); - void setUp(); - void tearDown(); - -private: - void testGetMaxModels(); - void testGetAllChains(); - - CifLoader* cl; - int maxModel; - vector chainIds; -}; - */ #endif /* TESTCIFLOADER_H */ From eaf7fe5fcc2057a9ccab1fd5fdbe4297236e2071 Mon Sep 17 00:00:00 2001 From: Marco Pezzutti Date: Fri, 12 Jun 2015 17:31:31 +0200 Subject: [PATCH 10/16] cif saver completed --- Biopool/Sources/AminoAcid.h | 18 +- Biopool/Sources/AminoAcidHydrogen.cc | 10 + Biopool/Sources/Atom.cc | 7 +- Biopool/Sources/Atom.h | 46 ++++ Biopool/Sources/CifLoader.cc | 29 +- Biopool/Sources/CifSaver.cc | 388 +++++++++++++++------------ Biopool/Sources/CifSaver.h | 5 + Biopool/Sources/CifStructure.cc | 314 +++++++++++++--------- Biopool/Sources/CifStructure.h | 45 +++- Biopool/Sources/PdbSaver.cc | 10 +- 10 files changed, 535 insertions(+), 337 deletions(-) diff --git a/Biopool/Sources/AminoAcid.h b/Biopool/Sources/AminoAcid.h index f497833..0fd9e0e 100644 --- a/Biopool/Sources/AminoAcid.h +++ b/Biopool/Sources/AminoAcid.h @@ -58,9 +58,9 @@ namespace Victor { namespace Biopool { unsigned int size() const; unsigned int sizeBackbone() const; - double getPhi(bool override = false); - double getPsi(bool override = false); - double getOmega(bool override = false); + double getPhi(bool bypass = false); + double getPsi(bool bypass = false); + double getOmega(bool bypass = false); double getChi(unsigned int n); vector getChi(); unsigned int getMaxChi(); @@ -187,8 +187,8 @@ namespace Victor { namespace Biopool { } inline double - AminoAcid::getPhi(bool override) { - if ((phi > 990) || (override)) + AminoAcid::getPhi(bool bypass) { + if ((phi > 990) || (bypass)) if (sizeInBonds()) phi = RAD2DEG * icc.getTorsionAngle(getInBond(0)[C], (*this)[N], (*this)[CA], (*this)[C]); @@ -196,8 +196,8 @@ namespace Victor { namespace Biopool { } inline double - AminoAcid::getPsi(bool override) { - if ((psi > 990) || (override)) + AminoAcid::getPsi(bool bypass) { + if ((psi > 990) || (bypass)) if (sizeOutBonds()) psi = RAD2DEG * icc.getTorsionAngle((*this)[N], (*this)[CA], (*this)[C], getOutBond(0)[N]); @@ -205,8 +205,8 @@ namespace Victor { namespace Biopool { } inline double - AminoAcid::getOmega(bool override) { - if ((omega > 990) || (override)) + AminoAcid::getOmega(bool bypass) { + if ((omega > 990) || (bypass)) if (sizeOutBonds()) omega = RAD2DEG * icc.getTorsionAngle((*this)[CA], (*this)[C], getOutBond(0)[N], getOutBond(0)[CA]); diff --git a/Biopool/Sources/AminoAcidHydrogen.cc b/Biopool/Sources/AminoAcidHydrogen.cc index 9fd5003..4b6cc4f 100644 --- a/Biopool/Sources/AminoAcidHydrogen.cc +++ b/Biopool/Sources/AminoAcidHydrogen.cc @@ -121,6 +121,11 @@ AminoAcidHydrogen::setHydrogen(AminoAcid* aa, bool verbose) { IntCoordConverter icc; Atom atH; atH.setType("H"); + + // extra CIF fields + atH.setAsymId(aa->getAtom(0).getAsymId()); + atH.setEntityId(aa->getAtom(0).getEntityId()); + atH.setModel(aa->getAtom(0).getModel()); AminoAcid before = (*aa).getInBond(0); atH.bindIn((*aa)[N]); @@ -170,6 +175,11 @@ AminoAcidHydrogen::setHydrogen(AminoAcid* aa, bool verbose) { chiral = atoi(args[8].c_str()); atH.setType(args[2]); + + // extra CIF fields + atH.setAsymId(aa->getAtom(0).getAsymId()); + atH.setEntityId(aa->getAtom(0).getEntityId()); + atH.setModel(aa->getAtom(0).getModel()); if (aa->getSideChain().isMember(atBindCod)) { diff --git a/Biopool/Sources/Atom.cc b/Biopool/Sources/Atom.cc index ebc2f10..61cbe15 100644 --- a/Biopool/Sources/Atom.cc +++ b/Biopool/Sources/Atom.cc @@ -34,7 +34,7 @@ using namespace Victor; using namespace Victor::Biopool; */ Atom::Atom(unsigned int mI, unsigned int mO) : SimpleBond(mI, mO), superior(NULL), type(X), coords(0, 0, 0), Bfac(0.0), trans(0, 0, 0), rot(1), -modified(false) { +modified(false), asymId('X'), entityId(0), occupancy(0.0), model(0) { PRINT_NAME; } @@ -133,6 +133,11 @@ Atom::copy(const Atom& orig) { type = orig.type; coords = orig.coords; Bfac = orig.Bfac; + + asymId = orig.asymId; + entityId = orig.entityId; + occupancy = orig.occupancy; + model = orig.model; trans = orig.trans; rot = orig.rot; diff --git a/Biopool/Sources/Atom.h b/Biopool/Sources/Atom.h index 3df6f47..9c4281a 100644 --- a/Biopool/Sources/Atom.h +++ b/Biopool/Sources/Atom.h @@ -53,6 +53,11 @@ namespace Victor { namespace Biopool { double getBFac() { return Bfac; } + + char getAsymId(); + int getEntityId(); + double getOccupancy(); + int getModel(); double distance(Atom& other); @@ -83,6 +88,10 @@ namespace Victor { namespace Biopool { void setBFac(double _b) { Bfac = _b; } + void setAsymId(char aId); + void setEntityId(int eId); + void setOccupancy(double occ); + void setModel(int mod); void setTrans(vgVector3 t); void addTrans(vgVector3 t); @@ -117,6 +126,11 @@ namespace Victor { namespace Biopool { vgVector3 coords; // xyz-Coords double Bfac; // B-factor + + char asymId; + int entityId; + double occupancy; + int model; vgVector3 trans; // relative translation vgMatrix3 rot; // relative rotation @@ -179,6 +193,22 @@ namespace Victor { namespace Biopool { Atom::inSync() { return (!modified); } + + inline char Atom::getAsymId() { + return asymId; + } + + inline int Atom::getEntityId() { + return entityId; + } + + inline double Atom::getOccupancy() { + return occupancy; + } + + inline int Atom::getModel() { + return model; + } // MODIFIERS: @@ -279,6 +309,22 @@ namespace Victor { namespace Biopool { this->superior = gr; } + inline void Atom::setAsymId(char aId) { + asymId = aId; + } + + inline void Atom::setEntityId(int eId) { + entityId = eId; + } + + inline void Atom::setOccupancy(double occ) { + occupancy = occ; + } + + inline void Atom::setModel(int mod) { + model = mod; + } + // OPERATORS: diff --git a/Biopool/Sources/CifLoader.cc b/Biopool/Sources/CifLoader.cc index 5d733e0..7a3d9b7 100644 --- a/Biopool/Sources/CifLoader.cc +++ b/Biopool/Sources/CifLoader.cc @@ -40,6 +40,7 @@ sheetCode(_NULL), onlyMetalHetAtoms(_onlyMetal), noNucleotideChains(_noNucleotid } CifLoader::~CifLoader() { + delete cif; PRINT_NAME; } @@ -296,16 +297,36 @@ CifLoader::parseCifline(string atomLine, string tag, Ligand* lig, AminoAcid* aa) cerr << "--> " << atType << "\n"; atType = "H"; } - + + // get asym id + char asymId = cif->getGroupField("atom", atomLine, + cif->getGroupColumnNumber("atom", "atom asym")).c_str()[0]; + + // get entity id + int entityId = stoiDEF(cif->getGroupField("atom", atomLine, + cif->getGroupColumnNumber("atom", "atom entity"))); + + // get occupancy + double occ = stodDEF(cif->getGroupField("atom", atomLine, + cif->getGroupColumnNumber("atom", "occupancy"))); + + // get model + int model = stoiDEF(cif->getGroupField("atom", atomLine, + cif->getGroupColumnNumber("atom", "model"))); + // Initialize the Atom object Atom* at = new Atom(); at->setNumber(atNum); at->setType(atType); at->setCoords(coord); at->setBFac(bfac); + at->setAsymId(asymId); + at->setEntityId(entityId); + at->setOccupancy(occ); + at->setModel(model); // Ligand object (includes DNA/RNA in "ATOM" field) - if ((tag == "HETATM") || + if (tag == "HETATM" || isKnownNucleotide(nucleotideThreeLetterTranslator(aaType))) { if (noWater) { if (!(aaType == "HOH")) { @@ -317,11 +338,11 @@ CifLoader::parseCifline(string atomLine, string tag, Ligand* lig, AminoAcid* aa) lig->setType(aaType); } }// AminoAcid - else if ((tag == "ATOM ")) { + else if (tag == "ATOM ") { // skip N-terminal ACE groups if (aaType != "ACE") { // DEBUG: it would be nice to load also alternative atoms - // skip alternative atoms, + // skip alternative atoms if (altAaID != '?' && altAaID != '.') { if (verbose) cout << "Warning: Skipping extraneous amino acid entry " diff --git a/Biopool/Sources/CifSaver.cc b/Biopool/Sources/CifSaver.cc index 0f73e4d..d218ba7 100644 --- a/Biopool/Sources/CifSaver.cc +++ b/Biopool/Sources/CifSaver.cc @@ -20,10 +20,13 @@ using namespace std; CifSaver::CifSaver(ostream& _output) : output(_output), writeSeq(true), writeSecStr(true), writeTer(true), -atomOffset(0), aminoOffset(0), ligandOffset(0), chain(' ') { +atomOffset(0), aminoOffset(0), ligandOffset(0), chain(' '), +atomGroupPrinted(false) { + cif = new CifStructure(_output); } CifSaver::~CifSaver() { + delete cif; PRINT_NAME; } @@ -31,7 +34,6 @@ CifSaver::~CifSaver() { // MODIFIERS: - /** * Saves a group in CIF format. * @param group reference @@ -40,134 +42,173 @@ CifSaver::~CifSaver() { void CifSaver::saveGroup(Group& gr) { gr.sync(); + if(!atomGroupPrinted) { + cif->printGroup("atom"); + atomGroupPrinted = true; + } + for (unsigned int i = 0; i < gr.size(); i++) { - string atName = gr[i].getType(); - - if (atName == "OXT") // cosmetics: OXT has to be output after - continue; // the sidechain and therefore goes in saveSpacer - - // Added variable for correcting atom type H (last column in PDBs) - char atomOneLetter; - if (!isdigit(atName[0])) { - atomOneLetter = atName[0]; - } else { - atomOneLetter = atName[1]; - } - - // Added control for size by Damiano Piovesan - // example HG12 - if (!isdigit(atName[0]) && (atName.size() < 4)) - atName = ' ' + atName; - while (atName.size() < 4) - atName += ' '; - - output << "ATOM" << setw(7) << gr[i].getNumber() << " " << atName - << " " - << gr.getType() << " " << chain << setw(4) << aminoOffset << " " - << setw(8) << setprecision(3) << gr[i].getCoords().x - << setw(8) << setprecision(3) << gr[i].getCoords().y - << setw(8) << setprecision(3) << gr[i].getCoords().z - << " 1.00" << setw(6) << setprecision(2) << gr[i].getBFac() - << " " << atomOneLetter << "\n"; - - atomOffset = gr[i].getNumber() + 1; + string atName = gr[i].getType(); + + // cosmetics: OXT has to be output after + // the sidechain and therefore goes in saveSpacer + if (atName == "OXT") { + continue; + } + + // Added variable for correcting atom type H (last column in PDBs) + char atomOneLetter; + if (!isdigit(atName[0])) { + atomOneLetter = atName[0]; + } else { + atomOneLetter = atName[1]; + } + + // Added control for size by Damiano Piovesan + // example HG12 + if (!isdigit(atName[0]) && (atName.size() < 4)) { + atName = ' ' + atName; + } + while (atName.size() < 4) { + atName += ' '; + } + + output << setw(7) << left << "ATOM" << + setw(6) << gr[i].getNumber() << + setw(2) << atomOneLetter << + setw(5) << left << atName << + setw(2) << "." << + setw(4) << gr.getType() << + setw(2) << gr[i].getAsymId() << + setw(2) << gr[i].getEntityId() << + setw(4) << aminoOffset << + setw(2) << "?" << + setw(8) << setprecision(3) << gr[i].getCoords().x << + setw(8) << setprecision(3) << gr[i].getCoords().y << + setw(8) << setprecision(3) << gr[i].getCoords().z << + setw(6) << setprecision(2) << gr[i].getOccupancy() << + setw(7) << left << setprecision(2) << gr[i].getBFac() << + setw(2) << "?" << + setw(2) << "?" << + setw(2) << "?" << + setw(2) << "?" << + setw(2) << "?" << + setw(2) << "?" << + setw(4) << aminoOffset << + setw(4) << gr.getType() << + setw(2) << chain << + setw(5) << left << atName << + setw(2) << gr[i].getModel() << + endl; + + atomOffset = gr[i].getNumber() + 1; } //aminoOffset++; } /** - * Saves a sidechain in PDB format. - *@param sideChain reference - *@return void + * Saves a sidechain in CIF format. + * @param sideChain side chain to save */ void CifSaver::saveSideChain(SideChain& sc) { saveGroup(sc); } /** - * Saves an aminoacid in PDB format. - *@param AminoAcid reference - *@return void + * Saves an aminoacid in CIF format. + * @param AminoAcid aminoacid to save */ void CifSaver::saveAminoAcid(AminoAcid& aa) { saveGroup(aa); } /** - * Saves a spacer in PDB format. - *@param Spacer reference - *@return void + * Saves a spacer in CIF format. + * @param Spacer spacer to save */ void CifSaver::saveSpacer(Spacer& sp) { PRINT_NAME; if (sp.size() > 0) { - unsigned int oldPrec = output.precision(); - ios::fmtflags oldFlags = output.flags(); - output.setf(ios::fixed, ios::floatfield); - - //method of class Component. It checks how deep is the spacer - if (sp.getDepth() == 0) { - if (writeTer) { - output << "HEADER " << sp.getType() << "\n" - << "REMARK created using Biopool2000 $Revision: 1.6.2.3 $ \n"; - } - aminoOffset = 0; - atomOffset = sp.getAtomStartOffset(); - } - - if (writeSeq) - writeSeqRes(sp); - if (writeSecStr) - writeSecondary(sp); - - aminoOffset = sp.getStartOffset(); - atomOffset = sp.getAtomStartOffset(); - - //saving is one ammino at a time - for (unsigned int i = 0; i < sp.sizeAmino(); i++) { - aminoOffset++; - while ((sp.isGap(aminoOffset)) && (aminoOffset < sp.maxPdbNumber())) { - aminoOffset++; - } - //cout << i << " " << aminoOffset << "\n"; - sp.getAmino(i).save(*this); - } - - // cosmetics: write OXT after last side chain - if (sp.getAmino(sp.sizeAmino() - 1).isMember(OXT)) { - unsigned int index = sp.sizeAmino() - 1; - output << "ATOM" << setw(7) << sp.getAmino(index)[OXT].getNumber() - << " OXT " - << sp.getAmino(index).getType() << " " << chain << setw(4) << aminoOffset - << " " << setw(8) << setprecision(3) - << sp.getAmino(index)[OXT].getCoords().x - << setw(8) << setprecision(3) - << sp.getAmino(index)[OXT].getCoords().y - << setw(8) << setprecision(3) - << sp.getAmino(index)[OXT].getCoords().z - << " 1.00" << setw(6) << setprecision(2) - << sp.getAmino(index)[OXT].getBFac() << " O\n"; - } - - if ((sp.getDepth() == 0) && (writeTer)) - output << "TER " << setw(4) << atomOffset + 1 << " " - << sp.getAmino(sp.sizeAmino() - 1).getType() << " " - << setw(4) << aminoOffset << "\n"; - - output.precision(oldPrec); - output.flags(oldFlags); - aminoOffset = 0; //necessary if the's more than one spacer - output << "TER\n"; + unsigned int oldPrec = output.precision(); + ios::fmtflags oldFlags = output.flags(); + output.setf(ios::fixed, ios::floatfield); + + //method of class Component. It checks how deep is the spacer + if (sp.getDepth() == 0) { + if (writeTer) { + output << "data_" << sp.getType() << endl; + output << "# " << endl; + output << cif->getTag("header") << " " << sp.getType() + << " " << endl; + output << "# " << endl; + } + aminoOffset = 0; + atomOffset = sp.getAtomStartOffset(); + } + + if (writeSeq) + writeSeqRes(sp); + if (writeSecStr) + writeSecondary(sp); + + aminoOffset = sp.getStartOffset(); + atomOffset = sp.getAtomStartOffset(); + + //saving is one ammino at a time + for (unsigned int i = 0; i < sp.sizeAmino(); i++) { + aminoOffset++; + while ((sp.isGap(aminoOffset)) && (aminoOffset < sp.maxPdbNumber())) { + aminoOffset++; + } + //cout << i << " " << aminoOffset << "\n"; + sp.getAmino(i).save(*this); + } + + // cosmetics: write OXT after last side chain + if (sp.getAmino(sp.sizeAmino() - 1).isMember(OXT)) { + unsigned int index = sp.sizeAmino() - 1; + + output << setw(7) << left << "ATOM" << + setw(6) << sp.getAmino(index)[OXT].getNumber() << + setw(2) << "O" << + setw(5) << left << "OXT" << + setw(2) << "." << + setw(4) << sp.getAmino(index).getType() << + setw(2) << sp.getAmino(index)[OXT].getAsymId() << + setw(2) << sp.getAmino(index)[OXT].getEntityId() << + setw(4) << aminoOffset << + setw(2) << "?" << + setw(8) << setprecision(3) << sp.getAmino(index)[OXT].getCoords().x << + setw(8) << setprecision(3) << sp.getAmino(index)[OXT].getCoords().y << + setw(8) << setprecision(3) << sp.getAmino(index)[OXT].getCoords().z << + setw(6) << setprecision(2) << sp.getAmino(index)[OXT].getOccupancy() << + setw(7) << left << setprecision(2) << sp.getAmino(index)[OXT].getBFac() << + setw(2) << "?" << + setw(2) << "?" << + setw(2) << "?" << + setw(2) << "?" << + setw(2) << "?" << + setw(2) << "?" << + setw(4) << aminoOffset << + setw(4) << sp.getAmino(index).getType() << + setw(2) << chain << + setw(5) << "OXT" << + setw(2) << sp.getAmino(index)[OXT].getModel() << + endl; + } + + output.precision(oldPrec); + output.flags(oldFlags); + aminoOffset = 0; //necessary if the's more than one spacer } } /** - * Saves a Ligand in PDB format. - *@param Ligand reference - *@return void + * Saves a Ligand in CIF format. + * @param Ligand ligand to save */ void CifSaver::saveLigand(Ligand& gr) { gr.sync(); @@ -177,46 +218,53 @@ void CifSaver::saveLigand(Ligand& gr) { string aaType = gr.getType(); - - // DEBUG: write TER for DNA/RNA ligands - string tag = "HETATM"; if (isKnownNucleotide(nucleotideThreeLetterTranslator(aaType))) { - tag = "ATOM "; + tag = "ATOM "; } - for (unsigned int i = 0; i < gr.size(); i++) //print all HETATM of a ligand + //print all HETATM of a ligand + for (unsigned int i = 0; i < gr.size(); i++) { - string atType = gr[i].getType(); - aaType = gr.getType(); - string atTypeShort; //last column in a Pdb File - unsigned int atNum = gr[i].getNumber(); - if (atType != aaType) { - atTypeShort = atType[0]; - atTypeShort = ' ' + atTypeShort; - atType = ' ' + atType; - } else { - atTypeShort = atType; - aaType = ' ' + aaType; - } - while (atType.size() < 4) - atType = atType + ' '; - while (aaType.size() < 3) - aaType = ' ' + aaType; - - - - output << tag << setw(5) << atNum << " " << setw(4) << atType << " " - << setw(3) << aaType << " " << chain << setw(4) << ligandOffset << " " - << setw(8) << setprecision(3) << gr[i].getCoords().x - << setw(8) << setprecision(3) << gr[i].getCoords().y - << setw(8) << setprecision(3) << gr[i].getCoords().z - << " 1.00" << setw(6) << setprecision(2) << gr[i].getBFac() - << " " << atTypeShort << "\n"; - } - if (tag == "ATOM ") { - output << "TER\n"; + string atType = gr[i].getType(); + aaType = gr.getType(); + string atTypeShort; //last column in a Pdb File + + if (atType != aaType) { + atTypeShort = atType[0]; + } else { + atTypeShort = atType; + } + + output << setw(7) << left << tag << + setw(6) << gr[i].getNumber() << + setw(2) << atTypeShort << + setw(5) << left << atType << + setw(2) << "." << + setw(4) << aaType << + setw(2) << gr[i].getAsymId() << + setw(2) << gr[i].getEntityId() << + setw(4) << aminoOffset << + setw(2) << "?" << + setw(8) << setprecision(3) << gr[i].getCoords().x << + setw(8) << setprecision(3) << gr[i].getCoords().y << + setw(8) << setprecision(3) << gr[i].getCoords().z << + setw(6) << setprecision(2) << gr[i].getOccupancy() << + setw(7) << left << setprecision(2) << gr[i].getBFac() << + setw(2) << "?" << + setw(2) << "?" << + setw(2) << "?" << + setw(2) << "?" << + setw(2) << "?" << + setw(2) << "?" << + setw(4) << aminoOffset << + setw(4) << aaType << + setw(2) << chain << + setw(5) << atType << + setw(2) << gr[i].getModel() << + endl; } + output << "# " << endl; ligandOffset++; output.precision(oldPrec); @@ -224,25 +272,23 @@ void CifSaver::saveLigand(Ligand& gr) { } /** - * Saves a LigandSet in PDB format. - *@param LigandSet reference - *@return void + * Saves a LigandSet in CIF format. + * @param LigandSet set of ligands to save */ void CifSaver::saveLigandSet(LigandSet& ls) { ligandOffset = ls.getStartOffset(); //set the offset for current LigandSet for (unsigned int i = 0; i < ls.sizeLigand(); i++) { - while ((ls.isGap(ligandOffset)) - && (ligandOffset < ls.maxPdbNumber())) - ligandOffset++; - ls[i].save(*this); + while ((ls.isGap(ligandOffset)) + && (ligandOffset < ls.maxPdbNumber())) + ligandOffset++; + ls[i].save(*this); } } /** - * Saves a Protein in PDB format. - *@param Protein reference - *@return void + * Saves a Protein in PDB format. + * @param Protein protein to save */ void CifSaver::saveProtein(Protein& prot) { //if (prot.sizeProtein()==0) @@ -251,52 +297,44 @@ void CifSaver::saveProtein(Protein& prot) { Spacer* sp = NULL; LigandSet* ls = NULL; - for (unsigned int i = 0; i < prot.sizeProtein(); i++) { - setChain(prot.getChainLetter(i)); //set the actual chain's ID - sp = prot.getSpacer(i); - saveSpacer(*sp); - + setChain(prot.getChainLetter(i)); //set the actual chain's ID + sp = prot.getSpacer(i); + saveSpacer(*sp); } for (unsigned int i = 0; i < prot.sizeProtein(); i++) { - setChain(prot.getChainLetter(i)); //set the actual chain's ID - ls = prot.getLigandSet(i); - + setChain(prot.getChainLetter(i)); //set the actual chain's ID + ls = prot.getLigandSet(i); - if (ls != NULL) { - saveLigandSet(*ls); - } + if (ls != NULL) { + saveLigandSet(*ls); + } } } /** - * Writes the SEQRES entry (PDB format) for a spacer. - *@param Spacer reference - *@return void + * Writes the SEQRES entry (CIF format) for a spacer. + * @param Spacer spacer to write */ void CifSaver::writeSeqRes(Spacer& sp) { - for (unsigned int i = 0; i < sp.sizeAmino() / 13; i++) { - output << "SEQRES " << setw(3) << i << " " << setw(3) - << sp.sizeAmino() << " "; - for (unsigned int j = 0; j < 13; j++) - output << sp.getAmino((i * 13) + j).getType() << " "; - output << "\n"; - } - if (sp.sizeAmino() % 13 > 0) { - output << "SEQRES " << setw(3) << sp.sizeAmino() / 13 + 1 << " " - << setw(3) << sp.sizeAmino() << " "; - for (unsigned int j = 13 * (sp.sizeAmino() / 13); j < sp.sizeAmino(); j++) - output << sp.getAmino(j).getType() << " "; - output << "\n"; + cif->printGroup("entity poly"); + + for (unsigned int i = 0; i< sp.sizeAmino(); i++) { + output << setw(2) << left << sp.getAmino(i).getAtom(0).getEntityId() << + setw(4) << i + 1 << + setw(4) << sp.getAmino(i).getType() << + setw(2) << "n" << + endl; } + output << "# " << endl; } /** * Writes the secondary information (PDB format) for a spacer, e.g. HELIX, - * SHEET, etc. - *@param sideChain reference - *@return void + * SHEET, etc. + * @param sideChain reference + * @return void */ void CifSaver::writeSecondary(Spacer& sp) { diff --git a/Biopool/Sources/CifSaver.h b/Biopool/Sources/CifSaver.h index bf582f6..2c26b72 100644 --- a/Biopool/Sources/CifSaver.h +++ b/Biopool/Sources/CifSaver.h @@ -88,11 +88,16 @@ namespace Victor { // ATTRIBUTES ostream& output; // output stream bool writeSeq, writeSecStr, writeTer; + // offsets that determine at which atom, // aminoacid and ligand number to start unsigned int atomOffset, ligandOffset; int aminoOffset; char chain; // chain ID + + bool atomGroupPrinted; + + CifStructure* cif; }; inline void CifSaver::endFile() { diff --git a/Biopool/Sources/CifStructure.cc b/Biopool/Sources/CifStructure.cc index b2b4d08..14438e9 100644 --- a/Biopool/Sources/CifStructure.cc +++ b/Biopool/Sources/CifStructure.cc @@ -16,25 +16,58 @@ using namespace Victor; using namespace Victor::Biopool; using namespace std; +/** + * Constructor + * @param input input file stream + * @param output output file stream + */ CifStructure::CifStructure(istream& input, ostream& output) : input(input), output(output) { + setData(); +} + +/** + * Constructor + * @param output output file stream + */ +CifStructure::CifStructure(ostream& output) : +output(output), input(cin) { + setData(); +} + +/** + * Destructor + */ +CifStructure::~CifStructure() { +} + +/** + * Sets data members + */ +void CifStructure::setData() { header = "_entry.id"; - model = "pdbx_PDB_model_num "; - helix = "_struct_conf."; - helixStart = "beg_auth_seq_id "; - helixEnd = "end_auth_seq_id "; - helixChainId = "beg_auth_asym_id "; + atom = "_atom_site."; - residueNum = "auth_seq_id "; atomId = "id "; + chain = "auth_asym_id "; + asymId = "label_asym_id "; + entityId = "label_entity_id "; residueIns = "pdbx_PDB_ins_code "; - tempFactor = "B_iso_or_equiv "; - atomName = "auth_atom_id "; - residueName = "auth_comp_id "; x = "Cartn_x "; y = "Cartn_y "; z = "Cartn_z "; - chain = "auth_asym_id "; + occupancy = "occupancy "; + tempFactor = "B_iso_or_equiv "; + residueNum = "auth_seq_id "; + residueName = "auth_comp_id "; + atomName = "auth_atom_id "; + model = "pdbx_PDB_model_num "; + + helix = "_struct_conf."; + helixStart = "beg_auth_seq_id "; + helixEnd = "end_auth_seq_id "; + helixChainId = "beg_auth_asym_id "; + sheet = "_struct_sheet."; sheetOrder = "_struct_sheet_order."; sheetRange = "_struct_sheet_range."; @@ -51,9 +84,6 @@ input(input), output(output) { sheetHboundgroupParsed = false; } -CifStructure::~CifStructure() { -} - /** * Returns the correct collection by group name * @param name name of the CIF group @@ -83,12 +113,17 @@ vector& CifStructure::getGroup(string name) { string CifStructure::getTag(string name) { if (name == "header") { return header; - } else if (name == "atom") { + }// atom group + else if (name == "atom") { return atom; - } else if (name == "residue num") { - return residueNum; } else if (name == "atom id") { return atomId; + } else if (name == "chain") { + return chain; + } else if (name == "atom asym") { + return asymId; + } else if (name == "atom entity") { + return entityId; } else if (name == "residue ins") { return residueIns; } else if (name == "x") { @@ -97,13 +132,20 @@ string CifStructure::getTag(string name) { return y; } else if (name == "z") { return z; + } else if (name == "occupancy") { + return occupancy; } else if (name == "bfac") { return tempFactor; - } else if (name == "atom name") { - return atomName; + } else if (name == "residue num") { + return residueNum; } else if (name == "residue name") { return residueName; - } else if (name == "helix") { + } else if (name == "atom name") { + return atomName; + } else if (name == "model") { + return model; + }// helix group + else if (name == "helix") { return helix; } else if (name == "helix start") { return helixStart; @@ -111,11 +153,8 @@ string CifStructure::getTag(string name) { return helixEnd; } else if (name == "helix chain") { return helixChainId; - } else if (name == "model") { - return model; - } else if (name == "chain") { - return chain; - } else if (name == "sheet") { + }// sheet group + else if (name == "sheet") { return sheet; } else if (name == "sheet order") { return sheetOrder; @@ -175,10 +214,10 @@ string CifStructure::getGroupField(string name, string& line, int columnNum) { } /** -* Returns the field present in the line -* @param line line of the CIF -* @return field of the line -*/ + * Returns the field present in the line + * @param line line of the CIF + * @return field of the line + */ string CifStructure::getInlineField(string& line) { istringstream iss(line); string tag, field; @@ -212,7 +251,7 @@ void CifStructure::parseGroup(string name, string& line) { setParsedFlag(name); break; } - + line = readLine(input); } } @@ -260,112 +299,123 @@ bool CifStructure::isGroupParsed(string name) { } /** -* Prints group records names into output stream. -* @param name name of the group -*/ + * Prints group records names into output stream. + * @param name name of the group + */ void CifStructure::printGroup(string name) { - vector& group = getGroup(name); output << "loop_" << endl; - if (isGroupParsed(name)) { - for (vector::iterator it = group.begin(); it != group.end(); it++) { - output << *it << endl; - } - } else { - if (name == "atom") { - output << "_atom_site.group_PDB " << endl << - "_atom_site.id " << endl << - "_atom_site.type_symbol " << endl << - "_atom_site.label_atom_id " << endl << - "_atom_site.label_alt_id " << endl << - "_atom_site.label_comp_id " << endl << - "_atom_site.label_asym_id " << endl << - "_atom_site.label_entity_id " << endl << - "_atom_site.label_seq_id " << endl << - "_atom_site.pdbx_PDB_ins_code " << endl << - "_atom_site.Cartn_x " << endl << - "_atom_site.Cartn_y " << endl << - "_atom_site.Cartn_z " << endl << - "_atom_site.occupancy " << endl << - "_atom_site.B_iso_or_equiv " << endl << - "_atom_site.Cartn_x_esd " << endl << - "_atom_site.Cartn_y_esd " << endl << - "_atom_site.Cartn_z_esd " << endl << - "_atom_site.occupancy_esd " << endl << - "_atom_site.B_iso_or_equiv_esd " << endl << - "_atom_site.pdbx_formal_charge " << endl << - "_atom_site.auth_seq_id " << endl << - "_atom_site.auth_comp_id " << endl << - "_atom_site.auth_asym_id " << endl << - "_atom_site.auth_atom_id " << endl << - "_atom_site.pdbx_PDB_model_num " << endl; - } else if (name == "helix") { - output << "_struct_conf.conf_type_id " << endl << - "_struct_conf.id " << endl << - "_struct_conf.pdbx_PDB_helix_id " << endl << - "_struct_conf.beg_label_comp_id " << endl << - "_struct_conf.beg_label_asym_id " << endl << - "_struct_conf.beg_label_seq_id " << endl << - "_struct_conf.pdbx_beg_PDB_ins_code " << endl << - "_struct_conf.end_label_comp_id " << endl << - "_struct_conf.end_label_asym_id " << endl << - "_struct_conf.end_label_seq_id " << endl << - "_struct_conf.pdbx_end_PDB_ins_code " << endl << - "_struct_conf.beg_auth_comp_id " << endl << - "_struct_conf.beg_auth_asym_id " << endl << - "_struct_conf.beg_auth_seq_id " << endl << - "_struct_conf.end_auth_comp_id " << endl << - "_struct_conf.end_auth_asym_id " << endl << - "_struct_conf.end_auth_seq_id " << endl << - "_struct_conf.pdbx_PDB_helix_class " << endl << - "_struct_conf.details " << endl << - "_struct_conf.pdbx_PDB_helix_length " << endl; - } else if (name == "sheet") { - output << "_struct_sheet.id " << endl << - "_struct_sheet.type " << endl << - "_struct_sheet.number_strands " << endl << - "_struct_sheet.details " << endl; - } else if (name == "sheet range") { - output << "_struct_sheet_range.sheet_id " << endl << - "_struct_sheet_range.id " << endl << - "_struct_sheet_range.beg_label_comp_id " << endl << - "_struct_sheet_range.beg_label_asym_id " << endl << - "_struct_sheet_range.beg_label_seq_id " << endl << - "_struct_sheet_range.pdbx_beg_PDB_ins_code " << endl << - "_struct_sheet_range.end_label_comp_id " << endl << - "_struct_sheet_range.end_label_asym_id " << endl << - "_struct_sheet_range.end_label_seq_id " << endl << - "_struct_sheet_range.pdbx_end_PDB_ins_code " << endl << - "_struct_sheet_range.symmetry " << endl << - "_struct_sheet_range.beg_auth_comp_id " << endl << - "_struct_sheet_range.beg_auth_asym_id " << endl << - "_struct_sheet_range.beg_auth_seq_id " << endl << - "_struct_sheet_range.end_auth_comp_id " << endl << - "_struct_sheet_range.end_auth_asym_id " << endl << - "_struct_sheet_range.end_auth_seq_id " << endl; - } else if (name == "sheet hbond") { - output << "_pdbx_struct_sheet_hbond.sheet_id " << endl << - "_pdbx_struct_sheet_hbond.range_id_1 " << endl << - "_pdbx_struct_sheet_hbond.range_id_2 " << endl << - "_pdbx_struct_sheet_hbond.range_1_label_atom_id " << endl << - "_pdbx_struct_sheet_hbond.range_1_label_comp_id " << endl << - "_pdbx_struct_sheet_hbond.range_1_label_asym_id " << endl << - "_pdbx_struct_sheet_hbond.range_1_label_seq_id " << endl << - "_pdbx_struct_sheet_hbond.range_1_PDB_ins_code " << endl << - "_pdbx_struct_sheet_hbond.range_1_auth_atom_id " << endl << - "_pdbx_struct_sheet_hbond.range_1_auth_comp_id " << endl << - "_pdbx_struct_sheet_hbond.range_1_auth_asym_id " << endl << - "_pdbx_struct_sheet_hbond.range_1_auth_seq_id " << endl << - "_pdbx_struct_sheet_hbond.range_2_label_atom_id " << endl << - "_pdbx_struct_sheet_hbond.range_2_label_comp_id " << endl << - "_pdbx_struct_sheet_hbond.range_2_label_asym_id " << endl << - "_pdbx_struct_sheet_hbond.range_2_label_seq_id " << endl << - "_pdbx_struct_sheet_hbond.range_2_PDB_ins_code " << endl << - "_pdbx_struct_sheet_hbond.range_2_auth_atom_id " << endl << - "_pdbx_struct_sheet_hbond.range_2_auth_comp_id " << endl << - "_pdbx_struct_sheet_hbond.range_2_auth_asym_id " << endl << - "_pdbx_struct_sheet_hbond.range_2_auth_seq_id " << endl; - } + if (name == "atom") { + output << "_atom_site.group_PDB " << endl << + "_atom_site.id " << endl << + "_atom_site.type_symbol " << endl << + "_atom_site.label_atom_id " << endl << + "_atom_site.label_alt_id " << endl << + "_atom_site.label_comp_id " << endl << + "_atom_site.label_asym_id " << endl << + "_atom_site.label_entity_id " << endl << + "_atom_site.label_seq_id " << endl << + "_atom_site.pdbx_PDB_ins_code " << endl << + "_atom_site.Cartn_x " << endl << + "_atom_site.Cartn_y " << endl << + "_atom_site.Cartn_z " << endl << + "_atom_site.occupancy " << endl << + "_atom_site.B_iso_or_equiv " << endl << + "_atom_site.Cartn_x_esd " << endl << + "_atom_site.Cartn_y_esd " << endl << + "_atom_site.Cartn_z_esd " << endl << + "_atom_site.occupancy_esd " << endl << + "_atom_site.B_iso_or_equiv_esd " << endl << + "_atom_site.pdbx_formal_charge " << endl << + "_atom_site.auth_seq_id " << endl << + "_atom_site.auth_comp_id " << endl << + "_atom_site.auth_asym_id " << endl << + "_atom_site.auth_atom_id " << endl << + "_atom_site.pdbx_PDB_model_num " << endl; + } else if (name == "helix") { + output << "_struct_conf.conf_type_id " << endl << + "_struct_conf.id " << endl << + "_struct_conf.pdbx_PDB_helix_id " << endl << + "_struct_conf.beg_label_comp_id " << endl << + "_struct_conf.beg_label_asym_id " << endl << + "_struct_conf.beg_label_seq_id " << endl << + "_struct_conf.pdbx_beg_PDB_ins_code " << endl << + "_struct_conf.end_label_comp_id " << endl << + "_struct_conf.end_label_asym_id " << endl << + "_struct_conf.end_label_seq_id " << endl << + "_struct_conf.pdbx_end_PDB_ins_code " << endl << + "_struct_conf.beg_auth_comp_id " << endl << + "_struct_conf.beg_auth_asym_id " << endl << + "_struct_conf.beg_auth_seq_id " << endl << + "_struct_conf.end_auth_comp_id " << endl << + "_struct_conf.end_auth_asym_id " << endl << + "_struct_conf.end_auth_seq_id " << endl << + "_struct_conf.pdbx_PDB_helix_class " << endl << + "_struct_conf.details " << endl << + "_struct_conf.pdbx_PDB_helix_length " << endl; + } else if (name == "sheet") { + output << "_struct_sheet.id " << endl << + "_struct_sheet.type " << endl << + "_struct_sheet.number_strands " << endl << + "_struct_sheet.details " << endl; + } else if (name == "sheet range") { + output << "_struct_sheet_range.sheet_id " << endl << + "_struct_sheet_range.id " << endl << + "_struct_sheet_range.beg_label_comp_id " << endl << + "_struct_sheet_range.beg_label_asym_id " << endl << + "_struct_sheet_range.beg_label_seq_id " << endl << + "_struct_sheet_range.pdbx_beg_PDB_ins_code " << endl << + "_struct_sheet_range.end_label_comp_id " << endl << + "_struct_sheet_range.end_label_asym_id " << endl << + "_struct_sheet_range.end_label_seq_id " << endl << + "_struct_sheet_range.pdbx_end_PDB_ins_code " << endl << + "_struct_sheet_range.symmetry " << endl << + "_struct_sheet_range.beg_auth_comp_id " << endl << + "_struct_sheet_range.beg_auth_asym_id " << endl << + "_struct_sheet_range.beg_auth_seq_id " << endl << + "_struct_sheet_range.end_auth_comp_id " << endl << + "_struct_sheet_range.end_auth_asym_id " << endl << + "_struct_sheet_range.end_auth_seq_id " << endl; + } else if (name == "sheet hbond") { + output << "_pdbx_struct_sheet_hbond.sheet_id " << endl << + "_pdbx_struct_sheet_hbond.range_id_1 " << endl << + "_pdbx_struct_sheet_hbond.range_id_2 " << endl << + "_pdbx_struct_sheet_hbond.range_1_label_atom_id " << endl << + "_pdbx_struct_sheet_hbond.range_1_label_comp_id " << endl << + "_pdbx_struct_sheet_hbond.range_1_label_asym_id " << endl << + "_pdbx_struct_sheet_hbond.range_1_label_seq_id " << endl << + "_pdbx_struct_sheet_hbond.range_1_PDB_ins_code " << endl << + "_pdbx_struct_sheet_hbond.range_1_auth_atom_id " << endl << + "_pdbx_struct_sheet_hbond.range_1_auth_comp_id " << endl << + "_pdbx_struct_sheet_hbond.range_1_auth_asym_id " << endl << + "_pdbx_struct_sheet_hbond.range_1_auth_seq_id " << endl << + "_pdbx_struct_sheet_hbond.range_2_label_atom_id " << endl << + "_pdbx_struct_sheet_hbond.range_2_label_comp_id " << endl << + "_pdbx_struct_sheet_hbond.range_2_label_asym_id " << endl << + "_pdbx_struct_sheet_hbond.range_2_label_seq_id " << endl << + "_pdbx_struct_sheet_hbond.range_2_PDB_ins_code " << endl << + "_pdbx_struct_sheet_hbond.range_2_auth_atom_id " << endl << + "_pdbx_struct_sheet_hbond.range_2_auth_comp_id " << endl << + "_pdbx_struct_sheet_hbond.range_2_auth_asym_id " << endl << + "_pdbx_struct_sheet_hbond.range_2_auth_seq_id " << endl; + } else if (name == "entity poly") { + output << "_entity_poly_seq.entity_id " << endl << + "_entity_poly_seq.num " << endl << + "_entity_poly_seq.mon_id " << endl << + "_entity_poly_seq.hetero " << endl; + } else if (name == "pdbx poly") { + output << "_pdbx_poly_seq_scheme.asym_id " << endl << + "_pdbx_poly_seq_scheme.entity_id " << endl << + "_pdbx_poly_seq_scheme.seq_id " << endl << + "_pdbx_poly_seq_scheme.mon_id " << endl << + "_pdbx_poly_seq_scheme.ndb_seq_num " << endl << + "_pdbx_poly_seq_scheme.pdb_seq_num " << endl << + "_pdbx_poly_seq_scheme.auth_seq_num " << endl << + "_pdbx_poly_seq_scheme.pdb_mon_id " << endl << + "_pdbx_poly_seq_scheme.auth_mon_id " << endl << + "_pdbx_poly_seq_scheme.pdb_strand_id " << endl << + "_pdbx_poly_seq_scheme.pdb_ins_code " << endl << + "_pdbx_poly_seq_scheme.hetero " << endl; } } diff --git a/Biopool/Sources/CifStructure.h b/Biopool/Sources/CifStructure.h index 9b8fc13..9eb4c58 100644 --- a/Biopool/Sources/CifStructure.h +++ b/Biopool/Sources/CifStructure.h @@ -34,6 +34,13 @@ namespace Victor { * @param output output file stream */ CifStructure(istream& input, ostream& output = cout); + + /** + * Constructor + * @param output output file stream + * @param input input file stream + */ + CifStructure(ostream& output); /** * Destructor @@ -110,28 +117,42 @@ namespace Victor { istream& getInput(); private: + /** + * Sets data members + */ + void setData(); + // CIF file istream& input; ostream& output; // CIF tags string header; + + // atom group + string atom; + string atomId; + string chain; + string asymId; + string entityId; + string residueIns; + string x; + string y; + string z; + string occupancy; + string tempFactor; + string residueNum; + string residueName; + string atomName; string model; - string helix; + + // helix group + string helix; string helixStart; string helixEnd; string helixChainId; - string atom; - string residueNum; - string atomId; - string residueIns; - string tempFactor; - string atomName; - string residueName; - string x; - string y; - string z; - string chain; + + // sheet group string sheet; string sheetOrder; string sheetRange; diff --git a/Biopool/Sources/PdbSaver.cc b/Biopool/Sources/PdbSaver.cc index 861ff01..cd1810d 100644 --- a/Biopool/Sources/PdbSaver.cc +++ b/Biopool/Sources/PdbSaver.cc @@ -274,15 +274,17 @@ void PdbSaver::saveProtein(Protein& prot) { */ void PdbSaver::writeSeqRes(Spacer& sp) { for (unsigned int i = 0; i < sp.sizeAmino() / 13; i++) { - output << "SEQRES " << setw(3) << i << " " << setw(3) - << sp.sizeAmino() << " "; + output << "SEQRES " << setw(3) << i << " " << + setw(1) << chain << " " << + setw(4) << sp.sizeAmino() << " "; for (unsigned int j = 0; j < 13; j++) output << sp.getAmino((i * 13) + j).getType() << " "; output << "\n"; } if (sp.sizeAmino() % 13 > 0) { - output << "SEQRES " << setw(3) << sp.sizeAmino() / 13 + 1 << " " - << setw(3) << sp.sizeAmino() << " "; + output << "SEQRES " << setw(3) << sp.sizeAmino() / 13 + 1 << " " << + setw(1) << chain << " " << + setw(4) << sp.sizeAmino() << " "; for (unsigned int j = 13 * (sp.sizeAmino() / 13); j < sp.sizeAmino(); j++) output << sp.getAmino(j).getType() << " "; output << "\n"; From 539b064927ab033ca424f5ef0c352982086af337 Mon Sep 17 00:00:00 2001 From: Marco Pezzutti Date: Fri, 12 Jun 2015 19:00:51 +0200 Subject: [PATCH 11/16] added APPS for CIF loader and saver --- Biopool/APPS/Cif2Secondary.cc | 120 +++++++++++++++++++ Biopool/APPS/Cif2Seq.cc | 120 +++++++++++++++++++ Biopool/APPS/CifCorrector.cc | 56 +++++++++ Biopool/APPS/CifEditor.cc | 75 ++++++++++++ Biopool/APPS/CifMover.cc | 193 ++++++++++++++++++++++++++++++ Biopool/APPS/CifSecondary.cc | 198 +++++++++++++++++++++++++++++++ Biopool/APPS/CifShifter.cc | 131 ++++++++++++++++++++ Biopool/APPS/Makefile | 12 +- Biopool/Tests/TestCifStructure.h | 107 +++++++++++++++++ 9 files changed, 1008 insertions(+), 4 deletions(-) create mode 100644 Biopool/APPS/Cif2Secondary.cc create mode 100644 Biopool/APPS/Cif2Seq.cc create mode 100644 Biopool/APPS/CifCorrector.cc create mode 100644 Biopool/APPS/CifEditor.cc create mode 100644 Biopool/APPS/CifMover.cc create mode 100644 Biopool/APPS/CifSecondary.cc create mode 100644 Biopool/APPS/CifShifter.cc create mode 100644 Biopool/Tests/TestCifStructure.h diff --git a/Biopool/APPS/Cif2Secondary.cc b/Biopool/APPS/Cif2Secondary.cc new file mode 100644 index 0000000..0630af3 --- /dev/null +++ b/Biopool/APPS/Cif2Secondary.cc @@ -0,0 +1,120 @@ +/* + * File: cif2secondary.cc + * Author: marco + * + * Created on 12 giugno 2015, 17.42 + */ + +#include +#include +#include +#include +#include + +using namespace Victor; +using namespace Victor::Biopool; + +void sShowHelp() { + cout << "Cif 2 Seq $Revision: 0.1 $ -- converts a CIF file into SEQ\n" + << "(torsion angles) protein structure backbone torsion angles\n" + << " Options: \n" + << "\t-i \t Input CIF file\n" + << "\t-o \t Output to file (default stdout)\n" + << "\t-c \t Chain identifier to read\n" + << "\t--all \t All chains\n" + << "\t-m \t Model number to read (NMR only, default is first model)\n" + << "\t--chi \t Write Chi angles (default false)\n" + << "\t-v \t verbose output\n\n" + << "\tIf both -c and --all are missing, only the first chain is processed.\n\n"; + +} + +int main(int argc, char** argv) { + + if (getArg("h", argc, argv)) { + sShowHelp(); + return 1; + } + + string inputFile, outputFile, chainID; + unsigned int modelNum; + bool chi, all; + + getArg("i", inputFile, argc, argv, "!"); + getArg("o", outputFile, argc, argv, "!"); + getArg("c", chainID, argc, argv, "!"); + getArg("m", modelNum, argc, argv, 999); + all = getArg("-all", argc, argv); + chi = getArg("-chi", argc, argv); + + // Check input file + if (inputFile == "!") { + cout << "Missing input file specification. Aborting. (-h for help)" << endl; + return -1; + } + ifstream inFile(inputFile.c_str()); + if (!inFile) + ERROR("Input file not found.", exception); + + + CifLoader cl(inFile); + + // Set CifLoader variables + cl.setModel(modelNum); + cl.setNoHAtoms(); + cl.setNoHetAtoms(); + cl.setNoSecondary(); + if (!getArg("v", argc, argv)) { + cl.setNoVerbose(); + } + + + // Check chain args + if ((chainID != "!") && all) { + ERROR("You can use --all or -c, not both", error); + } + // User selected chain + if (chainID != "!") { + if (chainID.size() > 1) + ERROR("You can choose only 1 chain", error); + cl.setChain(chainID[0]); + }// All chains + else if (all) { + cl.setAllChains(); + }// First chain + else { + cl.setChain(cl.getAllChains()[0]); + } + + // Load the protein object + Protein prot; + prot.load(cl); + + // Open the proper output stream (file or stdout) + std::ostream* os = &cout; + std::ofstream fout; + if (outputFile != "!") { + fout.open(outputFile.c_str()); + if (!fout) { + ERROR("Could not open file for writing.", exception); + } else { + os = &fout; + } + } + + + Spacer* sp; + for (unsigned int i = 0; i < prot.sizeProtein(); i++) { + + sp = prot.getSpacer(i); + + // Write the sequence + SeqSaver ss(*os); + if (!chi) + ss.setWriteChi(false); + sp->save(ss); + } + + return 0; +} + diff --git a/Biopool/APPS/Cif2Seq.cc b/Biopool/APPS/Cif2Seq.cc new file mode 100644 index 0000000..5c88e83 --- /dev/null +++ b/Biopool/APPS/Cif2Seq.cc @@ -0,0 +1,120 @@ +/* + * File: Cif2Seq.cc + * Author: marco + * + * Created on 12 giugno 2015, 18.51 + */ + +#include +#include +#include +#include +#include + +using namespace Victor; +using namespace Victor::Biopool; + +void sShowHelp() { + cout << "Cif 2 Seq $Revision: 0.1 $ -- converts a CIF file into SEQ\n" + << "(torsion angles) protein structure backbone torsion angles\n" + << " Options: \n" + << "\t-i \t Input CIF file\n" + << "\t-o \t Output to file (default stdout)\n" + << "\t-c \t Chain identifier to read\n" + << "\t--all \t All chains\n" + << "\t-m \t Model number to read (NMR only, default is first model)\n" + << "\t--chi \t Write Chi angles (default false)\n" + << "\t-v \t verbose output\n\n" + << "\tIf both -c and --all are missing, only the first chain is processed.\n\n"; + +} + +int main(int argc, char** argv) { + + if (getArg("h", argc, argv)) { + sShowHelp(); + return 1; + } + + string inputFile, outputFile, chainID; + unsigned int modelNum; + bool chi, all; + + getArg("i", inputFile, argc, argv, "!"); + getArg("o", outputFile, argc, argv, "!"); + getArg("c", chainID, argc, argv, "!"); + getArg("m", modelNum, argc, argv, 999); + all = getArg("-all", argc, argv); + chi = getArg("-chi", argc, argv); + + // Check input file + if (inputFile == "!") { + cout << "Missing input file specification. Aborting. (-h for help)" << endl; + return -1; + } + ifstream inFile(inputFile.c_str()); + if (!inFile) + ERROR("Input file not found.", exception); + + + CifLoader cl(inFile); + + // Set PdbLoader variables + cl.setModel(modelNum); + cl.setNoHAtoms(); + cl.setNoHetAtoms(); + cl.setNoSecondary(); + if (!getArg("v", argc, argv)) { + cl.setNoVerbose(); + } + + + // Check chain args + if ((chainID != "!") && all) { + ERROR("You can use --all or -c, not both", error); + } + // User selected chain + if (chainID != "!") { + if (chainID.size() > 1) + ERROR("You can choose only 1 chain", error); + cl.setChain(chainID[0]); + }// All chains + else if (all) { + cl.setAllChains(); + }// First chain + else { + cl.setChain(cl.getAllChains()[0]); + } + + // Load the protein object + Protein prot; + prot.load(cl); + + // Open the proper output stream (file or stdout) + std::ostream* os = &cout; + std::ofstream fout; + if (outputFile != "!") { + fout.open(outputFile.c_str()); + if (!fout) { + ERROR("Could not open file for writing.", exception); + } else { + os = &fout; + } + } + + + Spacer* sp; + for (unsigned int i = 0; i < prot.sizeProtein(); i++) { + + sp = prot.getSpacer(i); + + // Write the sequence + SeqSaver ss(*os); + if (!chi) + ss.setWriteChi(false); + sp->save(ss); + } + + return 0; +} + diff --git a/Biopool/APPS/CifCorrector.cc b/Biopool/APPS/CifCorrector.cc new file mode 100644 index 0000000..e0374aa --- /dev/null +++ b/Biopool/APPS/CifCorrector.cc @@ -0,0 +1,56 @@ +/* + * File: CifCorrector.cc + * Author: marco + * + * Created on 12 giugno 2015, 18.48 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace Victor; +using namespace Victor::Biopool; + +/* + * + */ +int main(int argc, char** argv) { + + if (argc != 2) { + cout << "Cif Corrector $Revision: 0.1 $ -- adds missing oxygen atoms to " + << "protein structure backbones" << endl; + cout << " Usage: \t\t CifCorrector \n"; + return 1; + }; + + Spacer sp; + ifstream inFile(argv[1]); + if (!inFile) + ERROR("File not found.", exception); + + CifLoader il(inFile); + sp.load(il); + + for (unsigned int i = 0; i < sp.sizeAmino(); i++) + sp.getAmino(i).addMissingO(); + + ofstream outFile2(argv[1]); + + if (!outFile2) + ERROR("Couldn't write file.", exception); + + CifSaver pss2(outFile2); + sp.save(pss2); + + return 0; +} + diff --git a/Biopool/APPS/CifEditor.cc b/Biopool/APPS/CifEditor.cc new file mode 100644 index 0000000..dfe12d6 --- /dev/null +++ b/Biopool/APPS/CifEditor.cc @@ -0,0 +1,75 @@ +/* + * File: CifEditor.cc + * Author: marco + * + * Created on 8 giugno 2015, 12.32 + */ + +#include +#include + +using namespace Victor; +using namespace Victor::Biopool; + +int main(int argc, char** argv) { + if (argc != 3) { + cout << "CIF Editor $Revision: 0.1 $ -- allows sequential manipulation of " + << "protein structure backbone torsion angles" << endl; + cout << " Usage: \t\t CifEditor \n"; + return 1; + }; + + ifstream inFile(argv[1]); + if (!inFile) + ERROR("Input file not found.", exception); + + CifLoader cl(inFile); + Spacer sp; + sp.load(cl); + + cout << "Editing " << argv[1] << " output goes to " << argv[2] << "\n"; + + int aaid = -1; + do { + cout << "Aminoacid# or -1: "; + cin >> aaid; + if (aaid <= -1) { + cout << "Bye.\n"; + return 0; + }; + + if (aaid >= (int) sp.sizeAmino()) { + cout << "\t Invalid aa#!\n"; + } else { + double newVal = 999; + cout << "\t " << aaid << " " << sp.getAmino(aaid).getType() << "\n"; + cout << "Phi= " << sp.getAmino(aaid).getPhi() + << "\t new phi or 999: "; + cin >> newVal; + if (newVal != 999) + sp.getAmino(aaid).setPhi(newVal); + cout << "Psi= " << sp.getAmino(aaid).getPsi() + << "\t new psi or 999: "; + cin >> newVal; + if (newVal != 999) + sp.getAmino(aaid).setPsi(newVal); + cout << "Omega= " << sp.getAmino(aaid).getOmega() + << "\t new omega or 999: "; + cin >> newVal; + if (newVal != 999) + sp.getAmino(aaid).setOmega(newVal); + + ofstream outFile2(argv[2]); + + if (!outFile2) + ERROR("Couldn't write output file.", exception); + + CifSaver pss2(outFile2); + + sp.save(pss2); + }; + } while (aaid != -1); + + return 0; +} + diff --git a/Biopool/APPS/CifMover.cc b/Biopool/APPS/CifMover.cc new file mode 100644 index 0000000..83d1aba --- /dev/null +++ b/Biopool/APPS/CifMover.cc @@ -0,0 +1,193 @@ +/* + * File: cifMover.cc + * Author: marco + * + * Created on 12 giugno 2015, 18.39 + */ + +#include +#include +#include +#include +#include +#include +#include + +using namespace Victor; +using namespace Victor::Biopool; + +// minimum distance between neighbouring CAs +const double LAMBDA = 1.5; + +void sShowHelp() { + cout << "CIF Shifter\n" + << "Allows to move all residues in file by fixed offset.\n" + << " Options: \n" + << "\t-i \t\t Input CIF file\n" + << "\t-o \t\t Output CIF file\n" + << "\t[-r ] \t\t Repeat unit length (default = no rotation)\n" + << "\t[-l ] \t\t Angle lambda factor (default = 0.1)\n" + << "\t[-s ] \t\t Start residue of fragment (default = first)\n" + << "\t[-e ] \t\t End residue of fragment (default = last)\n" + << "\n"; +} + +void sAddLine() { + cout << "-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-\n"; +} + +int main(int argc, char** argv) { + + // -------------------------------------------------- + // 0. treat options + // -------------------------------------------------- + + if (getArg("h", argc, argv)) { + sShowHelp(); + return 1; + }; + + string inputFile, outputFile; + unsigned int startOffset, endOffset, repeatLength; + double lambdaAngle; + + getArg("i", inputFile, argc, argv, "!"); + getArg("o", outputFile, argc, argv, "!"); + getArg("s", startOffset, argc, argv, 0); + getArg("e", endOffset, argc, argv, 9999); + getArg("r", repeatLength, argc, argv, 9999); + getArg("l", lambdaAngle, argc, argv, 0.1); + + vgVector3 transOff; + for (unsigned int i = 0; i < 3; i++) + transOff[i] = 0.0; + + if ((inputFile == "!") || (outputFile == "!")) { + cout << "Missing file specification. Aborting. (-h for help)" << endl; + return -1; + } + + // -------------------------------------------------- + // 1. read structure + // -------------------------------------------------- + + Spacer sp; + + ifstream inFile(inputFile.c_str()); + + if (!inFile) + ERROR("File does not exist.\n", exception); + + CifLoader cl(inFile); + + sp.load(cl); + inFile.close(); + + + endOffset = sp.getIndexFromPdbNumber(endOffset); + if (startOffset > 0) + startOffset = sp.getIndexFromPdbNumber(startOffset); + + // -------------------------------------------------- + // 2. rotate spacer + // -------------------------------------------------- + + if (repeatLength < 9999) { + IntCoordConverter icc; + + // find offset + vgVector3 firstA = sp.getAmino(endOffset + - (3 * repeatLength / 4))[CA].getCoords() + - sp.getAmino(endOffset)[CA].getCoords(); + + vgVector3 firstB = sp.getAmino(endOffset + - (1 * repeatLength / 4))[CA].getCoords() + - sp.getAmino(endOffset)[CA].getCoords(); + + vgVector3 firstNorm = (firstA.normalize()).cross(firstB.normalize()); + + vgVector3 secondA = sp.getAmino(endOffset + repeatLength + - (3 * repeatLength / 4))[CA].getCoords() + - sp.getAmino(endOffset)[CA].getCoords(); + + vgVector3 secondB = sp.getAmino(endOffset + repeatLength + - (1 * repeatLength / 4))[CA].getCoords() + - sp.getAmino(endOffset)[CA].getCoords(); + + vgVector3 secondNorm = (secondA.normalize()).cross(secondB.normalize()); + + firstNorm.normalize(); + secondNorm.normalize(); + + double scalar = icc.getAngle(firstNorm, secondNorm) * lambdaAngle; + + cout << "Scalar = " << setw(5) << setprecision(3) + << RAD2DEG * scalar / lambdaAngle << "\n"; + + + vgVector3 axis = (firstNorm.normalize()).cross(secondNorm.normalize()); + vgMatrix3 res = vgMatrix3::createRotationMatrix(axis, scalar); + + sp.getAmino(startOffset)[N].addRot(res); + } + + // -------------------------------------------------- + // 3. translate spacer + // -------------------------------------------------- + + if (endOffset < sp.sizeAmino() - 1) { + sp.getAmino(endOffset).unbindOut(sp.getAmino(endOffset + 1)); + sp.getAmino(endOffset)[C].unbindOut(sp.getAmino(endOffset + 1)[N]); + + // find offset + vgVector3 first = sp.getAmino(endOffset)[C].getCoords(); + vgVector3 second = sp.getAmino(endOffset + 1)[N].getCoords(); + + double d = sp.getAmino(endOffset)[C].distance( + sp.getAmino(endOffset + 1)[N]); + + double frac = (d - LAMBDA) / d; + + for (unsigned int i = 0; i < 3; i++) + transOff[i] = (second[i] - first[i]) * frac; + } else { + if (startOffset != 0) + ERROR("Both start and end offset are undefined.", exception); + + endOffset = sp.sizeAmino() - 1; + + // NB: unbind has to be reversed after moving the atoms if the model + // is to be used further in the same program + sp.getAmino(startOffset - 1).unbindOut(sp.getAmino(startOffset)); + sp.getAmino(startOffset - 1)[C].unbindOut(sp.getAmino(startOffset)[N]); + + // find offset + vgVector3 first = sp.getAmino(startOffset + 1)[N].getCoords(); + vgVector3 second = sp.getAmino(startOffset)[C].getCoords(); + + double d = sp.getAmino(startOffset + 1)[N].distance( + sp.getAmino(startOffset)[C]); + + double frac = (d - LAMBDA) / d; + + for (unsigned int i = 0; i < 3; i++) + transOff[i] = (second[i] - first[i]) * frac; + } + + sp.getAmino(startOffset)[N].addTrans(transOff); + + // -------------------------------------------------- + // 4. write model to disk + // -------------------------------------------------- + + ofstream outFile(outputFile.c_str()); + if (!outFile) + ERROR("File not found.", exception); + CifSaver cs(outFile); + + sp.save(cs); + outFile.close(); + + return 0; +} + diff --git a/Biopool/APPS/CifSecondary.cc b/Biopool/APPS/CifSecondary.cc new file mode 100644 index 0000000..1f9c132 --- /dev/null +++ b/Biopool/APPS/CifSecondary.cc @@ -0,0 +1,198 @@ +/* + * File: CifSecondary.cc + * Author: marco + * + * Created on 8 giugno 2015, 12.46 + */ + +#include +#include +#include + +using namespace Victor; +using namespace Victor::Biopool; + +void sShowHelp() { + cout << "CIF Secondary $Revision: 0.1 $ -- calculate the secondary structure\n" + << "(torsion angles) protein structure backbone torsion angles\n" + << "\tOptions: \n" + << "\t-i \t Input CIF file\n" + << "\t-o \t Output to file(the chain letter is appended)\n" + << "\t-c \t Chain identifier to read(default is all chains)\n" + << "\t-m \t Model number to read (NMR only, default is first model)\n" + << "\t-s <1,2,3> \t SS calculation(default 3): 1 = CIF fields, 2 = torsion angles, 3 = DSSP\n" + << "\t \t 1,2:\t H = helix, E = extended(strand,sheet), . = other.\n" + << "\t \t 3:\t H = alpha-helix, E = sheet, B = bridge,\n" + << "\t \t G = 3-10-helix, I = pi-helix, T = n-Turn, S = bend\n" + << "\t--ext \t Extended output (default false). Write the type of aminoacid:\n" + << "\t \t N = negative charge, \t P = positive charge\n" + << "\t \t h = hydrophilic, \t + = hydrophobic\n" + << "\t \t , = neutral (hydrophobic)\n" + << "\t-v \t verbose output\n\n" + << "\tWhen parsing multiple chains, one file for each chain is created\n\n"; +} + +void writeOutput(Spacer* sp, bool ext, int ssType, ostream& os) { + + if (ext) { + for (unsigned int i = 0; i <= sp->sizeAmino(); i++) { + if ((i + 1) % 10 == 0) + os << setw(1) << (((i + 1) % 100) / 10); + else + os << " "; + if ((i + 1) % 60 == 0) + os << "\n"; + } + os << "\n"; + + for (unsigned int i = 0; i < sp->sizeAmino(); i++) { + os << sp->getAmino(i).getType1L(); + if ((i + 1) % 60 == 0) + os << "\n"; + } + os << "\n"; + + for (unsigned int i = 0; i < sp->sizeAmino(); i++) { + switch (sp->getAmino(i).getCode()) { + case ASP: + case GLU: + os << "P"; + break; + case LYS: + case ARG: + os << "N"; + break; + case ASN: + case GLN: + case SER: + case THR: + case HIS: + os << "h"; + break; + case VAL: + case LEU: + case ILE: + os << "+"; + break; + default: + os << ","; + }; + if ((i + 1) % 60 == 0) + os << "\n"; + } + os << "\n"; + } + if (ssType != 3) { + for (unsigned int i = 0; i < sp->sizeAmino(); i++) { + switch (sp->getAmino(i).getState()) { + case HELIX: + os << "H"; + break; + case STRAND: + os << "E"; + break; + default: + os << "."; + }; + if ((i + 1) % 60 == 0) + os << "\n"; + } + } else { + vector > ss = sp->getDSSP(); + for (unsigned int i = 0; i < ss.size(); i++) { + if (!ss[i].empty()) { + for (set ::iterator it = ss[i].begin(); it != ss[i].end(); ++it) { + os << (*it); + } + } else { + os << "."; + } + if ((i + 1) % 60 == 0) + os << "\n"; + } + } + os << "\n"; +} + +int main(int argc, char** argv) { + + if (getArg("h", argc, argv)) { + sShowHelp(); + return 1; + } + + string inputFile, outputFile, chainID; + unsigned int modelNum; + unsigned int ssType; + bool extendedOutput, all; + + getArg("i", inputFile, argc, argv, "!"); + getArg("o", outputFile, argc, argv, "!"); + getArg("c", chainID, argc, argv, "!"); + getArg("m", modelNum, argc, argv, 999); + getArg("s", ssType, argc, argv, 3); + all = getArg("-all", argc, argv); + extendedOutput = getArg("-ext", argc, argv); + + // Check input file + if (inputFile == "!") { + cout << "Missing input file specification. Aborting. (-h for help)" << endl; + return -1; + } + ifstream inFile(inputFile.c_str()); + if (!inFile) + ERROR("Input file not found.", exception); + + CifLoader cl(inFile); + + // Set CifLoader variables + cl.setModel(modelNum); + cl.setNoHetAtoms(); + + if (!getArg("v", argc, argv)) { + cl.setNoVerbose(); + } + + // Check chain args + if ((chainID != "!") && all) { + ERROR("You can use --all or -c, not both", error); + } + // User selected chain + if (chainID != "!") { + if (chainID.size() > 1) + ERROR("You can choose only 1 chain", error); + cl.setChain(chainID[0]); + }// All chains + else if (all) { + cl.setAllChains(); + }// First chain + else { + cl.setChain(cl.getAllChains()[0]); + } + + // Load the protein object + Protein prot; + prot.load(cl); + + // Open the proper output stream (file or stdout) + std::ostream* os = &cout; + std::ofstream fout; + if (outputFile != "!") { + fout.open(outputFile.c_str()); + if (!fout) { + ERROR("Could not open file for writing.", exception); + } else { + os = &fout; + } + } + + Spacer* sp; + for (unsigned int i = 0; i < prot.sizeProtein(); i++) { + + sp = prot.getSpacer(i); + writeOutput(sp, extendedOutput, ssType, (*os)); + } + + return 0; +} + diff --git a/Biopool/APPS/CifShifter.cc b/Biopool/APPS/CifShifter.cc new file mode 100644 index 0000000..117a56a --- /dev/null +++ b/Biopool/APPS/CifShifter.cc @@ -0,0 +1,131 @@ +/* + * File: cifShifter.cc + * Author: marco + * + * Created on 12 giugno 2015, 18.45 + */ + +#include +#include +#include +#include + +using namespace Victor; +using namespace Victor::Biopool; + +void sShowHelp() { + cout << "CIF Shifter\n" + << "Allows to shift all residues in file by fixed offset.\n" + << " Options: \n" + << "\t-i \t\t Input CIF file\n" + << "\t-o \t\t Output CIF file\n" + << "\t[-p ] \t\t Positive *residue* offset\n" + << "\t[-n ] \t\t Negative *residue* offset\n" + << "\t[-P ] \t\t Positive *atom* offset\n" + << "\t[-N ] \t\t Negative *atom* offset\n" + << "\t[--nohydrogen] \t\t Skip hydrogen atoms\n" + << "\t[--renum] \t\t Reset residue numbering starting from 1\n" + << "\n"; +} + +void sAddLine() { + cout << "-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-x-\n"; +} + +void sRenumberAtoms(Spacer& sp) { + unsigned int counter = 1; + for (unsigned int i = 0; i < sp.sizeAmino(); i++) + for (unsigned int j = 0; j < sp.getAmino(i).size(); j++) { + sp.getAmino(i)[j].setNumber(counter); + counter++; + } +} + +int main(int argc, char** argv) { + + // -------------------------------------------------- + // 0. treat options + // -------------------------------------------------- + + if (getArg("h", argc, argv)) { + sShowHelp(); + return 1; + }; + + string inputFile, outputFile; + int offset, offsetAtom, tmp; + getArg("i", inputFile, argc, argv, "!"); + getArg("o", outputFile, argc, argv, "!"); + + getArg("p", offset, argc, argv, 0); + getArg("n", tmp, argc, argv, 0); + offset -= tmp; + getArg("P", offsetAtom, argc, argv, 0); + getArg("N", tmp, argc, argv, 0); + offsetAtom -= tmp; + + bool noHydrogen = getArg("-nohydrogen", argc, argv); + bool renumber = getArg("-renum", argc, argv); + + if ((inputFile == "!") || (outputFile == "!")) { + cout << "Missing file specification. Aborting. (-h for help)" << endl; + return -1; + } + + if ((offset == 0) && (!renumber) && (!noHydrogen)) { + cout << "Warning: Offset is zero. Mistake? \n"; + } + // -------------------------------------------------- + // 1. read structure + // -------------------------------------------------- + + Spacer sp; + + ifstream inFile(inputFile.c_str()); + + if (!inFile) + ERROR("File does not exist.\n", exception); + + CifLoader cl(inFile); + + if (noHydrogen) + cl.setNoHAtoms(); + + sp.load(cl); + inFile.close(); + + // -------------------------------------------------- + // 1.1 renumber residues (if necessary) + // -------------------------------------------------- + + if (renumber) { + cout << "Renumbering...\n"; + sp.setStartOffset(1); + sp.removeAllGaps(); + sRenumberAtoms(sp); + } + + // -------------------------------------------------- + // 2. shift offset + // -------------------------------------------------- + + sp.setStartOffset(offset + sp.getStartOffset()); + + if (offsetAtom != 0) + sp.setAtomStartOffset(offsetAtom + sp.getAtomStartOffset()); + + // -------------------------------------------------- + // 3. write model to disk + // -------------------------------------------------- + + ofstream outFile(outputFile.c_str()); + if (!outFile) + ERROR("File not found.", exception); + CifSaver cs(outFile); + + sp.save(cs); + outFile.close(); + + return 0; +} + diff --git a/Biopool/APPS/Makefile b/Biopool/APPS/Makefile index dd121ff..4eba391 100644 --- a/Biopool/APPS/Makefile +++ b/Biopool/APPS/Makefile @@ -27,16 +27,20 @@ INC_PATH = -I. -I../../tools/ -I../../Biopool/Sources -I../../Energy/Sources -I. # SOURCES = PdbCorrector.cc PdbSecondary.cc PdbEditor.cc Pdb2Seq.cc pdb2secondary.cc pdbshifter.cc \ - pdbMover.cc CifEditor.cc + pdbMover.cc CifEditor.cc CifSecondary.cc Cif2Secondary.cc CifMover.cc \ + CifShifter.cc CifCorrector.cc Cif2Seq.cc OBJECTS = PdbCorrector.o PdbSecondary.o PdbEditor.o Pdb2Seq.o pdb2secondary.o pdbshifter.o \ - pdbMover.o CifEditor.o + pdbMover.o CifEditor.o CifSecondary.o Cif2Secondary.o CifMover.o \ + CifShifter.o CifCorrector.o Cif2Seq.o TARGETS = PdbCorrector PdbSecondary PdbEditor Pdb2Seq pdb2secondary pdbshifter \ - pdbMover CifEditor + pdbMover CifEditor CifSecondary Cif2Secondary CifMover CifShifter \ + CifCorrector Cif2Seq EXECS = PdbCorrector PdbSecondary PdbEditor Pdb2Seq pdb2secondary pdbshifter \ - pdbMover CifEditor + pdbMover CifEditor CifSecondary Cif2Secondary CifMover CifShifter \ + CifCorrector Cif2Seq LIBRARY = APPSlibBiopool.a diff --git a/Biopool/Tests/TestCifStructure.h b/Biopool/Tests/TestCifStructure.h new file mode 100644 index 0000000..e985d94 --- /dev/null +++ b/Biopool/Tests/TestCifStructure.h @@ -0,0 +1,107 @@ +/* + * File: TestCifStructure.h + * Author: marco + * + * Created on 10 giugno 2015, 10.37 + */ + +#ifndef TESTCIFSTRUCTURE_H +#define TESTCIFSTRUCTURE_H + +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + +using namespace std; +using namespace Victor::Biopool; +using namespace CppUnit; + +class TestCifStructure : public TestFixture { +public: + + TestCifStructure() { + } + + virtual ~TestCifStructure() { + } + + static Test* suite() { + TestSuite* suiteOfTests = new TestSuite("TestCifLoader"); + + suiteOfTests->addTest(new TestCaller("Get group column number", + &TestCifStructure::testGetGroupColumnNumber)); + + suiteOfTests->addTest(new TestCaller("Get group field", + &TestCifStructure::testGetGroupField)); + + suiteOfTests->addTest(new TestCaller("Get inline field", + &TestCifStructure::testGetInlineField)); + + return suiteOfTests; + } + + void setUp() { + // inizialize CifLoader + string path = getenv("VICTOR_ROOT"); + string input = path + "Biopool/Tests/data/modelTest.cif"; + inFile = new ifstream(input.c_str()); + cs = new CifStructure(*inFile); + + // initialize test parameters + idColumn = 1; + atomId = 1; + headerLine = "_entry.id 25C8 "; + headerId = "25C8"; + } + + void tearDown() { + delete cs; + delete inFile; + } + +private: + + void testGetGroupField() { + string line = readLine(cs->getInput()); + cs->parseGroup("atom", line); + int field = stoiDEF(cs->getGroupField("atom", line, + cs->getGroupColumnNumber("atom", "atom id"))); + + CPPUNIT_ASSERT_EQUAL(atomId, field); + } + + void testGetGroupColumnNumber() { + string line = readLine(cs->getInput()); + cs->parseGroup("atom", line); + int col = cs->getGroupColumnNumber("atom", "atom id"); + + CPPUNIT_ASSERT_EQUAL(idColumn, col); + } + + void testGetInlineField() { + string field = cs->getInlineField(headerLine); + + CPPUNIT_ASSERT_EQUAL(headerId, field); + } + + int idColumn; + int atomId; + string headerLine; + string headerId; + + ifstream* inFile; + CifStructure* cs; + +}; + +#endif /* TESTCIFSTRUCTURE_H */ + From 129bfc5dd538fb591220d28e2cf3c0a86577cd8c Mon Sep 17 00:00:00 2001 From: Marco Pezzutti Date: Sat, 13 Jun 2015 18:41:30 +0200 Subject: [PATCH 12/16] all APPS working --- Biopool/APPS/Cif2Secondary.cc | 148 +++++++++++++++------------------- Biopool/APPS/CifEditor.cc | 12 +-- Biopool/APPS/CifMover.cc | 15 ++-- Biopool/APPS/CifSecondary.cc | 10 +++ Biopool/APPS/CifShifter.cc | 7 +- 5 files changed, 95 insertions(+), 97 deletions(-) diff --git a/Biopool/APPS/Cif2Secondary.cc b/Biopool/APPS/Cif2Secondary.cc index 0630af3..d18d836 100644 --- a/Biopool/APPS/Cif2Secondary.cc +++ b/Biopool/APPS/Cif2Secondary.cc @@ -5,28 +5,21 @@ * Created on 12 giugno 2015, 17.42 */ +#include +#include +#include #include #include -#include -#include -#include using namespace Victor; using namespace Victor::Biopool; void sShowHelp() { - cout << "Cif 2 Seq $Revision: 0.1 $ -- converts a CIF file into SEQ\n" - << "(torsion angles) protein structure backbone torsion angles\n" - << " Options: \n" - << "\t-i \t Input CIF file\n" - << "\t-o \t Output to file (default stdout)\n" - << "\t-c \t Chain identifier to read\n" - << "\t--all \t All chains\n" - << "\t-m \t Model number to read (NMR only, default is first model)\n" - << "\t--chi \t Write Chi angles (default false)\n" - << "\t-v \t verbose output\n\n" - << "\tIf both -c and --all are missing, only the first chain is processed.\n\n"; - + cout << "Pdb 2 Secondary Structure converter\n" + << "\t H = helix, \t E = extended (strand, sheet), \t . = other.\n" + << " Options: \n" + << "\t-i \t\t Input file for PDB structure\n" + << "\n"; } int main(int argc, char** argv) { @@ -34,86 +27,77 @@ int main(int argc, char** argv) { if (getArg("h", argc, argv)) { sShowHelp(); return 1; - } - - string inputFile, outputFile, chainID; - unsigned int modelNum; - bool chi, all; - + }; + vector allCh; + string chainID = "!"; + string inputFile; getArg("i", inputFile, argc, argv, "!"); - getArg("o", outputFile, argc, argv, "!"); - getArg("c", chainID, argc, argv, "!"); - getArg("m", modelNum, argc, argv, 999); - all = getArg("-all", argc, argv); - chi = getArg("-chi", argc, argv); - // Check input file if (inputFile == "!") { - cout << "Missing input file specification. Aborting. (-h for help)" << endl; + cout << "Missing file specification. Aborting. (-h for help)" << endl; return -1; } + ifstream inFile(inputFile.c_str()); - if (!inFile) - ERROR("Input file not found.", exception); - - - CifLoader cl(inFile); - - // Set CifLoader variables - cl.setModel(modelNum); - cl.setNoHAtoms(); - cl.setNoHetAtoms(); - cl.setNoSecondary(); - if (!getArg("v", argc, argv)) { - cl.setNoVerbose(); + if (!inFile) { + ERROR("File not found.", exception); } - - // Check chain args - if ((chainID != "!") && all) { - ERROR("You can use --all or -c, not both", error); - } - // User selected chain - if (chainID != "!") { - if (chainID.size() > 1) - ERROR("You can choose only 1 chain", error); - cl.setChain(chainID[0]); - }// All chains - else if (all) { - cl.setAllChains(); - }// First chain - else { - cl.setChain(cl.getAllChains()[0]); + CifLoader il(inFile); + il.setNoHAtoms(); + allCh = il.getAllChains(); + + for (unsigned int i = 0; i < allCh.size(); i++) { + cout << "\t," << allCh[i] << ","; } + cout << "\n"; - // Load the protein object - Protein prot; - prot.load(cl); - - // Open the proper output stream (file or stdout) - std::ostream* os = &cout; - std::ofstream fout; - if (outputFile != "!") { - fout.open(outputFile.c_str()); - if (!fout) { - ERROR("Could not open file for writing.", exception); - } else { - os = &fout; + /*check on validity of chain: + if user select a chain then check validity + else select first valid one by default*/ + if (chainID != "!") { + bool validChain = false; + for (unsigned int i = 0; i < allCh.size(); i++) { + if (allCh[i] == chainID[0]) { + il.setChain(chainID[0]); + cout << "Loading chain " << chainID << "\n"; + validChain = true; + break; + } + } + if (!validChain) { + cout << "Chain " << chainID << " is not available\n"; + return -1; } - } - - - Spacer* sp; - for (unsigned int i = 0; i < prot.sizeProtein(); i++) { - - sp = prot.getSpacer(i); - // Write the sequence - SeqSaver ss(*os); - if (!chi) - ss.setWriteChi(false); - sp->save(ss); + } else { + chainID[0] = allCh[0]; + cout << "Using chain " << chainID << "\n"; + } + + Protein prot; + prot.load(il); + Spacer *sp; + sp = prot.getSpacer(chainID[0]); + + allCh = il.getAllChains(); + cout << ">" << inputFile << "\n"; + + for (unsigned int i = 0; i < sp->sizeAmino(); i++) { + switch (sp->getAmino(i).getState()) { + case HELIX: + cout << "H"; + break; + case STRAND: + cout << "E"; + break; + default: + cout << "."; + }; + if ((i + 1) % 60 == 0) + cout << "\n"; } + cout << "\n"; return 0; } diff --git a/Biopool/APPS/CifEditor.cc b/Biopool/APPS/CifEditor.cc index dfe12d6..409c5ac 100644 --- a/Biopool/APPS/CifEditor.cc +++ b/Biopool/APPS/CifEditor.cc @@ -17,15 +17,17 @@ int main(int argc, char** argv) { << "protein structure backbone torsion angles" << endl; cout << " Usage: \t\t CifEditor \n"; return 1; - }; + } ifstream inFile(argv[1]); if (!inFile) ERROR("Input file not found.", exception); CifLoader cl(inFile); - Spacer sp; - sp.load(cl); + Protein prot; + prot.load(cl); + unsigned int zero = 0; + Spacer sp = *(prot.getSpacer(zero)); cout << "Editing " << argv[1] << " output goes to " << argv[2] << "\n"; @@ -36,7 +38,7 @@ int main(int argc, char** argv) { if (aaid <= -1) { cout << "Bye.\n"; return 0; - }; + } if (aaid >= (int) sp.sizeAmino()) { cout << "\t Invalid aa#!\n"; @@ -67,7 +69,7 @@ int main(int argc, char** argv) { CifSaver pss2(outFile2); sp.save(pss2); - }; + } } while (aaid != -1); return 0; diff --git a/Biopool/APPS/CifMover.cc b/Biopool/APPS/CifMover.cc index 83d1aba..6dcc9e4 100644 --- a/Biopool/APPS/CifMover.cc +++ b/Biopool/APPS/CifMover.cc @@ -71,16 +71,17 @@ int main(int argc, char** argv) { // 1. read structure // -------------------------------------------------- - Spacer sp; - ifstream inFile(inputFile.c_str()); if (!inFile) ERROR("File does not exist.\n", exception); - + CifLoader cl(inFile); - - sp.load(cl); + Protein prot; + prot.load(cl); + unsigned int zero = 0; + Spacer sp = *(prot.getSpacer(zero)); + inFile.close(); @@ -181,10 +182,10 @@ int main(int argc, char** argv) { // -------------------------------------------------- ofstream outFile(outputFile.c_str()); - if (!outFile) + if (!outFile) { ERROR("File not found.", exception); + } CifSaver cs(outFile); - sp.save(cs); outFile.close(); diff --git a/Biopool/APPS/CifSecondary.cc b/Biopool/APPS/CifSecondary.cc index 1f9c132..ca21c59 100644 --- a/Biopool/APPS/CifSecondary.cc +++ b/Biopool/APPS/CifSecondary.cc @@ -5,8 +5,18 @@ * Created on 8 giugno 2015, 12.46 */ +#include +#include +#include +#include +#include #include #include +#include +#include +#include +#include +#include #include using namespace Victor; diff --git a/Biopool/APPS/CifShifter.cc b/Biopool/APPS/CifShifter.cc index 117a56a..c3faf8a 100644 --- a/Biopool/APPS/CifShifter.cc +++ b/Biopool/APPS/CifShifter.cc @@ -79,8 +79,6 @@ int main(int argc, char** argv) { // 1. read structure // -------------------------------------------------- - Spacer sp; - ifstream inFile(inputFile.c_str()); if (!inFile) @@ -91,7 +89,10 @@ int main(int argc, char** argv) { if (noHydrogen) cl.setNoHAtoms(); - sp.load(cl); + Protein prot; + prot.load(cl); + unsigned int zero = 0; + Spacer sp = *(prot.getSpacer(zero)); inFile.close(); // -------------------------------------------------- From ae1f7534535ee6816f2aacc29496759ef3d3622f Mon Sep 17 00:00:00 2001 From: Marco Pezzutti Date: Sat, 13 Jun 2015 18:51:52 +0200 Subject: [PATCH 13/16] pdb APPS debugged --- Biopool/APPS/CifCorrector.cc | 6 ++++-- Biopool/APPS/PdbCorrector.cc | 6 ++++-- Biopool/APPS/PdbEditor.cc | 6 ++++-- Biopool/APPS/pdbMover.cc | 8 ++++---- Biopool/APPS/pdbshifter.cc | 7 ++++--- 5 files changed, 20 insertions(+), 13 deletions(-) diff --git a/Biopool/APPS/CifCorrector.cc b/Biopool/APPS/CifCorrector.cc index e0374aa..4d78a31 100644 --- a/Biopool/APPS/CifCorrector.cc +++ b/Biopool/APPS/CifCorrector.cc @@ -32,13 +32,15 @@ int main(int argc, char** argv) { return 1; }; - Spacer sp; ifstream inFile(argv[1]); if (!inFile) ERROR("File not found.", exception); CifLoader il(inFile); - sp.load(il); + Protein prot; + prot.load(il); + unsigned int zero = 0; + Spacer sp = *(prot.getSpacer(zero)); for (unsigned int i = 0; i < sp.sizeAmino(); i++) sp.getAmino(i).addMissingO(); diff --git a/Biopool/APPS/PdbCorrector.cc b/Biopool/APPS/PdbCorrector.cc index 83e6b04..593cb61 100644 --- a/Biopool/APPS/PdbCorrector.cc +++ b/Biopool/APPS/PdbCorrector.cc @@ -36,13 +36,15 @@ int main(int nArgs, char* argv[]) { return 1; }; - Spacer sp; ifstream inFile(argv[1]); if (!inFile) ERROR("File not found.", exception); PdbLoader il(inFile); - sp.load(il); + Protein prot; + prot.load(il); + unsigned int zero = 0; + Spacer sp = *(prot.getSpacer(zero)); for (unsigned int i = 0; i < sp.sizeAmino(); i++) sp.getAmino(i).addMissingO(); diff --git a/Biopool/APPS/PdbEditor.cc b/Biopool/APPS/PdbEditor.cc index 33ab8fd..a952ee4 100644 --- a/Biopool/APPS/PdbEditor.cc +++ b/Biopool/APPS/PdbEditor.cc @@ -31,13 +31,15 @@ int main(int nArgs, char* argv[]) { return 1; }; - Spacer sp; ifstream inFile(argv[1]); if (!inFile) ERROR("File not found.", exception); PdbLoader il(inFile); - sp.load(il); + Protein prot; + prot.load(il); + unsigned int zero = 0; + Spacer sp = *(prot.getSpacer(zero)); cout << "Editing " << argv[1] << " output goes to " << argv[2] << "\n"; diff --git a/Biopool/APPS/pdbMover.cc b/Biopool/APPS/pdbMover.cc index 21d6b35..95a62f2 100644 --- a/Biopool/APPS/pdbMover.cc +++ b/Biopool/APPS/pdbMover.cc @@ -78,16 +78,16 @@ int main(int nArgs, char* argv[]) { // 1. read structure // -------------------------------------------------- - Spacer sp; - ifstream inFile(inputFile.c_str()); if (!inFile) ERROR("File does not exist.\n", exception); PdbLoader pl(inFile); - - sp.load(pl); + Protein prot; + prot.load(pl); + unsigned int zero = 0; + Spacer sp = *(prot.getSpacer(zero)); inFile.close(); diff --git a/Biopool/APPS/pdbshifter.cc b/Biopool/APPS/pdbshifter.cc index 8a6d3db..07891d3 100644 --- a/Biopool/APPS/pdbshifter.cc +++ b/Biopool/APPS/pdbshifter.cc @@ -87,8 +87,6 @@ int main(int nArgs, char* argv[]) { // 1. read structure // -------------------------------------------------- - Spacer sp; - ifstream inFile(inputFile.c_str()); if (!inFile) @@ -99,7 +97,10 @@ int main(int nArgs, char* argv[]) { if (noHydrogen) pl.setNoHAtoms(); - sp.load(pl); + Protein prot; + prot.load(pl); + unsigned int zero = 0; + Spacer sp = *(prot.getSpacer(zero)); inFile.close(); // -------------------------------------------------- From 7510164d0793d448fbd02940bbc09285c4ced32e Mon Sep 17 00:00:00 2001 From: Marco Pezzutti Date: Sun, 14 Jun 2015 21:17:38 +0200 Subject: [PATCH 14/16] debug --- Biopool/APPS/Cif2Secondary.cc | 19 +++-- Biopool/APPS/Cif2Seq.cc | 19 +++-- Biopool/APPS/CifCorrector.cc | 19 +++-- Biopool/APPS/CifEditor.cc | 19 +++-- Biopool/APPS/CifMover.cc | 19 +++-- Biopool/APPS/CifReaderWriter.cc | 122 +++++++++++++++++++++++++++++++ Biopool/APPS/CifSecondary.cc | 19 +++-- Biopool/APPS/CifShifter.cc | 19 +++-- Biopool/APPS/Makefile | 8 +- Biopool/Sources/Atom.cc | 2 +- Biopool/Sources/Atom.h | 10 +-- Biopool/Sources/CifLoader.cc | 34 +++++---- Biopool/Sources/CifLoader.h | 19 +++-- Biopool/Sources/CifSaver.cc | 43 +++++++++-- Biopool/Sources/CifSaver.h | 21 ++++-- Biopool/Sources/CifStructure.cc | 85 +++++---------------- Biopool/Sources/CifStructure.h | 38 ++++++---- Biopool/Tests/TestCifLoader.h | 19 +++-- Biopool/Tests/TestCifStructure.h | 19 +++-- 19 files changed, 382 insertions(+), 171 deletions(-) create mode 100644 Biopool/APPS/CifReaderWriter.cc diff --git a/Biopool/APPS/Cif2Secondary.cc b/Biopool/APPS/Cif2Secondary.cc index d18d836..b8e2e50 100644 --- a/Biopool/APPS/Cif2Secondary.cc +++ b/Biopool/APPS/Cif2Secondary.cc @@ -1,8 +1,17 @@ -/* - * File: cif2secondary.cc - * Author: marco - * - * Created on 12 giugno 2015, 17.42 +/* This file is part of Victor. + + Victor is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Victor is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Victor. If not, see . */ #include diff --git a/Biopool/APPS/Cif2Seq.cc b/Biopool/APPS/Cif2Seq.cc index 5c88e83..b784830 100644 --- a/Biopool/APPS/Cif2Seq.cc +++ b/Biopool/APPS/Cif2Seq.cc @@ -1,8 +1,17 @@ -/* - * File: Cif2Seq.cc - * Author: marco - * - * Created on 12 giugno 2015, 18.51 +/* This file is part of Victor. + + Victor is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Victor is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Victor. If not, see . */ #include diff --git a/Biopool/APPS/CifCorrector.cc b/Biopool/APPS/CifCorrector.cc index 4d78a31..e062dfa 100644 --- a/Biopool/APPS/CifCorrector.cc +++ b/Biopool/APPS/CifCorrector.cc @@ -1,8 +1,17 @@ -/* - * File: CifCorrector.cc - * Author: marco - * - * Created on 12 giugno 2015, 18.48 +/* This file is part of Victor. + + Victor is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Victor is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Victor. If not, see . */ #include diff --git a/Biopool/APPS/CifEditor.cc b/Biopool/APPS/CifEditor.cc index 409c5ac..39f7afd 100644 --- a/Biopool/APPS/CifEditor.cc +++ b/Biopool/APPS/CifEditor.cc @@ -1,8 +1,17 @@ -/* - * File: CifEditor.cc - * Author: marco - * - * Created on 8 giugno 2015, 12.32 +/* This file is part of Victor. + + Victor is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Victor is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Victor. If not, see . */ #include diff --git a/Biopool/APPS/CifMover.cc b/Biopool/APPS/CifMover.cc index 6dcc9e4..2eabd86 100644 --- a/Biopool/APPS/CifMover.cc +++ b/Biopool/APPS/CifMover.cc @@ -1,8 +1,17 @@ -/* - * File: cifMover.cc - * Author: marco - * - * Created on 12 giugno 2015, 18.39 +/* This file is part of Victor. + + Victor is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Victor is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Victor. If not, see . */ #include diff --git a/Biopool/APPS/CifReaderWriter.cc b/Biopool/APPS/CifReaderWriter.cc new file mode 100644 index 0000000..d90dee6 --- /dev/null +++ b/Biopool/APPS/CifReaderWriter.cc @@ -0,0 +1,122 @@ +/* This file is part of Victor. + + Victor is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Victor is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Victor. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace Victor; +using namespace Victor::Biopool; +using namespace std; + +void sShowHelp() { + cout << "CIF Reader / Writer \n" + << "Allows to read and write a CIF or a PDB file without modifying them.\n" + << " Options: \n" + << "\t-i \t\t Input CIF/PDB file\n" + << "\t-o \t\t Output CIF/PDB file\n" + << "\n"; +} + +int main(int argc, char** argv) { + + // -------------------------------------------------- + // 0. treat options + // -------------------------------------------------- + + if (getArg("h", argc, argv)) { + sShowHelp(); + return 1; + }; + + string inputFile, outputFile; + Protein prot; + + getArg("i", inputFile, argc, argv, "!"); + getArg("o", outputFile, argc, argv, "!"); + + if ((inputFile == "!") || (outputFile == "!")) { + cout << "Missing file specification. Aborting. (-h for help)" << endl; + return -1; + } + + // -------------------------------------------------- + // 1. read structure + // -------------------------------------------------- + + if (inputFile.find("pdb") != string::npos) { + ifstream inFile(inputFile.c_str()); + if (!inFile) + ERROR("File does not exist.\n", exception); + + cout << "Loading PDB file..." << endl; + + PdbLoader pl(inFile); + prot.load(pl); + + } else if (inputFile.find("cif") != string::npos) { + ifstream inFile(inputFile.c_str()); + if (!inFile) + ERROR("File does not exist.\n", exception); + + cout << "Loading CIF file..." << endl; + + CifLoader cl(inFile); + prot.load(cl); + + } else { + cout << "Uknown input file format. Aborting. (-h for help)" << endl; + return -2; + } + + // -------------------------------------------------- + // 2. write structure + // -------------------------------------------------- + + if (outputFile.find("pdb") != string::npos) { + ofstream outFile(outputFile.c_str()); + if (!outFile) + ERROR("File not found.", exception); + + cout << "Saving PDB file..." << endl; + + PdbSaver ps(outFile); + prot.save(ps); + + } else if (outputFile.find("cif") != string::npos) { + ofstream outFile(outputFile.c_str()); + if (!outFile) + ERROR("File not found.", exception); + + cout << "Saving CIF file..." << endl; + + CifSaver cs(outFile); + prot.save(cs); + + } else { + cout << "Uknown output file format. Aborting. (-h for help)" << endl; + return -3; + } + + return 0; +} + diff --git a/Biopool/APPS/CifSecondary.cc b/Biopool/APPS/CifSecondary.cc index ca21c59..18dfdd4 100644 --- a/Biopool/APPS/CifSecondary.cc +++ b/Biopool/APPS/CifSecondary.cc @@ -1,8 +1,17 @@ -/* - * File: CifSecondary.cc - * Author: marco - * - * Created on 8 giugno 2015, 12.46 +/* This file is part of Victor. + + Victor is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Victor is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Victor. If not, see . */ #include diff --git a/Biopool/APPS/CifShifter.cc b/Biopool/APPS/CifShifter.cc index c3faf8a..f25370e 100644 --- a/Biopool/APPS/CifShifter.cc +++ b/Biopool/APPS/CifShifter.cc @@ -1,8 +1,17 @@ -/* - * File: cifShifter.cc - * Author: marco - * - * Created on 12 giugno 2015, 18.45 +/* This file is part of Victor. + + Victor is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Victor is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Victor. If not, see . */ #include diff --git a/Biopool/APPS/Makefile b/Biopool/APPS/Makefile index 4eba391..d52c2b6 100644 --- a/Biopool/APPS/Makefile +++ b/Biopool/APPS/Makefile @@ -28,19 +28,19 @@ INC_PATH = -I. -I../../tools/ -I../../Biopool/Sources -I../../Energy/Sources -I. SOURCES = PdbCorrector.cc PdbSecondary.cc PdbEditor.cc Pdb2Seq.cc pdb2secondary.cc pdbshifter.cc \ pdbMover.cc CifEditor.cc CifSecondary.cc Cif2Secondary.cc CifMover.cc \ - CifShifter.cc CifCorrector.cc Cif2Seq.cc + CifShifter.cc CifCorrector.cc Cif2Seq.cc CifReaderWriter.cc OBJECTS = PdbCorrector.o PdbSecondary.o PdbEditor.o Pdb2Seq.o pdb2secondary.o pdbshifter.o \ pdbMover.o CifEditor.o CifSecondary.o Cif2Secondary.o CifMover.o \ - CifShifter.o CifCorrector.o Cif2Seq.o + CifShifter.o CifCorrector.o Cif2Seq.o CifReaderWriter.o TARGETS = PdbCorrector PdbSecondary PdbEditor Pdb2Seq pdb2secondary pdbshifter \ pdbMover CifEditor CifSecondary Cif2Secondary CifMover CifShifter \ - CifCorrector Cif2Seq + CifCorrector Cif2Seq CifReaderWriter EXECS = PdbCorrector PdbSecondary PdbEditor Pdb2Seq pdb2secondary pdbshifter \ pdbMover CifEditor CifSecondary Cif2Secondary CifMover CifShifter \ - CifCorrector Cif2Seq + CifCorrector Cif2Seq CifReaderWriter LIBRARY = APPSlibBiopool.a diff --git a/Biopool/Sources/Atom.cc b/Biopool/Sources/Atom.cc index 61cbe15..48fffc5 100644 --- a/Biopool/Sources/Atom.cc +++ b/Biopool/Sources/Atom.cc @@ -34,7 +34,7 @@ using namespace Victor; using namespace Victor::Biopool; */ Atom::Atom(unsigned int mI, unsigned int mO) : SimpleBond(mI, mO), superior(NULL), type(X), coords(0, 0, 0), Bfac(0.0), trans(0, 0, 0), rot(1), -modified(false), asymId('X'), entityId(0), occupancy(0.0), model(0) { +modified(false), asymId('X'), entityId("0"), occupancy(0.0), model(0) { PRINT_NAME; } diff --git a/Biopool/Sources/Atom.h b/Biopool/Sources/Atom.h index 9c4281a..2c1fa6c 100644 --- a/Biopool/Sources/Atom.h +++ b/Biopool/Sources/Atom.h @@ -55,7 +55,7 @@ namespace Victor { namespace Biopool { } char getAsymId(); - int getEntityId(); + string getEntityId(); double getOccupancy(); int getModel(); @@ -89,7 +89,7 @@ namespace Victor { namespace Biopool { Bfac = _b; } void setAsymId(char aId); - void setEntityId(int eId); + void setEntityId(string eId); void setOccupancy(double occ); void setModel(int mod); @@ -128,7 +128,7 @@ namespace Victor { namespace Biopool { double Bfac; // B-factor char asymId; - int entityId; + string entityId; double occupancy; int model; @@ -198,7 +198,7 @@ namespace Victor { namespace Biopool { return asymId; } - inline int Atom::getEntityId() { + inline string Atom::getEntityId() { return entityId; } @@ -313,7 +313,7 @@ namespace Victor { namespace Biopool { asymId = aId; } - inline void Atom::setEntityId(int eId) { + inline void Atom::setEntityId(string eId) { entityId = eId; } diff --git a/Biopool/Sources/CifLoader.cc b/Biopool/Sources/CifLoader.cc index 7a3d9b7..d4ad53a 100644 --- a/Biopool/Sources/CifLoader.cc +++ b/Biopool/Sources/CifLoader.cc @@ -1,8 +1,17 @@ -/* - * File: CifLoader.cc - * Author: marco - * - * Created on 3 giugno 2015, 23.20 +/* This file is part of Victor. + + Victor is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Victor is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Victor. If not, see . */ // Includes: @@ -49,8 +58,6 @@ CifLoader::~CifLoader() { /** * If user selected a Model, it check validity of this choice, * otherwise it select first available chain. - * @param void - * @return void */ void CifLoader::checkModel() { if ((model != 999) && (model > getMaxModels())) { @@ -61,8 +68,6 @@ void CifLoader::checkModel() { /** * If user selected a chain, it check validity of this choice, * otherwise it select first available chain. - * @param void - * @return void */ void CifLoader::checkAndSetChain() { vector chainList = getAllChains(); @@ -84,7 +89,6 @@ void CifLoader::checkAndSetChain() { /** * Reads in the maximum allowed number of NMR models, zero otherwise. - * @param void */ unsigned int CifLoader::getMaxModels() { input.clear(); // reset file error flags @@ -248,10 +252,10 @@ CifLoader::loadSpacer(Spacer& sp){ /** * Parse a single line of a CIF file. * @param atomLine the whole CIF line as it is - * @param tag the first field (keyword) in a PDB line + * @param tag the first field (keyword) in a CIF line * @param lig pointer to a ligan * @param aa pointer to an amino acid - * @return Residue number read from the PDB line. + * @return Residue number read from the CIF line. */ int CifLoader::parseCifline(string atomLine, string tag, Ligand* lig, AminoAcid* aa) { @@ -303,8 +307,8 @@ CifLoader::parseCifline(string atomLine, string tag, Ligand* lig, AminoAcid* aa) cif->getGroupColumnNumber("atom", "atom asym")).c_str()[0]; // get entity id - int entityId = stoiDEF(cif->getGroupField("atom", atomLine, - cif->getGroupColumnNumber("atom", "atom entity"))); + string entityId = cif->getGroupField("atom", atomLine, + cif->getGroupColumnNumber("atom", "atom entity")); // get occupancy double occ = stodDEF(cif->getGroupField("atom", atomLine, @@ -370,7 +374,7 @@ CifLoader::parseCifline(string atomLine, string tag, Ligand* lig, AminoAcid* aa) } /** - * Core function for PDB file parsing. + * Core function for CIF file parsing. * @param prot (Protein&) */ void CifLoader::loadProtein(Protein& prot) { diff --git a/Biopool/Sources/CifLoader.h b/Biopool/Sources/CifLoader.h index fc68773..ab93bed 100644 --- a/Biopool/Sources/CifLoader.h +++ b/Biopool/Sources/CifLoader.h @@ -1,8 +1,17 @@ -/* - * File: CifLoader.h - * Author: marco - * - * Created on 3 giugno 2015, 23.20 +/* This file is part of Victor. + + Victor is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Victor is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Victor. If not, see . */ #ifndef CIFLOADER_H diff --git a/Biopool/Sources/CifSaver.cc b/Biopool/Sources/CifSaver.cc index d218ba7..de5fe09 100644 --- a/Biopool/Sources/CifSaver.cc +++ b/Biopool/Sources/CifSaver.cc @@ -1,8 +1,17 @@ -/* - * File: CifSaver.cc - * Author: marco - * - * Created on 3 giugno 2015, 23.20 +/* This file is part of Victor. + + Victor is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Victor is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Victor. If not, see . */ // Includes: @@ -73,14 +82,32 @@ void CifSaver::saveGroup(Group& gr) { atName += ' '; } + // if fields have default values (0 or X), assigns the CIF unknown value (?) + // or a possibly correct value + char asymId = gr[i].getAsymId(); + string entityId = gr[i].getEntityId(); + int model = gr[i].getModel(); + + if (asymId == 'X') { + asymId = '?'; + } + + if (entityId == "0") { + entityId = "?"; + } + + if (model == 0) { + model = 1; + } + output << setw(7) << left << "ATOM" << setw(6) << gr[i].getNumber() << setw(2) << atomOneLetter << setw(5) << left << atName << setw(2) << "." << setw(4) << gr.getType() << - setw(2) << gr[i].getAsymId() << - setw(2) << gr[i].getEntityId() << + setw(2) << asymId << + setw(2) << entityId << setw(4) << aminoOffset << setw(2) << "?" << setw(8) << setprecision(3) << gr[i].getCoords().x << @@ -98,7 +125,7 @@ void CifSaver::saveGroup(Group& gr) { setw(4) << gr.getType() << setw(2) << chain << setw(5) << left << atName << - setw(2) << gr[i].getModel() << + setw(2) << model << endl; atomOffset = gr[i].getNumber() + 1; diff --git a/Biopool/Sources/CifSaver.h b/Biopool/Sources/CifSaver.h index 2c26b72..49e28cf 100644 --- a/Biopool/Sources/CifSaver.h +++ b/Biopool/Sources/CifSaver.h @@ -1,8 +1,17 @@ -/* - * File: CifSaver.h - * Author: marco - * - * Created on 3 giugno 2015, 23.20 +/* This file is part of Victor. + + Victor is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Victor is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Victor. If not, see . */ #ifndef CIFSAVER_H @@ -30,7 +39,7 @@ namespace Victor { namespace Biopool { /** - * @brief Saves components (Atoms, Groups, etc.) in standard PDB format + * @brief Saves components (Atoms, Groups, etc.) in standard PDB format. * */ class CifSaver : public Saver { public: diff --git a/Biopool/Sources/CifStructure.cc b/Biopool/Sources/CifStructure.cc index 14438e9..3225a20 100644 --- a/Biopool/Sources/CifStructure.cc +++ b/Biopool/Sources/CifStructure.cc @@ -1,8 +1,17 @@ -/* - * File: CifStructure.cc - * Author: marco - * - * Created on 1 giugno 2015, 11.36 +/* This file is part of Victor. + + Victor is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Victor is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Victor. If not, see . */ #include @@ -16,35 +25,20 @@ using namespace Victor; using namespace Victor::Biopool; using namespace std; -/** - * Constructor - * @param input input file stream - * @param output output file stream - */ CifStructure::CifStructure(istream& input, ostream& output) : input(input), output(output) { - setData(); + initData(); } -/** - * Constructor - * @param output output file stream - */ CifStructure::CifStructure(ostream& output) : output(output), input(cin) { - setData(); + initData(); } -/** - * Destructor - */ CifStructure::~CifStructure() { } -/** - * Sets data members - */ -void CifStructure::setData() { +void CifStructure::initData() { header = "_entry.id"; atom = "_atom_site."; @@ -84,11 +78,6 @@ void CifStructure::setData() { sheetHboundgroupParsed = false; } -/** - * Returns the correct collection by group name - * @param name name of the CIF group - * @return reference to the collection - */ vector& CifStructure::getGroup(string name) { if (name == "atom") { return atomGroup; @@ -105,11 +94,6 @@ vector& CifStructure::getGroup(string name) { } } -/** - * Returns the tag by name - * @param name name of tag - * @return CIF tag - */ string CifStructure::getTag(string name) { if (name == "header") { return header; @@ -171,12 +155,6 @@ string CifStructure::getTag(string name) { } } -/** - * Returns the column number of the field - * @param name name of the group - * @param field name of the field - * @return field column number - */ int CifStructure::getGroupColumnNumber(string name, string field) { int col = -1; vector& group = getGroup(name); @@ -190,13 +168,6 @@ int CifStructure::getGroupColumnNumber(string name, string field) { return col; } -/** - * Returns the field of the line at the columnNum column - * @param name name of the group - * @param line line of the CIF - * @param columnNum number of column - * @return field at columnNum column - */ string CifStructure::getGroupField(string name, string& line, int columnNum) { istringstream iss(line); vector& group = getGroup(name); @@ -213,11 +184,6 @@ string CifStructure::getGroupField(string name, string& line, int columnNum) { } } -/** - * Returns the field present in the line - * @param line line of the CIF - * @return field of the line - */ string CifStructure::getInlineField(string& line) { istringstream iss(line); string tag, field; @@ -225,10 +191,6 @@ string CifStructure::getInlineField(string& line) { return field; } -/** - * Parses group of CIF fields and creates a vector with columns positions - * @param name name of the group - */ void CifStructure::parseGroup(string name, string& line) { bool found = false; vector& group = getGroup(name); @@ -257,10 +219,6 @@ void CifStructure::parseGroup(string name, string& line) { } } -/** - * Sets flag of the parsed group - * @param name name of the group - */ void CifStructure::setParsedFlag(string name) { if (name == "atom") { atomGroupParsed = true; @@ -277,11 +235,6 @@ void CifStructure::setParsedFlag(string name) { } } -/** - * Return true if the group name is parsed, false otherwise - * @param name name of the group - * @return true if group is parsed, false otherwise - */ bool CifStructure::isGroupParsed(string name) { if (name == "atom") { return atomGroupParsed; @@ -298,10 +251,6 @@ bool CifStructure::isGroupParsed(string name) { } } -/** - * Prints group records names into output stream. - * @param name name of the group - */ void CifStructure::printGroup(string name) { output << "loop_" << endl; diff --git a/Biopool/Sources/CifStructure.h b/Biopool/Sources/CifStructure.h index 9eb4c58..cd2baea 100644 --- a/Biopool/Sources/CifStructure.h +++ b/Biopool/Sources/CifStructure.h @@ -1,8 +1,17 @@ -/* - * File: CifStructure.h - * Author: marco - * - * Created on 1 giugno 2015, 11.36 +/* This file is part of Victor. + + Victor is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Victor is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Victor. If not, see . */ #ifndef CIFSTRUCTURE_H @@ -16,6 +25,7 @@ using std::string; using std::istream; +using std::ostream; using std::vector; // Global constants, typedefs, etc. (to avoid): @@ -24,26 +34,26 @@ namespace Victor { namespace Biopool { /** - * Helper class used to hold information from CIF file + * @brief Helper class used to hold information from CIF file. */ class CifStructure { public: /** - * Constructor + * Constructor. * @param input input file stream * @param output output file stream */ CifStructure(istream& input, ostream& output = cout); /** - * Constructor + * Constructor. * @param output output file stream * @param input input file stream */ CifStructure(ostream& output); /** - * Destructor + * Destructor. */ virtual ~CifStructure(); @@ -79,7 +89,7 @@ namespace Victor { string getGroupField(string name, string& line, int columnNum); /** - * Returns the field present in the line + * Returns the field present in the CIF line. * @param line * @return */ @@ -92,13 +102,13 @@ namespace Victor { void parseGroup(string group, string& line); /** - * Sets flag of the parsed group + * Sets the flag of the parsed group. * @param name name of the group */ void setParsedFlag(string name); /** - * Return true if the group name is parsed, false otherwise + * Returns true if the group name is parsed, false otherwise. * @param name name of the group * @return true if group is parsed, false otherwise */ @@ -118,9 +128,9 @@ namespace Victor { private: /** - * Sets data members + * Initializes data members. */ - void setData(); + void initData(); // CIF file istream& input; diff --git a/Biopool/Tests/TestCifLoader.h b/Biopool/Tests/TestCifLoader.h index 63795d2..8ca520c 100644 --- a/Biopool/Tests/TestCifLoader.h +++ b/Biopool/Tests/TestCifLoader.h @@ -1,8 +1,17 @@ -/* - * File: TestCifLoader.h - * Author: marco - * - * Created on 9-giu-2015, 10.45.39 +/* This file is part of Victor. + + Victor is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Victor is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Victor. If not, see . */ #ifndef TESTCIFLOADER_H diff --git a/Biopool/Tests/TestCifStructure.h b/Biopool/Tests/TestCifStructure.h index e985d94..507aea2 100644 --- a/Biopool/Tests/TestCifStructure.h +++ b/Biopool/Tests/TestCifStructure.h @@ -1,8 +1,17 @@ -/* - * File: TestCifStructure.h - * Author: marco - * - * Created on 10 giugno 2015, 10.37 +/* This file is part of Victor. + + Victor is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Victor is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Victor. If not, see . */ #ifndef TESTCIFSTRUCTURE_H From a8e57925575bca61f137e9e7a0645ba557d69e94 Mon Sep 17 00:00:00 2001 From: Marco Pezzutti Date: Sun, 14 Jun 2015 21:56:51 +0200 Subject: [PATCH 15/16] debug --- Biopool/Sources/CifSaver.cc | 142 +++++++++++++++++++++----------- Biopool/Sources/CifStructure.cc | 17 +++- 2 files changed, 109 insertions(+), 50 deletions(-) diff --git a/Biopool/Sources/CifSaver.cc b/Biopool/Sources/CifSaver.cc index de5fe09..d58b0da 100644 --- a/Biopool/Sources/CifSaver.cc +++ b/Biopool/Sources/CifSaver.cc @@ -29,7 +29,7 @@ using namespace std; CifSaver::CifSaver(ostream& _output) : output(_output), writeSeq(true), writeSecStr(true), writeTer(true), -atomOffset(0), aminoOffset(0), ligandOffset(0), chain(' '), +atomOffset(0), aminoOffset(0), ligandOffset(0), chain(' '), atomGroupPrinted(false) { cif = new CifStructure(_output); } @@ -51,7 +51,7 @@ CifSaver::~CifSaver() { void CifSaver::saveGroup(Group& gr) { gr.sync(); - if(!atomGroupPrinted) { + if (!atomGroupPrinted) { cif->printGroup("atom"); atomGroupPrinted = true; } @@ -62,7 +62,7 @@ void CifSaver::saveGroup(Group& gr) { // cosmetics: OXT has to be output after // the sidechain and therefore goes in saveSpacer if (atName == "OXT") { - continue; + continue; } // Added variable for correcting atom type H (last column in PDBs) @@ -81,31 +81,31 @@ void CifSaver::saveGroup(Group& gr) { while (atName.size() < 4) { atName += ' '; } - + // if fields have default values (0 or X), assigns the CIF unknown value (?) // or a possibly correct value char asymId = gr[i].getAsymId(); string entityId = gr[i].getEntityId(); int model = gr[i].getModel(); - + if (asymId == 'X') { asymId = '?'; } - + if (entityId == "0") { entityId = "?"; } - + if (model == 0) { model = 1; } - + output << setw(7) << left << "ATOM" << setw(6) << gr[i].getNumber() << setw(2) << atomOneLetter << setw(5) << left << atName << setw(2) << "." << - setw(4) << gr.getType() << + setw(4) << gr.getType() << setw(2) << asymId << setw(2) << entityId << setw(4) << aminoOffset << @@ -122,7 +122,7 @@ void CifSaver::saveGroup(Group& gr) { setw(2) << "?" << setw(2) << "?" << setw(4) << aminoOffset << - setw(4) << gr.getType() << + setw(4) << gr.getType() << setw(2) << chain << setw(5) << left << atName << setw(2) << model << @@ -196,34 +196,52 @@ void CifSaver::saveSpacer(Spacer& sp) { // cosmetics: write OXT after last side chain if (sp.getAmino(sp.sizeAmino() - 1).isMember(OXT)) { unsigned int index = sp.sizeAmino() - 1; - + + // if fields have default values (0 or X), assigns the CIF unknown value (?) + // or a possibly correct value + char asymId = sp.getAmino(index)[OXT].getAsymId(); + string entityId = sp.getAmino(index)[OXT].getEntityId(); + int model = sp.getAmino(index)[OXT].getModel(); + + if (asymId == 'X') { + asymId = '?'; + } + + if (entityId == "0") { + entityId = "?"; + } + + if (model == 0) { + model = 1; + } + output << setw(7) << left << "ATOM" << - setw(6) << sp.getAmino(index)[OXT].getNumber() << - setw(2) << "O" << - setw(5) << left << "OXT" << - setw(2) << "." << - setw(4) << sp.getAmino(index).getType() << - setw(2) << sp.getAmino(index)[OXT].getAsymId() << - setw(2) << sp.getAmino(index)[OXT].getEntityId() << - setw(4) << aminoOffset << - setw(2) << "?" << - setw(8) << setprecision(3) << sp.getAmino(index)[OXT].getCoords().x << - setw(8) << setprecision(3) << sp.getAmino(index)[OXT].getCoords().y << - setw(8) << setprecision(3) << sp.getAmino(index)[OXT].getCoords().z << - setw(6) << setprecision(2) << sp.getAmino(index)[OXT].getOccupancy() << - setw(7) << left << setprecision(2) << sp.getAmino(index)[OXT].getBFac() << - setw(2) << "?" << - setw(2) << "?" << - setw(2) << "?" << - setw(2) << "?" << - setw(2) << "?" << - setw(2) << "?" << - setw(4) << aminoOffset << - setw(4) << sp.getAmino(index).getType() << - setw(2) << chain << - setw(5) << "OXT" << - setw(2) << sp.getAmino(index)[OXT].getModel() << - endl; + setw(6) << sp.getAmino(index)[OXT].getNumber() << + setw(2) << "O" << + setw(5) << left << "OXT" << + setw(2) << "." << + setw(4) << sp.getAmino(index).getType() << + setw(2) << asymId << + setw(2) << entityId << + setw(4) << aminoOffset << + setw(2) << "?" << + setw(8) << setprecision(3) << sp.getAmino(index)[OXT].getCoords().x << + setw(8) << setprecision(3) << sp.getAmino(index)[OXT].getCoords().y << + setw(8) << setprecision(3) << sp.getAmino(index)[OXT].getCoords().z << + setw(6) << setprecision(2) << sp.getAmino(index)[OXT].getOccupancy() << + setw(7) << left << setprecision(2) << sp.getAmino(index)[OXT].getBFac() << + setw(2) << "?" << + setw(2) << "?" << + setw(2) << "?" << + setw(2) << "?" << + setw(2) << "?" << + setw(2) << "?" << + setw(4) << aminoOffset << + setw(4) << sp.getAmino(index).getType() << + setw(2) << chain << + setw(5) << "OXT" << + setw(2) << model << + endl; } output.precision(oldPrec); @@ -251,27 +269,44 @@ void CifSaver::saveLigand(Ligand& gr) { } //print all HETATM of a ligand - for (unsigned int i = 0; i < gr.size(); i++) - { + for (unsigned int i = 0; i < gr.size(); i++) { string atType = gr[i].getType(); aaType = gr.getType(); string atTypeShort; //last column in a Pdb File - + if (atType != aaType) { atTypeShort = atType[0]; } else { atTypeShort = atType; } + // if fields have default values (0 or X), assigns the CIF unknown value (?) + // or a possibly correct value + char asymId = gr[i].getAsymId(); + string entityId = gr[i].getEntityId(); + int model = gr[i].getModel(); + + if (asymId == 'X') { + asymId = '?'; + } + + if (entityId == "0") { + entityId = "?"; + } + + if (model == 0) { + model = 1; + } + output << setw(7) << left << tag << setw(6) << gr[i].getNumber() << setw(2) << atTypeShort << setw(5) << left << atType << setw(2) << "." << - setw(4) << aaType << - setw(2) << gr[i].getAsymId() << - setw(2) << gr[i].getEntityId() << - setw(4) << aminoOffset << + setw(4) << aaType << + setw(2) << asymId << + setw(2) << entityId << + setw(4) << ligandOffset << setw(2) << "?" << setw(8) << setprecision(3) << gr[i].getCoords().x << setw(8) << setprecision(3) << gr[i].getCoords().y << @@ -285,10 +320,10 @@ void CifSaver::saveLigand(Ligand& gr) { setw(2) << "?" << setw(2) << "?" << setw(4) << aminoOffset << - setw(4) << aaType << + setw(4) << aaType << setw(2) << chain << setw(5) << atType << - setw(2) << gr[i].getModel() << + setw(2) << model << endl; } output << "# " << endl; @@ -346,9 +381,18 @@ void CifSaver::saveProtein(Protein& prot) { */ void CifSaver::writeSeqRes(Spacer& sp) { cif->printGroup("entity poly"); - - for (unsigned int i = 0; i< sp.sizeAmino(); i++) { - output << setw(2) << left << sp.getAmino(i).getAtom(0).getEntityId() << + + + for (unsigned int i = 0; i < sp.sizeAmino(); i++) { + // if fields have default values (0 or X), assigns the CIF unknown value (?) + // or a possibly correct value + string entityId = sp.getAmino(i).getAtom(0).getEntityId(); + + if (entityId == "0") { + entityId = "?"; + } + + output << setw(2) << left << entityId << setw(4) << i + 1 << setw(4) << sp.getAmino(i).getType() << setw(2) << "n" << diff --git a/Biopool/Sources/CifStructure.cc b/Biopool/Sources/CifStructure.cc index 3225a20..81daebb 100644 --- a/Biopool/Sources/CifStructure.cc +++ b/Biopool/Sources/CifStructure.cc @@ -18,6 +18,7 @@ #include #include +#include #include "CifStructure.h" @@ -41,6 +42,7 @@ CifStructure::~CifStructure() { void CifStructure::initData() { header = "_entry.id"; + // atom group atom = "_atom_site."; atomId = "id "; chain = "auth_asym_id "; @@ -56,12 +58,14 @@ void CifStructure::initData() { residueName = "auth_comp_id "; atomName = "auth_atom_id "; model = "pdbx_PDB_model_num "; - + + // helix group helix = "_struct_conf."; helixStart = "beg_auth_seq_id "; helixEnd = "end_auth_seq_id "; helixChainId = "beg_auth_asym_id "; + // sheet group sheet = "_struct_sheet."; sheetOrder = "_struct_sheet_order."; sheetRange = "_struct_sheet_range."; @@ -70,6 +74,7 @@ void CifStructure::initData() { sheetEnd = "end_auth_seq_id "; sheetChainId = "beg_auth_asym_id "; + // flags atomGroupParsed = false; helixGroupParsed = false; sheetGroupParsed = false; @@ -91,6 +96,8 @@ vector& CifStructure::getGroup(string name) { return sheetRangeGroup; } else if (name == "sheet hbond") { return sheetHbondGroup; + } else { + ERROR("getGroup (CifStructure): invalid group name", exception); } } @@ -152,6 +159,8 @@ string CifStructure::getTag(string name) { return sheetEnd; } else if (name == "sheet chain") { return sheetChainId; + } else { + ERROR("getTag (CifStructure): invalid tag name", exception); } } @@ -232,6 +241,8 @@ void CifStructure::setParsedFlag(string name) { sheetOrderGroupParsed = true; } else if (name == "sheet range") { sheetRangeGroupParsed = true; + } else { + ERROR("setParsedFlag (CifStructure): invalid flag name", exception); } } @@ -248,6 +259,8 @@ bool CifStructure::isGroupParsed(string name) { return sheetOrderGroupParsed; } else if (name == "sheet range") { return sheetRangeGroupParsed; + } else { + ERROR("isGroupParsed (CifStructure): invalid group name", exception); } } @@ -366,5 +379,7 @@ void CifStructure::printGroup(string name) { "_pdbx_poly_seq_scheme.pdb_strand_id " << endl << "_pdbx_poly_seq_scheme.pdb_ins_code " << endl << "_pdbx_poly_seq_scheme.hetero " << endl; + } else { + ERROR("printGroup (CifStructure): invalid group name", exception); } } From a6454c40644cec66dce55a56329699870ecce7e8 Mon Sep 17 00:00:00 2001 From: Marco Pezzutti Date: Thu, 16 Jul 2015 23:46:36 +0200 Subject: [PATCH 16/16] APPS debug --- Biopool/APPS/Cif2Secondary.cc | 6 ++++-- Biopool/APPS/Cif2Seq.cc | 4 ++++ Biopool/APPS/CifCorrector.cc | 12 ++++++++---- Biopool/APPS/CifMover.cc | 3 ++- Biopool/APPS/CifReaderWriter.cc | 30 ++++++++++++++---------------- Biopool/APPS/CifSecondary.cc | 4 ++++ Biopool/APPS/CifShifter.cc | 1 + Biopool/APPS/PdbCorrector.cc | 4 ++++ 8 files changed, 41 insertions(+), 23 deletions(-) diff --git a/Biopool/APPS/Cif2Secondary.cc b/Biopool/APPS/Cif2Secondary.cc index b8e2e50..5db0500 100644 --- a/Biopool/APPS/Cif2Secondary.cc +++ b/Biopool/APPS/Cif2Secondary.cc @@ -24,10 +24,10 @@ using namespace Victor; using namespace Victor::Biopool; void sShowHelp() { - cout << "Pdb 2 Secondary Structure converter\n" + cout << "Cif 2 Secondary Structure converter\n" << "\t H = helix, \t E = extended (strand, sheet), \t . = other.\n" << " Options: \n" - << "\t-i \t\t Input file for PDB structure\n" + << "\t-i \t\t Input file for CIF structure\n" << "\n"; } @@ -92,6 +92,8 @@ int main(int argc, char** argv) { allCh = il.getAllChains(); cout << ">" << inputFile << "\n"; + inFile.close(); + for (unsigned int i = 0; i < sp->sizeAmino(); i++) { switch (sp->getAmino(i).getState()) { case HELIX: diff --git a/Biopool/APPS/Cif2Seq.cc b/Biopool/APPS/Cif2Seq.cc index b784830..b62e096 100644 --- a/Biopool/APPS/Cif2Seq.cc +++ b/Biopool/APPS/Cif2Seq.cc @@ -98,6 +98,8 @@ int main(int argc, char** argv) { // Load the protein object Protein prot; prot.load(cl); + + inFile.close(); // Open the proper output stream (file or stdout) std::ostream* os = &cout; @@ -124,6 +126,8 @@ int main(int argc, char** argv) { sp->save(ss); } + fout.close(); + return 0; } diff --git a/Biopool/APPS/CifCorrector.cc b/Biopool/APPS/CifCorrector.cc index e062dfa..0882727 100644 --- a/Biopool/APPS/CifCorrector.cc +++ b/Biopool/APPS/CifCorrector.cc @@ -53,14 +53,18 @@ int main(int argc, char** argv) { for (unsigned int i = 0; i < sp.sizeAmino(); i++) sp.getAmino(i).addMissingO(); + + inFile.close(); - ofstream outFile2(argv[1]); + ofstream outFile(argv[1]); - if (!outFile2) + if (!outFile) ERROR("Couldn't write file.", exception); - CifSaver pss2(outFile2); - sp.save(pss2); + CifSaver pss(outFile); + sp.save(pss); + + outFile.close(); return 0; } diff --git a/Biopool/APPS/CifMover.cc b/Biopool/APPS/CifMover.cc index 2eabd86..74dc560 100644 --- a/Biopool/APPS/CifMover.cc +++ b/Biopool/APPS/CifMover.cc @@ -29,7 +29,7 @@ using namespace Victor::Biopool; const double LAMBDA = 1.5; void sShowHelp() { - cout << "CIF Shifter\n" + cout << "CIF Mover\n" << "Allows to move all residues in file by fixed offset.\n" << " Options: \n" << "\t-i \t\t Input CIF file\n" @@ -196,6 +196,7 @@ int main(int argc, char** argv) { } CifSaver cs(outFile); sp.save(cs); + outFile.close(); return 0; diff --git a/Biopool/APPS/CifReaderWriter.cc b/Biopool/APPS/CifReaderWriter.cc index d90dee6..85487d6 100644 --- a/Biopool/APPS/CifReaderWriter.cc +++ b/Biopool/APPS/CifReaderWriter.cc @@ -63,21 +63,18 @@ int main(int argc, char** argv) { // 1. read structure // -------------------------------------------------- + ifstream inFile(inputFile.c_str()); + if (!inFile) { + ERROR("File does not exist.\n", exception); + } + if (inputFile.find("pdb") != string::npos) { - ifstream inFile(inputFile.c_str()); - if (!inFile) - ERROR("File does not exist.\n", exception); - cout << "Loading PDB file..." << endl; PdbLoader pl(inFile); prot.load(pl); } else if (inputFile.find("cif") != string::npos) { - ifstream inFile(inputFile.c_str()); - if (!inFile) - ERROR("File does not exist.\n", exception); - cout << "Loading CIF file..." << endl; CifLoader cl(inFile); @@ -87,26 +84,25 @@ int main(int argc, char** argv) { cout << "Uknown input file format. Aborting. (-h for help)" << endl; return -2; } + + inFile.close(); // -------------------------------------------------- // 2. write structure // -------------------------------------------------- + ofstream outFile(outputFile.c_str()); + if (!outFile) { + ERROR("File not found.", exception); + } + if (outputFile.find("pdb") != string::npos) { - ofstream outFile(outputFile.c_str()); - if (!outFile) - ERROR("File not found.", exception); - cout << "Saving PDB file..." << endl; PdbSaver ps(outFile); prot.save(ps); } else if (outputFile.find("cif") != string::npos) { - ofstream outFile(outputFile.c_str()); - if (!outFile) - ERROR("File not found.", exception); - cout << "Saving CIF file..." << endl; CifSaver cs(outFile); @@ -117,6 +113,8 @@ int main(int argc, char** argv) { return -3; } + outFile.close(); + return 0; } diff --git a/Biopool/APPS/CifSecondary.cc b/Biopool/APPS/CifSecondary.cc index 18dfdd4..cde7b87 100644 --- a/Biopool/APPS/CifSecondary.cc +++ b/Biopool/APPS/CifSecondary.cc @@ -192,6 +192,8 @@ int main(int argc, char** argv) { // Load the protein object Protein prot; prot.load(cl); + + inFile.close(); // Open the proper output stream (file or stdout) std::ostream* os = &cout; @@ -211,6 +213,8 @@ int main(int argc, char** argv) { sp = prot.getSpacer(i); writeOutput(sp, extendedOutput, ssType, (*os)); } + + fout.close(); return 0; } diff --git a/Biopool/APPS/CifShifter.cc b/Biopool/APPS/CifShifter.cc index f25370e..e620c64 100644 --- a/Biopool/APPS/CifShifter.cc +++ b/Biopool/APPS/CifShifter.cc @@ -69,6 +69,7 @@ int main(int argc, char** argv) { getArg("p", offset, argc, argv, 0); getArg("n", tmp, argc, argv, 0); offset -= tmp; + getArg("P", offsetAtom, argc, argv, 0); getArg("N", tmp, argc, argv, 0); offsetAtom -= tmp; diff --git a/Biopool/APPS/PdbCorrector.cc b/Biopool/APPS/PdbCorrector.cc index 593cb61..777245f 100644 --- a/Biopool/APPS/PdbCorrector.cc +++ b/Biopool/APPS/PdbCorrector.cc @@ -48,6 +48,8 @@ int main(int nArgs, char* argv[]) { for (unsigned int i = 0; i < sp.sizeAmino(); i++) sp.getAmino(i).addMissingO(); + + inFile.close(); ofstream outFile2(argv[1]); @@ -57,5 +59,7 @@ int main(int nArgs, char* argv[]) { PdbSaver pss2(outFile2); sp.save(pss2); + outFile2.close(); + return 0; }