halapi
hierarchichalalignmentformatapi
 All Classes Namespaces Functions Pages
halCommon.h
1 /*
2  * Copyright (C) 2012 by Glenn Hickey (hickey@soe.ucsc.edu)
3  *
4  * Released under the MIT license, see LICENSE.txt
5  */
6 
7 #ifndef _HALCOMMON_H
8 #define _HALCOMMON_H
9 
10 #include <map>
11 #include <set>
12 #include <string>
13 #include <vector>
14 #include <locale>
15 #include <cassert>
16 #include <sstream>
17 #include "hal.h"
18 
19 namespace hal {
20 
21 inline bool compatibleWithVersion(const std::string& version)
22 {
23  double myVersion, inVersion;
24  // assume versions are strings tho we treat as floats for now.
25  std::stringstream ss, ss2;
26  ss << HAL_VERSION;
27  ss >> myVersion;
28  ss2 << version;
29  ss2 >> inVersion;
30  return (int)myVersion == (int)inVersion;
31 }
32 
34 std::vector<std::string> chopString(const std::string& inString,
35  const std::string& separator);
36 
40 inline char reverseComplement(char c)
41 {
42  switch (c)
43  {
44  case 'A' : return 'T';
45  case 'a' : return 't';
46  case 'C' : return 'G';
47  case 'c' : return 'g';
48  case 'G' : return 'C';
49  case 'g' : return 'c';
50  case 'T' : return 'A';
51  case 't' : return 'a';
52  default : break;
53  }
54  return c;
55 }
56 
58 void reverseComplement(std::string& s);
59 
61 inline bool isNucleotide(char c)
62 {
63  bool result = false;
64  switch (c)
65  {
66  case 'A' :
67  case 'a' :
68  case 'C' :
69  case 'c' :
70  case 'G' :
71  case 'g' :
72  case 'T' :
73  case 't' :
74  case 'N' :
75  case 'n' :
76  result = true;
77  default : break;
78  }
79  return result;
80 }
81 
82 inline bool isTransition(char c1, char c2)
83 {
84  assert(isNucleotide(c1) && isNucleotide(c2));
85  char x = std::toupper((char)c1);
86  char y = std::toupper((char)c2);
87  switch(x)
88  {
89  case 'A' : return y == 'G';
90  case 'C' : return y == 'T';
91  case 'G' : return y == 'A';
92  case 'T' : return y == 'C';
93  default: break;
94  }
95  return false;
96 }
97 
98 inline bool isSubstitution(char c1, char c2)
99 {
100  return std::toupper(c1) != std::toupper(c2);
101 }
102 
103 inline bool isTransversion(char c1, char c2)
104 {
105  char x = std::toupper((char)c1);
106  char y = std::toupper((char)c2);
107  return (x != y && x != 'N' && y != 'N' && !isTransition(c1, c2));
108 }
109 
110 inline bool isMissingData(char c)
111 {
112  return c == 'n' || c == 'N';
113 }
114 
115 inline bool isMasked(char c)
116 {
117  return c == std::tolower(c);
118 }
119 
121 inline bool isFourfoldDegenerate(char c1, char c2)
122 {
123  char x1 = std::toupper((char)c1);
124  char x2 = std::toupper((char)c2);
125  if (x2 == 'T' || x2 == 'G')
126  {
127  return x1 == 'C' || x1 == 'G';
128  }
129  else if (x2 == 'C')
130  {
131  return x1 == 'A' || x1 == 'C' || x1 == 'G' || x1 == 'T';
132  }
133  return false;
134 }
135 
137 inline hal_size_t hammingDistance(const std::string& s1, const std::string& s2)
138 {
139  assert(s1.length() == s2.length());
140  hal_size_t dist = 0;
141  for (size_t i = 0; i < s1.length(); ++i)
142  {
143  if (isSubstitution(s1[i], s2[i]) == true)
144  {
145  ++dist;
146  }
147  }
148  return dist;
149 }
150 
151 const Genome* getLowestCommonAncestor(const std::set<const Genome*>& inputSet);
152 
153 /* Given a set of genomes (input set) find all genomes in the spanning
154  * tree including the inptuts (root should be the root of the alignment) */
155 void getGenomesInSpanningTree(const std::set<const Genome*>& inputSet,
156  std::set<const Genome*>& outputSet);
157 
158 /* Given a node (root), return it and all genomes (including internal nodes)
159  * below it in the tree */
160 void getGenomesInSubTree(const Genome* root,
161  std::set<const Genome*>& outputSet);
162 
163 }
164 
165 #endif
std::vector< std::string > chopString(const std::string &inString, const std::string &separator)
hal_size_t hammingDistance(const std::string &s1, const std::string &s2)
Definition: halCommon.h:137
bool isNucleotide(char c)
Definition: halCommon.h:61
bool isFourfoldDegenerate(char c1, char c2)
Definition: halCommon.h:121
char reverseComplement(char c)
Definition: halCommon.h:40