00001 00002 /* 00003 | Author: Michael Schmidt; 00004 | Gunnar Jehl 00005 | 00006 | ************************* SOFTWARE LICENSE AGREEMENT *********************** 00007 | This source code is published under the BSD License. 00008 | 00009 | See file 'LICENSE.txt' that comes with this distribution or 00010 | http://dbis.informatik.uni-freiburg.de/index.php?project=GCX/license.php 00011 | for the full license agreement. 00012 | 00013 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 00014 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00015 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 00016 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 00017 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 00018 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 00019 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 00020 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 00021 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 00022 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 00023 | POSSIBILITY OF SUCH DAMAGE. 00024 | **************************************************************************** 00025 */ 00026 00035 #include "pathexpressionadornment.h" 00036 00037 PathExpressionAdornment::PathExpressionAdornment(PathExpression * _path): 00038 adorned_path(NULL), rewritten_path(NULL), is_child_node_path(false), 00039 is_descendant_node_path(false), is_dos_node_path(false), 00040 is_child_text_path(false), is_descendant_text_path(false) { 00041 00042 // The fact that we, in addition to the Suciu XPath containment paper, 00043 // also consider node() and text() expressions, as well as dos-axes at the 00044 // end makes the implementation of the algorithm a little tricky. The key 00045 // idea is to replace interior // node() steps by * and interior text() steps 00046 // lead to unsatisfyable paths. Consequently, we clean the path by rewriting 00047 // these constructs. The information if node() and text() appear at the 00048 // end is stored externally in some class member variables and will be 00049 // handled using a series of special cases. The implementation of Suciu's 00050 // path containment algorithm can be found in class PathExpression. 00051 00052 // if the original path is empty or selects no node, set adorned_path=NULL 00053 if (!_path || _path->selectsNoNode() || _path->isEmptyPath()) { 00054 return; 00055 } 00056 // otherwise we take the original path as a starting point 00057 rewritten_path = new PathExpression(); 00058 rewritten_path-> 00059 addPathStep(new PathStepTagExpression(at_child, TAG_SHADOW_FRONT)); 00060 for (unsigned i = 0; i < _path->getPathSize(); i++) { 00061 rewritten_path->addPathStep(_path->getPathStepAt(i)->clone()); 00062 } 00063 00064 // if the path is of the form [...]/axis::text()/dos::node() we drop 00065 // the final (redundant) dos::node() path step 00066 unsigned path_size = rewritten_path->getPathSize(); 00067 00068 if (path_size > 1 && 00069 rewritten_path->getPathStepAt(path_size - 1)->isDosNodeStep() && 00070 rewritten_path->getPathStepAt(path_size - 2)->isTextNodeTest()) { 00071 delete rewritten_path->getPathStepAt(path_size - 1); 00072 00073 rewritten_path->getPathSteps()->pop_back(); 00074 } 00075 // if the path is of the form [...]/axis::node()/dos::node() we 00076 // we replace it by [...]//node() 00077 path_size = rewritten_path->getPathSize(); 00078 if (path_size > 1 && 00079 rewritten_path->getPathStepAt(path_size - 1)->isDosNodeStep() && 00080 rewritten_path->getPathStepAt(path_size - 2)->isNodeNodeTest()) { 00081 00082 rewritten_path->getPathStepAt(path_size - 00083 2)->setAxisType(at_descendant); 00084 delete rewritten_path->getPathStepAt(path_size - 1); 00085 00086 rewritten_path->getPathSteps()->pop_back(); 00087 } 00088 // if the tail path step is of the form axis::node(), remove this 00089 // tail step and store the information 00090 PathStepExpression *tail = rewritten_path->getTailPathStep(); 00091 00092 if (tail->isNodeNodeTest()) { 00093 00094 switch (tail->getAxisType()) { 00095 case at_child: 00096 is_child_node_path = true; 00097 break; 00098 case at_descendant: 00099 is_descendant_node_path = true; 00100 break; 00101 case at_dos: 00102 is_dos_node_path = true; 00103 break; 00104 } 00105 rewritten_path->getPathSteps()->pop_back(); 00106 delete tail; 00107 00108 // if the tail path step is axis::text(), remove it and store information 00109 // (note that patterns of the form [...]/axis::text()/axis::node() have 00110 // been removed previously, either by containsInnerTextNode() or the 00111 // axis::text()/dos::node() condition) 00112 } else if (tail->isTextNodeTest()) { 00113 00114 switch (tail->getAxisType()) { 00115 case at_child: 00116 is_child_text_path = true; 00117 break; 00118 case at_descendant: 00119 is_descendant_text_path = true; 00120 break; 00121 default: 00122 // should never happen 00123 break; 00124 } 00125 rewritten_path->getPathSteps()->pop_back(); 00126 delete tail; 00127 } 00128 // now we have a regular token (i.e. a tag or a wildcard at the end) 00129 00130 // if the rewritten path has become empty now, we set it to NULL and return 00131 if (rewritten_path->isEmptyPath()) { 00132 delete rewritten_path; 00133 00134 rewritten_path = NULL; 00135 return; 00136 } 00137 // a little trick: we append //* at the end and allow this guy to match 00138 // text nodes too; this way, the path can be used for standard containment 00139 // checking and matches text() at the same time 00140 //if (is_descendant_node_path) { 00141 // is_descendant_text_path=true; 00142 // rewritten_path->addPathStep(new PathStepStarExpression(at_descendant)); 00143 //} 00144 00145 // now we are in the position to add the descendant shadow leaf ... 00146 rewritten_path-> 00147 addPathStep(new PathStepTagExpression(at_descendant, TAG_SHADOW_TAIL)); 00148 00149 // ... and to replace interior node() path steps by star labels 00150 vector < PathStepExpression * >*steps = rewritten_path->getPathSteps(); 00151 for (unsigned i = 0; i < steps->size() - 1; i++) { 00152 if ((*steps)[i]->isNodeNodeTest()) { 00153 rewritten_path->replacePathStepAt(i, 00154 new 00155 PathStepStarExpression((*steps) 00156 [i]-> 00157 getAxisType 00158 ())); 00159 } 00160 } 00161 00162 // compute adornment in a one-pass traversal over the path) 00163 adorned_path = new PathExpression(); 00164 unsigned adornment_ctr = 0; // #eliminated predecessor-pathsteps 00165 00166 for (unsigned i = 0; i < steps->size(); i++) { 00167 00168 PathStepExpression *step = (*steps)[i]; // cur path step 00169 00170 // an axis::* path step can be eliminated whenever there is a 00171 // follow-up path step AND it is not of the form /*/ (note that 00172 // this second condition implicitly holds if adornment_ctr>0, 00173 // since in this case the current axis is rewritten to //); 00174 // moreover, it can be eliminated if the path is something like 00175 // /*/axis_1::*/axis_2::*/.../axis_n::tag, where at least one 00176 // axis_n is a descendant axis (note that there are no more 00177 // dos-axes in the rewritten_path) 00178 if (step->isStarNodeTest() && i < steps->size() - 1 && 00179 (adornment_ctr > 0 || step->getAxisType() == at_descendant || 00180 (*steps)[i + 1]->getAxisType() == at_descendant || 00181 rewritten_path->containsStarDescendantSequence(i + 1))) { 00182 adornment_ctr++; 00183 } else { 00184 PathStepExpression *p = (*steps)[i]->clone(); 00185 00186 if (adornment_ctr > 0) { 00187 p->setAxisType(at_descendant); // force descendant 00188 } 00189 adorned_path->addPathStep(p); 00190 path_adornments.push_back(adornment_ctr); 00191 00192 // reset variables 00193 adornment_ctr = 0; 00194 } 00195 } 00196 } 00197 00198 PathExpressionAdornment::~PathExpressionAdornment() { 00199 delete rewritten_path; 00200 delete adorned_path; 00201 } 00202 00203 void PathExpressionAdornment::print(OutputStream & dos) const { 00204 if (!adorned_path) { 00205 dos << "empty"; 00206 } else { 00207 for (unsigned i = 0; i < adorned_path->getPathSize(); i++) { 00208 dos << *(adorned_path-> 00209 getPathStepAt(i)) << "(>=" << path_adornments[i] << ")"; 00210 } 00211 } 00212 dos << " [cn/dn/dosn/ct/dt=" << is_child_node_path << "/" 00213 << is_descendant_node_path << "/" << is_dos_node_path << "/" 00214 << is_child_text_path << "/" << is_descendant_text_path << "]"; 00215 }