00001 00002 /* 00003 | Author: Michael Schmidt; 00004 | Gunnar Jehl 00005 | 00006 | ************************* SOFTWARE LICENSE AGREEMENT *********************** 00007 | This source code is published under the BSD License. 00008 | 00009 | See file 'LICENSE.txt' that comes with this distribution or 00010 | http://dbis.informatik.uni-freiburg.de/index.php?project=GCX/license.php 00011 | for the full license agreement. 00012 | 00013 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 00014 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00015 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 00016 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 00017 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 00018 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 00019 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 00020 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 00021 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 00022 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 00023 | POSSIBILITY OF SUCH DAMAGE. 00024 | **************************************************************************** 00025 */ 00026 00035 #include "pathexpression.h" 00036 00037 PathExpression::PathExpression(): 00038 Expression(et_path), adornment(NULL) { 00039 } 00040 00041 PathExpression::~PathExpression() { 00042 for (unsigned i = 0; i < pathsteps.size(); i++) { 00043 delete pathsteps[i]; 00044 } 00045 delete adornment; 00046 } 00047 00048 void PathExpression::print(OutputStream & dos) const { 00049 for (unsigned i = 0; i < pathsteps.size(); i++) { 00050 dos << *(pathsteps[i]); 00051 } 00052 } 00053 00054 PathStepExpression *PathExpression::getTailPathStep() { 00055 if (pathsteps.size() == 0) { 00056 return NULL; 00057 } 00058 00059 return pathsteps[pathsteps.size() - 1]; 00060 } 00061 00062 PathStepExpression *PathExpression::getPathStepAfterTextNodeTest() { 00063 if (pathsteps.size() == 0) { 00064 return NULL; 00065 } else { 00066 for (unsigned i = 0; i < pathsteps.size(); i++) { 00067 if (pathsteps[i]->isTextNodeTest()) { 00068 return pathsteps[i + 1]; 00069 } 00070 } 00071 } 00072 00073 return NULL; 00074 } 00075 00076 unsigned PathExpression::getWeight() { 00077 unsigned weight = 0; 00078 00079 if (pathsteps.size() > 0) { 00080 for (unsigned i = 0; i < (pathsteps.size() - 1); i++) { 00081 weight += pathsteps[i]->getStepWeight(false); 00082 } 00083 00084 weight += pathsteps[pathsteps.size() - 1]->getStepWeight(true); 00085 } 00086 00087 return weight; 00088 } 00089 00090 PathExpressionAdornment *PathExpression::getAdornment() { 00091 if (adornment == NULL) { 00092 adornment = new PathExpressionAdornment(this); 00093 } 00094 00095 return adornment; 00096 } 00097 00098 bool PathExpression::isDosNodePath() { 00099 if (pathsteps.size() > 1 || pathsteps.size() == 0) { 00100 return false; 00101 } 00102 00103 return ((pathsteps[0]->getAxisType() == at_dos) 00104 && (pathsteps[0]->isNodeNodeTest())); 00105 } 00106 00107 bool PathExpression::isSyntacticallyEqualTo(PathExpression * path) { 00108 if (path) { 00109 if (pathsteps.size() == path->getPathSize()) { 00110 vector < PathStepExpression * >*steps = path->getPathSteps(); 00111 for (unsigned i = 0; i < pathsteps.size(); i++) { 00112 if (!pathsteps[i]->isSyntacticallyEqualTo((*steps)[i])) { 00113 return false; 00114 } 00115 } 00116 return true; 00117 } 00118 return false; 00119 } 00120 return false; 00121 } 00122 00123 bool PathExpression::isSemanticallyContainedIn(PathExpression * path) { 00124 00125 PathExpressionAdornment *path_adornment = path->getAdornment(); 00126 PathExpressionAdornment *this_adornment = getAdornment(); 00127 00128 // please note that the following checks are not necessarily 00129 // complete, but never ever return true errorneously 00130 00131 // first we catch the scenario where the adorned path is NULL, 00132 // hence is at most of the form /axis::node() or /axis::text() 00133 if (this_adornment->getAdornedPath() == NULL) { 00134 00135 // case 1: /child::node() is always contained in 00136 // /child::node(), /descendant::node(), and /dos::node() 00137 if (this_adornment->isChildNodePath()) { 00138 return path_adornment->getAdornedPath() == NULL && 00139 (path_adornment->isChildNodePath() || 00140 path_adornment->isDescendantNodePath() || 00141 path_adornment->isDosNodePath()); 00142 00143 // case 2: /descendant::node() is contained in 00144 // /descendant::node() and /dos::node() 00145 } else if (this_adornment->isDescendantNodePath()) { 00146 return path_adornment->getAdornedPath() == NULL && 00147 (path_adornment->isDescendantNodePath() || 00148 path_adornment->isDosNodePath()); 00149 00150 // case 3: /dos::node() is contained in /dos::node() 00151 } else if (this_adornment->isDosNodePath()) { 00152 return path_adornment->getAdornedPath() == NULL && 00153 path_adornment->isDosNodePath(); 00154 00155 // case 4: /child::text() is always contained in 00156 // /child::node(), /descendant::node(), /dos::node(), 00157 // /child::text(), and /descendant::text() 00158 } else if (this_adornment->isChildTextPath()) { 00159 return path_adornment->getAdornedPath() == NULL && 00160 (path_adornment->isChildNodePath() || 00161 path_adornment->isDescendantNodePath() || 00162 path_adornment->isDosNodePath() || 00163 path_adornment->isChildTextPath() || 00164 path_adornment->isDescendantTextPath()); 00165 00166 // case 5: /descendant::text() is always contained in 00167 // /descendant::node(), /dos::node(), 00168 // and /descendant::text() 00169 } else if (this_adornment->isDescendantTextPath()) { 00170 return path_adornment->getAdornedPath() == NULL && 00171 (path_adornment->isDescendantNodePath() || 00172 path_adornment->isDosNodePath() || 00173 path_adornment->isDescendantTextPath()); 00174 } else { 00175 // otherwise the current path selects no node at all, 00176 // thus being trivially contained in the other path 00177 return true; 00178 } 00179 00180 // second we catch the scenario where the adorned path tested to 00181 // contain the current path is NULL, hence is at most of the form 00182 // /axis::node() or /axis::text() 00183 } else if (path_adornment->getAdornedPath() == NULL) { 00184 00185 // case 1: /descendant::node() and /dos::node() always contains 00186 // everything that is non-empty 00187 if (path_adornment->isDosNodePath() || 00188 path_adornment->isDescendantNodePath()) { 00189 return true; 00190 00191 // case 2: /descendant::text() contains everything that 00192 // selects some text 00193 } else if (path_adornment->isDescendantTextPath()) { 00194 return this_adornment->isChildTextPath() || 00195 this_adornment->isDescendantTextPath(); 00196 00197 // case 3: although there might be containment in some rare cases, 00198 // we return false here (this is always sound) 00199 } else { 00200 return false; 00201 } 00202 00203 } else { 00204 // we now assert that the final path step in the original path matches 00205 // the final path step in the path we check containment for; 00206 // again, note that this is not necessarily complete, but always sound 00207 00208 // case 1: dos::node() is matched only by dos::node() 00209 if (this_adornment->isDosNodePath()) { 00210 if (!path_adornment->isDosNodePath() && 00211 !(path_adornment->isDescendantNodePath() && 00212 path_adornment->isDescendantTextPath())) 00213 return false; 00214 00215 00216 // case 2: descendant::node() is matched by dos::node() 00217 // and descendant::node() 00218 } else if (this_adornment->isDescendantNodePath()) { 00219 if (!(path_adornment->isDosNodePath() || 00220 path_adornment->isDescendantNodePath())) 00221 return false; 00222 00223 // case 3: child::node() is matched by dos::node(), descendant::node(), 00224 // and child::node() 00225 } else if (this_adornment->isChildNodePath()) { 00226 if (!(path_adornment->isDosNodePath() || 00227 path_adornment->isDescendantNodePath() || 00228 path_adornment->isChildNodePath())) 00229 return false; 00230 00231 // case 4: descendant::text() is matched by dos::node(), 00232 // descendant::node(), and descendant::text() 00233 } else if (this_adornment->isDescendantTextPath()) { 00234 if (!(path_adornment->isDosNodePath() || 00235 path_adornment->isDescendantNodePath() || 00236 path_adornment->isDescendantTextPath())) 00237 return false; 00238 00239 // case 5: child::text() is matched by dos::node(), 00240 // descendant::node(), child::node(), descendant::text(), 00241 // and child::text() 00242 } else if (this_adornment->isChildTextPath()) { 00243 if (!(path_adornment->isDosNodePath() || 00244 path_adornment->isDescendantNodePath() || 00245 path_adornment->isChildNodePath() || 00246 path_adornment->isDescendantTextPath() || 00247 path_adornment->isChildTextPath())) 00248 return false; 00249 00250 // case 6: if the path is a path without any of the endings above, 00251 // we must assert that the other path is so, too, or a path 00252 // of the for dos::node() (which causes no harm) 00253 } else { 00254 if (path_adornment->isDescendantNodePath() || 00255 path_adornment->isChildNodePath() || 00256 path_adornment->isDescendantTextPath() || 00257 path_adornment->isChildTextPath()) 00258 return false; 00259 } 00260 } 00261 00262 00263 // now we are in the position to run the homomorphism check algorithm; we 00264 // do not need to consider the special cases above anymore; if we reach 00265 // this position, it suffices to check for path containment 00266 PathExpression *p = this_adornment->getRewrittenPath(); 00267 unsigned p_size = p->getPathSize(); 00268 00269 PathExpression *p_prime = path_adornment->getAdornedPath(); 00270 vector < unsigned >*p_prime_ad = path_adornment->getPathAdornments(); 00271 unsigned p_prime_size = (int) p_prime->getPathSize(); 00272 00273 bool C[p_size][p_prime_size]; 00274 int D[p_size][p_prime_size]; 00275 00276 for (unsigned i = 0; i < p_size; i++) { 00277 for (unsigned j = 0; j < p_prime_size; j++) { 00278 C[i][j] = false; 00279 D[i][j] = 0; 00280 } 00281 } 00282 00283 // core algorithm (Algorithm 3 in paper) 00284 for (int x = p_size - 1; x >= 0; x--) { 00285 for (int y = p_prime_size - 1; y >= 0; y--) { 00286 00287 bool x_at_end = (unsigned) x == p_size - 1; 00288 bool y_at_end = (unsigned) y == p_prime_size - 1; 00289 00290 // part 1: compute C[x][y] 00291 bool cond = false; 00292 00293 if (p_prime->getPathStepAt(y)->isStarNodeTest()) { 00294 cond = true; 00295 } else if (p_prime->getPathStepAt(y)->isTagNodeTest() && 00296 p->getPathStepAt(x)->isTagNodeTest()) { 00297 PathStepTagExpression *ps_tag = 00298 (PathStepTagExpression *) p->getPathStepAt(x); 00299 PathStepTagExpression *ps_prime_tag = 00300 (PathStepTagExpression *) p_prime->getPathStepAt(y); 00301 cond = ps_tag->getNodeTest() == ps_prime_tag->getNodeTest(); 00302 } 00303 00304 if (cond && !y_at_end) { 00305 switch (p_prime->getPathStepAt(y + 1)->getAxisType()) { 00306 case at_child: 00307 cond = !x_at_end && C[x + 1][y + 1] && 00308 p->getPathStepAt(x + 1)->getAxisType() == at_child; 00309 break; 00310 case at_descendant: 00311 cond = D[x][y + 1] >= (int) (1 + (*p_prime_ad)[y + 1]); 00312 break; 00313 default: 00314 // should never happen 00315 break; 00316 } 00317 } 00318 00319 C[x][y] = cond; 00320 00321 00322 // part 2: compute d 00323 int d = C[x][y] ? 0 : -1; // -1 := -infty 00324 00325 // part 3: comput D[x][y] 00326 int max2 = -1; // -1 := -infty 00327 00328 if (!x_at_end && D[x + 1][y] >= 0) { 00329 max2 = 1 + D[x + 1][y]; 00330 } 00331 00332 int max = max2 > d ? max2 : d; 00333 00334 D[x][y] = max; 00335 } 00336 } 00337 00338 return C[0][0]; 00339 } 00340 00341 bool PathExpression::hasInnerTextNodeTest() { 00342 for (unsigned i = 0; i < pathsteps.size(); i++) { 00343 if ((pathsteps[i]->isTextNodeTest()) && (i < pathsteps.size() - 1)) { 00344 return true; 00345 } 00346 } 00347 00348 return false; 00349 } 00350 00351 bool PathExpression::hasTerminatingTextNodeTest() { 00352 if (pathsteps.size() == 0) 00353 return false; 00354 return pathsteps[pathsteps.size() - 1]->isTextNodeTest(); 00355 } 00356 00357 bool PathExpression::hasFollowingDescendantOrDosFrom(unsigned ps_idx) { 00358 for (unsigned i = (ps_idx + 1); i < pathsteps.size(); i++) { 00359 if (pathsteps[i]->getAxisType() == at_descendant 00360 || pathsteps[i]->getAxisType() == at_dos) { 00361 return true; 00362 } 00363 } 00364 00365 return false; 00366 } 00367 00368 bool PathExpression::hasPreviousDescendantOrDosUpTo(unsigned ps_idx) { 00369 if (ps_idx == 0 || ps_idx >= pathsteps.size()) { 00370 return false; 00371 } 00372 00373 for (unsigned i = 0; i < ps_idx; i++) { 00374 if (pathsteps[i]->getAxisType() == at_descendant 00375 || pathsteps[i]->getAxisType() == at_dos) { 00376 return true; 00377 } 00378 } 00379 00380 return false; 00381 } 00382 00383 bool PathExpression::selectsNoNode() { 00384 for (unsigned i = 0; i < pathsteps.size() - 1; i++) { 00385 if (i != pathsteps.size() - 2 || !pathsteps[i + 1]->isDosNodeStep()) { 00386 if (pathsteps[i]->isTextNodeTest()) 00387 return true; 00388 } 00389 } 00390 00391 return false; 00392 } 00393 00394 bool PathExpression::containsStarDescendantSequence(unsigned pos) { 00395 // break if end of path reached 00396 if (pos >= pathsteps.size()) { 00397 return false; 00398 } else if (pathsteps[pos]->getAxisType() == at_descendant) { 00399 return true; 00400 } else { 00401 return pathsteps[pos]->isStarNodeTest()? 00402 containsStarDescendantSequence(pos + 1) : false; 00403 } 00404 } 00405 00406 bool PathExpression::mightHasChildDescendantConflict(PathExpression * p) { 00407 PathStepExpression *this_last_nondos_ps = NULL; 00408 PathStepExpression *path_last_nondos_ps = NULL; 00409 00410 unsigned this_size = getPathSize(); 00411 unsigned path_size = p->getPathSize(); 00412 00413 if (this_size == 0 || (this_size == 1 && getPathStepAt(0)->isDosNodeStep())) { 00414 return false; // no conflict 00415 } else { 00416 this_last_nondos_ps = getPathStepAt(this_size - 1)->isDosNodeStep()? 00417 getPathStepAt(this_size - 2) : getPathStepAt(this_size - 1); 00418 } 00419 00420 00421 if (path_size == 0 00422 || (path_size == 1 && p->getPathStepAt(0)->isDosNodeStep())) { 00423 return false; // no conflict 00424 } else { 00425 path_last_nondos_ps = 00426 p->getPathStepAt(path_size - 00427 1)->isDosNodeStep()? p-> 00428 getPathStepAt(path_size - 2) : p->getPathStepAt(path_size - 1); 00429 } 00430 00431 // note: both this_last_nondos_ps and path_last_nondis_ps differ from NULL 00432 AXIS_TYPE this_type = this_last_nondos_ps->getAxisType(); 00433 AXIS_TYPE path_type = path_last_nondos_ps->getAxisType(); 00434 00435 if ((this_type == at_child && path_type == at_descendant) || 00436 (this_type == at_descendant && path_type == at_child)) { 00437 if (this_last_nondos_ps->isTagNodeTest() && 00438 path_last_nondos_ps->isTagNodeTest()) { 00439 // compare tags 00440 PathStepTagExpression *t1 = 00441 (PathStepTagExpression *) this_last_nondos_ps; 00442 PathStepTagExpression *t2 = 00443 (PathStepTagExpression *) path_last_nondos_ps; 00444 return t1->getNodeTest() == t2->getNodeTest(); // confl. if identical 00445 } else if (this_last_nondos_ps->isTextNodeTest() || 00446 path_last_nondos_ps->isTextNodeTest()) { 00447 return false; // no conflict 00448 } else { 00449 // yes, we have a conflict for node()=a, a=node(), *=a, a=*, *=*, 00450 // *=node(), node()=node(), ... 00451 return true; 00452 } 00453 } else { 00454 return false; 00455 } 00456 } 00457 00458 void PathExpression::replacePathStepAt(unsigned idx, PathStepExpression * ps) { 00459 if (ps && idx <= pathsteps.size()) { 00460 delete pathsteps[idx]; 00461 00462 pathsteps[idx] = ps; 00463 } 00464 } 00465 00466 PathExpression *PathExpression::clone() { 00467 PathExpression *p = new PathExpression(); 00468 00469 for (unsigned i = 0; i < pathsteps.size(); i++) { 00470 p->addPathStep(pathsteps[i]->clone()); 00471 } 00472 00473 return p; 00474 } 00475 00476 PathExpression *PathExpression::cloneWithoutFinalDosNodeAndAttributes() { 00477 PathExpression *p = new PathExpression(); 00478 00479 for (unsigned i = 0; i < pathsteps.size(); i++) { 00480 if (!(i == pathsteps.size() - 1 && pathsteps[i]->isNodeNodeTest() && 00481 pathsteps[i]->getAxisType() == at_dos)) { 00482 p->addPathStep(pathsteps[i]->cloneWithoutAttributes()); 00483 } 00484 } 00485 00486 return p; 00487 }