http://xml.apache.org/http://www.apache.org/http://www.w3.org/

Home

Readme
Release Info

Installation
Download
Build

FAQs
Samples
API Docs

DOM C++ Binding
Programming
Migration Guide

Feedback
Bug-Reporting
PDF Document

CVS Repository
Mail Archive

API Docs for SAX and DOM
 

Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members  

XMLChar.hpp

Go to the documentation of this file.
00001 /*
00002  * The Apache Software License, Version 1.1
00003  *
00004  * Copyright (c) 2002 The Apache Software Foundation.  All rights
00005  * reserved.
00006  *
00007  * Redistribution and use in source and binary forms, with or without
00008  * modification, are permitted provided that the following conditions
00009  * are met:
00010  *
00011  * 1. Redistributions of source code must retain the above copyright
00012  *    notice, this list of conditions and the following disclaimer.
00013  *
00014  * 2. Redistributions in binary form must reproduce the above copyright
00015  *    notice, this list of conditions and the following disclaimer in
00016  *    the documentation and/or other materials provided with the
00017  *    distribution.
00018  *
00019  * 3. The end-user documentation included with the redistribution,
00020  *    if any, must include the following acknowledgment:
00021  *       "This product includes software developed by the
00022  *        Apache Software Foundation (http://www.apache.org/)."
00023  *    Alternately, this acknowledgment may appear in the software itself,
00024  *    if and wherever such third-party acknowledgments normally appear.
00025  *
00026  * 4. The names "Xerces" and "Apache Software Foundation" must
00027  *    not be used to endorse or promote products derived from this
00028  *    software without prior written permission. For written
00029  *    permission, please contact apache\@apache.org.
00030  *
00031  * 5. Products derived from this software may not be called "Apache",
00032  *    nor may "Apache" appear in their name, without prior written
00033  *    permission of the Apache Software Foundation.
00034  *
00035  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
00036  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
00037  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
00038  * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
00039  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00040  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
00041  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
00042  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
00043  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
00044  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
00045  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00046  * SUCH DAMAGE.
00047  * ====================================================================
00048  *
00049  * This software consists of voluntary contributions made by many
00050  * individuals on behalf of the Apache Software Foundation, and was
00051  * originally based on software copyright (c) 1999, International
00052  * Business Machines, Inc., http://www.ibm.com .  For more information
00053  * on the Apache Software Foundation, please see
00054  * <http://www.apache.org/>.
00055  */
00056 
00057 /*
00058  * $Log: XMLChar.hpp,v $
00059  * Revision 1.2  2003/08/14 02:57:27  knoaman
00060  * Code refactoring to improve performance of validation.
00061  *
00062  * Revision 1.1  2002/12/20 22:10:21  tng
00063  * XML 1.1
00064  *
00065  */
00066 
00067 #if !defined(XMLCHAR_HPP)
00068 #define XMLCHAR_HPP
00069 
00070 #include <xercesc/util/XMLUniDefs.hpp>
00071 
00072 XERCES_CPP_NAMESPACE_BEGIN
00073 
00074 // ---------------------------------------------------------------------------
00075 //  This file defines Char and utility that conforms to XML 1.0 and XML 1.1
00076 // ---------------------------------------------------------------------------
00077 // Masks for the fgCharCharsTable1_0 array
00078 const XMLByte   gLetterCharMask             = 0x1;
00079 const XMLByte   gFirstNameCharMask          = 0x2;
00080 const XMLByte   gNameCharMask               = 0x4;
00081 const XMLByte   gPlainContentCharMask       = 0x8;
00082 const XMLByte   gSpecialStartTagCharMask    = 0x10;
00083 const XMLByte   gControlCharMask            = 0x20;
00084 const XMLByte   gXMLCharMask                = 0x40;
00085 const XMLByte   gWhitespaceCharMask         = 0x80;
00086 
00087 // ---------------------------------------------------------------------------
00088 //  This class is for XML 1.0
00089 // ---------------------------------------------------------------------------
00090 class  XMLChar1_0
00091 {
00092 public:
00093     // -----------------------------------------------------------------------
00094     //  Public, static methods, check the string
00095     // -----------------------------------------------------------------------
00096     static bool isAllSpaces
00097     (
00098         const   XMLCh* const    toCheck
00099         , const unsigned int    count
00100     );
00101 
00102     static bool containsWhiteSpace
00103     (
00104         const   XMLCh* const    toCheck
00105         , const unsigned int    count
00106     );
00107 
00108     static bool isValidName
00109     (
00110         const   XMLCh* const    toCheck
00111         , const unsigned int    count
00112     );
00113 
00114     static bool isValidNCName
00115     (
00116         const   XMLCh* const    toCheck
00117         , const unsigned int    count
00118     );
00119 
00120     static bool isValidQName
00121     (
00122         const   XMLCh* const    toCheck
00123         , const unsigned int    count
00124     );
00125 
00126     // -----------------------------------------------------------------------
00127     //  Public, static methods, check the XMLCh
00128     //  surrogate pair is assumed if second parameter is not null
00129     // -----------------------------------------------------------------------
00130     static bool isXMLLetter(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00131     static bool isFirstNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00132     static bool isNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00133     static bool isPlainContentChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00134     static bool isSpecialStartTagChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00135     static bool isXMLChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00136     static bool isWhitespace(const XMLCh toCheck);
00137     static bool isWhitespace(const XMLCh toCheck, const XMLCh toCheck2);
00138     static bool isControlChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00139 
00140     static bool isPublicIdChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00141 
00142     // -----------------------------------------------------------------------
00143     //  Special Non-conformant Public, static methods
00144     // -----------------------------------------------------------------------
00148     static bool isNELRecognized();
00149 
00153     static void enableNELWS();
00154 
00155 private:
00156     // -----------------------------------------------------------------------
00157     //  Static data members
00158     //
00159     //  fgCharCharsTable1_0
00160     //      The character characteristics table. Bits in each byte, represent
00161     //      the characteristics of each character. It is generated via some
00162     //      code and then hard coded into the cpp file for speed.
00163     //
00164     //  fNEL
00165     //      Flag to respresents whether NEL and LSEP newline recognition is enabled
00166     //      or disabled
00167     // -----------------------------------------------------------------------
00168     static XMLByte  fgCharCharsTable1_0[0x10000];
00169     static bool     enableNEL;
00170 
00171     friend class XMLReader;
00172 };
00173 
00174 
00175 // ---------------------------------------------------------------------------
00176 //  XMLReader: Public, static methods
00177 // ---------------------------------------------------------------------------
00178 inline bool XMLChar1_0::isXMLLetter(const XMLCh toCheck, const XMLCh toCheck2)
00179 {
00180     if (!toCheck2)
00181         return ((fgCharCharsTable1_0[toCheck] & gLetterCharMask) != 0);
00182     return false;
00183 }
00184 
00185 inline bool XMLChar1_0::isFirstNameChar(const XMLCh toCheck, const XMLCh toCheck2)
00186 {
00187     if (!toCheck2)
00188         return ((fgCharCharsTable1_0[toCheck] & gFirstNameCharMask) != 0);
00189     return false;
00190 }
00191 
00192 inline bool XMLChar1_0::isNameChar(const XMLCh toCheck, const XMLCh toCheck2)
00193 {
00194     if (!toCheck2)
00195         return ((fgCharCharsTable1_0[toCheck] & gNameCharMask) != 0);
00196     return false;
00197 }
00198 
00199 inline bool XMLChar1_0::isPlainContentChar(const XMLCh toCheck, const XMLCh toCheck2)
00200 {
00201     if (!toCheck2)
00202         return ((fgCharCharsTable1_0[toCheck] & gPlainContentCharMask) != 0);
00203     else {
00204         if ((toCheck >= 0xD800) && (toCheck <= 0xDBFF))
00205            if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
00206                return true;
00207     }
00208     return false;
00209 }
00210 
00211 
00212 inline bool XMLChar1_0::isSpecialStartTagChar(const XMLCh toCheck, const XMLCh toCheck2)
00213 {
00214     if (!toCheck2)
00215         return ((fgCharCharsTable1_0[toCheck] & gSpecialStartTagCharMask) != 0);
00216     return false;
00217 }
00218 
00219 inline bool XMLChar1_0::isXMLChar(const XMLCh toCheck, const XMLCh toCheck2)
00220 {
00221     if (!toCheck2)
00222         return ((fgCharCharsTable1_0[toCheck] & gXMLCharMask) != 0);
00223     else {
00224         if ((toCheck >= 0xD800) && (toCheck <= 0xDBFF))
00225            if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
00226                return true;
00227     }
00228     return false;
00229 }
00230 
00231 inline bool XMLChar1_0::isWhitespace(const XMLCh toCheck)
00232 {
00233     return ((fgCharCharsTable1_0[toCheck] & gWhitespaceCharMask) != 0);
00234 }
00235 
00236 inline bool XMLChar1_0::isWhitespace(const XMLCh toCheck, const XMLCh toCheck2)
00237 {
00238     if (!toCheck2)
00239         return ((fgCharCharsTable1_0[toCheck] & gWhitespaceCharMask) != 0);
00240     return false;
00241 }
00242 
00243 inline bool XMLChar1_0::isControlChar(const XMLCh toCheck, const XMLCh toCheck2)
00244 {
00245     if (!toCheck2)
00246         return ((fgCharCharsTable1_0[toCheck] & gControlCharMask) != 0);
00247     return false;
00248 }
00249 
00250 inline bool XMLChar1_0::isNELRecognized() {
00251 
00252     return enableNEL;
00253 }
00254 
00255 
00256 // ---------------------------------------------------------------------------
00257 //  This class is for XML 1.1
00258 // ---------------------------------------------------------------------------
00259 class  XMLChar1_1
00260 {
00261 public:
00262     // -----------------------------------------------------------------------
00263     //  Public, static methods, check the string
00264     // -----------------------------------------------------------------------
00265     static bool isAllSpaces
00266     (
00267         const   XMLCh* const    toCheck
00268         , const unsigned int    count
00269     );
00270 
00271     static bool containsWhiteSpace
00272     (
00273         const   XMLCh* const    toCheck
00274         , const unsigned int    count
00275     );
00276 
00277     static bool isValidName
00278     (
00279         const   XMLCh* const    toCheck
00280         , const unsigned int    count
00281     );
00282 
00283     static bool isValidNCName
00284     (
00285         const   XMLCh* const    toCheck
00286         , const unsigned int    count
00287     );
00288 
00289     static bool isValidQName
00290     (
00291         const   XMLCh* const    toCheck
00292         , const unsigned int    count
00293     );
00294 
00295     // -----------------------------------------------------------------------
00296     //  Public, static methods, check the XMLCh
00297     // -----------------------------------------------------------------------
00298     static bool isXMLLetter(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00299     static bool isFirstNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00300     static bool isNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00301     static bool isPlainContentChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00302     static bool isSpecialStartTagChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00303     static bool isXMLChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00304     static bool isWhitespace(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00305     static bool isControlChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00306 
00307     static bool isPublicIdChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00308 
00309 private:
00310     // -----------------------------------------------------------------------
00311     //  Static data members
00312     //
00313     //  fgCharCharsTable1_1
00314     //      The character characteristics table. Bits in each byte, represent
00315     //      the characteristics of each character. It is generated via some
00316     //      code and then hard coded into the cpp file for speed.
00317     //
00318     // -----------------------------------------------------------------------
00319     static XMLByte  fgCharCharsTable1_1[0x10000];
00320 
00321     friend class XMLReader;
00322 };
00323 
00324 
00325 // ---------------------------------------------------------------------------
00326 //  XMLReader: Public, static methods
00327 // ---------------------------------------------------------------------------
00328 inline bool XMLChar1_1::isXMLLetter(const XMLCh toCheck, const XMLCh toCheck2)
00329 {
00330     if (!toCheck2)
00331         return ((fgCharCharsTable1_1[toCheck] & gLetterCharMask) != 0);
00332     return false;
00333 }
00334 
00335 inline bool XMLChar1_1::isFirstNameChar(const XMLCh toCheck, const XMLCh toCheck2)
00336 {
00337     if (!toCheck2)
00338         return ((fgCharCharsTable1_1[toCheck] & gFirstNameCharMask) != 0);
00339     else {
00340         if ((toCheck >= 0xD800) && (toCheck <= 0xDB7F))
00341            if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
00342                return true;
00343     }
00344     return false;
00345 }
00346 
00347 inline bool XMLChar1_1::isNameChar(const XMLCh toCheck, const XMLCh toCheck2)
00348 {
00349     if (!toCheck2)
00350         return ((fgCharCharsTable1_1[toCheck] & gNameCharMask) != 0);
00351     else {
00352         if ((toCheck >= 0xD800) && (toCheck <= 0xDB7F))
00353            if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
00354                return true;
00355     }
00356     return false;
00357 }
00358 
00359 inline bool XMLChar1_1::isPlainContentChar(const XMLCh toCheck, const XMLCh toCheck2)
00360 {
00361     if (!toCheck2)
00362         return ((fgCharCharsTable1_1[toCheck] & gPlainContentCharMask) != 0);
00363     else {
00364         if ((toCheck >= 0xD800) && (toCheck <= 0xDBFF))
00365            if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
00366                return true;
00367     }
00368     return false;
00369 }
00370 
00371 
00372 inline bool XMLChar1_1::isSpecialStartTagChar(const XMLCh toCheck, const XMLCh toCheck2)
00373 {
00374     if (!toCheck2)
00375         return ((fgCharCharsTable1_1[toCheck] & gSpecialStartTagCharMask) != 0);
00376     return false;
00377 }
00378 
00379 inline bool XMLChar1_1::isXMLChar(const XMLCh toCheck, const XMLCh toCheck2)
00380 {
00381     if (!toCheck2)
00382         return ((fgCharCharsTable1_1[toCheck] & gXMLCharMask) != 0);
00383     else {
00384         if ((toCheck >= 0xD800) && (toCheck <= 0xDBFF))
00385            if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
00386                return true;
00387     }
00388     return false;
00389 }
00390 
00391 inline bool XMLChar1_1::isWhitespace(const XMLCh toCheck, const XMLCh toCheck2)
00392 {
00393     if (!toCheck2)
00394         return ((fgCharCharsTable1_1[toCheck] & gWhitespaceCharMask) != 0);
00395     return false;
00396 }
00397 
00398 inline bool XMLChar1_1::isControlChar(const XMLCh toCheck, const XMLCh toCheck2)
00399 {
00400     if (!toCheck2)
00401         return ((fgCharCharsTable1_1[toCheck] & gControlCharMask) != 0);
00402     return false;
00403 }
00404 
00405 
00406 XERCES_CPP_NAMESPACE_END
00407 
00408 #endif


Copyright © 2003 The Apache Software Foundation. All Rights Reserved.