Main Page   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Namespace Members   Compound Members   File Members  

Builder.h

00001 #ifndef _XML_XMLUTF8Builder_H_  // -*-c++-*-
00002 
00003 #define _XML_XMLUTF8Builder_H_
00004 
00005 #ifdef __GNUG__
00006 #  pragma interface
00007 #endif
00008 
00009 /* $URL: svn://svn.omnifarious.org/home/hopper/src/svn-gp/trunk/C++/libNet/xml/xml/utf8/Builder.h $
00010  * $Author: hopper $
00011  * $Date: 2003-01-16 16:21:54 -0600 (Thu, 16 Jan 2003) $
00012  * $Rev: 48 $
00013  */
00014 
00015 #include <string>
00016 #include <cstddef>
00017 
00018 namespace strmod {
00019 namespace xml {
00020 namespace utf8 {
00021 
00022 /** \class Builder Builder.h xml/utf8/Builder.h
00023  * An interface class for an Lexer to use to put tokens together into some other
00024  * structure.
00025  *
00026  * This is so the Lexer doesn't have to know the details of how the parser (or
00027  * whatever is interpreting the output of the Lexer) works.  It follows the
00028  * Builder pattern from Design Patterns.
00029  *
00030  * In the various member functions that follow, there are various parameters
00031  * describing the positions of things.  In order to make this description
00032  * clearer, here is a diagram:
00033  * <pre>
00034  * &lt;LongTagName withan="attribute">And some element text&lt;/LongTagName> 
00035  * ^            ^       ^        ^^^                    ^             ^
00036  * |            |       |        |||                    |             |
00037  * `->selbegin  |       |        ||`->selend  celbegin<-'     celend<-'
00038  *              |       |        ||
00039  *   attrbegin<-'       |        |`->attrend
00040  *                      |        |
00041  *            valbegin<-'        `->valend
00042  * </pre>
00043  */
00044 class Builder
00045 {
00046  public:
00047    /** \class BufHandle Builder.h xml/utf8/Builder.h
00048     * Handle for positions from previous calls to Lexer::lex
00049     */
00050    union BufHandle {
00051       unsigned long ulval_;  //!< If the user uses an integral type for the handle.
00052       void *ptrval_; //!< If the user uses a pointer type for the handle.
00053    };
00054    /** \class Position Builder.h xml/utf8/Builder.h
00055     * The position of a token start, end, or other feature.
00056     *
00057     * Since the Lexer works on character buffers, and a given token may extend
00058     * across several buffers, it also needs a way to communicate which buffer a
00059     * particular token started or ended in.  To this end, since the Lexer
00060     * reports the positions in the buffer of token beginnings and endings, it
00061     * reports those positions using an identifier (i.e. handle) for which buffer
00062     * the position was in, what the offset is from that buffer.
00063     *
00064     * An offset may point one past the last achracter in the buffer identified
00065     * by the handle.
00066     *
00067     * This whole system is designed with three goals:
00068     * - Reduce the amount of information that has to be copied.
00069     * - Open up as much code to the optimizer as possible.
00070     * - Make sure each character in an XML document is only processed once
00071     *    on the way to a parse tree.
00072     */
00073    struct Position {
00074       //! A buffer identifier passed into Lexer::lex
00075       BufHandle bufhdl_;
00076       //! The offset from the beginning of the buffer represented by bufhdl_
00077       // This may point one past the last achracter in the buffer identified by
00078       // the handle.
00079       size_t bufoffset_;
00080 
00081       //! Convenience initializing contructor
00082       Position(const BufHandle &bufhdl, const size_t &bufoffset)
00083            : bufhdl_(bufhdl), bufoffset_(bufoffset)
00084       {
00085       }
00086       //! Give things reasonable default values.
00087       Position() : bufoffset_(0) { bufhdl_.ulval_ = 0; }
00088    };
00089 
00090    //! It's an interface, so this doesn't do anything.
00091    Builder() { }
00092    //! It's an interface, so this doesn't do anything.
00093    virtual ~Builder() {}
00094 
00095    /** The lexer encountered an element open tag, atributes may follow
00096     *
00097     * @param selbegin The buffer position of the '<' of the tag.
00098     * @param name The name of the element being opened.
00099     */
00100    virtual void startElementTag(const Position &selbegin,
00101                                 const ::std::string &name) = 0;
00102    /** The lexer encountered an attribute of an element open tag.
00103     *
00104     * @param attrbegin  The buffer position of the first character of the tag name.
00105     *
00106     * @param attrend    The buffer position one past the closing single
00107     *     or double quote of the attribute value.
00108     *
00109     * @param valbegin   The buffer position of the first character after
00110     *     the opening single or double quote of the attribute value.
00111     *
00112     * @param valend     The buffer position of the closing single or
00113     *     double quote of the attribute value.
00114     *
00115     * @param name       The attribute name.
00116     */
00117    virtual void addAttribute(const Position &attrbegin, const Position &attrend,
00118                              const Position &valbegin, const Position &valend,
00119                              const ::std::string &name) = 0;
00120    /** The lexer encountered the closing '>' of an element open tag.
00121     *
00122     * @param selend The buffer position one past the '>'.
00123     * @param wasempty Was the tag of the form &lt;br/>?
00124     */
00125    virtual void endElementTag(const Position &selend, bool wasempty) = 0;
00126    /** The lexer encountered the close element tag (a tag of the form &lt;/p>
00127     *
00128     * @param celbegin The buffer position of the '<'.
00129     * @param celend The buffer position one past the '>'.
00130     * @param name The name of the element being closed.
00131     */
00132    virtual void closeElementTag(const Position &celbegin,
00133                                 const Position &celend,
00134                                 const ::std::string &name) = 0;
00135 };
00136 
00137 } // namespace utf8
00138 } // namespace xml
00139 } // namespace strmod
00140 
00141 #endif

Generated on Wed Jan 29 00:32:43 2003 for libNet by doxygen1.3-rc1