00001 #ifndef _XML_XMLUTF8Builder_H_ // -*-c++-*- 00002 00003 #define _XML_XMLUTF8Builder_H_ 00004 00005 #ifdef __GNUG__ 00006 # pragma interface 00007 #endif 00008 00009 /* $URL: svn://svn.omnifarious.org/home/hopper/src/svn-gp/trunk/C++/libNet/xml/xml/utf8/Builder.h $ 00010 * $Author: hopper $ 00011 * $Date: 2003-01-16 16:21:54 -0600 (Thu, 16 Jan 2003) $ 00012 * $Rev: 48 $ 00013 */ 00014 00015 #include <string> 00016 #include <cstddef> 00017 00018 namespace strmod { 00019 namespace xml { 00020 namespace utf8 { 00021 00022 /** \class Builder Builder.h xml/utf8/Builder.h 00023 * An interface class for an Lexer to use to put tokens together into some other 00024 * structure. 00025 * 00026 * This is so the Lexer doesn't have to know the details of how the parser (or 00027 * whatever is interpreting the output of the Lexer) works. It follows the 00028 * Builder pattern from Design Patterns. 00029 * 00030 * In the various member functions that follow, there are various parameters 00031 * describing the positions of things. In order to make this description 00032 * clearer, here is a diagram: 00033 * <pre> 00034 * <LongTagName withan="attribute">And some element text</LongTagName> 00035 * ^ ^ ^ ^^^ ^ ^ 00036 * | | | ||| | | 00037 * `->selbegin | | ||`->selend celbegin<-' celend<-' 00038 * | | || 00039 * attrbegin<-' | |`->attrend 00040 * | | 00041 * valbegin<-' `->valend 00042 * </pre> 00043 */ 00044 class Builder 00045 { 00046 public: 00047 /** \class BufHandle Builder.h xml/utf8/Builder.h 00048 * Handle for positions from previous calls to Lexer::lex 00049 */ 00050 union BufHandle { 00051 unsigned long ulval_; //!< If the user uses an integral type for the handle. 00052 void *ptrval_; //!< If the user uses a pointer type for the handle. 00053 }; 00054 /** \class Position Builder.h xml/utf8/Builder.h 00055 * The position of a token start, end, or other feature. 00056 * 00057 * Since the Lexer works on character buffers, and a given token may extend 00058 * across several buffers, it also needs a way to communicate which buffer a 00059 * particular token started or ended in. To this end, since the Lexer 00060 * reports the positions in the buffer of token beginnings and endings, it 00061 * reports those positions using an identifier (i.e. handle) for which buffer 00062 * the position was in, what the offset is from that buffer. 00063 * 00064 * An offset may point one past the last achracter in the buffer identified 00065 * by the handle. 00066 * 00067 * This whole system is designed with three goals: 00068 * - Reduce the amount of information that has to be copied. 00069 * - Open up as much code to the optimizer as possible. 00070 * - Make sure each character in an XML document is only processed once 00071 * on the way to a parse tree. 00072 */ 00073 struct Position { 00074 //! A buffer identifier passed into Lexer::lex 00075 BufHandle bufhdl_; 00076 //! The offset from the beginning of the buffer represented by bufhdl_ 00077 // This may point one past the last achracter in the buffer identified by 00078 // the handle. 00079 size_t bufoffset_; 00080 00081 //! Convenience initializing contructor 00082 Position(const BufHandle &bufhdl, const size_t &bufoffset) 00083 : bufhdl_(bufhdl), bufoffset_(bufoffset) 00084 { 00085 } 00086 //! Give things reasonable default values. 00087 Position() : bufoffset_(0) { bufhdl_.ulval_ = 0; } 00088 }; 00089 00090 //! It's an interface, so this doesn't do anything. 00091 Builder() { } 00092 //! It's an interface, so this doesn't do anything. 00093 virtual ~Builder() {} 00094 00095 /** The lexer encountered an element open tag, atributes may follow 00096 * 00097 * @param selbegin The buffer position of the '<' of the tag. 00098 * @param name The name of the element being opened. 00099 */ 00100 virtual void startElementTag(const Position &selbegin, 00101 const ::std::string &name) = 0; 00102 /** The lexer encountered an attribute of an element open tag. 00103 * 00104 * @param attrbegin The buffer position of the first character of the tag name. 00105 * 00106 * @param attrend The buffer position one past the closing single 00107 * or double quote of the attribute value. 00108 * 00109 * @param valbegin The buffer position of the first character after 00110 * the opening single or double quote of the attribute value. 00111 * 00112 * @param valend The buffer position of the closing single or 00113 * double quote of the attribute value. 00114 * 00115 * @param name The attribute name. 00116 */ 00117 virtual void addAttribute(const Position &attrbegin, const Position &attrend, 00118 const Position &valbegin, const Position &valend, 00119 const ::std::string &name) = 0; 00120 /** The lexer encountered the closing '>' of an element open tag. 00121 * 00122 * @param selend The buffer position one past the '>'. 00123 * @param wasempty Was the tag of the form <br/>? 00124 */ 00125 virtual void endElementTag(const Position &selend, bool wasempty) = 0; 00126 /** The lexer encountered the close element tag (a tag of the form </p> 00127 * 00128 * @param celbegin The buffer position of the '<'. 00129 * @param celend The buffer position one past the '>'. 00130 * @param name The name of the element being closed. 00131 */ 00132 virtual void closeElementTag(const Position &celbegin, 00133 const Position &celend, 00134 const ::std::string &name) = 0; 00135 }; 00136 00137 } // namespace utf8 00138 } // namespace xml 00139 } // namespace strmod 00140 00141 #endif
1.3-rc1