rapidjson
A fast JSON parser/generator for C++ with both SAX/DOM style API
 All Classes Functions Variables Typedefs Pages
rapidjson.h
1 #ifndef RAPIDJSON_RAPIDJSON_H_
2 #define RAPIDJSON_RAPIDJSON_H_
3 
4 // Copyright (c) 2011-2012 Milo Yip (miloyip@gmail.com)
5 // Version 0.11
6 
7 #include <cstdlib> // malloc(), realloc(), free()
8 #include <cstring> // memcpy()
9 
10 ///////////////////////////////////////////////////////////////////////////////
11 // RAPIDJSON_NO_INT64DEFINE
12 
13 // Here defines int64_t and uint64_t types in global namespace.
14 // If user have their own definition, can define RAPIDJSON_NO_INT64DEFINE to disable this.
15 #ifndef RAPIDJSON_NO_INT64DEFINE
16 #ifdef _MSC_VER
17 typedef __int64 int64_t;
18 typedef unsigned __int64 uint64_t;
19 #else
20 #include <inttypes.h>
21 #endif
22 #endif // RAPIDJSON_NO_INT64TYPEDEF
23 
24 ///////////////////////////////////////////////////////////////////////////////
25 // RAPIDJSON_ENDIAN
26 #define RAPIDJSON_LITTLEENDIAN 0 //!< Little endian machine
27 #define RAPIDJSON_BIGENDIAN 1 //!< Big endian machine
28 
29 //! Endianness of the machine.
30 /*! GCC provided macro for detecting endianness of the target machine. But other
31  compilers may not have this. User can define RAPIDJSON_ENDIAN to either
32  RAPIDJSON_LITTLEENDIAN or RAPIDJSON_BIGENDIAN.
33 */
34 #ifndef RAPIDJSON_ENDIAN
35 #ifdef __BYTE_ORDER__
36 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
37 #define RAPIDJSON_ENDIAN RAPIDJSON_LITTLEENDIAN
38 #else
39 #define RAPIDJSON_ENDIAN RAPIDJSON_BIGENDIAN
40 #endif // __BYTE_ORDER__
41 #else
42 #define RAPIDJSON_ENDIAN RAPIDJSON_LITTLEENDIAN // Assumes little endian otherwise.
43 #endif
44 #endif // RAPIDJSON_ENDIAN
45 
46 ///////////////////////////////////////////////////////////////////////////////
47 // RAPIDJSON_SSE2/RAPIDJSON_SSE42/RAPIDJSON_SIMD
48 
49 // Enable SSE2 optimization.
50 //#define RAPIDJSON_SSE2
51 
52 // Enable SSE4.2 optimization.
53 //#define RAPIDJSON_SSE42
54 
55 #if defined(RAPIDJSON_SSE2) || defined(RAPIDJSON_SSE42)
56 #define RAPIDJSON_SIMD
57 #endif
58 
59 ///////////////////////////////////////////////////////////////////////////////
60 // RAPIDJSON_NO_SIZETYPEDEFINE
61 
62 #ifndef RAPIDJSON_NO_SIZETYPEDEFINE
63 namespace rapidjson {
64 //! Use 32-bit array/string indices even for 64-bit platform, instead of using size_t.
65 /*! User may override the SizeType by defining RAPIDJSON_NO_SIZETYPEDEFINE.
66 */
67 typedef unsigned SizeType;
68 } // namespace rapidjson
69 #endif
70 
71 ///////////////////////////////////////////////////////////////////////////////
72 // RAPIDJSON_ASSERT
73 
74 //! Assertion.
75 /*! By default, rapidjson uses C assert() for assertion.
76  User can override it by defining RAPIDJSON_ASSERT(x) macro.
77 */
78 #ifndef RAPIDJSON_ASSERT
79 #include <cassert>
80 #define RAPIDJSON_ASSERT(x) assert(x)
81 #endif // RAPIDJSON_ASSERT
82 
83 ///////////////////////////////////////////////////////////////////////////////
84 // Helpers
85 
86 #define RAPIDJSON_MULTILINEMACRO_BEGIN do {
87 #define RAPIDJSON_MULTILINEMACRO_END \
88 } while((void)0, 0)
89 
90 namespace rapidjson {
91 
92 ///////////////////////////////////////////////////////////////////////////////
93 // Allocator
94 
95 /*! \class rapidjson::Allocator
96  \brief Concept for allocating, resizing and freeing memory block.
97 
98  Note that Malloc() and Realloc() are non-static but Free() is static.
99 
100  So if an allocator need to support Free(), it needs to put its pointer in
101  the header of memory block.
102 
103 \code
104 concept Allocator {
105  static const bool kNeedFree; //!< Whether this allocator needs to call Free().
106 
107  // Allocate a memory block.
108  // \param size of the memory block in bytes.
109  // \returns pointer to the memory block.
110  void* Malloc(size_t size);
111 
112  // Resize a memory block.
113  // \param originalPtr The pointer to current memory block. Null pointer is permitted.
114  // \param originalSize The current size in bytes. (Design issue: since some allocator may not book-keep this, explicitly pass to it can save memory.)
115  // \param newSize the new size in bytes.
116  void* Realloc(void* originalPtr, size_t originalSize, size_t newSize);
117 
118  // Free a memory block.
119  // \param pointer to the memory block. Null pointer is permitted.
120  static void Free(void *ptr);
121 };
122 \endcode
123 */
124 
125 ///////////////////////////////////////////////////////////////////////////////
126 // CrtAllocator
127 
128 //! C-runtime library allocator.
129 /*! This class is just wrapper for standard C library memory routines.
130  \implements Allocator
131 */
133 public:
134  static const bool kNeedFree = true;
135  void* Malloc(size_t size) { return malloc(size); }
136  void* Realloc(void* originalPtr, size_t originalSize, size_t newSize) { (void)originalSize; return realloc(originalPtr, newSize); }
137  static void Free(void *ptr) { free(ptr); }
138 };
139 
140 ///////////////////////////////////////////////////////////////////////////////
141 // MemoryPoolAllocator
142 
143 //! Default memory allocator used by the parser and DOM.
144 /*! This allocator allocate memory blocks from pre-allocated memory chunks.
145 
146  It does not free memory blocks. And Realloc() only allocate new memory.
147 
148  The memory chunks are allocated by BaseAllocator, which is CrtAllocator by default.
149 
150  User may also supply a buffer as the first chunk.
151 
152  If the user-buffer is full then additional chunks are allocated by BaseAllocator.
153 
154  The user-buffer is not deallocated by this allocator.
155 
156  \tparam BaseAllocator the allocator type for allocating memory chunks. Default is CrtAllocator.
157  \implements Allocator
158 */
159 template <typename BaseAllocator = CrtAllocator>
161 public:
162  static const bool kNeedFree = false; //!< Tell users that no need to call Free() with this allocator. (concept Allocator)
163 
164  //! Constructor with chunkSize.
165  /*! \param chunkSize The size of memory chunk. The default is kDefaultChunkSize.
166  \param baseAllocator The allocator for allocating memory chunks.
167  */
168  MemoryPoolAllocator(size_t chunkSize = kDefaultChunkCapacity, BaseAllocator* baseAllocator = 0) :
169  chunkHead_(0), chunk_capacity_(chunkSize), userBuffer_(0), baseAllocator_(baseAllocator), ownBaseAllocator_(0)
170  {
171  if (!baseAllocator_)
172  ownBaseAllocator_ = baseAllocator_ = new BaseAllocator();
173  AddChunk(chunk_capacity_);
174  }
175 
176  //! Constructor with user-supplied buffer.
177  /*! The user buffer will be used firstly. When it is full, memory pool allocates new chunk with chunk size.
178 
179  The user buffer will not be deallocated when this allocator is destructed.
180 
181  \param buffer User supplied buffer.
182  \param size Size of the buffer in bytes. It must at least larger than sizeof(ChunkHeader).
183  \param chunkSize The size of memory chunk. The default is kDefaultChunkSize.
184  \param baseAllocator The allocator for allocating memory chunks.
185  */
186  MemoryPoolAllocator(char *buffer, size_t size, size_t chunkSize = kDefaultChunkCapacity, BaseAllocator* baseAllocator = 0) :
187  chunkHead_(0), chunk_capacity_(chunkSize), userBuffer_(buffer), baseAllocator_(baseAllocator), ownBaseAllocator_(0)
188  {
189  RAPIDJSON_ASSERT(buffer != 0);
190  RAPIDJSON_ASSERT(size > sizeof(ChunkHeader));
191  chunkHead_ = (ChunkHeader*)buffer;
192  chunkHead_->capacity = size - sizeof(ChunkHeader);
193  chunkHead_->size = 0;
194  chunkHead_->next = 0;
195  }
196 
197  //! Destructor.
198  /*! This deallocates all memory chunks, excluding the user-supplied buffer.
199  */
201  Clear();
202  delete ownBaseAllocator_;
203  }
204 
205  //! Deallocates all memory chunks, excluding the user-supplied buffer.
206  void Clear() {
207  while(chunkHead_ != 0 && chunkHead_ != (ChunkHeader *)userBuffer_) {
208  ChunkHeader* next = chunkHead_->next;
209  baseAllocator_->Free(chunkHead_);
210  chunkHead_ = next;
211  }
212  }
213 
214  //! Computes the total capacity of allocated memory chunks.
215  /*! \return total capacity in bytes.
216  */
217  size_t Capacity() {
218  size_t capacity = 0;
219  for (ChunkHeader* c = chunkHead_; c != 0; c = c->next)
220  capacity += c->capacity;
221  return capacity;
222  }
223 
224  //! Computes the memory blocks allocated.
225  /*! \return total used bytes.
226  */
227  size_t Size() {
228  size_t size = 0;
229  for (ChunkHeader* c = chunkHead_; c != 0; c = c->next)
230  size += c->size;
231  return size;
232  }
233 
234  //! Allocates a memory block. (concept Allocator)
235  void* Malloc(size_t size) {
236  size = (size + 3) & ~3; // Force aligning size to 4
237 
238  if (chunkHead_->size + size > chunkHead_->capacity)
239  AddChunk(chunk_capacity_ > size ? chunk_capacity_ : size);
240 
241  char *buffer = (char *)(chunkHead_ + 1) + chunkHead_->size;
242  RAPIDJSON_ASSERT(((uintptr_t)buffer & 3) == 0); // returned buffer is aligned to 4
243  chunkHead_->size += size;
244 
245  return buffer;
246  }
247 
248  //! Resizes a memory block (concept Allocator)
249  void* Realloc(void* originalPtr, size_t originalSize, size_t newSize) {
250  if (originalPtr == 0)
251  return Malloc(newSize);
252 
253  // Do not shrink if new size is smaller than original
254  if (originalSize >= newSize)
255  return originalPtr;
256 
257  // Simply expand it if it is the last allocation and there is sufficient space
258  if (originalPtr == (char *)(chunkHead_ + 1) + chunkHead_->size - originalSize) {
259  size_t increment = newSize - originalSize;
260  increment = (increment + 3) & ~3; // Force aligning size to 4
261  if (chunkHead_->size + increment <= chunkHead_->capacity) {
262  chunkHead_->size += increment;
263  RAPIDJSON_ASSERT(((uintptr_t)originalPtr & 3) == 0); // returned buffer is aligned to 4
264  return originalPtr;
265  }
266  }
267 
268  // Realloc process: allocate and copy memory, do not free original buffer.
269  void* newBuffer = Malloc(newSize);
270  RAPIDJSON_ASSERT(newBuffer != 0); // Do not handle out-of-memory explicitly.
271  return memcpy(newBuffer, originalPtr, originalSize);
272  }
273 
274  //! Frees a memory block (concept Allocator)
275  static void Free(void *) {} // Do nothing
276 
277 private:
278  //! Creates a new chunk.
279  /*! \param capacity Capacity of the chunk in bytes.
280  */
281  void AddChunk(size_t capacity) {
282  ChunkHeader* chunk = (ChunkHeader*)baseAllocator_->Malloc(sizeof(ChunkHeader) + capacity);
283  chunk->capacity = capacity;
284  chunk->size = 0;
285  chunk->next = chunkHead_;
286  chunkHead_ = chunk;
287  }
288 
289  static const int kDefaultChunkCapacity = 64 * 1024; //!< Default chunk capacity.
290 
291  //! Chunk header for perpending to each chunk.
292  /*! Chunks are stored as a singly linked list.
293  */
294  struct ChunkHeader {
295  size_t capacity; //!< Capacity of the chunk in bytes (excluding the header itself).
296  size_t size; //!< Current size of allocated memory in bytes.
297  ChunkHeader *next; //!< Next chunk in the linked list.
298  };
299 
300  ChunkHeader *chunkHead_; //!< Head of the chunk linked-list. Only the head chunk serves allocation.
301  size_t chunk_capacity_; //!< The minimum capacity of chunk when they are allocated.
302  char *userBuffer_; //!< User supplied buffer.
303  BaseAllocator* baseAllocator_; //!< base allocator for allocating memory chunks.
304  BaseAllocator* ownBaseAllocator_; //!< base allocator created by this object.
305 };
306 
307 ///////////////////////////////////////////////////////////////////////////////
308 // Encoding
309 
310 /*! \class rapidjson::Encoding
311  \brief Concept for encoding of Unicode characters.
312 
313 \code
314 concept Encoding {
315  typename Ch; //! Type of character.
316 
317  //! \brief Encode a Unicode codepoint to a buffer.
318  //! \param buffer pointer to destination buffer to store the result. It should have sufficient size of encoding one character.
319  //! \param codepoint An unicode codepoint, ranging from 0x0 to 0x10FFFF inclusively.
320  //! \returns the pointer to the next character after the encoded data.
321  static Ch* Encode(Ch *buffer, unsigned codepoint);
322 };
323 \endcode
324 */
325 
326 ///////////////////////////////////////////////////////////////////////////////
327 // UTF8
328 
329 //! UTF-8 encoding.
330 /*! http://en.wikipedia.org/wiki/UTF-8
331  \tparam CharType Type for storing 8-bit UTF-8 data. Default is char.
332  \implements Encoding
333 */
334 template<typename CharType = char>
335 struct UTF8 {
336  typedef CharType Ch;
337 
338  static Ch* Encode(Ch *buffer, unsigned codepoint) {
339  if (codepoint <= 0x7F)
340  *buffer++ = codepoint & 0xFF;
341  else if (codepoint <= 0x7FF) {
342  *buffer++ = 0xC0 | ((codepoint >> 6) & 0xFF);
343  *buffer++ = 0x80 | ((codepoint & 0x3F));
344  }
345  else if (codepoint <= 0xFFFF) {
346  *buffer++ = 0xE0 | ((codepoint >> 12) & 0xFF);
347  *buffer++ = 0x80 | ((codepoint >> 6) & 0x3F);
348  *buffer++ = 0x80 | (codepoint & 0x3F);
349  }
350  else {
351  RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
352  *buffer++ = 0xF0 | ((codepoint >> 18) & 0xFF);
353  *buffer++ = 0x80 | ((codepoint >> 12) & 0x3F);
354  *buffer++ = 0x80 | ((codepoint >> 6) & 0x3F);
355  *buffer++ = 0x80 | (codepoint & 0x3F);
356  }
357  return buffer;
358  }
359 };
360 
361 ///////////////////////////////////////////////////////////////////////////////
362 // UTF16
363 
364 //! UTF-16 encoding.
365 /*! http://en.wikipedia.org/wiki/UTF-16
366  \tparam CharType Type for storing 16-bit UTF-16 data. Default is wchar_t. C++11 may use char16_t instead.
367  \implements Encoding
368 */
369 template<typename CharType = wchar_t>
370 struct UTF16 {
371  typedef CharType Ch;
372 
373  static Ch* Encode(Ch* buffer, unsigned codepoint) {
374  if (codepoint <= 0xFFFF) {
375  RAPIDJSON_ASSERT(codepoint < 0xD800 || codepoint > 0xDFFF); // Code point itself cannot be surrogate pair
376  *buffer++ = static_cast<Ch>(codepoint);
377  }
378  else {
379  RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
380  unsigned v = codepoint - 0x10000;
381  *buffer++ = static_cast<Ch>((v >> 10) + 0xD800);
382  *buffer++ = (v & 0x3FF) + 0xDC00;
383  }
384  return buffer;
385  }
386 };
387 
388 ///////////////////////////////////////////////////////////////////////////////
389 // UTF32
390 
391 //! UTF-32 encoding.
392 /*! http://en.wikipedia.org/wiki/UTF-32
393  \tparam Ch Type for storing 32-bit UTF-32 data. Default is unsigned. C++11 may use char32_t instead.
394  \implements Encoding
395 */
396 template<typename CharType = unsigned>
397 struct UTF32 {
398  typedef CharType Ch;
399 
400  static Ch *Encode(Ch* buffer, unsigned codepoint) {
401  RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
402  *buffer++ = codepoint;
403  return buffer;
404  }
405 };
406 
407 ///////////////////////////////////////////////////////////////////////////////
408 // Stream
409 
410 /*! \class rapidjson::Stream
411  \brief Concept for reading and writing characters.
412 
413  For read-only stream, no need to implement PutBegin(), Put() and PutEnd().
414 
415  For write-only stream, only need to implement Put().
416 
417 \code
418 concept Stream {
419  typename Ch; //!< Character type of the stream.
420 
421  //! Read the current character from stream without moving the read cursor.
422  Ch Peek() const;
423 
424  //! Read the current character from stream and moving the read cursor to next character.
425  Ch Take();
426 
427  //! Get the current read cursor.
428  //! \return Number of characters read from start.
429  size_t Tell();
430 
431  //! Begin writing operation at the current read pointer.
432  //! \return The begin writer pointer.
433  Ch* PutBegin();
434 
435  //! Write a character.
436  void Put(Ch c);
437 
438  //! End the writing operation.
439  //! \param begin The begin write pointer returned by PutBegin().
440  //! \return Number of characters written.
441  size_t PutEnd(Ch* begin);
442 }
443 \endcode
444 */
445 
446 //! Put N copies of a character to a stream.
447 template<typename Stream, typename Ch>
448 inline void PutN(Stream& stream, Ch c, size_t n) {
449  for (size_t i = 0; i < n; i++)
450  stream.Put(c);
451 }
452 
453 ///////////////////////////////////////////////////////////////////////////////
454 // StringStream
455 
456 //! Read-only string stream.
457 /*! \implements Stream
458 */
459 template <typename Encoding>
461  typedef typename Encoding::Ch Ch;
462 
463  GenericStringStream(const Ch *src) : src_(src), head_(src) {}
464 
465  Ch Peek() const { return *src_; }
466  Ch Take() { return *src_++; }
467  size_t Tell() const { return src_ - head_; }
468 
469  Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
470  void Put(Ch) { RAPIDJSON_ASSERT(false); }
471  size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
472 
473  const Ch* src_; //!< Current read position.
474  const Ch* head_; //!< Original head of the string.
475 };
476 
478 
479 ///////////////////////////////////////////////////////////////////////////////
480 // InsituStringStream
481 
482 //! A read-write string stream.
483 /*! This string stream is particularly designed for in-situ parsing.
484  \implements Stream
485 */
486 template <typename Encoding>
488  typedef typename Encoding::Ch Ch;
489 
490  GenericInsituStringStream(Ch *src) : src_(src), dst_(0), head_(src) {}
491 
492  // Read
493  Ch Peek() { return *src_; }
494  Ch Take() { return *src_++; }
495  size_t Tell() { return src_ - head_; }
496 
497  // Write
498  Ch* PutBegin() { return dst_ = src_; }
499  void Put(Ch c) { RAPIDJSON_ASSERT(dst_ != 0); *dst_++ = c; }
500  size_t PutEnd(Ch* begin) { return dst_ - begin; }
501 
502  Ch* src_;
503  Ch* dst_;
504  Ch* head_;
505 };
506 
508 
509 ///////////////////////////////////////////////////////////////////////////////
510 // Type
511 
512 //! Type of JSON value
513 enum Type {
514  kNullType = 0, //!< null
515  kFalseType = 1, //!< false
516  kTrueType = 2, //!< true
517  kObjectType = 3, //!< object
518  kArrayType = 4, //!< array
519  kStringType = 5, //!< string
520  kNumberType = 6, //!< number
521 };
522 
523 } // namespace rapidjson
524 
525 #endif // RAPIDJSON_RAPIDJSON_H_