You've already forked cmf-bindings
Add the C bindings.
This is meant for embedded, so slightly different API optimized for speed.
This commit is contained in:
@@ -0,0 +1,163 @@
|
|||||||
|
#include "cmf.h"
|
||||||
|
|
||||||
|
#include <assert.h>
|
||||||
|
#include <string.h> // for memcpy
|
||||||
|
|
||||||
|
static bool cmf_unserialize(const unsigned char **data, const unsigned char * const endMessage, unsigned long *result)
|
||||||
|
{
|
||||||
|
assert(data);
|
||||||
|
assert(*data);
|
||||||
|
assert(result);
|
||||||
|
assert(endMessage);
|
||||||
|
|
||||||
|
const unsigned char *ptr = *data;
|
||||||
|
while (ptr < endMessage) {
|
||||||
|
unsigned char byte = *ptr++;
|
||||||
|
*result = (*result << 7) | (byte & 0x7F);
|
||||||
|
if (byte & 0x80)
|
||||||
|
*result += 1;
|
||||||
|
else {
|
||||||
|
*data = ptr;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void cmf_serialize(unsigned char **data, unsigned long value)
|
||||||
|
{
|
||||||
|
unsigned char *start = *data;
|
||||||
|
unsigned char *pos = start;
|
||||||
|
while (true) {
|
||||||
|
*pos = (unsigned char) ((value & 0x7F) | (pos != start ? 0x80 : 0x00));
|
||||||
|
if (value <= 0x7F)
|
||||||
|
break;
|
||||||
|
value = (value >> 7) - 1;
|
||||||
|
++pos;
|
||||||
|
}
|
||||||
|
*data = pos + 1;
|
||||||
|
|
||||||
|
// reverse
|
||||||
|
while (pos > start) {
|
||||||
|
unsigned char tmp = *start; // swap
|
||||||
|
*start = *pos;
|
||||||
|
*pos = tmp;
|
||||||
|
++start;
|
||||||
|
--pos;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void cmf_write(unsigned char **data, unsigned int tag, short type) {
|
||||||
|
assert(type < 8);
|
||||||
|
if (tag >= 31) { // use more than 1 byte
|
||||||
|
unsigned char byte = type | 0xF8; // set the 'tag' to all 1s
|
||||||
|
*data[0] = byte;
|
||||||
|
*data += 1;
|
||||||
|
cmf_serialize(data, tag);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
assert(tag < 32);
|
||||||
|
unsigned char byte = tag;
|
||||||
|
byte = byte << 3;
|
||||||
|
byte += type;
|
||||||
|
*data[0] = byte;
|
||||||
|
*data += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void cmfbuilder_add_int(unsigned char **ptr, unsigned int tag, int value)
|
||||||
|
{
|
||||||
|
short type;
|
||||||
|
if (value >= 0) {
|
||||||
|
type = CMFMT_POSITIVE_NUMBER;
|
||||||
|
} else {
|
||||||
|
type = CMFMT_NEGATIVE_NUMBER;
|
||||||
|
value *= -1;
|
||||||
|
}
|
||||||
|
cmf_write(ptr, tag, type);
|
||||||
|
cmf_serialize(ptr, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
void cmfbuilder_add_ulong(unsigned char **ptr, unsigned int tag, unsigned long value)
|
||||||
|
{
|
||||||
|
cmf_write(ptr, tag, CMFMT_POSITIVE_NUMBER);
|
||||||
|
cmf_serialize(ptr, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
void cmfbuilder_add_bytes(unsigned char **ptr, unsigned int tag, const char *data, int length, enum cmf_message_format fmt)
|
||||||
|
{
|
||||||
|
assert(fmt == CMFMT_STRING_UTF8 || fmt == CMFMT_BYTES);
|
||||||
|
cmf_write(ptr, tag, fmt);
|
||||||
|
cmf_serialize(ptr, length);
|
||||||
|
memcpy(*ptr, data, length);
|
||||||
|
*ptr += length;
|
||||||
|
}
|
||||||
|
|
||||||
|
void cmfbuilder_add_bool(unsigned char **ptr, unsigned int tag, bool value)
|
||||||
|
{
|
||||||
|
cmf_write(ptr, tag, value ? CMFMT_BOOL_TRUE : CMFMT_BOOL_FALSE);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
enum cmf_parser_result cmfparser_next(const unsigned char **ptr, const unsigned char * const endMessage, struct cmf_message_parser_token *token)
|
||||||
|
{
|
||||||
|
if (*ptr >= endMessage)
|
||||||
|
return CMF_DOCUMENT_END;
|
||||||
|
|
||||||
|
unsigned char byte = *ptr[0];
|
||||||
|
token->fmt = (enum cmf_message_format)(byte & 0x07);
|
||||||
|
token->tag = byte >> 3;
|
||||||
|
if (token->tag == 31) { // the tag is stored in the next byte(s)
|
||||||
|
unsigned long tag = 0;
|
||||||
|
*ptr += 1;
|
||||||
|
bool ok = cmf_unserialize(ptr, endMessage, &tag);
|
||||||
|
if (!ok || tag > 0xFFFFFFFF) {
|
||||||
|
*ptr -= 1;
|
||||||
|
return CMF_PARSER_ERROR;
|
||||||
|
}
|
||||||
|
*ptr -= 1;
|
||||||
|
token->tag = (unsigned int) tag;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned long value = 0;
|
||||||
|
|
||||||
|
switch (token->fmt) {
|
||||||
|
case CMFMT_POSITIVE_NUMBER:
|
||||||
|
case CMFMT_NEGATIVE_NUMBER: {
|
||||||
|
*ptr += 1;
|
||||||
|
bool ok = cmf_unserialize(ptr, endMessage, &value);
|
||||||
|
if (!ok) {
|
||||||
|
*ptr -= 1;
|
||||||
|
return CMF_PARSER_ERROR;
|
||||||
|
}
|
||||||
|
if (token->fmt == CMFMT_NEGATIVE_NUMBER)
|
||||||
|
token->signed_num = (long) (value * -1);
|
||||||
|
else
|
||||||
|
token->big_num = value;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case CMFMT_BYTES:
|
||||||
|
case CMFMT_STRING_UTF8: {
|
||||||
|
*ptr += 1;
|
||||||
|
bool ok = cmf_unserialize(ptr, endMessage, &value);
|
||||||
|
if (!ok) {
|
||||||
|
*ptr -= 1;
|
||||||
|
return CMF_PARSER_ERROR;
|
||||||
|
}
|
||||||
|
token->begin = *ptr;
|
||||||
|
token->end = token->begin + value;
|
||||||
|
if (token->end > endMessage) // The actual value is not included in the message
|
||||||
|
return CMF_PARSER_ERROR;
|
||||||
|
*ptr += value;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case CMFMT_BOOL_TRUE:
|
||||||
|
case CMFMT_BOOL_FALSE:
|
||||||
|
*ptr += 1;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
return CMF_PARSER_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
return CMF_FOUND_TOKEN;
|
||||||
|
}
|
||||||
@@ -0,0 +1,70 @@
|
|||||||
|
#ifndef _CMF_H
|
||||||
|
#define _CMF_H
|
||||||
|
|
||||||
|
#include <stdbool.h>
|
||||||
|
|
||||||
|
enum cmf_message_format {
|
||||||
|
CMFMT_POSITIVE_NUMBER = 0,
|
||||||
|
CMFMT_NEGATIVE_NUMBER = 1,
|
||||||
|
CMFMT_STRING_UTF8 = 2,
|
||||||
|
CMFMT_BYTES = 3,
|
||||||
|
CMFMT_BOOL_TRUE = 4,
|
||||||
|
CMFMT_BOOL_FALSE = 5
|
||||||
|
// TODO double
|
||||||
|
};
|
||||||
|
|
||||||
|
enum cmf_parser_result {
|
||||||
|
CMF_FOUND_TOKEN,
|
||||||
|
CMF_PARSER_ERROR,
|
||||||
|
CMF_DOCUMENT_END
|
||||||
|
};
|
||||||
|
|
||||||
|
/* builder API */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The cmfbuilder_add_* range of methods append data to an existing buffer.
|
||||||
|
* The amount of data added is relative to the actual value passed because a variable-width byte encoding is being used.
|
||||||
|
* The 'tag' takes at most 5 bytes. It takes zero bytes if the value is < 30.
|
||||||
|
* The format (int/string/etc) is encoded and takes 1 byte.
|
||||||
|
* The actual byte count used for the value is format dependent.
|
||||||
|
* numbers take up to 9 bytes for a 64-bit value. (notice that negative numbers are multiplied by -1 before being encoded, so -1 is just 1 byte)
|
||||||
|
* byte-arrays are just copied. Additional byte-count is the length. Also var-encoded.
|
||||||
|
* booleans are free. No bytes taken.
|
||||||
|
*
|
||||||
|
* Please make sure enough bytes are available in the buffer as no effort is being made to avoid appending after the buffer.
|
||||||
|
*/
|
||||||
|
|
||||||
|
void cmfbuilder_add_int(unsigned char **ptr, unsigned int tag, int value);
|
||||||
|
void cmfbuilder_add_ulong(unsigned char **ptr, unsigned int tag, unsigned long value);
|
||||||
|
/*
|
||||||
|
* Add-bytes allows the caller to specify the format specifically because the compact message format
|
||||||
|
* supports both byte-arrays as well as utf-8 encoded strings.
|
||||||
|
* Add the one you want to encoded. Notice that if you pass anything other than CMFMT_STRING_UTF8 or CMFMT_BYTES your
|
||||||
|
* stream will be corrupted
|
||||||
|
*/
|
||||||
|
void cmfbuilder_add_bytes(unsigned char **ptr, unsigned int tag, const char *data, int length, enum cmf_message_format fmt);
|
||||||
|
void cmfbuilder_add_bool(unsigned char **ptr, unsigned int tag, bool value);
|
||||||
|
|
||||||
|
/* parser API */
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The cmf parser method is essentially a SOX parser that allows really fast parsing and zero data-copy.
|
||||||
|
*
|
||||||
|
* The cmfparser_next() method can be called repeatedly until a certain token you wish to find has been located.
|
||||||
|
* The actual token values are stored in the cmf_message_parser_token struct which can be reused for all calls.
|
||||||
|
*/
|
||||||
|
struct cmf_message_parser_token {
|
||||||
|
int tag;
|
||||||
|
enum cmf_message_format fmt;
|
||||||
|
union {
|
||||||
|
unsigned long big_num; /* Used when fmt is CMFMT_POSITIVE_NUMBER */
|
||||||
|
long signed_num; /* Used when fmt is CMFMT_NEGATIVE_NUMBER */
|
||||||
|
};
|
||||||
|
const unsigned char *begin, *end; /* used for byte arrays and strings */
|
||||||
|
};
|
||||||
|
|
||||||
|
enum cmf_parser_result cmfparser_next(const unsigned char **ptr, const unsigned char * const endMessage, struct cmf_message_parser_token *token);
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
||||||
@@ -0,0 +1,151 @@
|
|||||||
|
#include "cmf.h"
|
||||||
|
|
||||||
|
#ifdef NDEBUG
|
||||||
|
# error run the test in debug mode otherwise you wont get any results
|
||||||
|
#endif
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
void basic_test1()
|
||||||
|
{
|
||||||
|
unsigned char buf[100];
|
||||||
|
unsigned char *ptr = buf;
|
||||||
|
struct cmf_message_parser_token token;
|
||||||
|
enum cmf_parser_result found;
|
||||||
|
|
||||||
|
cmfbuilder_add_int(&ptr, 15, 6512);
|
||||||
|
assert(ptr > buf);
|
||||||
|
// printf( "size: %d\n", ptr - buf);
|
||||||
|
assert(ptr - buf == 3);
|
||||||
|
assert(buf[0] == 120);
|
||||||
|
assert((unsigned char) buf[1] == 177);
|
||||||
|
assert(buf[2] == 112);
|
||||||
|
|
||||||
|
const unsigned char *parsePtr = buf;
|
||||||
|
found = cmfparser_next(&parsePtr, ptr, &token);
|
||||||
|
assert(found == CMF_FOUND_TOKEN);
|
||||||
|
assert(token.tag == 15);
|
||||||
|
assert(token.fmt == CMFMT_POSITIVE_NUMBER);
|
||||||
|
assert(token.big_num == 6512);
|
||||||
|
|
||||||
|
found = cmfparser_next(&parsePtr, ptr, &token);
|
||||||
|
assert(found == CMF_DOCUMENT_END);
|
||||||
|
}
|
||||||
|
|
||||||
|
void basic_test2()
|
||||||
|
{
|
||||||
|
unsigned char buf[100];
|
||||||
|
unsigned char *ptr = buf;
|
||||||
|
struct cmf_message_parser_token token;
|
||||||
|
enum cmf_parser_result found;
|
||||||
|
|
||||||
|
cmfbuilder_add_int(&ptr, 129, 6512);
|
||||||
|
assert(ptr > buf);
|
||||||
|
// printf( "size: %d\n", ptr - buf);
|
||||||
|
assert(ptr - buf == 5);
|
||||||
|
assert((unsigned char) buf[0] == 248);
|
||||||
|
assert((unsigned char) buf[1] == 128);
|
||||||
|
assert(buf[2] == 1);
|
||||||
|
assert((unsigned char) buf[3] == 177);
|
||||||
|
assert(buf[4] == 112);
|
||||||
|
|
||||||
|
const unsigned char *parsePtr = buf;
|
||||||
|
found = cmfparser_next(&parsePtr, ptr, &token);
|
||||||
|
assert(found == CMF_FOUND_TOKEN);
|
||||||
|
assert(token.tag == 129);
|
||||||
|
assert(token.fmt == CMFMT_POSITIVE_NUMBER);
|
||||||
|
assert(token.big_num == 6512);
|
||||||
|
|
||||||
|
found = cmfparser_next(&parsePtr, ptr, &token);
|
||||||
|
assert(found == CMF_DOCUMENT_END);
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_types()
|
||||||
|
{
|
||||||
|
unsigned char buf[100];
|
||||||
|
unsigned char *ptr = buf;
|
||||||
|
struct cmf_message_parser_token token;
|
||||||
|
enum cmf_parser_result found;
|
||||||
|
|
||||||
|
const char *foo = "Föo";
|
||||||
|
assert(strlen(foo) == 4); // someone changed encoding of this source file
|
||||||
|
cmfbuilder_add_bytes(&ptr, 1, foo, 4, CMFMT_STRING_UTF8);
|
||||||
|
const char *hihi = "hihi";
|
||||||
|
cmfbuilder_add_bytes(&ptr, 200, hihi, 4, CMFMT_BYTES);
|
||||||
|
cmfbuilder_add_bool(&ptr, 3, true);
|
||||||
|
cmfbuilder_add_bool(&ptr, 40, false);
|
||||||
|
assert(ptr > buf);
|
||||||
|
// printf( "size: %d\n", ptr - buf);
|
||||||
|
assert(ptr - buf == 17);
|
||||||
|
// string '1'
|
||||||
|
assert((unsigned char) buf[0] == 10);
|
||||||
|
assert((unsigned char) buf[1] == 4); // serialized string length
|
||||||
|
assert((unsigned char) buf[2] == 70);
|
||||||
|
assert((unsigned char) buf[3] == 195);
|
||||||
|
assert((unsigned char) buf[4] == 182);
|
||||||
|
assert((unsigned char) buf[5] == 111);
|
||||||
|
|
||||||
|
// blob '200'
|
||||||
|
assert((unsigned char) buf[6] == 251);
|
||||||
|
assert((unsigned char) buf[7] == 128);
|
||||||
|
assert((unsigned char) buf[8] == 72);
|
||||||
|
assert((unsigned char) buf[9] == 4); // length of bytearray
|
||||||
|
assert((unsigned char) buf[10] == 104); // 'h'
|
||||||
|
assert((unsigned char) buf[11] == 105); // 'i'
|
||||||
|
assert((unsigned char) buf[12] == 104); // 'h'
|
||||||
|
assert((unsigned char) buf[13] == 105); // 'i'
|
||||||
|
|
||||||
|
// bool-true '3'
|
||||||
|
assert((unsigned char) buf[14] == 28);
|
||||||
|
|
||||||
|
// bool-false '40'
|
||||||
|
assert((unsigned char) buf[15] == 253);
|
||||||
|
assert((unsigned char) buf[16] == 40);
|
||||||
|
|
||||||
|
const unsigned char *parsePtr = buf;
|
||||||
|
found = cmfparser_next(&parsePtr, ptr, &token);
|
||||||
|
assert(found == CMF_FOUND_TOKEN);
|
||||||
|
assert(token.tag == 1);
|
||||||
|
assert(token.fmt == CMFMT_STRING_UTF8);
|
||||||
|
assert(token.begin > buf);
|
||||||
|
assert(token.begin < ptr);
|
||||||
|
assert(token.end > buf);
|
||||||
|
assert(token.end < ptr);
|
||||||
|
assert(token.end - token.begin == 4);
|
||||||
|
assert(memcmp(foo, token.begin, 4) == 0); // be careful, no trailing zero!
|
||||||
|
|
||||||
|
found = cmfparser_next(&parsePtr, ptr, &token);
|
||||||
|
assert(found == CMF_FOUND_TOKEN);
|
||||||
|
assert(token.tag == 200);
|
||||||
|
assert(token.fmt == CMFMT_BYTES);
|
||||||
|
assert(token.begin > buf);
|
||||||
|
assert(token.begin < ptr);
|
||||||
|
assert(token.end > buf);
|
||||||
|
assert(token.end < ptr);
|
||||||
|
assert(token.end - token.begin == 4);
|
||||||
|
assert(memcmp(hihi, token.begin, 4) == 0); // be careful, no trailing zero!
|
||||||
|
|
||||||
|
found = cmfparser_next(&parsePtr, ptr, &token);
|
||||||
|
assert(found == CMF_FOUND_TOKEN);
|
||||||
|
assert(token.tag == 3);
|
||||||
|
assert(token.fmt == CMFMT_BOOL_TRUE);
|
||||||
|
found = cmfparser_next(&parsePtr, ptr, &token);
|
||||||
|
assert(found == CMF_FOUND_TOKEN);
|
||||||
|
assert(token.tag == 40);
|
||||||
|
assert(token.fmt == CMFMT_BOOL_FALSE);
|
||||||
|
|
||||||
|
found = cmfparser_next(&parsePtr, ptr, &token);
|
||||||
|
assert(found == CMF_DOCUMENT_END);
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
basic_test1();
|
||||||
|
basic_test2();
|
||||||
|
test_types();
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
Reference in New Issue
Block a user