Add the C bindings.

This is meant for embedded, so slightly different API optimized
for speed.
This commit is contained in:
TomZ
2017-01-16 15:40:38 +01:00
parent c6997bbb0c
commit 39fd61e400
3 changed files with 384 additions and 0 deletions
+163
View File
@@ -0,0 +1,163 @@
#include "cmf.h"
#include <assert.h>
#include <string.h> // for memcpy
static bool cmf_unserialize(const unsigned char **data, const unsigned char * const endMessage, unsigned long *result)
{
assert(data);
assert(*data);
assert(result);
assert(endMessage);
const unsigned char *ptr = *data;
while (ptr < endMessage) {
unsigned char byte = *ptr++;
*result = (*result << 7) | (byte & 0x7F);
if (byte & 0x80)
*result += 1;
else {
*data = ptr;
return true;
}
}
return false;
}
static void cmf_serialize(unsigned char **data, unsigned long value)
{
unsigned char *start = *data;
unsigned char *pos = start;
while (true) {
*pos = (unsigned char) ((value & 0x7F) | (pos != start ? 0x80 : 0x00));
if (value <= 0x7F)
break;
value = (value >> 7) - 1;
++pos;
}
*data = pos + 1;
// reverse
while (pos > start) {
unsigned char tmp = *start; // swap
*start = *pos;
*pos = tmp;
++start;
--pos;
}
}
static void cmf_write(unsigned char **data, unsigned int tag, short type) {
assert(type < 8);
if (tag >= 31) { // use more than 1 byte
unsigned char byte = type | 0xF8; // set the 'tag' to all 1s
*data[0] = byte;
*data += 1;
cmf_serialize(data, tag);
}
else {
assert(tag < 32);
unsigned char byte = tag;
byte = byte << 3;
byte += type;
*data[0] = byte;
*data += 1;
}
}
void cmfbuilder_add_int(unsigned char **ptr, unsigned int tag, int value)
{
short type;
if (value >= 0) {
type = CMFMT_POSITIVE_NUMBER;
} else {
type = CMFMT_NEGATIVE_NUMBER;
value *= -1;
}
cmf_write(ptr, tag, type);
cmf_serialize(ptr, value);
}
void cmfbuilder_add_ulong(unsigned char **ptr, unsigned int tag, unsigned long value)
{
cmf_write(ptr, tag, CMFMT_POSITIVE_NUMBER);
cmf_serialize(ptr, value);
}
void cmfbuilder_add_bytes(unsigned char **ptr, unsigned int tag, const char *data, int length, enum cmf_message_format fmt)
{
assert(fmt == CMFMT_STRING_UTF8 || fmt == CMFMT_BYTES);
cmf_write(ptr, tag, fmt);
cmf_serialize(ptr, length);
memcpy(*ptr, data, length);
*ptr += length;
}
void cmfbuilder_add_bool(unsigned char **ptr, unsigned int tag, bool value)
{
cmf_write(ptr, tag, value ? CMFMT_BOOL_TRUE : CMFMT_BOOL_FALSE);
}
enum cmf_parser_result cmfparser_next(const unsigned char **ptr, const unsigned char * const endMessage, struct cmf_message_parser_token *token)
{
if (*ptr >= endMessage)
return CMF_DOCUMENT_END;
unsigned char byte = *ptr[0];
token->fmt = (enum cmf_message_format)(byte & 0x07);
token->tag = byte >> 3;
if (token->tag == 31) { // the tag is stored in the next byte(s)
unsigned long tag = 0;
*ptr += 1;
bool ok = cmf_unserialize(ptr, endMessage, &tag);
if (!ok || tag > 0xFFFFFFFF) {
*ptr -= 1;
return CMF_PARSER_ERROR;
}
*ptr -= 1;
token->tag = (unsigned int) tag;
}
unsigned long value = 0;
switch (token->fmt) {
case CMFMT_POSITIVE_NUMBER:
case CMFMT_NEGATIVE_NUMBER: {
*ptr += 1;
bool ok = cmf_unserialize(ptr, endMessage, &value);
if (!ok) {
*ptr -= 1;
return CMF_PARSER_ERROR;
}
if (token->fmt == CMFMT_NEGATIVE_NUMBER)
token->signed_num = (long) (value * -1);
else
token->big_num = value;
break;
}
case CMFMT_BYTES:
case CMFMT_STRING_UTF8: {
*ptr += 1;
bool ok = cmf_unserialize(ptr, endMessage, &value);
if (!ok) {
*ptr -= 1;
return CMF_PARSER_ERROR;
}
token->begin = *ptr;
token->end = token->begin + value;
if (token->end > endMessage) // The actual value is not included in the message
return CMF_PARSER_ERROR;
*ptr += value;
break;
}
case CMFMT_BOOL_TRUE:
case CMFMT_BOOL_FALSE:
*ptr += 1;
break;
default:
return CMF_PARSER_ERROR;
}
return CMF_FOUND_TOKEN;
}
+70
View File
@@ -0,0 +1,70 @@
#ifndef _CMF_H
#define _CMF_H
#include <stdbool.h>
enum cmf_message_format {
CMFMT_POSITIVE_NUMBER = 0,
CMFMT_NEGATIVE_NUMBER = 1,
CMFMT_STRING_UTF8 = 2,
CMFMT_BYTES = 3,
CMFMT_BOOL_TRUE = 4,
CMFMT_BOOL_FALSE = 5
// TODO double
};
enum cmf_parser_result {
CMF_FOUND_TOKEN,
CMF_PARSER_ERROR,
CMF_DOCUMENT_END
};
/* builder API */
/*
* The cmfbuilder_add_* range of methods append data to an existing buffer.
* The amount of data added is relative to the actual value passed because a variable-width byte encoding is being used.
* The 'tag' takes at most 5 bytes. It takes zero bytes if the value is < 30.
* The format (int/string/etc) is encoded and takes 1 byte.
* The actual byte count used for the value is format dependent.
* numbers take up to 9 bytes for a 64-bit value. (notice that negative numbers are multiplied by -1 before being encoded, so -1 is just 1 byte)
* byte-arrays are just copied. Additional byte-count is the length. Also var-encoded.
* booleans are free. No bytes taken.
*
* Please make sure enough bytes are available in the buffer as no effort is being made to avoid appending after the buffer.
*/
void cmfbuilder_add_int(unsigned char **ptr, unsigned int tag, int value);
void cmfbuilder_add_ulong(unsigned char **ptr, unsigned int tag, unsigned long value);
/*
* Add-bytes allows the caller to specify the format specifically because the compact message format
* supports both byte-arrays as well as utf-8 encoded strings.
* Add the one you want to encoded. Notice that if you pass anything other than CMFMT_STRING_UTF8 or CMFMT_BYTES your
* stream will be corrupted
*/
void cmfbuilder_add_bytes(unsigned char **ptr, unsigned int tag, const char *data, int length, enum cmf_message_format fmt);
void cmfbuilder_add_bool(unsigned char **ptr, unsigned int tag, bool value);
/* parser API */
/*
* The cmf parser method is essentially a SOX parser that allows really fast parsing and zero data-copy.
*
* The cmfparser_next() method can be called repeatedly until a certain token you wish to find has been located.
* The actual token values are stored in the cmf_message_parser_token struct which can be reused for all calls.
*/
struct cmf_message_parser_token {
int tag;
enum cmf_message_format fmt;
union {
unsigned long big_num; /* Used when fmt is CMFMT_POSITIVE_NUMBER */
long signed_num; /* Used when fmt is CMFMT_NEGATIVE_NUMBER */
};
const unsigned char *begin, *end; /* used for byte arrays and strings */
};
enum cmf_parser_result cmfparser_next(const unsigned char **ptr, const unsigned char * const endMessage, struct cmf_message_parser_token *token);
#endif
+151
View File
@@ -0,0 +1,151 @@
#include "cmf.h"
#ifdef NDEBUG
# error run the test in debug mode otherwise you wont get any results
#endif
#include <assert.h>
#include <stdio.h>
#include <string.h>
void basic_test1()
{
unsigned char buf[100];
unsigned char *ptr = buf;
struct cmf_message_parser_token token;
enum cmf_parser_result found;
cmfbuilder_add_int(&ptr, 15, 6512);
assert(ptr > buf);
// printf( "size: %d\n", ptr - buf);
assert(ptr - buf == 3);
assert(buf[0] == 120);
assert((unsigned char) buf[1] == 177);
assert(buf[2] == 112);
const unsigned char *parsePtr = buf;
found = cmfparser_next(&parsePtr, ptr, &token);
assert(found == CMF_FOUND_TOKEN);
assert(token.tag == 15);
assert(token.fmt == CMFMT_POSITIVE_NUMBER);
assert(token.big_num == 6512);
found = cmfparser_next(&parsePtr, ptr, &token);
assert(found == CMF_DOCUMENT_END);
}
void basic_test2()
{
unsigned char buf[100];
unsigned char *ptr = buf;
struct cmf_message_parser_token token;
enum cmf_parser_result found;
cmfbuilder_add_int(&ptr, 129, 6512);
assert(ptr > buf);
// printf( "size: %d\n", ptr - buf);
assert(ptr - buf == 5);
assert((unsigned char) buf[0] == 248);
assert((unsigned char) buf[1] == 128);
assert(buf[2] == 1);
assert((unsigned char) buf[3] == 177);
assert(buf[4] == 112);
const unsigned char *parsePtr = buf;
found = cmfparser_next(&parsePtr, ptr, &token);
assert(found == CMF_FOUND_TOKEN);
assert(token.tag == 129);
assert(token.fmt == CMFMT_POSITIVE_NUMBER);
assert(token.big_num == 6512);
found = cmfparser_next(&parsePtr, ptr, &token);
assert(found == CMF_DOCUMENT_END);
}
void test_types()
{
unsigned char buf[100];
unsigned char *ptr = buf;
struct cmf_message_parser_token token;
enum cmf_parser_result found;
const char *foo = "Föo";
assert(strlen(foo) == 4); // someone changed encoding of this source file
cmfbuilder_add_bytes(&ptr, 1, foo, 4, CMFMT_STRING_UTF8);
const char *hihi = "hihi";
cmfbuilder_add_bytes(&ptr, 200, hihi, 4, CMFMT_BYTES);
cmfbuilder_add_bool(&ptr, 3, true);
cmfbuilder_add_bool(&ptr, 40, false);
assert(ptr > buf);
// printf( "size: %d\n", ptr - buf);
assert(ptr - buf == 17);
// string '1'
assert((unsigned char) buf[0] == 10);
assert((unsigned char) buf[1] == 4); // serialized string length
assert((unsigned char) buf[2] == 70);
assert((unsigned char) buf[3] == 195);
assert((unsigned char) buf[4] == 182);
assert((unsigned char) buf[5] == 111);
// blob '200'
assert((unsigned char) buf[6] == 251);
assert((unsigned char) buf[7] == 128);
assert((unsigned char) buf[8] == 72);
assert((unsigned char) buf[9] == 4); // length of bytearray
assert((unsigned char) buf[10] == 104); // 'h'
assert((unsigned char) buf[11] == 105); // 'i'
assert((unsigned char) buf[12] == 104); // 'h'
assert((unsigned char) buf[13] == 105); // 'i'
// bool-true '3'
assert((unsigned char) buf[14] == 28);
// bool-false '40'
assert((unsigned char) buf[15] == 253);
assert((unsigned char) buf[16] == 40);
const unsigned char *parsePtr = buf;
found = cmfparser_next(&parsePtr, ptr, &token);
assert(found == CMF_FOUND_TOKEN);
assert(token.tag == 1);
assert(token.fmt == CMFMT_STRING_UTF8);
assert(token.begin > buf);
assert(token.begin < ptr);
assert(token.end > buf);
assert(token.end < ptr);
assert(token.end - token.begin == 4);
assert(memcmp(foo, token.begin, 4) == 0); // be careful, no trailing zero!
found = cmfparser_next(&parsePtr, ptr, &token);
assert(found == CMF_FOUND_TOKEN);
assert(token.tag == 200);
assert(token.fmt == CMFMT_BYTES);
assert(token.begin > buf);
assert(token.begin < ptr);
assert(token.end > buf);
assert(token.end < ptr);
assert(token.end - token.begin == 4);
assert(memcmp(hihi, token.begin, 4) == 0); // be careful, no trailing zero!
found = cmfparser_next(&parsePtr, ptr, &token);
assert(found == CMF_FOUND_TOKEN);
assert(token.tag == 3);
assert(token.fmt == CMFMT_BOOL_TRUE);
found = cmfparser_next(&parsePtr, ptr, &token);
assert(found == CMF_FOUND_TOKEN);
assert(token.tag == 40);
assert(token.fmt == CMFMT_BOOL_FALSE);
found = cmfparser_next(&parsePtr, ptr, &token);
assert(found == CMF_DOCUMENT_END);
}
int main(int argc, char *argv[])
{
basic_test1();
basic_test2();
test_types();
return 0;
}