19

I'm trying to read a package from a buffer, decode it, and then return a package that might contain a requested info or call a specific function depending on the package I received. I'd like to know if there is any way I can simplify this block of code in order to reduce code repetition:

if (rx_buffer[0] == 0xAA && rx_buffer[1] == 0x88){
  tx_buffer[0] = 'O';
  tx_buffer[1] = 'K';
  change_light_intensity(1000);
}
else if (rx_buffer[0] == 0xBB && rx_buffer[1] == 0x89){
  tx_buffer[0] = 'O';
  tx_buffer[1] = 'K';
  tx_buffer[2] = 0x34;
  tx_buffer[3] = 0x12;
}

//...
//many many more else if statements
//...

else if (rx_buffer[0] == 0x3D && rx_buffer[1] == 0x76){
  tx_buffer[0] = 'O';
  tx_buffer[1] = 'K';
  change_volume(38);
}
else {
  tx_buffer[0] = 'N';
  tx_buffer[0] = 'O';
}

7
  • 12
    Question is on topic for Stack Overflow. To anyone who wants to close it, please resist the urge to do so. Commented Aug 14 at 7:36
  • 1
    In your example with "NO", did you mean tx_buffer[1] = 'O';? you wrote tx_buffer[0] = 'O'. Commented Aug 15 at 3:01
  • One thing worth investigating is whether there is any method to the madness: Do particular bits or value ranges in the input code for specific outputs? Then you could approach the reaction more algorithmically. Commented Aug 15 at 9:48
  • 1
    Also, if "OK" is the overwhelming response, you could set the tx_buffer bytes to those first thing and only change it when necessary; that reduces code. Commented Aug 15 at 9:52
  • 6
    @dumbass It was already established in now deleted comments that this question is not suitable for Code Review. The asker rather clearly wrote they want to avoid code repetition in if statements. the question was already opened and closed multiple times and moderators can step in in such situations. Commented Aug 15 at 19:14

7 Answers 7

28

Since you are dispatching based on the value of the first 2 bytes in the string, you can use a switch statement on the combination of these 2 bytes. Be careful to combine the bytes as unsigned char to avoid sign extension where char is signed if rx_buffer in an array of char.

Here is modified code:

#define COMBINE(a,b)  (((unsigned)(unsigned char)(a) << 8) | (unsigned char)(b)) 

void handle_request(const unsigned char *rx_buffer, unsigned char *tx_buffer) {
    switch (COMBINE(rx_buffer[0], rx_buffer[1])) {
    case COMBINE(0xAA, 0x88):
        tx_buffer[0] = 'O';
        tx_buffer[1] = 'K';
        change_light_intensity(1000);
        break;
    case COMBINE(0xBB, 0xB9):
        tx_buffer[0] = 'O';
        tx_buffer[1] = 'K';
        tx_buffer[2] = 0x34;
        tx_buffer[3] = 0x12;
        break;
    //...
    //many many more case clauses
    //...
    case COMBINE(0x3D, 0x76):
        tx_buffer[0] = 'O';
        tx_buffer[1] = 'K';
        change_volume(38);
        break;
    default:
        tx_buffer[0] = 'N';
        tx_buffer[1] = 'O';
        break;
    }
}

Regarding the repetitive code, you could use macros to improve readability and avoid cut+paste errors (such as the one on the posted code for the O in the else branch. Note however that this will not improve code generation and may be considered inappropriate in many places:

#define COMBINE(a,b)  (((unsigned)(unsigned char)(a) << 8) | (unsigned char)(b))
#define SET_OK(b)  ((b)[0] = 'O', (b)[1] = 'K')
#define SET_NO(b)  ((b)[0] = 'N', (b)[1] = 'O')

void handle_request(const unsigned char *rx_buffer, unsigned char *tx_buffer) {
    switch (COMBINE(rx_buffer[0], rx_buffer[1])) {
    case COMBINE(0xAA, 0x88):
        SET_OK(tx_buffer);
        change_light_intensity(1000);
        break;
    case COMBINE(0xBB, 0xB9):
        SET_OK(tx_buffer);
        tx_buffer[2] = 0x34;
        tx_buffer[3] = 0x12;
        break;
    //...
    //many many more case clauses
    //...
    case COMBINE(0x3D, 0x76):
        SET_OK(tx_buffer);
        change_volume(38);
        break;
    default:
        SET_NO(tx_buffer);
        break;
    }
}

Here are some further improvements:

  • As suggested by @Fe2O3, since all but one handler set rx_buffer to OK, this code should be factored out of the switch cases at a very small cost in the default handler, removing the need for the SET_xxx macros.
  • Changing the COMBINE macro to combine the bytes in the native order also gives the compiler the opportunity to generate a single read for the switch value (as tested on Godbolt's Compiler Explorer):
#if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
#define COMBINE(a,b)  (((unsigned)(unsigned char)(a) << 8) | (unsigned char)(b))
#else
#define COMBINE(a,b)  ((unsigned char)(a) | ((unsigned)(unsigned char)(b) << 8))
#endif

void handle_request(const unsigned char *rx_buffer, unsigned char *tx_buffer) {
    tx_buffer[0] = 'O';  // default response
    tx_buffer[1] = 'K';
    switch (COMBINE(rx_buffer[0], rx_buffer[1])) {
    case COMBINE(0xAA, 0x88):
        change_light_intensity(1000);
        break;
    case COMBINE(0xBB, 0xB9):
        tx_buffer[2] = 0x34;
        tx_buffer[3] = 0x12;
        break;
    //...
    //many many more case clauses
    //...
    case COMBINE(0x3D, 0x76):
        change_volume(38);
        break;
    default:
        tx_buffer[0] = 'N';
        tx_buffer[1] = 'O';
        break;
    }
}

This generates very compact code (latest gcc and clang):

handle_request:
        mov     WORD PTR [rsi], 19279
        movzx   eax, WORD PTR [rdi]
        cmp     eax, 34986
        je      .L2
        cmp     eax, 47547
        je      .L3
        cmp     eax, 30269
        jne     .L9
        mov     edi, 38
        jmp     change_volume
.L2:
        mov     edi, 1000
        jmp     change_light_intensity
.L3:
        mov     WORD PTR [rsi+2], 4660
        ret
.L9:
        mov     WORD PTR [rsi], 20302
        ret
Sign up to request clarification or add additional context in comments.

6 Comments

The nice part with this answer is that a good compiler can use binary search or even jump tables; as it deems optimal
@HansOlsson: yes modern compilers will produce a decision tree (a binary search using explicit tests) that tends to be more efficient than a jump table thanks of branch prediction hardware.
Until shown generality is needed, the two SET_?? macros could specify tx_buffer instead of using parameter b... Don't need a parameter if the target is ALWAYS the same thing into the same two bytes. In fact, perhaps SET_OK before switch() and overwrite when "NO" is the ("one of") response.
I suppose << 8 better as << CHAR_BIT for those dinosaur and unicorn platforms. IAC, on even stranger implementations where sizeof(int) == sizeof(char), we are in trouble. Hmmm, perhaps << 8 is better when sizeof(int) == sizeof(char)? Perhaps this is leading down the rabbit hole.
@chux: you are correct, assuming 8-bit bytes is bold, yet in my 42+ years of C programming on various environments, I have never encountered such mythical creatures, and shifting by 8 will be inefficient but correct as long as the byte values stay in the range 0..255
|
12

There would be three different reasons to improve this:

  • Increase readability by getting rid of magic numbers etc.
  • Make the code more maintainable by reducing code repetition.
  • Avoid needless comparisons/branches for performance reasons.

We can get rid of all of that by storing the data to look for in a separate array. These look like some flavour of "AT commands" so maybe each pair of bytes corresponds to a certain command? I'll assume it is something like that.

We can create an array like this:

const uint8_t commands [][2] =
{
  {0x3D, 0x76},
  {0xAA, 0x88},
  {0xBB, 0x89},
};

Note that these should be sorted at compile-time in a way that makes sense, I sorted them in ascending order based on the first byte. However this is still a bunch of unexplained "magic numbers". So lets also create an enum that gives meaningful names to each command:

typedef enum
{ // whatever names that may be meaningful:
  COMMAND_A,
  COMMAND_B,
  COMMAND_C,
  ...
  COMMAND_N // this one gives the number of supported commands
} command_t;

const uint8_t commands [][2] =
{
  // designated initializers guarantees data integrity in case the array is modified later on:
  [COMMAND_A] = {0x3D, 0x76},
  [COMMAND_B] = {0xAA, 0x88},
  [COMMAND_C] = {0xBB, 0x89},
};

// further data integrity checking:
static_assert( sizeof(commands)/sizeof(*commands) == COMMAND_N, 
               "commands array mismatch with command_t enum" );

With that sorted out, we will then use an efficient search method upon receiving new data. That is binary search, rather than naively checking all items one by one. We don't need to re-invent binary search either, we can use bsearch from stdlib.h. We would need to code a comparison callback for it, but actually memcmp is probably just what we are looking for in this case, so the callback would just be a wrapper around that:

int command_cmp (const void* obj1, const void* obj2)
{
  return memcmp(obj1, obj2, 2); 
}

The call will look something like:

const void* result;

result = bsearch(rx_buffer,           // search key
                 commands,            // data to search
                 COMMAND_N,           // number of items
                 sizeof(uint8_t[2]),  // size of each array item
                 command_cmp);        // comparison callback function

13 Comments

Does it improve readability as OP wanted?
@0___________ A single bsearch call is far more readable than some n lines of if-else with localized magic numbers, yeah?
@0___________ Depends--OP didn't specify what metrics they use for readability, beyond "fewer if-else statements". Given that this code doesn't include any if-else statements, it's reasonable to assume OP would be satisfied by it, but it's impossible to say with any certainty.
Minor: for size of each array item, sizeof rx_buffer[0] looks easier to review and maintain than sizeof(uint8_t[2]).
Depending on use-case, linear search with the most common commands first may be even better than binary search, especially with few total commands, and depending on sorting if binary search has to get all the way to down to a leaf "node" for the most common commands, rather than finding them in the first couple checks. Especially with C library bsearch with a callback function so there are actual function calls, tons of overhead vs. an inline decision-tree, manual or invented by a compiler for a switch.
|
6

Here's my approach, incorporating the switch method in Konstantin's answer as well as some further merging of logic. This doesn't necessarily increase the code quality, but it is maximally deduplicated.

char tmp[] = { 'O', 'K', 0x34, 0x12 };
int len = 2;

switch ((unsigned short) rx_buffer[0] << 8 | (unsigned short) rx_buffer[1]) {
    case 0xAA88:
        change_light_intensity(1000);
        break;
    case 0xBB89:
        len = 4;
        break;
    case 0x3D76:
        change_volume(38);
        break;
    default:
        tmp[0] = 'N';
        tmp[1] = 'O';
        break;
}

memcpy(tx_buffer, tmp, len);

1 Comment

(unsigned short) rx_buffer[1] is a bug when rx_buffer[1] is a char that is signed. Consider casting each rx_buffer[] access to unsigned char first.
4

I chose this method because switch .. case is usually well optimized:

#ifdef IS_BIG_ENDIAN
  #define CONST16(x) (((x & 0xFF00) >> 8) | ((x & 0x00FF) << 8))
#else
  #define CONST16(x) (x)
#endif

enum {
    CHANGE_LIGHT_INTENSITY = CONST16(0x88AA),
    CHANGE_VOLUME          = CONST16(0x763D),
};

union {
    unsigned char rx_buffer[100];
    unsigned short cmd;
} rx;
switch (rx.cmd) {

  case CHANGE_LIGHT_INTENSITY:
    tx_buffer[0] = 'O';
    tx_buffer[1] = 'K';
    change_light_intensity(1000);
    break;

  case CONST16(0x89BB):
    tx_buffer[0] = 'O';
    tx_buffer[1] = 'K';
    tx_buffer[2] = 0x34;
    tx_buffer[3] = 0x12;
    break;

  // many many cases ...

  case CHANGE_VOLUME:
    tx_buffer[0] = 'O';
    tx_buffer[1] = 'K';
    change_volume(38);
    break;

  default:
    tx_buffer[0] = 'N';
    tx_buffer[1] = 'O';
}

I want to take a closer look at switch ... case construct, because many novice programmers mistakenly believe that it is a replacement for if ... else if ... else construct.

For simplicity, let's look at swith ... case with other, more memorable values:

switch (...) {
  case     0: ...; break;
  case     5: ...; break;
  case    44: ...; break;
  case   333: ...; break;
  case  2222: ...; break;
  case 11111: ...; break;
}

Indeed, without optimization, it's just a regular sequential comparison (gcc 15.2 -O0):

        cmp     eax, 11111
        je      .L8             ; == 11111
        cmp     eax, 11111
        jg      .L9             ; >  11111
        cmp     eax, 2222
        je      .L10            ; == 2222
        cmp     eax, 2222
        jg      .L9             ; >  2222
        cmp     eax, 333
        je      .L11            ; == 333
        cmp     eax, 333
        jg      .L9             ; >  333
        cmp     eax, 44
        je      .L12            ; == 44
        cmp     eax, 44
        jg      .L9             ; >  44
        test    eax, eax
        je      .L13            ; == 0
        cmp     eax, 5
        je      .L14            ; == 5

But after optimal compilation, it turns into a binary search, which is embedded in the program code, which means it does not require any additional actions after loading, unlike a manual binary search (gcc 15.2 -O3):

        cmp     ax, 333
        je      .L9             ; == 333
        ja      .L10            ; >  333
        cmp     ax, 5
        je      .L11            ; == 5
        cmp     ax, 44
        je      .L12            ; == 44
        test    ax, ax
        jne     .L14            ; != 0
        ...
.L10:
        cmp     ax, 2222
        je      .L15            ; == 2222
        cmp     ax, 11111
        jne     .L14            ; != 11111

The more cases there are, the more the search will approach binary.

6 Comments

You've silently introduced an endianness dependency which you should mention in the answer. Furthermore, *(unsigned short *)rx_buffer is a strict aliasing violation and an undefined behavior bug.
@XavierPedraza No it shouldn't since it is full of bugs, see my comment above. Shifting the bytes while placing them in a larger integer does indeed solve that bug.
I'm afraid there is no guarantee that rx_buffer is properly aligned for *(unsigned short *)rx_buffer to be used safely.
@KonstantinMakarov The alignment of an array of char/uint8_t is 1 and so it may be misaligned for a unsigned short access. Furthermore as I already told, the cast + dereferencing as the wrong type is also a strict aliasing violation bug. What is the strict aliasing rule?
static const 2-byte arrays and memcmp which should inline for a constant count of 2 could make this endian-agnostic and still compile to the same efficient asm. But good idea with the union as a way to read the first 2 bytes, that also works.
|
3

I like to write such code like this:

bool done(uint8_t* tx_buffer) {
  tx_buffer[0] = 'O';
  tx_buffer[1] = 'K';
  return true;
}

bool fail(uint8_t* tx_buffer) {
  tx_buffer[0] = 'N';
  tx_buffer[1] = 'O';
  return true;
}

bool process_change_light_intensity(const uint8_t* rx_buffer, uint8_t* tx_buffer) {
  if (rx_buffer[0] == 0xAA && rx_buffer[1] == 0x88){
    change_light_intensity(1000);
    return done(tx_buffer);
  }
  return false;
}

bool process_change_volume(const uint8_t* rx_buffer, uint8_t* tx_buffer) {
  if (rx_buffer[0] == 0x3D && rx_buffer[1] == 0x76){
    change_volume(38);
    return done(tx_buffer);
  }
  return false;
}

// somewhere use short-circuit evaluation

  process_change_light_intensity(rx_buffer, tx_buffer) ||
  process_change_volume(rx_buffer, tx_buffer) ||
  fail(tx_buffer);

3 Comments

This provides a certain readability improvement by introducing handler names, but the OP also asked for performance improvement. The best this can hope to do is match the performance of OP's code. It has a good chance to perform worse.
The asker did not explicitly ask for a performance improvement. The lack of clarity permits this wide variety of useful answers. However I agree that the answerer could touch on performance implications (however minimal) as there's a good chance that the compiler uses the stack here.
@XavierPedraza, at the time I loaded the question its title and body specifically did ask about performance. That has since been edited out, and, I grant after reviewing the edit history, the OP was not originally specific about that.
2

Since you named your variables tx_* and rx_*, I assume they're directly hardware-interfacing. In this case, you may use word-based IO on them, with or without consideration for endianness.

e.g.:

...
else if( *(uint16_t *)rx_buffer == *(uint16_t *)("\xBB\x89") ) {
    *(uint32_t *)tx_buffer = *(uint32_t *)("OK\x34\x12");
}
...

For endianness consideration, NetBSD introduced some functions/macros that had been adopted into the POSIX standard, operating system vendors are expected to support them. The next revision of the C language standard is also expected to include endianness-aware memory load/store. So you can do something like this:

...
else if( be16toh(*(uint16_t *)rx_buffer) == 0xBB89 ) {
    ((uint16_t *)tx_buffer)[0] = *(uint16_t *)("OK");
    ((uint16_t *)tx_buffer)[1] = htobe16(0x3412);
}
...

Comments

1

Well, depends a bit on how you want to do it, but in some cases you could just create a table of the input values and corresponding actions, and have the code act on that.

As long as you only need to change some limited set of values, you can put those directly in the table, but if the actions can vary wildly, you might have to resort to callback functions which IMO would rather negate the point.

Having the actions specified as data would make it simpler to load them at runtime from some external source, but if this is an embedded system, that might not be a consideration. Instead, you might have reasons to prefer using code memory, but that's up to you to consider.

As a mock example:

#include<stdio.h>
#include<stdint.h>
#include<string.h>


struct action {
    uint8_t input[2];
    int outputlen;
    uint8_t output[16];
    int light_intensity;
    int volume;
};

static const struct action actions[] = {
  { { 0xAA, 0x88 }, 2, "OK",         1000, -1 },
  { { 0xBB, 0x89 }, 4, "OK\x34\x12", 1234, -1 },
};
static const int num_actions = sizeof(actions)/sizeof(actions[0]);

void change_light_intensity(int intensity) {
    printf("setting light intensity to: %d\n", intensity);
}

int main(void) {
    uint8_t rx_buffer[2] = {0xBB, 0x89};
    uint8_t tx_buffer[8] = {};

    for (int i = 0; i < num_actions; i++) {
        const struct action *a = &actions[i];
        printf("rx: %02x%02x a:%02x%02x\n", rx_buffer[0], rx_buffer[1], a->input[0], a->input[1]);
        if (rx_buffer[0] == a->input[0] && rx_buffer[1] == a->input[1]) {
            memcpy(tx_buffer, a->output, a->outputlen);
            if (a->light_intensity != -1) change_light_intensity(a->light_intensity);
            break;
        }
    }
    printf("%02x%02x%02x%02x\n", tx_buffer[0], tx_buffer[1], tx_buffer[2], tx_buffer[3]);
    return 0;
}

Of course, you could also use macros to generate the array lines, or generate them with an external script. (Then again, you could also use an external tool to generate the code with all the if-statements using an external script, if you wanted to go that way.)

1 Comment

I don't recommend doing it this way. Sure it looks cool and makes you feel smart. But try debugging it.

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.