Error Handling

Comprehensive guide to error handling strategies and best practices in Nexus applications.

Overview

Robust error handling is essential for reliable embedded systems. This guide covers error detection, reporting, recovery, and prevention strategies.

Error Handling Goals:

  • Detect errors early

  • Report errors clearly

  • Recover gracefully

  • Prevent system failures

  • Maintain system integrity

Error Types

Hardware Errors

Common Hardware Errors:

  • Peripheral initialization failure

  • Communication timeouts

  • Invalid sensor readings

  • Memory access violations

  • Power supply issues

Example:

nx_status_t init_sensor(void)
{
    nx_i2c_t* i2c = nx_factory_i2c(0);
    if (!i2c) {
        LOG_ERROR("Failed to get I2C device");
        return NX_ERR_NO_DEVICE;
    }

    /* Check sensor presence */
    uint8_t device_id;
    nx_status_t status = i2c_read_register(i2c, SENSOR_ID_REG, &device_id);
    if (status != NX_OK) {
        LOG_ERROR("Failed to read sensor ID: %d", status);
        nx_factory_i2c_release(i2c);
        return NX_ERR_IO;
    }

    if (device_id != EXPECTED_SENSOR_ID) {
        LOG_ERROR("Invalid sensor ID: 0x%02X (expected 0x%02X)",
                  device_id, EXPECTED_SENSOR_ID);
        nx_factory_i2c_release(i2c);
        return NX_ERR_INVALID_DEVICE;
    }

    nx_factory_i2c_release(i2c);
    return NX_OK;
}

Software Errors

Common Software Errors:

  • Invalid parameters

  • Buffer overflows

  • Null pointer dereferences

  • Resource exhaustion

  • Logic errors

Example:

int process_buffer(const uint8_t* buffer, size_t len)
{
    /* Validate parameters */
    if (!buffer) {
        LOG_ERROR("Null buffer pointer");
        return -EINVAL;
    }

    if (len == 0) {
        LOG_ERROR("Zero length buffer");
        return -EINVAL;
    }

    if (len > MAX_BUFFER_SIZE) {
        LOG_ERROR("Buffer too large: %zu (max %d)", len, MAX_BUFFER_SIZE);
        return -EINVAL;
    }

    /* Process buffer */
    for (size_t i = 0; i < len; i++) {
        if (process_byte(buffer[i]) != 0) {
            LOG_ERROR("Failed to process byte at index %zu", i);
            return -EIO;
        }
    }

    return 0;
}

System Errors

Common System Errors:

  • Out of memory

  • Stack overflow

  • Deadlock

  • Task starvation

  • Watchdog timeout

Example:

void* allocate_buffer(size_t size)
{
    void* buffer = osal_malloc(size);
    if (!buffer) {
        LOG_ERROR("Out of memory: failed to allocate %zu bytes", size);

        /* Check heap status */
        size_t free_heap = osal_get_free_heap_size();
        LOG_ERROR("Free heap: %zu bytes", free_heap);

        /* Try to recover */
        cleanup_unused_resources();

        /* Retry allocation */
        buffer = osal_malloc(size);
        if (!buffer) {
            LOG_FATAL("Memory allocation failed after cleanup");
            /* Enter safe mode */
            enter_safe_mode();
        }
    }

    return buffer;
}

Error Codes

Standard Error Codes

Nexus Error Codes:

typedef enum {
    NX_OK = 0,                  /**< Success */
    NX_ERR_FAIL = -1,           /**< General failure */
    NX_ERR_PARAM = -2,          /**< Invalid parameter */
    NX_ERR_STATE = -3,          /**< Invalid state */
    NX_ERR_TIMEOUT = -4,        /**< Operation timeout */
    NX_ERR_NO_MEM = -5,         /**< Out of memory */
    NX_ERR_NO_DEVICE = -6,      /**< Device not found */
    NX_ERR_IO = -7,             /**< I/O error */
    NX_ERR_BUSY = -8,           /**< Resource busy */
    NX_ERR_NOT_SUPPORTED = -9,  /**< Not supported */
} nx_status_t;

POSIX-Style Error Codes:

#include <errno.h>

int my_function(void)
{
    if (error_condition) {
        return -EINVAL;  /* Invalid argument */
    }

    if (timeout) {
        return -ETIMEDOUT;  /* Timeout */
    }

    if (no_memory) {
        return -ENOMEM;  /* Out of memory */
    }

    return 0;  /* Success */
}

Error Code Conversion

Convert Between Error Systems:

nx_status_t errno_to_nx_status(int err)
{
    switch (err) {
    case 0:
        return NX_OK;
    case EINVAL:
        return NX_ERR_PARAM;
    case ETIMEDOUT:
        return NX_ERR_TIMEOUT;
    case ENOMEM:
        return NX_ERR_NO_MEM;
    case EIO:
        return NX_ERR_IO;
    case EBUSY:
        return NX_ERR_BUSY;
    default:
        return NX_ERR_FAIL;
    }
}

const char* nx_status_to_string(nx_status_t status)
{
    switch (status) {
    case NX_OK:
        return "Success";
    case NX_ERR_FAIL:
        return "General failure";
    case NX_ERR_PARAM:
        return "Invalid parameter";
    case NX_ERR_STATE:
        return "Invalid state";
    case NX_ERR_TIMEOUT:
        return "Timeout";
    case NX_ERR_NO_MEM:
        return "Out of memory";
    case NX_ERR_NO_DEVICE:
        return "Device not found";
    case NX_ERR_IO:
        return "I/O error";
    case NX_ERR_BUSY:
        return "Resource busy";
    case NX_ERR_NOT_SUPPORTED:
        return "Not supported";
    default:
        return "Unknown error";
    }
}

Error Detection

Parameter Validation

Validate All Inputs:

int configure_device(device_t* device, const config_t* config)
{
    /* Validate pointers */
    if (!device) {
        LOG_ERROR("Null device pointer");
        return -EINVAL;
    }

    if (!config) {
        LOG_ERROR("Null config pointer");
        return -EINVAL;
    }

    /* Validate ranges */
    if (config->speed < MIN_SPEED || config->speed > MAX_SPEED) {
        LOG_ERROR("Invalid speed: %d (range: %d-%d)",
                  config->speed, MIN_SPEED, MAX_SPEED);
        return -EINVAL;
    }

    /* Validate state */
    if (device->state != DEVICE_STATE_IDLE) {
        LOG_ERROR("Device not in idle state: %d", device->state);
        return -EBUSY;
    }

    /* Configure device */
    return 0;
}

Assertions

Use Assertions for Programming Errors:

#include "hal/nx_assert.h"

void process_data(const uint8_t* data, size_t len)
{
    /* Assert preconditions (programming errors) */
    NX_ASSERT(data != NULL);
    NX_ASSERT(len > 0);
    NX_ASSERT(len <= MAX_SIZE);

    /* Process data */
    for (size_t i = 0; i < len; i++) {
        NX_ASSERT(i < len);  /* Bounds check */
        process_byte(data[i]);
    }
}

Custom Assert Handler:

void nx_assert_failed(const char* file, int line, const char* expr)
{
    /* Log assertion failure */
    LOG_FATAL("Assertion failed: %s at %s:%d", expr, file, line);

    /* Dump system state */
    dump_system_state();

    /* Enter safe mode or reset */
    __disable_irq();
    while (1) {
        /* Halt */
    }
}

Runtime Checks

Check Return Values:

void send_data(const uint8_t* data, size_t len)
{
    nx_uart_t* uart = nx_factory_uart(0);
    if (!uart) {
        LOG_ERROR("Failed to get UART device");
        return;
    }

    nx_tx_sync_t* tx = uart->get_tx_sync(uart);
    if (!tx) {
        LOG_ERROR("Failed to get TX interface");
        nx_factory_uart_release(uart);
        return;
    }

    nx_status_t status = tx->send(tx, data, len, 1000);
    if (status != NX_OK) {
        LOG_ERROR("Failed to send data: %d", status);
        /* Handle error */
    }

    nx_factory_uart_release(uart);
}

Error Reporting

Logging Errors

Use Appropriate Log Levels:

#define LOG_MODULE "sensor"
#include "log/log.h"

int read_sensor(float* value)
{
    /* Recoverable error - WARN */
    if (sensor_not_ready()) {
        LOG_WARN("Sensor not ready, retrying...");
        osal_task_delay(100);
    }

    /* Failure - ERROR */
    int result = sensor_read_raw(value);
    if (result != 0) {
        LOG_ERROR("Sensor read failed: %d", result);
        return result;
    }

    /* Invalid data - ERROR */
    if (*value < MIN_VALUE || *value > MAX_VALUE) {
        LOG_ERROR("Invalid sensor value: %.2f (range: %.2f-%.2f)",
                  *value, MIN_VALUE, MAX_VALUE);
        return -EINVAL;
    }

    /* Critical failure - FATAL */
    if (sensor_hardware_fault()) {
        LOG_FATAL("Sensor hardware fault detected");
        return -EIO;
    }

    return 0;
}

Error Context

Include Relevant Information:

int process_packet(const packet_t* packet)
{
    if (!packet) {
        LOG_ERROR("Null packet pointer");
        return -EINVAL;
    }

    /* Include context in error messages */
    if (packet->length > MAX_PACKET_SIZE) {
        LOG_ERROR("Packet too large: type=0x%02X, len=%d, max=%d, seq=%lu",
                  packet->type, packet->length, MAX_PACKET_SIZE,
                  packet->sequence);
        return -EINVAL;
    }

    if (!validate_checksum(packet)) {
        LOG_ERROR("Checksum mismatch: type=0x%02X, len=%d, seq=%lu, "
                  "expected=0x%04X, actual=0x%04X",
                  packet->type, packet->length, packet->sequence,
                  packet->checksum, calculate_checksum(packet));
        return -EINVAL;
    }

    return 0;
}

Error Callbacks

Register Error Handlers:

typedef void (*error_callback_t)(int error_code, const char* message);

static error_callback_t error_callback = NULL;

void register_error_callback(error_callback_t callback)
{
    error_callback = callback;
}

void report_error(int error_code, const char* format, ...)
{
    char message[256];
    va_list args;

    /* Format message */
    va_start(args, format);
    vsnprintf(message, sizeof(message), format, args);
    va_end(args);

    /* Log error */
    LOG_ERROR("%s", message);

    /* Call callback if registered */
    if (error_callback) {
        error_callback(error_code, message);
    }
}

Error Recovery

Retry Strategies

Implement Retry Logic:

int send_with_retry(const uint8_t* data, size_t len)
{
    const int MAX_RETRIES = 3;
    const uint32_t RETRY_DELAY_MS = 100;

    for (int retry = 0; retry < MAX_RETRIES; retry++) {
        int result = send_data(data, len);

        if (result == 0) {
            /* Success */
            if (retry > 0) {
                LOG_INFO("Send succeeded after %d retries", retry);
            }
            return 0;
        }

        /* Log retry */
        LOG_WARN("Send failed (attempt %d/%d): %d",
                 retry + 1, MAX_RETRIES, result);

        /* Delay before retry */
        if (retry < MAX_RETRIES - 1) {
            osal_task_delay(RETRY_DELAY_MS);
        }
    }

    LOG_ERROR("Send failed after %d retries", MAX_RETRIES);
    return -EIO;
}

Exponential Backoff:

int connect_with_backoff(void)
{
    const int MAX_RETRIES = 5;
    uint32_t delay_ms = 100;  /* Start with 100ms */

    for (int retry = 0; retry < MAX_RETRIES; retry++) {
        int result = connect();

        if (result == 0) {
            return 0;
        }

        LOG_WARN("Connection failed (attempt %d/%d), retrying in %lu ms",
                 retry + 1, MAX_RETRIES, delay_ms);

        osal_task_delay(delay_ms);

        /* Exponential backoff: double delay each time */
        delay_ms *= 2;
        if (delay_ms > 5000) {
            delay_ms = 5000;  /* Cap at 5 seconds */
        }
    }

    return -ETIMEDOUT;
}

Fallback Mechanisms

Provide Alternatives:

float read_temperature(void)
{
    float temp;

    /* Try primary sensor */
    if (read_primary_sensor(&temp) == 0) {
        return temp;
    }

    LOG_WARN("Primary sensor failed, trying backup");

    /* Try backup sensor */
    if (read_backup_sensor(&temp) == 0) {
        return temp;
    }

    LOG_ERROR("Both sensors failed, using default value");

    /* Return default value */
    return DEFAULT_TEMPERATURE;
}

Graceful Degradation

Reduce Functionality:

typedef enum {
    MODE_FULL = 0,
    MODE_REDUCED,
    MODE_MINIMAL,
    MODE_SAFE,
} operation_mode_t;

static operation_mode_t current_mode = MODE_FULL;

void handle_error(int error_severity)
{
    switch (error_severity) {
    case ERROR_MINOR:
        /* Continue normal operation */
        break;

    case ERROR_MODERATE:
        if (current_mode == MODE_FULL) {
            LOG_WARN("Entering reduced mode");
            current_mode = MODE_REDUCED;
            disable_non_essential_features();
        }
        break;

    case ERROR_SEVERE:
        if (current_mode != MODE_MINIMAL) {
            LOG_ERROR("Entering minimal mode");
            current_mode = MODE_MINIMAL;
            disable_all_optional_features();
        }
        break;

    case ERROR_CRITICAL:
        LOG_FATAL("Entering safe mode");
        current_mode = MODE_SAFE;
        enter_safe_mode();
        break;
    }
}

Safe Mode

Implement Safe Mode:

void enter_safe_mode(void)
{
    LOG_FATAL("Entering safe mode");

    /* Disable all non-essential peripherals */
    disable_all_peripherals();

    /* Stop all non-critical tasks */
    stop_non_critical_tasks();

    /* Enable only essential functions */
    enable_essential_functions();

    /* Indicate safe mode (LED pattern, etc.) */
    indicate_safe_mode();

    /* Wait for manual intervention or watchdog reset */
    while (1) {
        /* Minimal operation */
        osal_task_delay(1000);
    }
}

Error Prevention

Defensive Programming

Check Assumptions:

void process_queue(queue_t* queue)
{
    /* Defensive checks */
    if (!queue) {
        LOG_ERROR("Null queue pointer");
        return;
    }

    if (!queue->initialized) {
        LOG_ERROR("Queue not initialized");
        return;
    }

    while (!queue_is_empty(queue)) {
        item_t* item = queue_dequeue(queue);

        /* Check for unexpected null */
        if (!item) {
            LOG_ERROR("Unexpected null item from queue");
            continue;
        }

        process_item(item);
    }
}

Input Sanitization

Validate and Sanitize:

int set_device_name(device_t* device, const char* name)
{
    if (!device || !name) {
        return -EINVAL;
    }

    /* Check length */
    size_t len = strlen(name);
    if (len == 0 || len >= MAX_NAME_LENGTH) {
        LOG_ERROR("Invalid name length: %zu", len);
        return -EINVAL;
    }

    /* Sanitize input - remove invalid characters */
    char sanitized[MAX_NAME_LENGTH];
    size_t j = 0;
    for (size_t i = 0; i < len && j < MAX_NAME_LENGTH - 1; i++) {
        if (isalnum(name[i]) || name[i] == '_' || name[i] == '-') {
            sanitized[j++] = name[i];
        }
    }
    sanitized[j] = '\0';

    /* Copy sanitized name */
    strncpy(device->name, sanitized, MAX_NAME_LENGTH - 1);
    device->name[MAX_NAME_LENGTH - 1] = '\0';

    return 0;
}

Resource Limits

Enforce Limits:

#define MAX_CONNECTIONS 10

static connection_t connections[MAX_CONNECTIONS];
static size_t connection_count = 0;

int create_connection(void)
{
    /* Check limit */
    if (connection_count >= MAX_CONNECTIONS) {
        LOG_ERROR("Connection limit reached: %zu/%d",
                  connection_count, MAX_CONNECTIONS);
        return -ENOMEM;
    }

    /* Create connection */
    connection_t* conn = &connections[connection_count++];
    initialize_connection(conn);

    return 0;
}

Best Practices

  1. Return Status Codes * Always return error status * Use consistent error codes * Document error conditions * Check all return values

  2. Log Errors Appropriately * Use correct log levels * Include context * Log at error source * Don't spam logs

  3. Handle Errors Gracefully * Implement retry logic * Provide fallbacks * Degrade gracefully * Maintain system integrity

  4. Validate Inputs * Check all parameters * Validate ranges * Sanitize inputs * Use assertions

  5. Prevent Errors * Defensive programming * Resource limits * Input validation * Code reviews

  6. Test Error Paths * Test failure scenarios * Test recovery mechanisms * Test edge cases * Use fault injection

  7. Document Errors * Document error codes * Document recovery procedures * Document limitations * Provide examples

See Also