How is curl parsing integers in C?

Published on: 09 December 2025


Last month Daniel Stenberg (the creator of curl) wrote this post on his blog showing the drawbacks of the various functions to parse integers in libc and how they improved the situation in curl.

Here are the major issues shared by Daniel:

And curl requires a function that handles all these things so they created a function called str_num_base() that solves all these issues and I want to study its implementation. This function doesn't use any curl-specific internals so it can be easily used by other C code.

First of all, here is a list of errors that are used by the str_* functions.

#define STRE_OK       0
#define STRE_BIG      1
#define STRE_SHORT    2
#define STRE_BEGQUOTE 3
#define STRE_ENDQUOTE 4
#define STRE_BYTE     5
#define STRE_NEWLINE  6
#define STRE_OVERFLOW 7
#define STRE_NO_NUM   8

Not all of these are used in str_num_base(), only STRE_OK, STRE_OVERVFLOW and STRE_NO_NUM.

The next step is to make sure the code parses a 64bit number. Curl supports an absurd number of compilers/architectures combinations, so it's not trivial to create a type that is going to be 64bit on all these combinations, and this is what they came up with.

#ifdef __DJGPP__
#  define CURL_TYPEOF_CURL_OFF_T     long long

#elif defined(__BORLANDC__)
#  define CURL_TYPEOF_CURL_OFF_T     __int64

#elif defined(__POCC__)
#  ifdef _MSC_VER
#    define CURL_TYPEOF_CURL_OFF_T     __int64
#  else
#    define CURL_TYPEOF_CURL_OFF_T     long long
#  endif
#  define CURL_TYPEOF_CURL_SOCKLEN_T int

#elif defined(__LCC__)
#  ifdef __MCST__ /* MCST eLbrus Compiler Collection */
#    define CURL_TYPEOF_CURL_OFF_T     long
#  else                /* Local (or Little) C Compiler */
#    define CURL_TYPEOF_CURL_OFF_T     long
#  endif

#elif defined(macintosh)
#  include <ConditionalMacros.h>
#  if TYPE_LONGLONG
#    define CURL_TYPEOF_CURL_OFF_T     long long
#  else
#    define CURL_TYPEOF_CURL_OFF_T     long
#  endif

#elif defined(__TANDEM)
#  ifndef __LP64
#    define CURL_TYPEOF_CURL_OFF_T     long long
#  else
#    define CURL_TYPEOF_CURL_OFF_T     long
#  endif

#elif defined(UNDER_CE)
#  ifdef __MINGW32CE__
#    define CURL_TYPEOF_CURL_OFF_T     long long
#  else
#    define CURL_TYPEOF_CURL_OFF_T     __int64
#  endif

#elif defined(__MINGW32__)
#  include <inttypes.h>
#  define CURL_TYPEOF_CURL_OFF_T     long long

#elif defined(__VMS)
#  ifdef __VAX
#    define CURL_TYPEOF_CURL_OFF_T     long
#  else
#    define CURL_TYPEOF_CURL_OFF_T     long long
#  endif

#elif defined(__OS400__)
#  define CURL_TYPEOF_CURL_OFF_T     long long

#elif defined(__MVS__)
#  ifdef _LONG_LONG
#    define CURL_TYPEOF_CURL_OFF_T     long long
#  else /* _LP64 and default */
#    define CURL_TYPEOF_CURL_OFF_T     long
#  endif

#elif defined(__370__)
#  if defined(__IBMC__) || defined(__IBMCPP__)
#    ifdef _LONG_LONG
#      define CURL_TYPEOF_CURL_OFF_T     long long
#    else /* _LP64 and default */
#      define CURL_TYPEOF_CURL_OFF_T     long
#    endif
#  endif

#elif defined(TPF)
#  define CURL_TYPEOF_CURL_OFF_T     long

#elif defined(__TINYC__) /* also known as tcc */
#  define CURL_TYPEOF_CURL_OFF_T     long long

#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) /* Oracle Solaris Studio */
#  if !defined(__LP64) && (defined(__ILP32) ||                          \
                           defined(__i386) ||                           \
                           defined(__sparcv8) ||                        \
                           defined(__sparcv8plus))
#    define CURL_TYPEOF_CURL_OFF_T     long long
#  elif defined(__LP64) || \
        defined(__amd64) || defined(__sparcv9)
#    define CURL_TYPEOF_CURL_OFF_T     long
#  endif

#elif defined(__xlc__) /* IBM xlc compiler */
#  ifndef _LP64
#    define CURL_TYPEOF_CURL_OFF_T     long long
#  else
#    define CURL_TYPEOF_CURL_OFF_T     long
#  endif

#elif defined(__hpux) /* HP aCC compiler */
#  ifndef _LP64
#    define CURL_TYPEOF_CURL_OFF_T     long long
#  else
#    define CURL_TYPEOF_CURL_OFF_T     long
#  endif

/* ===================================== */
/*    KEEP MSVC THE PENULTIMATE ENTRY    */
/* ===================================== */

#elif defined(_MSC_VER)
#  define CURL_TYPEOF_CURL_OFF_T     __int64

/* ===================================== */
/*    KEEP GENERIC GCC THE LAST ENTRY    */
/* ===================================== */

#elif defined(__GNUC__) && !defined(_SCO_DS)
#  if !defined(__LP64__) &&                                             \
  (defined(__ILP32__) || defined(__i386__) || defined(__hppa__) ||      \
   defined(__ppc__) || defined(__powerpc__) || defined(__arm__) ||      \
   defined(__sparc__) || defined(__mips__) || defined(__sh__) ||        \
   defined(__XTENSA__) ||                                               \
   (defined(__SIZEOF_LONG__) && __SIZEOF_LONG__ == 4) ||                \
   (defined(__LONG_MAX__) && __LONG_MAX__ == 2147483647L))
#    define CURL_TYPEOF_CURL_OFF_T     long long
#  elif defined(__LP64__) || \
        defined(__x86_64__) || defined(__ppc64__) || defined(__sparc64__) || \
        defined(__e2k__) || \
        (defined(__SIZEOF_LONG__) && __SIZEOF_LONG__ == 8) || \
        (defined(__LONG_MAX__) && __LONG_MAX__ == 9223372036854775807L)
#    define CURL_TYPEOF_CURL_OFF_T     long
#  endif

#else
/* generic "safe guess" on old 32-bit style */
#  define CURL_TYPEOF_CURL_OFF_T     long long
#endif

/* Data type definition of curl_off_t. */

#ifdef CURL_TYPEOF_CURL_OFF_T
  typedef CURL_TYPEOF_CURL_OFF_T curl_off_t;
#endif

This is definitely the most amount of ways I have seen a 64bit type defined.

Once the tyep is defined there is the ascii table and a few helper functions to convert a character into a number.

/* We use 16 for the zero index (and the necessary bitwise AND in the loop)
   to be able to have a non-zero value there to make valid_digit() able to
   use the info */
const unsigned char Curl_hexasciitable[] = {
  16, 1, 2, 3, 4, 5, 6, 7, 8, 9, /* 0x30: 0 - 9 */
  0, 0, 0, 0, 0, 0, 0,
  10, 11, 12, 13, 14, 15,        /* 0x41: A - F */
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  10, 11, 12, 13, 14, 15         /* 0x61: a - f */
};

/* given a hexadecimal letter, return the binary value. '0' returns 0, 'a'
   returns 10. THIS ONLY WORKS ON VALID HEXADECIMAL LETTER INPUT. Verify
   before calling this!
*/
#define Curl_hexval(x) (unsigned char)(Curl_hexasciitable[(x) - '0'] & 0x0f)

/* given an ASCII character and max ascii, return TRUE if valid */
#define valid_digit(x,m) \
  (((x) >= '0') && ((x) <= m) && Curl_hexasciitable[(x)-'0'])

Nothing crazy here, these functions make a lot of sense.

And now the full definition of str_num_base().

/* no support for 0x prefix nor leading spaces */
static int str_num_base(const char **linep, curl_off_t *nump, curl_off_t max,
                        int base) /* 8, 10 or 16, nothing else */
{
  curl_off_t num = 0;
  const char *p;
  int m = (base == 10) ? '9' :   /* the largest digit possible */
    (base == 16) ? 'f' : '7';
  DEBUGASSERT(linep && *linep && nump);
  DEBUGASSERT((base == 8) || (base == 10) || (base == 16));
  DEBUGASSERT(max >= 0); /* mostly to catch SIZE_MAX, which is too large */
  *nump = 0;
  p = *linep;
  if(!valid_digit(*p, m))
    return STRE_NO_NUM;
  if(max < base) {
    /* special-case low max scenario because check needs to be different */
    do {
      int n = Curl_hexval(*p++);
      num = num * base + n;
      if(num > max)
        return STRE_OVERFLOW;
    } while(valid_digit(*p, m));
  }
  else {
    do {
      int n = Curl_hexval(*p++);
      if(num > ((max - n) / base))
        return STRE_OVERFLOW;
      num = num * base + n;
    } while(valid_digit(*p, m));
  }
  *nump = num;
  *linep = p;
  return STRE_OK;
}

As arguments you need to provide the string, the 64bit variable where you want to store the converted number, the maximum accepted value (to check for overflow) and the base (8, 10, 16). And it returns an error or STRE_OK if everything went fine.

The function is quite understandable, as long as there are valid digits and as long as it's not overflowing it does num = num * base + n.

On top of this main function there are a few helper functions so that you don't need to pass the base every time.

/* Get an unsigned decimal number with no leading space or minus. Leading
   zeroes are accepted. return non-zero on error */
int curlx_str_number(const char **linep, curl_off_t *nump, curl_off_t max)
{
  return str_num_base(linep, nump, max, 10);
}

/* Get an unsigned hexadecimal number with no leading space or minus and no
   "0x" support. Leading zeroes are accepted. return non-zero on error */
int curlx_str_hex(const char **linep, curl_off_t *nump, curl_off_t max)
{
  return str_num_base(linep, nump, max, 16);
}

/* Get an unsigned octal number with no leading space or minus and no "0"
   prefix support. Leading zeroes are accepted. return non-zero on error */
int curlx_str_octal(const char **linep, curl_off_t *nump, curl_off_t max)
{
  return str_num_base(linep, nump, max, 8);
}

This function checks all the boxes that curl needs:

But there are a few things that I find missing to become a general-purpose function:

Since curl (most likely) doesn't care about these two cases this function works great for them and for anyone else who think it's enough for them.