How is curl parsing integers in C?
Published on: 09 December 2025
Last month Daniel Stenberg (the creator of curl) wrote this post on his blog showing the drawbacks of the various functions to parse integers in libc and how they improved the situation in curl.
Here are the major issues shared by Daniel:
- No checks for underflow or overflow
- No clear error handling
- Not reliably using 64 bits
And curl requires a function that handles all these things so they
created a function called str_num_base() that solves all these
issues and I want to study its implementation. This function doesn't
use any curl-specific internals so it can be easily used by other C
code.
First of all, here is a list of errors that are used by the str_*
functions.
#define STRE_OK 0 #define STRE_BIG 1 #define STRE_SHORT 2 #define STRE_BEGQUOTE 3 #define STRE_ENDQUOTE 4 #define STRE_BYTE 5 #define STRE_NEWLINE 6 #define STRE_OVERFLOW 7 #define STRE_NO_NUM 8
Not all of these are used in str_num_base(), only STRE_OK,
STRE_OVERVFLOW and STRE_NO_NUM.
The next step is to make sure the code parses a 64bit number. Curl supports an absurd number of compilers/architectures combinations, so it's not trivial to create a type that is going to be 64bit on all these combinations, and this is what they came up with.
#ifdef __DJGPP__
# define CURL_TYPEOF_CURL_OFF_T long long
#elif defined(__BORLANDC__)
# define CURL_TYPEOF_CURL_OFF_T __int64
#elif defined(__POCC__)
# ifdef _MSC_VER
# define CURL_TYPEOF_CURL_OFF_T __int64
# else
# define CURL_TYPEOF_CURL_OFF_T long long
# endif
# define CURL_TYPEOF_CURL_SOCKLEN_T int
#elif defined(__LCC__)
# ifdef __MCST__ /* MCST eLbrus Compiler Collection */
# define CURL_TYPEOF_CURL_OFF_T long
# else /* Local (or Little) C Compiler */
# define CURL_TYPEOF_CURL_OFF_T long
# endif
#elif defined(macintosh)
# include <ConditionalMacros.h>
# if TYPE_LONGLONG
# define CURL_TYPEOF_CURL_OFF_T long long
# else
# define CURL_TYPEOF_CURL_OFF_T long
# endif
#elif defined(__TANDEM)
# ifndef __LP64
# define CURL_TYPEOF_CURL_OFF_T long long
# else
# define CURL_TYPEOF_CURL_OFF_T long
# endif
#elif defined(UNDER_CE)
# ifdef __MINGW32CE__
# define CURL_TYPEOF_CURL_OFF_T long long
# else
# define CURL_TYPEOF_CURL_OFF_T __int64
# endif
#elif defined(__MINGW32__)
# include <inttypes.h>
# define CURL_TYPEOF_CURL_OFF_T long long
#elif defined(__VMS)
# ifdef __VAX
# define CURL_TYPEOF_CURL_OFF_T long
# else
# define CURL_TYPEOF_CURL_OFF_T long long
# endif
#elif defined(__OS400__)
# define CURL_TYPEOF_CURL_OFF_T long long
#elif defined(__MVS__)
# ifdef _LONG_LONG
# define CURL_TYPEOF_CURL_OFF_T long long
# else /* _LP64 and default */
# define CURL_TYPEOF_CURL_OFF_T long
# endif
#elif defined(__370__)
# if defined(__IBMC__) || defined(__IBMCPP__)
# ifdef _LONG_LONG
# define CURL_TYPEOF_CURL_OFF_T long long
# else /* _LP64 and default */
# define CURL_TYPEOF_CURL_OFF_T long
# endif
# endif
#elif defined(TPF)
# define CURL_TYPEOF_CURL_OFF_T long
#elif defined(__TINYC__) /* also known as tcc */
# define CURL_TYPEOF_CURL_OFF_T long long
#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) /* Oracle Solaris Studio */
# if !defined(__LP64) && (defined(__ILP32) || \
defined(__i386) || \
defined(__sparcv8) || \
defined(__sparcv8plus))
# define CURL_TYPEOF_CURL_OFF_T long long
# elif defined(__LP64) || \
defined(__amd64) || defined(__sparcv9)
# define CURL_TYPEOF_CURL_OFF_T long
# endif
#elif defined(__xlc__) /* IBM xlc compiler */
# ifndef _LP64
# define CURL_TYPEOF_CURL_OFF_T long long
# else
# define CURL_TYPEOF_CURL_OFF_T long
# endif
#elif defined(__hpux) /* HP aCC compiler */
# ifndef _LP64
# define CURL_TYPEOF_CURL_OFF_T long long
# else
# define CURL_TYPEOF_CURL_OFF_T long
# endif
/* ===================================== */
/* KEEP MSVC THE PENULTIMATE ENTRY */
/* ===================================== */
#elif defined(_MSC_VER)
# define CURL_TYPEOF_CURL_OFF_T __int64
/* ===================================== */
/* KEEP GENERIC GCC THE LAST ENTRY */
/* ===================================== */
#elif defined(__GNUC__) && !defined(_SCO_DS)
# if !defined(__LP64__) && \
(defined(__ILP32__) || defined(__i386__) || defined(__hppa__) || \
defined(__ppc__) || defined(__powerpc__) || defined(__arm__) || \
defined(__sparc__) || defined(__mips__) || defined(__sh__) || \
defined(__XTENSA__) || \
(defined(__SIZEOF_LONG__) && __SIZEOF_LONG__ == 4) || \
(defined(__LONG_MAX__) && __LONG_MAX__ == 2147483647L))
# define CURL_TYPEOF_CURL_OFF_T long long
# elif defined(__LP64__) || \
defined(__x86_64__) || defined(__ppc64__) || defined(__sparc64__) || \
defined(__e2k__) || \
(defined(__SIZEOF_LONG__) && __SIZEOF_LONG__ == 8) || \
(defined(__LONG_MAX__) && __LONG_MAX__ == 9223372036854775807L)
# define CURL_TYPEOF_CURL_OFF_T long
# endif
#else
/* generic "safe guess" on old 32-bit style */
# define CURL_TYPEOF_CURL_OFF_T long long
#endif
/* Data type definition of curl_off_t. */
#ifdef CURL_TYPEOF_CURL_OFF_T
typedef CURL_TYPEOF_CURL_OFF_T curl_off_t;
#endif
This is definitely the most amount of ways I have seen a 64bit type defined.
Once the tyep is defined there is the ascii table and a few helper functions to convert a character into a number.
/* We use 16 for the zero index (and the necessary bitwise AND in the loop)
to be able to have a non-zero value there to make valid_digit() able to
use the info */
const unsigned char Curl_hexasciitable[] = {
16, 1, 2, 3, 4, 5, 6, 7, 8, 9, /* 0x30: 0 - 9 */
0, 0, 0, 0, 0, 0, 0,
10, 11, 12, 13, 14, 15, /* 0x41: A - F */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
10, 11, 12, 13, 14, 15 /* 0x61: a - f */
};
/* given a hexadecimal letter, return the binary value. '0' returns 0, 'a'
returns 10. THIS ONLY WORKS ON VALID HEXADECIMAL LETTER INPUT. Verify
before calling this!
*/
#define Curl_hexval(x) (unsigned char)(Curl_hexasciitable[(x) - '0'] & 0x0f)
/* given an ASCII character and max ascii, return TRUE if valid */
#define valid_digit(x,m) \
(((x) >= '0') && ((x) <= m) && Curl_hexasciitable[(x)-'0'])
Nothing crazy here, these functions make a lot of sense.
And now the full definition of str_num_base().
/* no support for 0x prefix nor leading spaces */
static int str_num_base(const char **linep, curl_off_t *nump, curl_off_t max,
int base) /* 8, 10 or 16, nothing else */
{
curl_off_t num = 0;
const char *p;
int m = (base == 10) ? '9' : /* the largest digit possible */
(base == 16) ? 'f' : '7';
DEBUGASSERT(linep && *linep && nump);
DEBUGASSERT((base == 8) || (base == 10) || (base == 16));
DEBUGASSERT(max >= 0); /* mostly to catch SIZE_MAX, which is too large */
*nump = 0;
p = *linep;
if(!valid_digit(*p, m))
return STRE_NO_NUM;
if(max < base) {
/* special-case low max scenario because check needs to be different */
do {
int n = Curl_hexval(*p++);
num = num * base + n;
if(num > max)
return STRE_OVERFLOW;
} while(valid_digit(*p, m));
}
else {
do {
int n = Curl_hexval(*p++);
if(num > ((max - n) / base))
return STRE_OVERFLOW;
num = num * base + n;
} while(valid_digit(*p, m));
}
*nump = num;
*linep = p;
return STRE_OK;
}
As arguments you need to provide the string, the 64bit variable where
you want to store the converted number, the maximum accepted value (to
check for overflow) and the base (8, 10, 16). And it returns an error
or STRE_OK if everything went fine.
The function is quite understandable, as long as there are valid
digits and as long as it's not overflowing it does num = num * base +
n.
On top of this main function there are a few helper functions so that you don't need to pass the base every time.
/* Get an unsigned decimal number with no leading space or minus. Leading
zeroes are accepted. return non-zero on error */
int curlx_str_number(const char **linep, curl_off_t *nump, curl_off_t max)
{
return str_num_base(linep, nump, max, 10);
}
/* Get an unsigned hexadecimal number with no leading space or minus and no
"0x" support. Leading zeroes are accepted. return non-zero on error */
int curlx_str_hex(const char **linep, curl_off_t *nump, curl_off_t max)
{
return str_num_base(linep, nump, max, 16);
}
/* Get an unsigned octal number with no leading space or minus and no "0"
prefix support. Leading zeroes are accepted. return non-zero on error */
int curlx_str_octal(const char **linep, curl_off_t *nump, curl_off_t max)
{
return str_num_base(linep, nump, max, 8);
}
This function checks all the boxes that curl needs:
- It fails if there is an overflow
- There is clear error handling
- The output variable is always 64bit
But there are a few things that I find missing to become a general-purpose function:
- It doesn't parse negative numbers, which may be a need for some other codebases.
- It allows to have invalid characters after the number, for example "123a" is successfully parsed as "123".
Since curl (most likely) doesn't care about these two cases this function works great for them and for anyone else who think it's enough for them.