Skip to content

Commit cc02b04

Browse files
committed
libfoundation: Re-use iconv file descriptors
Creating a new `iconv_t` file descriptor is an expensive operation. For UTF-16 encoding/decoding, `iconv_open()` requires at least one `open()` system call (and additional work) to load its UTF-16 support shared library. `iconv_t` file descriptors can be reused safely if they're reset to their original state by calling `iconv()` with null input and output pointers. This patch eliminates about two thirds of the `open()` syscalls that occur during IDE startup on Linux.
1 parent 6af7a29 commit cc02b04

File tree

1 file changed

+77
-30
lines changed

1 file changed

+77
-30
lines changed

libfoundation/src/foundation-string.cpp

Lines changed: 77 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -6239,6 +6239,65 @@ bool MCStringIsGraphemeClusterBoundary(MCStringRef self, uindex_t p_index)
62396239

62406240
////////////////////////////////////////////////////////////////////////////////
62416241

6242+
#if defined(__LINUX__)
6243+
static const iconv_t k_invalid_iconv_fd = reinterpret_cast<iconv_t>(-1);
6244+
6245+
static bool is_valid_iconv_fd(iconv_t p_fd)
6246+
{
6247+
return p_fd != k_invalid_iconv_fd;
6248+
}
6249+
6250+
static iconv_t s_iconv_unicode_from_sys_fd = k_invalid_iconv_fd;
6251+
static iconv_t s_iconv_unicode_to_sys_fd = k_invalid_iconv_fd;
6252+
static iconv_t s_iconv_native_to_sys_fd = k_invalid_iconv_fd;
6253+
6254+
static bool
6255+
__MCStringInitializeIconv()
6256+
{
6257+
const char *k_internal_iconv_charset =
6258+
(kMCByteOrderHost == kMCByteOrderLittleEndian ? "UTF-16LE" : "UTF-16BE");
6259+
6260+
// What is the system character encoding?
6261+
//
6262+
// Doing this here is unpleasant but the MCString*SysString functions are
6263+
// needed before the libfoundation initialise call completes
6264+
if (__MCSysCharset == nil)
6265+
{
6266+
setlocale(LC_CTYPE, "");
6267+
__MCSysCharset = nl_langinfo(CODESET);
6268+
}
6269+
6270+
s_iconv_unicode_from_sys_fd = iconv_open(k_internal_iconv_charset, __MCSysCharset);
6271+
if (!is_valid_iconv_fd(s_iconv_unicode_from_sys_fd))
6272+
return false;
6273+
s_iconv_unicode_to_sys_fd = iconv_open(__MCSysCharset, k_internal_iconv_charset);
6274+
if (!is_valid_iconv_fd(s_iconv_unicode_to_sys_fd))
6275+
return false;
6276+
s_iconv_native_to_sys_fd = iconv_open(__MCSysCharset, "ISO-8859-1");
6277+
if (!is_valid_iconv_fd(s_iconv_native_to_sys_fd))
6278+
return false;
6279+
6280+
return true;
6281+
}
6282+
6283+
static void
6284+
__MCStringFinalizeIconv()
6285+
{
6286+
auto t_iconv_cleanup = [](iconv_t& p_fd) {
6287+
if (is_valid_iconv_fd(p_fd))
6288+
{
6289+
iconv_close(p_fd);
6290+
p_fd = k_invalid_iconv_fd;
6291+
}
6292+
};
6293+
6294+
t_iconv_cleanup(s_iconv_unicode_from_sys_fd);
6295+
t_iconv_cleanup(s_iconv_unicode_to_sys_fd);
6296+
t_iconv_cleanup(s_iconv_native_to_sys_fd);
6297+
}
6298+
6299+
#endif /* __LINUX__ */
6300+
62426301
MC_DLLEXPORT_DEF MCStringRef kMCEmptyString;
62436302
MC_DLLEXPORT_DEF MCStringRef kMCTrueString;
62446303
MC_DLLEXPORT_DEF MCStringRef kMCFalseString;
@@ -6249,6 +6308,11 @@ MC_DLLEXPORT_DEF MCStringRef kMCTabString;
62496308

62506309
bool __MCStringInitialize(void)
62516310
{
6311+
#if defined(__LINUX__)
6312+
if (!__MCStringInitializeIconv())
6313+
return false;
6314+
#endif
6315+
62526316
if (!MCStringCreateWithNativeChars((const char_t *)"", 0, kMCEmptyString))
62536317
return false;
62546318

@@ -6290,6 +6354,10 @@ void __MCStringFinalize(void)
62906354
kMCLineEndString = nil;
62916355
MCValueRelease(kMCTabString);
62926356
kMCTabString = nil;
6357+
6358+
#if defined(__LINUX__)
6359+
__MCStringFinalizeIconv();
6360+
#endif
62936361
}
62946362

62956363
////////////////////////////////////////////////////////////////////////////////
@@ -6308,6 +6376,9 @@ static bool do_iconv(iconv_t fd, const char *in, size_t in_len, char * &out, siz
63086376
char * t_out;
63096377
char * t_out_cursor;
63106378

6379+
/* Reset the iconv file descriptor */
6380+
iconv(fd, nullptr, nullptr, nullptr, nullptr);
6381+
63116382
t_out = (char*)malloc(in_len);
63126383
if (t_out == nil)
63136384
return false;
@@ -6380,27 +6451,8 @@ bool MCStringCreateWithSysString(const char *p_system_string, MCStringRef &r_str
63806451
return true;
63816452
}
63826453

6383-
6384-
// What is the system character encoding?
6385-
//
6386-
// Doing this here is unpleasant but the MCString*SysString functions are
6387-
// needed before the libfoundation initialise call is made
6388-
if (__MCSysCharset == nil)
6389-
{
6390-
setlocale(LC_CTYPE, "");
6391-
__MCSysCharset = nl_langinfo(CODESET);
6392-
}
6393-
6394-
// Create the pseudo-FD that iconv uses for character conversion. The most
6395-
// convenient form is UTF-16 as StringRefs can be constructed directly from that.
6396-
#ifdef __LITTLE_ENDIAN__
6397-
iconv_t t_fd = iconv_open("UTF-16LE", __MCSysCharset);
6398-
#else
6399-
iconv_t t_fd = iconv_open("UTF-16BE", __MCSysCharset);
6400-
#endif
6401-
64026454
// Was creation of the iconv FD successful?
6403-
if (t_fd == (iconv_t)-1)
6455+
if (!is_valid_iconv_fd(s_iconv_unicode_from_sys_fd))
64046456
return false;
64056457

64066458
// Measure the string
@@ -6411,8 +6463,8 @@ bool MCStringCreateWithSysString(const char *p_system_string, MCStringRef &r_str
64116463
char *t_utf16_bytes;
64126464
size_t t_utf16_byte_len;
64136465
bool t_success;
6414-
t_success = do_iconv(t_fd, p_system_string, t_len, t_utf16_bytes, t_utf16_byte_len);
6415-
iconv_close(t_fd);
6466+
t_success = do_iconv(s_iconv_unicode_from_sys_fd, p_system_string, t_len,
6467+
t_utf16_bytes, t_utf16_byte_len);
64166468

64176469
if (!t_success)
64186470
return false;
@@ -6450,31 +6502,26 @@ bool MCStringConvertToSysString(MCStringRef p_string, char *& r_system_string, s
64506502

64516503
if (MCStringIsNative(p_string) && MCStringGetNativeCharPtr(p_string) != nil)
64526504
{
6453-
t_fd = iconv_open(__MCSysCharset, "ISO-8859-1");
6505+
t_fd = s_iconv_native_to_sys_fd;
64546506
t_mc_string = (const char *)MCStringGetNativeCharPtr(p_string);
64556507
t_mc_len = MCStringGetLength(p_string);
64566508
}
64576509
else
64586510
{
6459-
#ifdef __LITTLE_ENDIAN__
6460-
t_fd = iconv_open(__MCSysCharset, "UTF-16LE");
6461-
#else
6462-
t_fd = iconv_open(__MCSysCharset, "UTF-16BE");
6463-
#endif
6511+
t_fd = s_iconv_unicode_to_sys_fd;
64646512
t_mc_string = (const char *)MCStringGetCharPtr(p_string);
64656513
t_mc_len = MCStringGetLength(p_string) * sizeof(unichar_t);
64666514
}
64676515

64686516
// Was creation of the iconv FD successful?
6469-
if (t_fd == (iconv_t)-1)
6517+
if (!is_valid_iconv_fd(t_fd))
64706518
return false;
64716519

64726520
// Perform the conversion
64736521
bool t_success;
64746522
char *t_sys_string;
64756523
size_t t_sys_len;
64766524
t_success = do_iconv(t_fd, t_mc_string, t_mc_len, t_sys_string, t_sys_len);
6477-
iconv_close(t_fd);
64786525

64796526
if (!t_success)
64806527
return false;

0 commit comments

Comments
 (0)