From a17d756543b7524f292c1585b160ded03ff59654 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sun, 8 Feb 2015 01:18:54 -0500 Subject: [PATCH 01/16] replace brk and sbrk with stubs Pretend that there is never room to grow the heap in order to prevent usage of these unsafe legacy functions. There are likely no users of these in practice as it is inherently broken to use them outside of malloc. Signed-off-by: anupritaisno1 --- libc/bionic/brk.cpp | 48 ++++++++------------------------------------- 1 file changed, 8 insertions(+), 40 deletions(-) diff --git a/libc/bionic/brk.cpp b/libc/bionic/brk.cpp index 566c33a7a..ef9305513 100644 --- a/libc/bionic/brk.cpp +++ b/libc/bionic/brk.cpp @@ -29,48 +29,16 @@ #include #include -#if defined(__LP64__) -static void* __bionic_brk; -#else -void* __bionic_brk; // Accidentally exported by the NDK. +#if !defined(__LP64__) +void* __bionic_brk = reinterpret_cast(-1); // Accidentally exported by the NDK. #endif -extern "C" void* __brk(void* __addr); - -int brk(void* end_data) { - __bionic_brk = __brk(end_data); - if (__bionic_brk < end_data) { - errno = ENOMEM; - return -1; - } - return 0; +int brk(void*) { + errno = ENOMEM; + return -1; } -void* sbrk(ptrdiff_t increment) { - // Initialize __bionic_brk if necessary. - if (__bionic_brk == nullptr) { - __bionic_brk = __brk(nullptr); - } - - // Don't ask the kernel if we already know the answer. - if (increment == 0) { - return __bionic_brk; - } - - // Avoid overflow. - uintptr_t old_brk = reinterpret_cast(__bionic_brk); - if ((increment > 0 && static_cast(increment) > (UINTPTR_MAX - old_brk)) || - (increment < 0 && static_cast(-increment) > old_brk)) { - errno = ENOMEM; - return reinterpret_cast(-1); - } - - void* desired_brk = reinterpret_cast(old_brk + increment); - __bionic_brk = __brk(desired_brk); - if (__bionic_brk < desired_brk) { - errno = ENOMEM; - return reinterpret_cast(-1); - } - - return reinterpret_cast(old_brk); +void* sbrk(ptrdiff_t) { + errno = ENOMEM; + return reinterpret_cast(-1); } From c82a662cec995e8c6815108278b53d2539497aab Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Mon, 19 Sep 2016 07:57:43 -0400 Subject: [PATCH 02/16] fix undefined out-of-bounds accesses in sched.h Signed-off-by: anupritaisno1 --- libc/include/sched.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/libc/include/sched.h b/libc/include/sched.h index 3260231cf..00c2a4a45 100644 --- a/libc/include/sched.h +++ b/libc/include/sched.h @@ -70,7 +70,10 @@ int setns(int __fd, int __ns_type) __INTRODUCED_IN(21); #define __CPU_MASK(x) ((__CPU_BITTYPE)1 << ((x) & (__CPU_BITS - 1))) typedef struct { - __CPU_BITTYPE __bits[ CPU_SETSIZE / __CPU_BITS ]; + union { + __CPU_BITTYPE __bits_minimum[ CPU_SETSIZE / __CPU_BITS ]; + __CPU_BITTYPE __bits[0]; + }; } cpu_set_t; int sched_setaffinity(pid_t __pid, size_t __set_size, const cpu_set_t* __set); From 484b1734eefe54cbbe6617af7336f9640b746454 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sat, 16 Jul 2016 23:55:16 -0400 Subject: [PATCH 03/16] replace VLA formatting with dprintf-like function Signed-off-by: anupritaisno1 --- libc/bionic/bionic_systrace.cpp | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/libc/bionic/bionic_systrace.cpp b/libc/bionic/bionic_systrace.cpp index fd9771298..acd9b7681 100644 --- a/libc/bionic/bionic_systrace.cpp +++ b/libc/bionic/bionic_systrace.cpp @@ -27,8 +27,6 @@ #include #include // For ATRACE_TAG_BIONIC. -#define WRITE_OFFSET 32 - static Lock g_lock; static CachedProperty g_debug_atrace_tags_enableflags("debug.atrace.tags.enableflags"); static uint64_t g_tags; @@ -65,15 +63,9 @@ void bionic_trace_begin(const char* message) { return; } - // If bionic tracing has been enabled, then write the message to the - // kernel trace_marker. - int length = strlen(message); - char buf[length + WRITE_OFFSET]; - size_t len = async_safe_format_buffer(buf, length + WRITE_OFFSET, "B|%d|%s", getpid(), message); - // Tracing may stop just after checking property and before writing the message. // So the write is acceptable to fail. See b/20666100. - TEMP_FAILURE_RETRY(write(trace_marker_fd, buf, len)); + async_safe_format_fd(trace_marker_fd, "B|%d|%s", getpid(), message); } void bionic_trace_end() { From 461e8a61823d69c83b7393b29109d543059f7821 Mon Sep 17 00:00:00 2001 From: Danny Lin Date: Thu, 22 Jul 2021 16:12:55 -0700 Subject: [PATCH 04/16] linker: Add support for opening zip files by fd paths In some cases, it can be useful to load libraries from zip files that are only available by fd reference. For example, file descriptors of APKs containing native libraries may be sent via Binder IPC for clients to use. Unfortunately, while this linker does support loading libraries from file descriptors using android_dlopen_ext, using that API is not an option because our dlopen calls originate from JNI loadLibrary requests in ART. This is necessary for compatibility with Google Play Services' dynamic module system (Dynamite) without weakening the SELinux sandbox to allow other apps to open module APKs from /data/user_de/0/com.google.android.gms/app_chimera/m. Change-Id: If44d5c3faf4f50e4704688b520b197ff151ae05a --- linker/linker.cpp | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/linker/linker.cpp b/linker/linker.cpp index 3488f5cc7..20c20debc 100644 --- a/linker/linker.cpp +++ b/linker/linker.cpp @@ -842,14 +842,14 @@ class ZipArchiveCache { ZipArchiveCache() {} ~ZipArchiveCache(); - bool get_or_open(const char* zip_path, ZipArchiveHandle* handle); + bool get_or_open(const char* zip_path, int zip_fd, ZipArchiveHandle* handle); private: DISALLOW_COPY_AND_ASSIGN(ZipArchiveCache); std::unordered_map cache_; }; -bool ZipArchiveCache::get_or_open(const char* zip_path, ZipArchiveHandle* handle) { +bool ZipArchiveCache::get_or_open(const char* zip_path, int zip_fd, ZipArchiveHandle* handle) { std::string key(zip_path); auto it = cache_.find(key); @@ -858,7 +858,7 @@ bool ZipArchiveCache::get_or_open(const char* zip_path, ZipArchiveHandle* handle return true; } - int fd = TEMP_FAILURE_RETRY(open(zip_path, O_RDONLY | O_CLOEXEC)); + int fd = zip_fd != -1 ? dup(zip_fd) : TEMP_FAILURE_RETRY(open(zip_path, O_RDONLY | O_CLOEXEC)); if (fd == -1) { return false; } @@ -909,13 +909,19 @@ static int open_library_in_zipfile(ZipArchiveCache* zip_archive_cache, const char* zip_path = buf; const char* file_path = &buf[separator - path + 2]; - int fd = TEMP_FAILURE_RETRY(open(zip_path, O_RDONLY | O_CLOEXEC)); + int fd; + if (!strncmp("/proc/self/fd/", zip_path, strlen("/proc/self/fd/")) && + sscanf(zip_path, "/proc/self/fd/%d", &fd) == 1) { + fd = dup(fd); + } else { + fd = TEMP_FAILURE_RETRY(open(zip_path, O_RDONLY | O_CLOEXEC)); + } if (fd == -1) { return -1; } ZipArchiveHandle handle; - if (!zip_archive_cache->get_or_open(zip_path, &handle)) { + if (!zip_archive_cache->get_or_open(zip_path, fd, &handle)) { // invalid zip-file (?) close(fd); return -1; From 902babbccafa41eb6916094200ff116fc8f29ab6 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sun, 12 Mar 2017 17:49:13 -0400 Subject: [PATCH 05/16] on 64-bit, zero the leading stack canary byte This reduces entropy of the canary from 64-bit to 56-bit in exchange for mitigating non-terminated C string overflows. Signed-off-by: anupritaisno1 --- libc/bionic/__libc_init_main_thread.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/libc/bionic/__libc_init_main_thread.cpp b/libc/bionic/__libc_init_main_thread.cpp index 95f46e9fa..dc9ab77c1 100644 --- a/libc/bionic/__libc_init_main_thread.cpp +++ b/libc/bionic/__libc_init_main_thread.cpp @@ -46,6 +46,12 @@ uintptr_t __stack_chk_guard = 0; static pthread_internal_t main_thread; +#if __LP64__ +static const uintptr_t canary_mask = __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ ? + 0xffffffffffffff00UL : + 0x00ffffffffffffffUL; +#endif + // Setup for the main thread. For dynamic executables, this is called by the // linker _before_ libc is mapped in memory. This means that all writes to // globals from this function will apply to linker-private copies and will not @@ -111,6 +117,10 @@ extern "C" void android_reset_stack_guards() { // before we initialize the TLS. Dynamic executables will initialize their copy of the global // stack protector from the one in the main thread's TLS. __libc_safe_arc4random_buf(&__stack_chk_guard, sizeof(__stack_chk_guard)); +#if __LP64__ + // Sacrifice 8 bits of entropy on 64-bit to mitigate non-terminated C string overflows + __stack_chk_guard &= canary_mask; +#endif __init_tcb_stack_guard(__get_bionic_tcb()); } From f46646a2e6cfd58317d2204dcf8af3122aee8aad Mon Sep 17 00:00:00 2001 From: Tom Marshall Date: Fri, 17 Jun 2016 16:38:12 -0700 Subject: [PATCH 06/16] bionic: Sort and cache hosts file data for fast lookup The hosts file is normally searched linearly. This is very slow when the file is large. To mitigate this, read the hosts file and sort the entries in an in-memory cache. When an address is requested via gethostbyname or getaddrinfo, binary search the cache. In case where the cache is not available, return a suitable error code and fall back to the existing lookup code. This has been written to behave as much like the existing lookup code as possible. But note bionic and glibc differ in behavior for some corner cases. Choose the most standard compliant behavior for these where possible. Otherwise choose the behavior that seems most reasonable. RM-290 Change-Id: I3b322883cbc48b0d76a0ce9d149b59faaac1dc58 (cherry picked from commit ed4c3a6bd449a4ed70645071a440ae146f194116) --- libc/dns/net/getaddrinfo.c | 10 + libc/dns/net/hosts_cache.c | 520 +++++++++++++++++++++++++++++++++++++ libc/dns/net/hosts_cache.h | 23 ++ libc/dns/net/sethostent.c | 7 + 4 files changed, 560 insertions(+) create mode 100644 libc/dns/net/hosts_cache.c create mode 100644 libc/dns/net/hosts_cache.h diff --git a/libc/dns/net/getaddrinfo.c b/libc/dns/net/getaddrinfo.c index d0c11d2b0..cc94b21e2 100644 --- a/libc/dns/net/getaddrinfo.c +++ b/libc/dns/net/getaddrinfo.c @@ -109,6 +109,8 @@ #include "nsswitch.h" #include "private/bionic_defs.h" +#include "hosts_cache.h" + typedef union sockaddr_union { struct sockaddr generic; struct sockaddr_in in; @@ -2125,6 +2127,14 @@ _files_getaddrinfo(void *rv, void *cb_data, va_list ap) name = va_arg(ap, char *); pai = va_arg(ap, struct addrinfo *); + memset(&sentinel, 0, sizeof(sentinel)); + cur = &sentinel; + int gai_error = hc_getaddrinfo(name, NULL, pai, &cur); + if (gai_error != EAI_SYSTEM) { + *((struct addrinfo **)rv) = sentinel.ai_next; + return (gai_error == 0 ? NS_SUCCESS : NS_NOTFOUND); + } + // fprintf(stderr, "_files_getaddrinfo() name = '%s'\n", name); memset(&sentinel, 0, sizeof(sentinel)); cur = &sentinel; diff --git a/libc/dns/net/hosts_cache.c b/libc/dns/net/hosts_cache.c new file mode 100644 index 000000000..52d29e032 --- /dev/null +++ b/libc/dns/net/hosts_cache.c @@ -0,0 +1,520 @@ +/* + * Copyright (C) 2016 The CyanogenMod Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "hostent.h" +#include "resolv_private.h" + +#define MAX_ADDRLEN (INET6_ADDRSTRLEN - (1 + 5)) +#define MAX_HOSTLEN MAXHOSTNAMELEN + +#define ESTIMATED_LINELEN 32 +#define HCFILE_ALLOC_SIZE 256 + +/* + * Host cache entry for hcfile.c_data. + * Offsets are into hcfile.h_data. + * Strings are *not* terminated by NULL, but by whitespace (isspace) or '#'. + * Use hstr* functions with these. + */ +struct hcent +{ + uint32_t addr; + uint32_t name; +}; + +/* + * Overall host cache file state. + */ +struct hcfile +{ + int h_fd; + struct stat h_st; + char *h_data; + + uint32_t c_alloc; + uint32_t c_len; + struct hcent *c_data; +}; +static struct hcfile hcfile; +static pthread_mutex_t hclock = PTHREAD_MUTEX_INITIALIZER; + +static size_t hstrlen(const char *s) +{ + const char *p = s; + while (*p && *p != '#' && !isspace(*p)) + ++p; + return p - s; +} + +static int hstrcmp(const char *a, const char *b) +{ + size_t alen = hstrlen(a); + size_t blen = hstrlen(b); + int res = strncmp(a, b, MIN(alen, blen)); + if (res == 0) + res = alen - blen; + return res; +} + +static char *hstrcpy(char *dest, const char *src) +{ + size_t len = hstrlen(src); + memcpy(dest, src, len); + dest[len] = '\0'; + return dest; +} + +static char *hstrdup(const char *s) +{ + size_t len = hstrlen(s); + char *dest = (char *)malloc(len + 1); + if (!dest) + return NULL; + memcpy(dest, s, len); + dest[len] = '\0'; + return dest; +} + +static int cmp_hcent_name(const void *a, const void *b) +{ + struct hcent *ea = (struct hcent *)a; + const char *na = hcfile.h_data + ea->name; + struct hcent *eb = (struct hcent *)b; + const char *nb = hcfile.h_data + eb->name; + + return hstrcmp(na, nb); +} + +static struct hcent *_hcfindname(const char *name) +{ + size_t first, last, mid; + struct hcent *cur = NULL; + int cmp; + + if (hcfile.c_len == 0) + return NULL; + + first = 0; + last = hcfile.c_len - 1; + mid = (first + last) / 2; + while (first <= last) { + cur = hcfile.c_data + mid; + cmp = hstrcmp(hcfile.h_data + cur->name, name); + if (cmp == 0) + goto found; + if (cmp < 0) + first = mid + 1; + else { + if (mid > 0) + last = mid - 1; + else + return NULL; + } + mid = (first + last) / 2; + } + return NULL; + +found: + while (cur > hcfile.c_data) { + struct hcent *prev = cur - 1; + cmp = cmp_hcent_name(cur, prev); + if (cmp) + break; + cur = prev; + } + + return cur; +} + +/* + * Find next name on line, if any. + * + * Assumes that line is terminated by LF. + */ +static const char *_hcnextname(const char *name) +{ + while (!isspace(*name)) { + if (*name == '#') + return NULL; + ++name; + } + while (isspace(*name)) { + if (*name == '\n') + return NULL; + ++name; + } + if (*name == '#') + return NULL; + return name; +} + +static int _hcfilemmap(void) +{ + struct stat st; + int h_fd; + char *h_addr; + const char *p, *pend; + uint32_t c_alloc; + + h_fd = open(_PATH_HOSTS, O_RDONLY); + if (h_fd < 0) + return -1; + if (flock(h_fd, LOCK_EX) != 0) { + close(h_fd); + return -1; + } + + if (hcfile.h_data) { + memset(&st, 0, sizeof(st)); + if (fstat(h_fd, &st) == 0) { + if (st.st_size == hcfile.h_st.st_size && + st.st_mtime == hcfile.h_st.st_mtime) { + flock(h_fd, LOCK_UN); + close(h_fd); + return 0; + } + } + free(hcfile.c_data); + munmap(hcfile.h_data, hcfile.h_st.st_size); + close(hcfile.h_fd); + memset(&hcfile, 0, sizeof(struct hcfile)); + } + + if (fstat(h_fd, &st) != 0) { + flock(h_fd, LOCK_UN); + close(h_fd); + return -1; + } + h_addr = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, h_fd, 0); + if (h_addr == MAP_FAILED) { + flock(h_fd, LOCK_UN); + close(h_fd); + return -1; + } + + hcfile.h_fd = h_fd; + hcfile.h_st = st; + hcfile.h_data = h_addr; + + c_alloc = 0; + /* + * Do an initial allocation if the file is "large". Estimate + * 32 bytes per line and define "large" as more than half of + * the alloc growth size (256 entries). + */ + if (st.st_size >= ESTIMATED_LINELEN * HCFILE_ALLOC_SIZE / 2) { + c_alloc = st.st_size / ESTIMATED_LINELEN; + hcfile.c_data = malloc(c_alloc * sizeof(struct hcent)); + if (!hcfile.c_data) { + goto oom; + } + } + + p = (const char *)h_addr; + pend = p + st.st_size; + while (p < pend) { + const char *eol, *addr, *name; + size_t len; + addr = p; + eol = memchr(p, '\n', pend - p); + if (!eol) + break; + p = eol + 1; + if (*addr == '#' || *addr == '\n') + continue; + len = hstrlen(addr); + if (len > MAX_ADDRLEN) + continue; + name = addr + len; + while (name < eol && isspace(*name)) + ++name; + while (name < eol) { + len = hstrlen(name); + if (len == 0) + break; + if (len < MAX_HOSTLEN) { + struct hcent *ent; + if (c_alloc <= hcfile.c_len) { + struct hcent *c_data; + c_alloc += HCFILE_ALLOC_SIZE; + c_data = realloc(hcfile.c_data, c_alloc * sizeof(struct hcent)); + if (!c_data) { + goto oom; + } + hcfile.c_data = c_data; + } + ent = hcfile.c_data + hcfile.c_len; + ent->addr = addr - h_addr; + ent->name = name - h_addr; + ++hcfile.c_len; + } + name += len; + while (name < eol && isspace(*name)) + ++name; + } + } + + qsort(hcfile.c_data, hcfile.c_len, + sizeof(struct hcent), cmp_hcent_name); + + flock(h_fd, LOCK_UN); + + return 0; + +oom: + free(hcfile.c_data); + munmap(hcfile.h_data, hcfile.h_st.st_size); + flock(hcfile.h_fd, LOCK_UN); + close(hcfile.h_fd); + memset(&hcfile, 0, sizeof(struct hcfile)); + return -1; +} + +/* + * Caching version of getaddrinfo. + * + * If we find the requested host name in the cache, use getaddrinfo to + * populate the result for each address we find. + * + * Note glibc and bionic differ in the handling of ai_canonname. POSIX + * says that ai_canonname is only populated in the first result entry. + * glibc does this. bionic populates ai_canonname in all result entries. + * We choose the POSIX/glibc way here. + */ +int hc_getaddrinfo(const char *host, const char *service, + const struct addrinfo *hints, + struct addrinfo **result) +{ + int ret = 0; + struct hcent *ent, *cur; + struct addrinfo *ai; + struct addrinfo rhints; + struct addrinfo *last; + int canonname = 0; + int cmp; + + if (getenv("ANDROID_HOSTS_CACHE_DISABLE") != NULL) + return EAI_SYSTEM; + + /* Avoid needless work and recursion */ + if (hints && (hints->ai_flags & AI_NUMERICHOST)) + return EAI_SYSTEM; + if (!host) + return EAI_SYSTEM; + + pthread_mutex_lock(&hclock); + + if (_hcfilemmap() != 0) { + ret = EAI_SYSTEM; + goto out; + } + ent = _hcfindname(host); + if (!ent) { + ret = EAI_NONAME; + goto out; + } + + if (hints) { + canonname = (hints->ai_flags & AI_CANONNAME); + memcpy(&rhints, hints, sizeof(rhints)); + rhints.ai_flags &= ~AI_CANONNAME; + } + else { + memset(&rhints, 0, sizeof(rhints)); + } + rhints.ai_flags |= AI_NUMERICHOST; + + last = NULL; + cur = ent; + do { + char addrstr[MAX_ADDRLEN]; + struct addrinfo *res; + + hstrcpy(addrstr, hcfile.h_data + cur->addr); + + if (getaddrinfo(addrstr, service, &rhints, &res) == 0) { + if (!last) + (*result)->ai_next = res; + else + last->ai_next = res; + last = res; + while (last->ai_next) + last = last->ai_next; + } + + if(cur + 1 >= hcfile.c_data + hcfile.c_len) + break; + cmp = cmp_hcent_name(cur, cur + 1); + cur = cur + 1; + } + while (!cmp); + + if (last == NULL) { + /* This check is equivalent to (*result)->ai_next == NULL */ + ret = EAI_NODATA; + goto out; + } + + if (canonname) { + ai = (*result)->ai_next; + free(ai->ai_canonname); + ai->ai_canonname = hstrdup(hcfile.h_data + ent->name); + } + +out: + pthread_mutex_unlock(&hclock); + return ret; +} + +/* + * Caching version of gethtbyname. + * + * Note glibc and bionic differ in the handling of aliases. glibc returns + * all aliases for all entries, regardless of whether they match h_addrtype. + * bionic returns only the aliases for the first hosts entry. We return all + * aliases for all IPv4 entries. + * + * Additionally, if an alias is IPv6 and the primary name for an alias also + * has an IPv4 entry, glibc will return the IPv4 address(es), but bionic + * will not. Neither do we. + */ +int hc_gethtbyname(const char *host, int af, struct getnamaddr *info) +{ + int ret = NETDB_SUCCESS; + struct hcent *ent, *cur; + int cmp; + size_t addrlen; + unsigned int naliases = 0; + char *aliases[MAXALIASES]; + unsigned int naddrs = 0; + char *addr_ptrs[MAXADDRS]; + unsigned int n; + + if (getenv("ANDROID_HOSTS_CACHE_DISABLE") != NULL) + return NETDB_INTERNAL; + + switch (af) { + case AF_INET: addrlen = NS_INADDRSZ; break; + case AF_INET6: addrlen = NS_IN6ADDRSZ; break; + default: + return NETDB_INTERNAL; + } + + pthread_mutex_lock(&hclock); + + if (_hcfilemmap() != 0) { + ret = NETDB_INTERNAL; + goto out; + } + + ent = _hcfindname(host); + if (!ent) { + ret = HOST_NOT_FOUND; + goto out; + } + + cur = ent; + do { + char addr[16]; + char addrstr[MAX_ADDRLEN]; + char namestr[MAX_HOSTLEN]; + const char *name; + + hstrcpy(addrstr, hcfile.h_data + cur->addr); + if (inet_pton(af, addrstr, &addr) == 1) { + char *aligned; + /* First match is considered the official hostname */ + if (naddrs == 0) { + hstrcpy(namestr, hcfile.h_data + cur->name); + HENT_SCOPY(info->hp->h_name, namestr, info->buf, info->buflen); + } + for (name = hcfile.h_data + cur->name; name; name = _hcnextname(name)) { + if (!hstrcmp(name, host)) + continue; + hstrcpy(namestr, name); + HENT_SCOPY(aliases[naliases], namestr, info->buf, info->buflen); + ++naliases; + if (naliases >= MAXALIASES) + goto nospc; + } + aligned = (char *)ALIGN(info->buf); + if (info->buf != aligned) { + if ((ptrdiff_t)info->buflen < (aligned - info->buf)) + goto nospc; + info->buflen -= (aligned - info->buf); + info->buf = aligned; + } + HENT_COPY(addr_ptrs[naddrs], addr, addrlen, info->buf, info->buflen); + ++naddrs; + if (naddrs >= MAXADDRS) + goto nospc; + } + + if(cur + 1 >= hcfile.c_data + hcfile.c_len) + break; + cmp = cmp_hcent_name(cur, cur + 1); + cur = cur + 1; + } + while (!cmp); + + if (naddrs == 0) { + ret = HOST_NOT_FOUND; + goto out; + } + + addr_ptrs[naddrs++] = NULL; + aliases[naliases++] = NULL; + + /* hp->h_name already populated */ + HENT_ARRAY(info->hp->h_aliases, naliases, info->buf, info->buflen); + for (n = 0; n < naliases; ++n) { + info->hp->h_aliases[n] = aliases[n]; + } + info->hp->h_addrtype = af; + info->hp->h_length = addrlen; + HENT_ARRAY(info->hp->h_addr_list, naddrs, info->buf, info->buflen); + for (n = 0; n < naddrs; ++n) { + info->hp->h_addr_list[n] = addr_ptrs[n]; + } + +out: + pthread_mutex_unlock(&hclock); + *info->he = ret; + return ret; + +nospc: + ret = NETDB_INTERNAL; + goto out; +} diff --git a/libc/dns/net/hosts_cache.h b/libc/dns/net/hosts_cache.h new file mode 100644 index 000000000..fa5488f51 --- /dev/null +++ b/libc/dns/net/hosts_cache.h @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2016 The CyanogenMod Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +struct getnamaddr; + +int hc_getaddrinfo(const char *host, const char *service, + const struct addrinfo *hints, + struct addrinfo **result); + +int hc_gethtbyname(const char *host, int af, struct getnamaddr *info); diff --git a/libc/dns/net/sethostent.c b/libc/dns/net/sethostent.c index 483105a95..1399378cd 100644 --- a/libc/dns/net/sethostent.c +++ b/libc/dns/net/sethostent.c @@ -55,6 +55,8 @@ __RCSID("$NetBSD: sethostent.c,v 1.20 2014/03/17 13:24:23 christos Exp $"); #include "hostent.h" #include "resolv_private.h" +#include "hosts_cache.h" + #ifndef _REENTRANT void res_close(void); #endif @@ -109,6 +111,11 @@ _hf_gethtbyname(void *rv, void *cb_data, va_list ap) /* NOSTRICT skip string len */(void)va_arg(ap, int); af = va_arg(ap, int); + int rc = hc_gethtbyname(name, af, info); + if (rc != NETDB_INTERNAL) { + return (rc == NETDB_SUCCESS ? NS_SUCCESS : NS_NOTFOUND); + } + #if 0 { res_state res = __res_get_state(); From 61d8d453aea2d8b657f33d14c1013944c817e6ee Mon Sep 17 00:00:00 2001 From: Tom Marshall Date: Thu, 16 Jan 2020 13:07:04 -0800 Subject: [PATCH 07/16] bionic: Support wildcards in cached hosts file If an exact name is not found in the hosts file and the host name contains at least one dot, search for entries of the form "*.domain", where domain is the portion of the host name after the first dot. If that is not found, repeat using the domain. Example: a.b.c.example.com would search for the following in turn: a.b.c.example.com *.b.c.example.com *.c.example.com *.example.com *.com Change-Id: I4b0bb81699151d5b371850daebf785e35ec9b180 --- libc/dns/net/hosts_cache.c | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/libc/dns/net/hosts_cache.c b/libc/dns/net/hosts_cache.c index 52d29e032..fc6370d0c 100644 --- a/libc/dns/net/hosts_cache.c +++ b/libc/dns/net/hosts_cache.c @@ -117,7 +117,7 @@ static int cmp_hcent_name(const void *a, const void *b) return hstrcmp(na, nb); } -static struct hcent *_hcfindname(const char *name) +static struct hcent *_hcfindname_exact(const char *name) { size_t first, last, mid; struct hcent *cur = NULL; @@ -158,6 +158,33 @@ static struct hcent *_hcfindname(const char *name) return cur; } +static struct hcent *_hcfindname(const char *name) +{ + struct hcent *ent; + char namebuf[MAX_HOSTLEN]; + char *p; + char *dot; + + ent = _hcfindname_exact(name); + if (!ent && strlen(name) < sizeof(namebuf)) { + strcpy(namebuf, name); + p = namebuf; + do { + dot = strchr(p, '.'); + if (!dot) + break; + if (dot > p) { + *(dot - 1) = '*'; + ent = _hcfindname_exact(dot - 1); + } + p = dot + 1; + } + while (!ent); + } + + return ent; +} + /* * Find next name on line, if any. * From 74e8f30bc2352a33307dc515d72c6f681a353ce7 Mon Sep 17 00:00:00 2001 From: Bernhard Rosenkraenzer Date: Sat, 15 Feb 2014 20:43:47 +0100 Subject: [PATCH 08/16] [master] [DNM] libc: Import cortex-strings strlen for A7/A15/A53/A53.A57/Denver/Krait Benchmarked on Nextbit Robin (MSM8992) Before: iterations ns/op BM_string_strlen/8 50M 75 0.106 GiB/s BM_string_strlen/64 10M 159 0.400 GiB/s BM_string_strlen/512 2M 819 0.625 GiB/s BM_string_strlen/1024 1000k 1547 0.662 GiB/s BM_string_strlen/8Ki 200k 12327 0.665 GiB/s BM_string_strlen/16Ki 100k 24579 0.667 GiB/s BM_string_strlen/32Ki 50k 48950 0.669 GiB/s BM_string_strlen/64Ki 20k 97195 0.674 GiB/s After: iterations ns/op BM_string_strlen/8 50M 13 0.574 GiB/s BM_string_strlen/64 1000k 23 2.703 GiB/s BM_string_strlen/512 20M 115 4.414 GiB/s BM_string_strlen/1024 10M 206 4.954 GiB/s BM_string_strlen/8Ki 1000k 1528 5.359 GiB/s BM_string_strlen/16Ki 1000k 2946 5.561 GiB/s BM_string_strlen/32Ki 500k 5910 5.544 GiB/s BM_string_strlen/64Ki 200k 11842 5.534 GiB/s Signed-off-by: Bernhard Rosenkraenzer Signed-off-by: Jake Weinstein Signed-off-by: Vishalcj17 Change-Id: I1e74557046c901afd1356e8ebf3a6c39b0850b87 --- libc/arch-arm/cortex-a15/bionic/strlen.S | 282 +++++++++++------------ 1 file changed, 131 insertions(+), 151 deletions(-) diff --git a/libc/arch-arm/cortex-a15/bionic/strlen.S b/libc/arch-arm/cortex-a15/bionic/strlen.S index 9c5ed2911..c568ee05d 100644 --- a/libc/arch-arm/cortex-a15/bionic/strlen.S +++ b/libc/arch-arm/cortex-a15/bionic/strlen.S @@ -1,165 +1,145 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE - * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS - * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. +/* Copyright (c) 2010-2011,2013 Linaro Limited + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of Linaro Limited nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ + /* - * Copyright (c) 2013 ARM Ltd - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the company may not be used to endorse or promote - * products derived from this software without specific prior written - * permission. - * - * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED - * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + Assumes: + ARMv6T2, AArch32 + + Adapted to Bionic by Bernhard Rosenkränzer */ #include - .syntax unified +#ifdef __ARMEB__ +#define S2LO lsl +#define S2HI lsr +#else +#define S2LO lsr +#define S2HI lsl +#endif + .text .thumb - .thumb_func - -ENTRY(strlen_a15) - pld [r0, #0] - mov r1, r0 - - ands r3, r0, #7 - beq .L_mainloop - - // Align to a double word (64 bits). - rsb r3, r3, #8 - lsls ip, r3, #31 - beq .L_align_to_32 - - ldrb r2, [r1], #1 - cbz r2, .L_update_count_and_return - -.L_align_to_32: - bcc .L_align_to_64 - ands ip, r3, #2 - beq .L_align_to_64 - - ldrb r2, [r1], #1 - cbz r2, .L_update_count_and_return - ldrb r2, [r1], #1 - cbz r2, .L_update_count_and_return - -.L_align_to_64: - tst r3, #4 - beq .L_mainloop - ldr r3, [r1], #4 - - sub ip, r3, #0x01010101 - bic ip, ip, r3 - ands ip, ip, #0x80808080 - bne .L_zero_in_second_register - - .p2align 2 -.L_mainloop: - ldrd r2, r3, [r1], #8 - - pld [r1, #64] - - sub ip, r2, #0x01010101 - bic ip, ip, r2 - ands ip, ip, #0x80808080 - bne .L_zero_in_first_register - - sub ip, r3, #0x01010101 - bic ip, ip, r3 - ands ip, ip, #0x80808080 - bne .L_zero_in_second_register - b .L_mainloop - -.L_update_count_and_return: - sub r0, r1, r0 - sub r0, r0, #1 - bx lr - -.L_zero_in_first_register: - sub r0, r1, r0 - lsls r3, ip, #17 - bne .L_sub8_and_return - bcs .L_sub7_and_return - lsls ip, ip, #1 - bne .L_sub6_and_return - - sub r0, r0, #5 - bx lr - -.L_sub8_and_return: - sub r0, r0, #8 - bx lr - -.L_sub7_and_return: - sub r0, r0, #7 - bx lr - -.L_sub6_and_return: - sub r0, r0, #6 - bx lr - -.L_zero_in_second_register: - sub r0, r1, r0 - lsls r3, ip, #17 - bne .L_sub4_and_return - bcs .L_sub3_and_return - lsls ip, ip, #1 - bne .L_sub2_and_return + .syntax unified - sub r0, r0, #1 - bx lr +/* Parameters and result. */ +#define srcin r0 +#define result r0 -.L_sub4_and_return: - sub r0, r0, #4 - bx lr +/* Internal variables. */ +#define src r1 +#define data1a r2 +#define data1b r3 +#define const_m1 r12 +#define const_0 r4 +#define tmp1 r4 /* Overlaps const_0 */ +#define tmp2 r5 -.L_sub3_and_return: - sub r0, r0, #3 +ENTRY(strlen_a15) + .p2align 6 + pld [srcin, #0] + strd r4, r5, [sp, #-8]! + bic src, srcin, #7 + mvn const_m1, #0 + ands tmp1, srcin, #7 /* (8 - bytes) to alignment. */ + pld [src, #32] + bne.w .L_misaligned8 + mov const_0, #0 + mov result, #-8 +.L_loop_aligned: + /* Bytes 0-7. */ + ldrd data1a, data1b, [src] + pld [src, #64] + add result, result, #8 +.L_start_realigned: + uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */ + sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */ + uadd8 data1b, data1b, const_m1 + sel data1b, data1a, const_m1 /* Only used if d1a == 0. */ + cbnz data1b, .L_null_found + + /* Bytes 8-15. */ + ldrd data1a, data1b, [src, #8] + uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */ + add result, result, #8 + sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */ + uadd8 data1b, data1b, const_m1 + sel data1b, data1a, const_m1 /* Only used if d1a == 0. */ + cbnz data1b, .L_null_found + + /* Bytes 16-23. */ + ldrd data1a, data1b, [src, #16] + uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */ + add result, result, #8 + sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */ + uadd8 data1b, data1b, const_m1 + sel data1b, data1a, const_m1 /* Only used if d1a == 0. */ + cbnz data1b, .L_null_found + + /* Bytes 24-31. */ + ldrd data1a, data1b, [src, #24] + add src, src, #32 + uadd8 data1a, data1a, const_m1 /* Saturating GE<0:3> set. */ + add result, result, #8 + sel data1a, const_0, const_m1 /* Select based on GE<0:3>. */ + uadd8 data1b, data1b, const_m1 + sel data1b, data1a, const_m1 /* Only used if d1a == 0. */ + cmp data1b, #0 + beq .L_loop_aligned + +.L_null_found: + cmp data1a, #0 + itt eq + addeq result, result, #4 + moveq data1a, data1b +#ifndef __ARMEB__ + rev data1a, data1a +#endif + clz data1a, data1a + ldrd r4, r5, [sp], #8 + add result, result, data1a, lsr #3 /* Bits -> Bytes. */ bx lr -.L_sub2_and_return: - sub r0, r0, #2 - bx lr +.L_misaligned8: + ldrd data1a, data1b, [src] + and tmp2, tmp1, #3 + rsb result, tmp1, #0 + lsl tmp2, tmp2, #3 /* Bytes -> bits. */ + tst tmp1, #4 + pld [src, #64] + S2HI tmp2, const_m1, tmp2 + orn data1a, data1a, tmp2 + itt ne + ornne data1b, data1b, tmp2 + movne data1a, const_m1 + mov const_0, #0 + b .L_start_realigned END(strlen_a15) From c89bba80da786ce048b9a4438d9b55418b0352d3 Mon Sep 17 00:00:00 2001 From: dkati Date: Sun, 14 Oct 2018 02:07:04 +0300 Subject: [PATCH 09/16] RELAND: bionic: Enable -O3 Change-Id: Ia82860326de114d48dea0e8cd93b836f1e826e15 Signed-off-by: Adithya R --- benchmarks/Android.bp | 3 ++- libc/Android.bp | 2 ++ libdl/Android.bp | 2 ++ libm/Android.bp | 1 + linker/Android.bp | 3 +++ 5 files changed, 10 insertions(+), 1 deletion(-) diff --git a/benchmarks/Android.bp b/benchmarks/Android.bp index 5dfc38f5c..727803fd8 100644 --- a/benchmarks/Android.bp +++ b/benchmarks/Android.bp @@ -33,7 +33,7 @@ license { cc_defaults { name: "bionic-benchmarks-defaults", cflags: [ - "-O2", + "-O3", "-fno-builtin", "-Wall", "-Wextra", @@ -86,6 +86,7 @@ cc_defaults { "-Wextra", "-Werror", "-Wunused", + "-O3", ], } diff --git a/libc/Android.bp b/libc/Android.bp index 0ce07dbc2..2d382ac44 100644 --- a/libc/Android.bp +++ b/libc/Android.bp @@ -55,6 +55,7 @@ libc_common_flags = [ "-Wno-deprecated-declarations", "-Wno-gcc-compat", "-Wframe-larger-than=2048", + "-O3", // Try to catch typical 32-bit assumptions that break with 64-bit pointers. "-Werror=pointer-to-int-cast", @@ -2099,6 +2100,7 @@ cc_defaults { "-Wno-gcc-compat", "-Wall", "-Werror", + "-O3", ], sanitize: { never: true, diff --git a/libdl/Android.bp b/libdl/Android.bp index 750a6e26f..19e1b5280 100644 --- a/libdl/Android.bp +++ b/libdl/Android.bp @@ -34,6 +34,7 @@ cc_library_static { "-Wextra", "-Wunused", "-Werror", + "-O3", ], // For private/CFIShadow.h. @@ -197,6 +198,7 @@ cc_library { "-Wextra", "-Wunused", "-Werror", + "-O3", ], stl: "none", diff --git a/libm/Android.bp b/libm/Android.bp index 6c3abd129..7b40a95ff 100644 --- a/libm/Android.bp +++ b/libm/Android.bp @@ -499,6 +499,7 @@ cc_library { "-Wno-unknown-pragmas", "-Wno-unused-const-variable", "-Wno-unused-variable", + "-O3", ], ldflags: [ diff --git a/linker/Android.bp b/linker/Android.bp index 4a5bf44a6..55c56f91c 100644 --- a/linker/Android.bp +++ b/linker/Android.bp @@ -48,6 +48,7 @@ cc_object { "-Wextra", "-Wno-unused", "-Werror", + "-O3", ], srcs: [ @@ -97,6 +98,7 @@ cc_defaults { "-Wextra", "-Wunused", "-Werror", + "-O3", ], // TODO: split out the asflags. @@ -462,6 +464,7 @@ cc_library { "-Wextra", "-Wunused", "-Werror", + "-O3", ], stl: "none", From 0c23b75f9602400736fad1850d784b1e071a3b80 Mon Sep 17 00:00:00 2001 From: Danny Lin Date: Sat, 20 Mar 2021 22:02:38 -0700 Subject: [PATCH 10/16] Switch to jemalloc memory allocator Overall, jemalloc performs significantly better than Scudo in Bionic's real-world memory_replay traces (all times in milliseconds): +----------------+-------+----------+ | Trace | Scudo | jemalloc | +----------------+-------+----------+ | SQL | 27 | 21 | | Angry Birds 2 | 2236 | 1501 | | Camera | 4251 | 979 | | Candy Crush | 2197 | 1325 | | Gmail | 594 | 463 | | Maps | 434 | 344 | | Photos | 1330 | 477 | | PUBG | 666 | 416 | | surfaceflinger | 221 | 192 | | system_server | 1921 | 1416 | | SystemUI | 102 | 79 | | YouTube | 363 | 294 | +----------------+-------+----------+ jemalloc also tends to use slightly less memory than Scudo for most traces. These tests were conducted on desktop x86 Linux with glibc and the latest stable version of each allocator, but they should still be relevant. RSS values in KiB: +----------------+--------+----------+ | Trace | Scudo | jemalloc | +----------------+--------+----------+ | Angry Birds 2 | 793948 | 746992 | | Camera | 219372 | 251888 | | Candy Crush | 548288 | 550148 | | Gmail | 195236 | 193048 | | Maps | 159860 | 159816 | | Photos | 175436 | 171872 | | PUBG | 233752 | 223572 | | surfaceflinger | 94736 | 107068 | | system_server | 471048 | 484392 | | SystemUI | 54432 | 60740 | | YouTube | 139376 | 142252 | +----------------+--------+----------+ While not representative of real-world usage, jemalloc also performs fairly well in synthetic benchmarks (all times in nanoseconds): +-----------------+---------+----------+ | Benchmark | Scudo | jemalloc | +-----------------+---------+----------+ | alloc 8 | 87.9 | 60.1 | | alloc 16 | 87.9 | 60 | | alloc 32 | 88.6 | 60.7 | | alloc 64 | 88.6 | 59.7 | | alloc 512 | 89.2 | 60 | | alloc 1024 | 89.4 | 59.8 | | alloc 8192 | 89.8 | 65.2 | | alloc 16384 | 92.7 | 69.1 | | alloc 32768 | 97.2 | 74 | | alloc 65536 | 109 | 83.8 | | alloc 131072 | 41536 | 42720 | | alloc40x 8 | 2156 | 2556 | | alloc40x 16 | 2155 | 2244 | | alloc40x 32 | 2234 | 2312 | | alloc40x 64 | 2234 | 2289 | | alloc40x 512 | 2274 | 8171 | | alloc40x 1024 | 2397 | 2162 | | alloc40x 8192 | 3550 | 78880 | | alloc40x 16384 | 3732 | 124454 | | alloc40x 32768 | 3849 | 275460 | | alloc40x 65536 | 4987 | 727598 | | alloc40x 131072 | 2745207 | 3067980 | | alloc8192 1x | 464 | 454 | | alloc8192 2x | 510 | 488 | | alloc8192 3x | 587 | 523 | | alloc8192 4x | 665 | 557 | | alloc8192 5x | 742 | 598 | | alloc8192 6x | 818 | 633 | | alloc8192 7x | 884 | 669 | | alloc8192 8x | 960 | 699 | | alloc8192 9x | 1045 | 734 | | alloc8192 10x | 1131 | 770 | | alloc8192 11x | 1207 | 806 | | alloc8192 12x | 1282 | 841 | | alloc8192 13x | 1363 | 877 | | alloc8192 14x | 1442 | 912 | | alloc8192 15x | 1512 | 944 | | alloc8192 16x | 1587 | 978 | | alloc8192 24x | 2256 | 21195 | | alloc8192 32x | 2867 | 45446 | | alloc8192 40x | 3522 | 71618 | | alloc8192 48x | 4126 | 89740 | | alloc8192 56x | 4786 | 114990 | | alloc8192 64x | 5412 | 141082 | | alloc8192 72x | 6049 | 170742 | | alloc8192 80x | 6712 | 198480 | | alloc8192 88x | 7331 | 221557 | | alloc8192 96x | 7976 | 251462 | | alloc8192 104x | 8581 | 281626 | | alloc8192 112x | 9245 | 313164 | | alloc8192 120x | 9914 | 353147 | | alloc8192 128x | 10514 | 376625 | | alloc8192 136x | 11187 | 408194 | | alloc8192 144x | 11802 | 445694 | | alloc8192 160x | 13083 | 514547 | | alloc8192 176x | 14414 | 582501 | | alloc8192 192x | 15746 | 654346 | | alloc8192 208x | 17044 | 712620 | | alloc8192 224x | 18405 | 769963 | | alloc8192 240x | 19744 | 843969 | | alloc8192 256x | 21160 | 917803 | +-----------------+---------+----------+ Scudo performs fairly well for a hardened memory allocator, but we're optimizing for performance. Full benchmark data with graphs: https://docs.google.com/spreadsheets/d/1LG_kxaK5cI14gGtnyM-nNNmfpMdV9Vh-LtYoq7H5J4s/edit Change-Id: Ia4901eedfaa2c9779678c5b6532979de4919ee01 --- libc/Android.bp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/libc/Android.bp b/libc/Android.bp index 2d382ac44..bd439c94a 100644 --- a/libc/Android.bp +++ b/libc/Android.bp @@ -120,9 +120,6 @@ cc_defaults { malloc_pattern_fill_contents: { cflags: ["-DSCUDO_PATTERN_FILL_CONTENTS"], }, - malloc_not_svelte: { - cflags: ["-DUSE_SCUDO"], - }, }, } @@ -151,7 +148,6 @@ cc_defaults { "libc_jemalloc_wrapper", ], header_libs: ["gwp_asan_headers"], - product_variables: libc_scudo_product_variables, } // Functions not implemented by jemalloc directly, or that need to From 857a1fcada665d94a00cdb296d209e0c0a87cbc6 Mon Sep 17 00:00:00 2001 From: Elliott Hughes Date: Thu, 18 Nov 2021 17:57:38 -0800 Subject: [PATCH 11/16] [master] strftime: format small positive integers ourselves. A decent chunk of the logcat profile is spent formatting the timestamps for each line, and most of that time was going to snprintf(3). We should find all the places that could benefit from a lighter-weight "format an integer" and share something between them, but this is easy for now. Before: ----------------------------------------------------------- Benchmark Time CPU Iterations ----------------------------------------------------------- BM_time_strftime 781 ns 775 ns 893102 After: ----------------------------------------------------------- Benchmark Time CPU Iterations ----------------------------------------------------------- BM_time_strftime 170 ns 168 ns 4139487 Much of the remaining time is in tzset() which seems unfortunate. Test: treehugger Change-Id: Ie0f7ee462ff1b1abea6f87d4a9a996d768e51056 --- benchmarks/time_benchmark.cpp | 10 +++ libc/tzcode/strftime.c | 126 +++++++++++++++++++++------------- 2 files changed, 88 insertions(+), 48 deletions(-) diff --git a/benchmarks/time_benchmark.cpp b/benchmarks/time_benchmark.cpp index 437dc7812..a765e3efe 100644 --- a/benchmarks/time_benchmark.cpp +++ b/benchmarks/time_benchmark.cpp @@ -187,3 +187,13 @@ void BM_time_localtime_r(benchmark::State& state) { } } BIONIC_BENCHMARK(BM_time_localtime_r); + +void BM_time_strftime(benchmark::State& state) { + char buf[128]; + time_t t = 0; + struct tm* tm = gmtime(&t); + while (state.KeepRunning()) { + strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M:%S", tm); + } +} +BIONIC_BENCHMARK(BM_time_strftime); diff --git a/libc/tzcode/strftime.c b/libc/tzcode/strftime.c index c05f6b5bd..2e2b7c62a 100644 --- a/libc/tzcode/strftime.c +++ b/libc/tzcode/strftime.c @@ -252,8 +252,8 @@ _fmt(const char *format, const struct tm *t, char *pt, pt = _fmt("%m/%d/%y", t, pt, ptlim, warnp); continue; case 'd': - pt = _conv(t->tm_mday, getformat(modifier, "%02d", "%2d", "%d", "%02d"), pt, ptlim); - continue; + pt = _conv(t->tm_mday, getformat(modifier, "02", " 2", " ", "02"), pt, ptlim); + continue; case 'E': case 'O': /* @@ -274,22 +274,21 @@ _fmt(const char *format, const struct tm *t, char *pt, modifier = *format; goto label; case 'e': - pt = _conv(t->tm_mday, getformat(modifier, "%2d", "%2d", "%d", "%02d"), pt, ptlim); - continue; + pt = _conv(t->tm_mday, getformat(modifier, " 2", " 2", " ", "02"), pt, ptlim); + continue; case 'F': pt = _fmt("%Y-%m-%d", t, pt, ptlim, warnp); continue; case 'H': - pt = _conv(t->tm_hour, getformat(modifier, "%02d", "%2d", "%d", "%02d"), pt, ptlim); - continue; + pt = _conv(t->tm_hour, getformat(modifier, "02", " 2", " ", "02"), pt, ptlim); + continue; case 'I': - pt = _conv((t->tm_hour % 12) ? - (t->tm_hour % 12) : 12, - getformat(modifier, "%02d", "%2d", "%d", "%02d"), pt, ptlim); - continue; + pt = _conv((t->tm_hour % 12) ? (t->tm_hour % 12) : 12, + getformat(modifier, "02", " 2", " ", "02"), pt, ptlim); + continue; case 'j': - pt = _conv(t->tm_yday + 1, getformat(modifier, "%03d", "%3d", "%d", "%03d"), pt, ptlim); - continue; + pt = _conv(t->tm_yday + 1, getformat(modifier, "03", " 3", " ", "03"), pt, ptlim); + continue; case 'k': /* ** This used to be... @@ -301,7 +300,7 @@ _fmt(const char *format, const struct tm *t, char *pt, ** "%l" have been swapped. ** (ado, 1993-05-24) */ - pt = _conv(t->tm_hour, getformat(modifier, "%2d", "%2d", "%d", "%02d"), pt, ptlim); + pt = _conv(t->tm_hour, getformat(modifier, " 2", " 2", " ", "02"), pt, ptlim); continue; #ifdef KITCHEN_SINK case 'K': @@ -321,16 +320,15 @@ _fmt(const char *format, const struct tm *t, char *pt, ** "%l" have been swapped. ** (ado, 1993-05-24) */ - pt = _conv((t->tm_hour % 12) ? - (t->tm_hour % 12) : 12, - getformat(modifier, "%2d", "%2d", "%d", "%02d"), pt, ptlim); + pt = _conv((t->tm_hour % 12) ? (t->tm_hour % 12) : 12, + getformat(modifier, " 2", " 2", " ", "02"), pt, ptlim); continue; case 'M': - pt = _conv(t->tm_min, getformat(modifier, "%02d", "%2d", "%d", "%02d"), pt, ptlim); - continue; + pt = _conv(t->tm_min, getformat(modifier, "02", " 2", " ", "02"), pt, ptlim); + continue; case 'm': - pt = _conv(t->tm_mon + 1, getformat(modifier, "%02d", "%2d", "%d", "%02d"), pt, ptlim); - continue; + pt = _conv(t->tm_mon + 1, getformat(modifier, "02", " 2", " ", "02"), pt, ptlim); + continue; case 'n': pt = _add("\n", pt, ptlim, modifier); continue; @@ -348,13 +346,12 @@ _fmt(const char *format, const struct tm *t, char *pt, pt = _fmt("%I:%M:%S %p", t, pt, ptlim, warnp); continue; case 'S': - pt = _conv(t->tm_sec, getformat(modifier, "%02d", "%2d", "%d", "%02d"), pt, ptlim); - continue; + pt = _conv(t->tm_sec, getformat(modifier, "02", " 2", " ", "02"), pt, ptlim); + continue; case 's': { struct tm tm; - char buf[INT_STRLEN_MAXIMUM( - time64_t) + 1]; + char buf[INT_STRLEN_MAXIMUM(time64_t) + 1] __attribute__((__uninitialized__)); time64_t mkt; tm = *t; @@ -374,10 +371,9 @@ _fmt(const char *format, const struct tm *t, char *pt, pt = _add("\t", pt, ptlim, modifier); continue; case 'U': - pt = _conv((t->tm_yday + DAYSPERWEEK - - t->tm_wday) / DAYSPERWEEK, - getformat(modifier, "%02d", "%2d", "%d", "%02d"), pt, ptlim); - continue; + pt = _conv((t->tm_yday + DAYSPERWEEK - t->tm_wday) / DAYSPERWEEK, + getformat(modifier, "02", " 2", " ", "02"), pt, ptlim); + continue; case 'u': /* ** From Arnold Robbins' strftime version 3.0: @@ -385,9 +381,7 @@ _fmt(const char *format, const struct tm *t, char *pt, ** [1 (Monday) - 7]" ** (ado, 1993-05-24) */ - pt = _conv((t->tm_wday == 0) ? - DAYSPERWEEK : t->tm_wday, - "%d", pt, ptlim); + pt = _conv((t->tm_wday == 0) ? DAYSPERWEEK : t->tm_wday, " ", pt, ptlim); continue; case 'V': /* ISO 8601 week number */ case 'G': /* ISO 8601 year (four digits) */ @@ -467,8 +461,7 @@ _fmt(const char *format, const struct tm *t, char *pt, w = 53; #endif /* defined XPG4_1994_04_09 */ if (*format == 'V') - pt = _conv(w, getformat(modifier, "%02d", "%2d", "%d", "%02d"), - pt, ptlim); + pt = _conv(w, getformat(modifier, "02", " 2", " ", "02"), pt, ptlim); else if (*format == 'g') { *warnp = IN_ALL; pt = _yconv(year, base, @@ -488,15 +481,14 @@ _fmt(const char *format, const struct tm *t, char *pt, pt = _fmt("%e-%b-%Y", t, pt, ptlim, warnp); continue; case 'W': - pt = _conv((t->tm_yday + DAYSPERWEEK - - (t->tm_wday ? - (t->tm_wday - 1) : - (DAYSPERWEEK - 1))) / DAYSPERWEEK, - getformat(modifier, "%02d", "%2d", "%d", "%02d"), pt, ptlim); - continue; + pt = _conv( + (t->tm_yday + DAYSPERWEEK - (t->tm_wday ? (t->tm_wday - 1) : (DAYSPERWEEK - 1))) / + DAYSPERWEEK, + getformat(modifier, "02", " 2", " ", "02"), pt, ptlim); + continue; case 'w': - pt = _conv(t->tm_wday, "%d", pt, ptlim); - continue; + pt = _conv(t->tm_wday, " ", pt, ptlim); + continue; case 'X': pt = _fmt(Locale->X_fmt, t, pt, ptlim, warnp); continue; @@ -602,7 +594,7 @@ _fmt(const char *format, const struct tm *t, char *pt, diff /= SECSPERMIN; diff = (diff / MINSPERHOUR) * 100 + (diff % MINSPERHOUR); - pt = _conv(diff, getformat(modifier, "%04d", "%4d", "%d", "%04d"), pt, ptlim); + pt = _conv(diff, getformat(modifier, "04", " 4", " ", "04"), pt, ptlim); } continue; case '+': @@ -629,10 +621,46 @@ _fmt(const char *format, const struct tm *t, char *pt, static char * _conv(int n, const char *format, char *pt, const char *ptlim) { - char buf[INT_STRLEN_MAXIMUM(int) + 1]; - - snprintf(buf, sizeof(buf), format, n); - return _add(buf, pt, ptlim, 0); + // The original implementation used snprintf(3) here, but rolling our own is + // about 5x faster. Seems like a good trade-off for so little code, especially + // for users like logcat that have a habit of formatting 10k times all at + // once... + + // Format is '0' or ' ' for the fill character, followed by a single-digit + // width or ' ' for "whatever". + // %d -> " " + // %2d -> " 2" + // %02d -> "02" + char fill = format[0]; + int width = format[1] == ' ' ? 0 : format[1] - '0'; + + char buf[32] __attribute__((__uninitialized__)); + char* p = buf; + + // Output digits while we have them. + if (n == 0) *p++ = '0'; // Special-case zero. + while (n) { + unsigned d = n % 10; + n /= 10; + *p++ = d + '0'; + } + // Fill if required. + while ((p - buf) < width) { + *p++ = fill; + } + + // Reverse in-place. + size_t length = p - buf; + for (size_t i = 0, j = length - 1; i < j; ++i, --j) { + char ch = buf[i]; + buf[i] = buf[j]; + buf[j] = ch; + } + + // Terminate. + buf[length] = '\0'; + + return _add(buf, pt, ptlim, 0); } static char * @@ -704,9 +732,11 @@ _yconv(int a, int b, bool convert_top, bool convert_yy, if (convert_top) { if (lead == 0 && trail < 0) pt = _add("-0", pt, ptlim, modifier); - else pt = _conv(lead, getformat(modifier, "%02d", "%2d", "%d", "%02d"), pt, ptlim); + else + pt = _conv(lead, getformat(modifier, "02", " 2", " ", "02"), pt, ptlim); } if (convert_yy) - pt = _conv(((trail < 0) ? -trail : trail), getformat(modifier, "%02d", "%2d", "%d", "%02d"), pt, ptlim); + pt = _conv(((trail < 0) ? -trail : trail), getformat(modifier, "02", " 2", " ", "02"), pt, + ptlim); return pt; } From 28604946c83e1317f61c44d91abb88260b7d70e8 Mon Sep 17 00:00:00 2001 From: Elliott Hughes Date: Thu, 2 Dec 2021 14:42:16 -0800 Subject: [PATCH 12/16] [master] Use -fno-builtin for libc and libm. clang was getting in the way of a strftime(3) optimization, and smaller hammers weren't working, and this seems like the right choice for libc anyway? If we have code that can usefully be optimized, we should do it in the source. In general, though, no libc/libm author should be ignorant of memset(3) or memcpy(3), and would have used it themselves if it made sense. (And the compiler isn't using profiling data or anything; it's just always assuming it should use the functions, and doesn't consider whether the cost of the calls can be amortized or not.) Test: treehugger Change-Id: Ia7e22623e47bfbfcfe46c1af0d95ef7e8669c0f6 --- libc/Android.bp | 7 +++++++ libm/Android.bp | 1 + 2 files changed, 8 insertions(+) diff --git a/libc/Android.bp b/libc/Android.bp index bd439c94a..cafd3f291 100644 --- a/libc/Android.bp +++ b/libc/Android.bp @@ -69,6 +69,13 @@ libc_common_flags = [ // GWP-ASan requires platform TLS. "-fno-emulated-tls", + + // We know clang does a lot of harm by rewriting what we've said, and sadly + // never see any good it does, so let's just ask it to do what we say... + // (The specific motivating example was clang turning a loop that would only + // ever touch 0, 1, or 2 bytes into a call to memset, which was never going + // to amortize.) + "-fno-builtin", ] // Define some common cflags diff --git a/libm/Android.bp b/libm/Android.bp index 7b40a95ff..03acb5204 100644 --- a/libm/Android.bp +++ b/libm/Android.bp @@ -489,6 +489,7 @@ cc_library { "-D_BSD_SOURCE", "-DFLT_EVAL_METHOD=0", "-include freebsd-compat.h", + "-fno-builtin", "-fno-math-errno", "-Wall", "-Werror", From 89623335ba45a991f71260c8102f8b8d0ccdae43 Mon Sep 17 00:00:00 2001 From: Elliott Hughes Date: Tue, 16 Nov 2021 11:03:19 -0800 Subject: [PATCH 13/16] [master] Optimize the mbs fast path slightly. From a logcat profile: ``` |--95.06%-- convertPrintable(char*, char const*, unsigned long) | |--13.95%-- [hit in function] | | | |--35.96%-- mbrtoc32 | | |--82.72%-- [hit in function] | | | | | |--11.07%-- mbsinit | | | | | |--5.96%-- @plt ``` I think we'd assumed that mbsinit() would be inlined, but since these functions aren't all in wchar.cpp it wasn't being. This change moves the implementation into a (more clearly named) inline function so we can trivially reclaim that 11%+6%. Benchmarks before: ``` ------------------------------------------------------------------- Benchmark Time CPU Iterations ------------------------------------------------------------------- BM_stdlib_mbrtowc_1 8.03 ns 7.95 ns 87144997 BM_stdlib_mbrtowc_2 22.0 ns 21.8 ns 32002437 BM_stdlib_mbrtowc_3 30.0 ns 29.7 ns 23517699 BM_stdlib_mbrtowc_4 37.4 ns 37.1 ns 18895204 BM_stdlib_mbstowcs_ascii 792373 ns 782484 ns 890 bytes_per_second=609.389M/s BM_stdlib_mbstowcs_wide 15836785 ns 15678316 ns 44 bytes_per_second=30.4138M/s ``` Benchmarks after: ``` ------------------------------------------------------------------- Benchmark Time CPU Iterations ------------------------------------------------------------------- BM_stdlib_mbrtowc_1 5.76 ns 5.72 ns 121863813 BM_stdlib_mbrtowc_2 17.1 ns 16.9 ns 41487260 BM_stdlib_mbrtowc_3 24.2 ns 24.0 ns 29141629 BM_stdlib_mbrtowc_4 30.3 ns 30.1 ns 23229291 BM_stdlib_mbstowcs_ascii 783506 ns 775389 ns 903 bytes_per_second=614.965M/s BM_stdlib_mbstowcs_wide 12787003 ns 12672642 ns 55 bytes_per_second=37.6273M/s ``` Bug: http://b/206523398 Test: treehugger Change-Id: If8c6c39880096ddd2cbd323c68dca82e9849ace6 Signed-off-by: Vishalcj17 --- libc/bionic/c16rtomb.cpp | 2 +- libc/bionic/c32rtomb.cpp | 2 +- libc/bionic/mbrtoc32.cpp | 4 ++-- libc/bionic/wchar.cpp | 4 ++-- libc/private/bionic_mbstate.h | 4 ++++ 5 files changed, 10 insertions(+), 6 deletions(-) diff --git a/libc/bionic/c16rtomb.cpp b/libc/bionic/c16rtomb.cpp index 2d6ae938c..e052d0470 100644 --- a/libc/bionic/c16rtomb.cpp +++ b/libc/bionic/c16rtomb.cpp @@ -43,7 +43,7 @@ static inline constexpr bool is_low_surrogate(char16_t c16) { size_t c16rtomb(char* s, char16_t c16, mbstate_t* ps) { static mbstate_t __private_state; mbstate_t* state = (ps == nullptr) ? &__private_state : ps; - if (mbsinit(state)) { + if (mbstate_is_initial(state)) { if (is_high_surrogate(c16)) { char32_t c32 = (c16 & ~0xd800) << 10; mbstate_set_byte(state, 3, (c32 & 0xff0000) >> 16); diff --git a/libc/bionic/c32rtomb.cpp b/libc/bionic/c32rtomb.cpp index 2909d8b36..d2519b96f 100644 --- a/libc/bionic/c32rtomb.cpp +++ b/libc/bionic/c32rtomb.cpp @@ -50,7 +50,7 @@ size_t c32rtomb(char* s, char32_t c32, mbstate_t* ps) { return mbstate_reset_and_return(1, state); } - if (!mbsinit(state)) { + if (!mbstate_is_initial(state)) { return mbstate_reset_and_return_illegal(EILSEQ, state); } diff --git a/libc/bionic/mbrtoc32.cpp b/libc/bionic/mbrtoc32.cpp index 644e54275..21603a120 100644 --- a/libc/bionic/mbrtoc32.cpp +++ b/libc/bionic/mbrtoc32.cpp @@ -55,7 +55,7 @@ size_t mbrtoc32(char32_t* pc32, const char* s, size_t n, mbstate_t* ps) { } uint8_t ch; - if (mbsinit(state) && (((ch = static_cast(*s)) & ~0x7f) == 0)) { + if (mbstate_is_initial(state) && (((ch = static_cast(*s)) & ~0x7f) == 0)) { // Fast path for plain ASCII characters. if (pc32 != nullptr) { *pc32 = ch; @@ -105,7 +105,7 @@ size_t mbrtoc32(char32_t* pc32, const char* s, size_t n, mbstate_t* ps) { size_t bytes_wanted = length - bytes_so_far; size_t i; for (i = 0; i < MIN(bytes_wanted, n); i++) { - if (!mbsinit(state) && ((*s & 0xc0) != 0x80)) { + if (!mbstate_is_initial(state) && ((*s & 0xc0) != 0x80)) { // Malformed input; bad characters in the middle of a character. return mbstate_reset_and_return_illegal(EILSEQ, state); } diff --git a/libc/bionic/wchar.cpp b/libc/bionic/wchar.cpp index dabe82485..bd9a45ee5 100644 --- a/libc/bionic/wchar.cpp +++ b/libc/bionic/wchar.cpp @@ -54,7 +54,7 @@ // int mbsinit(const mbstate_t* ps) { - return (ps == nullptr || (*(reinterpret_cast(ps->__seq)) == 0)); + return ps == nullptr || mbstate_is_initial(ps); } size_t mbrtowc(wchar_t* pwc, const char* s, size_t n, mbstate_t* ps) { @@ -148,7 +148,7 @@ size_t wcsnrtombs(char* dst, const wchar_t** src, size_t nwc, size_t len, mbstat static mbstate_t __private_state; mbstate_t* state = (ps == nullptr) ? &__private_state : ps; - if (!mbsinit(state)) { + if (!mbstate_is_initial(state)) { return mbstate_reset_and_return_illegal(EILSEQ, state); } diff --git a/libc/private/bionic_mbstate.h b/libc/private/bionic_mbstate.h index 352115aa7..243b2201b 100644 --- a/libc/private/bionic_mbstate.h +++ b/libc/private/bionic_mbstate.h @@ -44,6 +44,10 @@ __BEGIN_DECLS #define __MB_IS_ERR(rv) (rv == __MB_ERR_ILLEGAL_SEQUENCE || \ rv == __MB_ERR_INCOMPLETE_SEQUENCE) +static inline __wur bool mbstate_is_initial(const mbstate_t* ps) { + return *(reinterpret_cast(ps->__seq)) == 0; +} + static inline __wur size_t mbstate_bytes_so_far(const mbstate_t* ps) { return (ps->__seq[2] != 0) ? 3 : From 1294eb041746beab3d6056facf9708c6e39e5ec0 Mon Sep 17 00:00:00 2001 From: Elliott Hughes Date: Tue, 26 Oct 2021 17:31:03 -0700 Subject: [PATCH 14/16] [master] Don't open /dev/null until we need to. This saves a couple of syscalls in the common case, and also lets static binaries run in a chroot without /dev/null as long as stdin/stdout/stderr are actually connected to something (which the toybox maintainer tried to do). Test: manual with strace Change-Id: Ic9a28896a07304a3bd428acfd9ddca9d22015f6e Signed-off-by: Vishalcj17 --- libc/bionic/libc_init_common.cpp | 55 +++++++++----------------------- 1 file changed, 15 insertions(+), 40 deletions(-) diff --git a/libc/bionic/libc_init_common.cpp b/libc/bionic/libc_init_common.cpp index dd623a529..8084e73d1 100644 --- a/libc/bionic/libc_init_common.cpp +++ b/libc/bionic/libc_init_common.cpp @@ -149,50 +149,25 @@ __noreturn static void __early_abort(int line) { _exit(EXIT_FAILURE); } -// Force any of the closed stdin, stdout and stderr to be associated with /dev/null. +// Force any of the stdin/stdout/stderr file descriptors that aren't +// open to be associated with /dev/null. static void __nullify_closed_stdio() { - int dev_null = TEMP_FAILURE_RETRY(open("/dev/null", O_RDWR)); - if (dev_null == -1) { - // init won't have /dev/null available, but SELinux provides an equivalent. - dev_null = TEMP_FAILURE_RETRY(open("/sys/fs/selinux/null", O_RDWR)); - } - if (dev_null == -1) { - __early_abort(__LINE__); - } - - // If any of the stdio file descriptors is valid and not associated - // with /dev/null, dup /dev/null to it. for (int i = 0; i < 3; i++) { - // If it is /dev/null already, we are done. - if (i == dev_null) { - continue; - } - - // Is this fd already open? - int status = TEMP_FAILURE_RETRY(fcntl(i, F_GETFL)); - if (status != -1) { - continue; - } - - // The only error we allow is that the file descriptor does not - // exist, in which case we dup /dev/null to it. - if (errno == EBADF) { - // Try dupping /dev/null to this stdio file descriptor and - // repeat if there is a signal. Note that any errors in closing - // the stdio descriptor are lost. - status = TEMP_FAILURE_RETRY(dup2(dev_null, i)); - if (status == -1) { + if (TEMP_FAILURE_RETRY(fcntl(i, F_GETFL)) == -1) { + // The only error we allow is that the file descriptor does not exist. + if (errno != EBADF) __early_abort(__LINE__); + + // This file descriptor wasn't open, so open /dev/null. + // init won't have /dev/null available, but SELinux provides an equivalent. + // This takes advantage of the fact that open() will take the lowest free + // file descriptor, and we're iterating in order from 0, but we'll + // double-check we got the right fd anyway... + int fd; + if (((fd = TEMP_FAILURE_RETRY(open("/dev/null", O_RDWR))) == -1 && + (fd = TEMP_FAILURE_RETRY(open("/sys/fs/selinux/null", O_RDWR))) == -1) || + fd != i) { __early_abort(__LINE__); } - } else { - __early_abort(__LINE__); - } - } - - // If /dev/null is not one of the stdio file descriptors, close it. - if (dev_null > 2) { - if (close(dev_null) == -1) { - __early_abort(__LINE__); } } } From 75afb93e82c8d745857e83c8cec28bd1ef829d78 Mon Sep 17 00:00:00 2001 From: Jake Weinstein Date: Thu, 30 May 2019 11:43:07 -0500 Subject: [PATCH 15/16] [master] libc: Set __bionic_asm_align to 64 for arm and arm64 This way, critical string functions are always at the start of a cacheline. Change-Id: I049e88d88a043911093641f44d9846fa5f6f3982 Suggested-By: Wilco Dijkstra Test: TBD Signed-off-by: Vishalcj17 --- libc/private/bionic_asm_arm.h | 2 +- libc/private/bionic_asm_arm64.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/libc/private/bionic_asm_arm.h b/libc/private/bionic_asm_arm.h index d8381d32b..9ca5f387d 100644 --- a/libc/private/bionic_asm_arm.h +++ b/libc/private/bionic_asm_arm.h @@ -37,7 +37,7 @@ #pragma once -#define __bionic_asm_align 0 +#define __bionic_asm_align 64 #undef __bionic_asm_custom_entry #undef __bionic_asm_custom_end diff --git a/libc/private/bionic_asm_arm64.h b/libc/private/bionic_asm_arm64.h index ee51a8e78..c0b969cbd 100644 --- a/libc/private/bionic_asm_arm64.h +++ b/libc/private/bionic_asm_arm64.h @@ -37,7 +37,7 @@ #pragma once -#define __bionic_asm_align 16 +#define __bionic_asm_align 64 #undef __bionic_asm_function_type #define __bionic_asm_function_type %function From 870d75d5a4b25295421776b7911b34a2b2c0d9f2 Mon Sep 17 00:00:00 2001 From: Prakhar Bahuguna Date: Mon, 10 Apr 2017 00:41:23 +0200 Subject: [PATCH 16/16] [master] libc: arm: Optimise memchr for NEON-enabled processors This optimization is extracted from cortex-strings and bionic-ized, and applied to arm-v7a cpus. Stringbench results https://android.git.linaro.org/gitweb/platform/external/stringbench.git ----------------------------------------------------------------------- Nexus 6P (MSM8994): Before: 15000 chars: 154.493394 seconds 5000 chars: 51.545608 seconds After: 15000 chars: 18.374188 seconds (88% improvement) 5000 chars: 7.494449 seconds (85% improvement) ------------------------------------------------------------------------ OnePlus 3 (MSM8996): Before: 15000 chars:166.577121 seconds 5000 chars: 60.121731 seconds After: 15000 chars: 13.684960 seconds (91% improvement) 5000 chars: 5.413961 seconds (90% improvemend) ------------------------------------------------------------------------ Razer Phone (MSM8998) Before: 15000 chars: 215.932986 seconds 5000 chars: 72.147830 seconds After: 15000 chars: 17.342402 seconds (92% improvement) 5000 chars: 4.397512 seconds (94% improvement) ------------------------------------------------------------------------- Change-Id: I1c3fb0c89ce2b3ee7e44f492367b6caf6db58ccf Signed-off-by: Yingshiuan Pan Signed-off-by: Vishalcj17 --- libc/Android.bp | 2 + libc/arch-arm/generic/bionic/memchr.S | 201 ++++++++++++++++++++++++++ 2 files changed, 203 insertions(+) create mode 100644 libc/arch-arm/generic/bionic/memchr.S diff --git a/libc/Android.bp b/libc/Android.bp index cafd3f291..25abbcade 100644 --- a/libc/Android.bp +++ b/libc/Android.bp @@ -643,6 +643,7 @@ cc_library_static { "upstream-openbsd/lib/libc/string/strcpy.c", "upstream-openbsd/lib/libc/string/stpcpy.c", "upstream-openbsd/lib/libc/string/strcat.c", + "upstream-openbsd/lib/libc/string/memchr.c", ], }, arm64: { @@ -835,6 +836,7 @@ cc_library_static { arm: { asflags: libc_common_flags + ["-mno-restrict-it"], srcs: [ + "arch-arm/generic/bionic/memchr.S", "arch-arm/generic/bionic/memcmp.S", "arch-arm/generic/bionic/memmove.S", "arch-arm/generic/bionic/memset.S", diff --git a/libc/arch-arm/generic/bionic/memchr.S b/libc/arch-arm/generic/bionic/memchr.S new file mode 100644 index 000000000..5a78da280 --- /dev/null +++ b/libc/arch-arm/generic/bionic/memchr.S @@ -0,0 +1,201 @@ +/* Copyright (c) 2015 ARM Ltd. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the Linaro nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ + + +#include + + .syntax unified + .arch armv7-a + .fpu neon + .text + .thumb + .thumb_func + .p2align 4,,15 + .align 4 + +/* Arguments */ +#define srcin r0 +#define chrin r1 +#define cntin r2 + +/* Retval */ +#define result r0 /* Live range does not overlap with srcin */ + +/* Working registers */ +#define src r1 /* Live range does not overlap with chrin */ +#define tmp r3 +#define synd r0 /* No overlap with srcin or result */ +#define soff r12 + +/* Working NEON registers */ +#define vrepchr q0 +#define vdata0 q1 +#define vdata0_0 d2 /* Lower half of vdata0 */ +#define vdata0_1 d3 /* Upper half of vdata0 */ +#define vdata1 q2 +#define vdata1_0 d4 /* Lower half of vhas_chr0 */ +#define vdata1_1 d5 /* Upper half of vhas_chr0 */ +#define vrepmask q3 +#define vrepmask0 d6 +#define vrepmask1 d7 +#define vend q4 +#define vend0 d8 +#define vend1 d9 + +/* + * Core algorithm: + * + * For each 32-byte chunk we calculate a 32-bit syndrome value, with one bit per + * byte. Each bit is set if the relevant byte matched the requested character + * and cleared otherwise. Since the bits in the syndrome reflect exactly the + * order in which things occur in the original string, counting trailing zeros + * allows to identify exactly which byte has matched. + */ + +ENTRY(memchr) + .cfi_sections .debug_frame + /* Use a simple loop if there are less than 8 bytes to search. */ + cmp cntin, #7 + bhi .Llargestr + and chrin, chrin, #0xff + +.Lsmallstr: + subs cntin, cntin, #1 + blo .Lnotfound /* Return not found if reached end. */ + ldrb tmp, [srcin], #1 + cmp tmp, chrin + bne .Lsmallstr /* Loop again if not found. */ + /* Otherwise fixup address and return. */ + sub result, result, #1 + bx lr + +.Llargestr: + vdup.8 vrepchr, chrin /* Duplicate char across all lanes. */ + /* + * Magic constant 0x8040201008040201 allows us to identify which lane + * matches the requested byte. + */ + movw tmp, #0x0201 + movt tmp, #0x0804 + lsl soff, tmp, #4 + vmov vrepmask0, tmp, soff + vmov vrepmask1, tmp, soff + /* Work with aligned 32-byte chunks */ + bic src, srcin, #31 + ands soff, srcin, #31 + beq .Lloopintro /* Go straight to main loop if it's aligned. */ + + /* + * Input string is not 32-byte aligned. We calculate the syndrome + * value for the aligned 32 bytes block containing the first bytes + * and mask the irrelevant part. + */ + vld1.8 {vdata0, vdata1}, [src:256]! + sub tmp, soff, #32 + adds cntin, cntin, tmp + vceq.i8 vdata0, vdata0, vrepchr + vceq.i8 vdata1, vdata1, vrepchr + vand vdata0, vdata0, vrepmask + vand vdata1, vdata1, vrepmask + vpadd.i8 vdata0_0, vdata0_0, vdata0_1 + vpadd.i8 vdata1_0, vdata1_0, vdata1_1 + vpadd.i8 vdata0_0, vdata0_0, vdata1_0 + vpadd.i8 vdata0_0, vdata0_0, vdata0_0 + vmov.32 synd, vdata0_0[0] + + /* Clear the soff lower bits */ + lsr synd, synd, soff + lsl synd, synd, soff + /* The first block can also be the last */ + bls .Lmasklast + /* Have we found something already? */ + cbnz synd, .Ltail + +.Lloopintro: + vpush {vend} + /* 264/265 correspond to d8/d9 for q4 */ + .cfi_adjust_cfa_offset 16 + .cfi_rel_offset 264, 0 + .cfi_rel_offset 265, 8 + .p2align 3,,7 + +.Lloop: + vld1.8 {vdata0, vdata1}, [src:256]! + subs cntin, cntin, #32 + vceq.i8 vdata0, vdata0, vrepchr + vceq.i8 vdata1, vdata1, vrepchr + /* If we're out of data we finish regardless of the result. */ + bls .Lend + /* Use a fast check for the termination condition. */ + vorr vend, vdata0, vdata1 + vorr vend0, vend0, vend1 + vmov synd, tmp, vend0 + orrs synd, synd, tmp + /* We're not out of data, loop if we haven't found the character. */ + beq .Lloop + +.Lend: + vpop {vend} + .cfi_adjust_cfa_offset -16 + .cfi_restore 264 + .cfi_restore 265 + + /* Termination condition found, let's calculate the syndrome value. */ + vand vdata0, vdata0, vrepmask + vand vdata1, vdata1, vrepmask + vpadd.i8 vdata0_0, vdata0_0, vdata0_1 + vpadd.i8 vdata1_0, vdata1_0, vdata1_1 + vpadd.i8 vdata0_0, vdata0_0, vdata1_0 + vpadd.i8 vdata0_0, vdata0_0, vdata0_0 + vmov.32 synd, vdata0_0[0] + cbz synd, .Lnotfound + bhi .Ltail + +.Lmasklast: + /* Clear the (-cntin) upper bits to avoid out-of-bounds matches. */ + neg cntin, cntin + lsl synd, synd, cntin + lsrs synd, synd, cntin + it eq + moveq src, #0 /* If no match, set src to 0 so the retval is 0. */ + +.Ltail: + /* Count the trailing zeros using bit reversing */ + rbit synd, synd + /* Compensate the last post-increment */ + sub src, src, #32 + /* Count the leading zeros */ + clz synd, synd + /* Compute the potential result and return */ + add result, src, synd + bx lr + +.Lnotfound: + /* Set result to NULL if not found and return */ + mov result, #0 + bx lr + +END(memchr)