[PATCH] build: support app version script

13 views
Skip to first unread message

Waldemar Kozaczuk

unread,
Mar 11, 2022, 2:55:14 PM3/11/22
to osv...@googlegroups.com, Waldemar Kozaczuk
This patch introduces another new build mechanism that allows creating
custom kernel exporting only symbols required by specific application.
Such kernel benefits from smaller size and better security as all unneeded
code is removed. This patch addresses remaining part of the modularization/librarization
functionality as explained by the issue #1110 and this part of the roadmap -
https://github.com/cloudius-systems/osv/wiki/Roadmap#modularizationlibrarization.
This idea was also mentioned in the P99 OSv presentation - see slide 12.

In essence, this patch adds two new scripts that analyse the build
manifest, detect ELF files and identify symbols required from OSv kernel
and finally produce an application specific version script under
build/last/app_version_script:

- scripts/list_manifest_files.py - reads build/last/usr.manifest and
produces a list of file paths on host filesystem
- scripts/generate_app_version_script.sh - iterates over manifest files
produced by list_manifest_files.py, identifies undefined symbols in the
ELF files using objdump that are also exported by OSv kernel and
finally generates build/last/app_version_script

This patch also makes some modest changes to the main makefile to
support new parameter - conf_version_script - intended to point to a
custom version script. Please note that this new functionality only
works when building kernel with most symbols hidden
(conf_hide_symbols=1).

To take advantage of this new feature one would follow these steps:
1. Build image for given application.
2. Run scripts/generate_app_version_script.sh to produce
app_version_script.
3. Re-build the image with kernel exporting only symbols needed by an
app like so:

./scripts/build fs=rofs conf_hide_symbols=1 image=golang-pie-example \
conf_version_script=build/last/app_version_script

The version script generated for the golang ELF list only 30 symbols.

My experiments show that for many apps this can reduce kernel size by
close to 0.5MB. For example the size of kernel taylored to the
golang app above is 3196K vs 3632K of the generic ones. Obviously this
feature can be used together with the driver profile to further reduce
kernel size. The kernel produced with the build command below is only 2688K
in size:

./scripts/build fs=rofs conf_hide_symbols=1 image=golang-pie-example \
drivers_profile=virtio-mmio conf_version_script=build/last/app_version_script

Please note that some application use dlsym() to dynamically resolve
symbols which would be missed by this technique. In such scenarios
such symbols would have to be manually added to app_version_script.

Fixes #1110

Signed-off-by: Waldemar Kozaczuk <jwkoz...@gmail.com>
---
Makefile | 31 +++++++---
scripts/generate_app_version_script.sh | 84 ++++++++++++++++++++++++++
scripts/generate_version_script.sh | 3 +
scripts/list_manifest_files.py | 50 +++++++++++++++
4 files changed, 160 insertions(+), 8 deletions(-)
create mode 100755 scripts/generate_app_version_script.sh
create mode 100755 scripts/list_manifest_files.py

diff --git a/Makefile b/Makefile
index c1c0eb84..82885016 100644
--- a/Makefile
+++ b/Makefile
@@ -2036,7 +2036,7 @@ $(out)/dummy-shlib.so: $(out)/dummy-shlib.o
$(call quiet, $(CXX) -nodefaultlibs -shared $(gcc-sysroot) -o $@ $^, LINK $@)

stage1_targets = $(out)/arch/$(arch)/boot.o $(out)/loader.o $(out)/runtime.o $(drivers:%=$(out)/%) $(objects:%=$(out)/%) $(out)/dummy-shlib.so
-stage1: $(stage1_targets) links $(out)/version_script
+stage1: $(stage1_targets) links $(out)/default_version_script
.PHONY: stage1

loader_options_dep = $(out)/arch/$(arch)/loader_options.ld
@@ -2047,20 +2047,35 @@ $(loader_options_dep): stage1
fi

ifeq ($(conf_hide_symbols),1)
+version_script_file:=$(out)/version_script
+#Detect which version script to be used and copy to $(out)/version_script
+#so that loader.elf/kernel.elf is rebuilt accordingly if version script has changed
+ifdef conf_version_script
+ifeq (,$(wildcard $(conf_version_script)))
+ $(error Missing version script: $(conf_version_script))
+endif
+ifneq ($(shell cmp $(out)/version_script $(conf_version_script)),)
+$(shell cp $(conf_version_script) $(out)/version_script)
+endif
+else
+ifneq ($(shell cmp $(out)/version_script $(out)/default_version_script),)
+$(shell cp $(out)/default_version_script $(out)/version_script)
+endif
+endif
linker_archives_options = --no-whole-archive $(libstdc++.a) $(libgcc.a) $(libgcc_eh.a) $(boost-libs) \
- --exclude-libs libstdc++.a --gc-sections --version-script=$(out)/version_script
+ --exclude-libs libstdc++.a --gc-sections
else
linker_archives_options = --whole-archive $(libstdc++.a) $(libgcc_eh.a) $(boost-libs) --no-whole-archive $(libgcc.a)
endif

-$(out)/version_script: exported_symbols/*.symbols exported_symbols/$(arch)/*.symbols
- $(call quiet, scripts/generate_version_script.sh $(out)/version_script, GEN version_script)
+$(out)/default_version_script: exported_symbols/*.symbols exported_symbols/$(arch)/*.symbols
+ $(call quiet, scripts/generate_version_script.sh $(out)/default_version_script, GEN default_version_script)

-$(out)/loader.elf: $(stage1_targets) arch/$(arch)/loader.ld $(out)/bootfs.o $(loader_options_dep)
+$(out)/loader.elf: $(stage1_targets) arch/$(arch)/loader.ld $(out)/bootfs.o $(loader_options_dep) $(version_script_file)
$(call quiet, $(LD) -o $@ --defsym=OSV_KERNEL_BASE=$(kernel_base) \
--defsym=OSV_KERNEL_VM_BASE=$(kernel_vm_base) --defsym=OSV_KERNEL_VM_SHIFT=$(kernel_vm_shift) \
-Bdynamic --export-dynamic --eh-frame-hdr --enable-new-dtags -L$(out)/arch/$(arch) \
- $(^:%.ld=-T %.ld) \
+ $(patsubst %version_script,--version-script=%version_script,$(patsubst %.ld,-T %.ld,$^)) \
$(linker_archives_options) $(conf_linker_extra_options), \
LINK loader.elf)
@# Build libosv.so matching this loader.elf. This is not a separate
@@ -2069,11 +2084,11 @@ $(out)/loader.elf: $(stage1_targets) arch/$(arch)/loader.ld $(out)/bootfs.o $(lo
@scripts/libosv.py $(out)/osv.syms $(out)/libosv.ld `scripts/osv-version.sh` | $(CC) -c -o $(out)/osv.o -x assembler -
$(call quiet, $(CC) $(out)/osv.o -nostdlib -shared -o $(out)/libosv.so -T $(out)/libosv.ld, LIBOSV.SO)

-$(out)/kernel.elf: $(stage1_targets) arch/$(arch)/loader.ld $(out)/empty_bootfs.o $(loader_options_dep)
+$(out)/kernel.elf: $(stage1_targets) arch/$(arch)/loader.ld $(out)/empty_bootfs.o $(loader_options_dep) $(version_script_file)
$(call quiet, $(LD) -o $@ --defsym=OSV_KERNEL_BASE=$(kernel_base) \
--defsym=OSV_KERNEL_VM_BASE=$(kernel_vm_base) --defsym=OSV_KERNEL_VM_SHIFT=$(kernel_vm_shift) \
-Bdynamic --export-dynamic --eh-frame-hdr --enable-new-dtags -L$(out)/arch/$(arch) \
- $(^:%.ld=-T %.ld) \
+ $(patsubst %version_script,--version-script=%version_script,$(patsubst %.ld,-T %.ld,$^)) \
$(linker_archives_options) $(conf_linker_extra_options), \
LINK kernel.elf)
$(call quiet, $(STRIP) $(out)/kernel.elf -o $(out)/kernel-stripped.elf, STRIP kernel.elf -> kernel-stripped.elf )
diff --git a/scripts/generate_app_version_script.sh b/scripts/generate_app_version_script.sh
new file mode 100755
index 00000000..b1cffe84
--- /dev/null
+++ b/scripts/generate_app_version_script.sh
@@ -0,0 +1,84 @@
+#!/bin/bash
+
+if [[ "$1" == "--help" || "$1" == "-h" ]]; then
+ cat <<-EOF
+Produce version script file under build/last/app_version_script intended
+to build custom kernel exporting only symbols listed in this file.
+
+The script reads default user manifest file - build/last/usr.manifest
+to identify all ELF files - executables and shared libraries - and
+extract names of all symbols required to be exported by OSv kernel.
+
+You can override location of the source manifest and pass its path
+as 1st argument.
+
+Usage: ${0} [<manifest_file_path>]
+
+NOTE: Given that some executables and libraries may dynamically resolve
+symbols using dlsym(), this script would miss to identify those. In this
+case one would have to manually add those symbols to build/last/app_version_script.
+EOF
+ exit 0
+fi
+
+MACHINE=$(uname -m)
+if [ "${MACHINE}" == "x86_64" ]; then
+ ARCH="x64"
+else
+ ARCH="aarch64"
+fi
+
+VERSION_SCRIPT_START=$(cat <<"EOF"
+{
+ global:
+EOF
+)
+
+VERSION_SCRIPT_END=$(cat <<"EOF"
+ local:
+ *;
+};
+EOF
+)
+
+BUILD_DIR=$(dirname $0)/../build/last
+VERSION_SCRIPT_FILE=$(dirname $0)/../build/last/app_version_script
+
+ALL_SYMBOLS_FILE=$BUILD_DIR/all.symbols
+if [[ ! -f $ALL_SYMBOLS_FILE ]]; then
+ echo "Could not find $ALL_SYMBOLS_FILE. Please run build first!"
+ exit 1
+fi
+
+USR_MANIFEST=$1
+if [[ "$USR_MANIFEST" == "" ]]; then
+ USR_MANIFEST=$BUILD_DIR/usr.manifest
+fi
+if [[ ! -f $USR_MANIFEST ]]; then
+ echo "Could not find $USR_MANIFEST. Please run build first!"
+ exit 1
+fi
+
+MANIFEST_FILES=$BUILD_DIR/usr.manifest.files
+echo "Extracting list of files on host from $USR_MANIFEST"
+scripts/list_manifest_files.py > $MANIFEST_FILES
+
+extract_symbols_from_elf()
+{
+ local ELF_PATH=$1
+ echo "/*------- $ELF_PATH */"
+ objdump -wT ${ELF_PATH} | grep UND | cut -c 62- | \
+ sort -d | uniq | comm - ${ALL_SYMBOLS_FILE} -12 | \
+ awk '// { printf(" %s;\n", $0) }' | tee /tmp/generate_app_version_script_symbols
+ if [[ $(grep dlsym /tmp/generate_app_version_script_symbols) != "" ]]; then
+ echo "WARNING: the $ELF_PATH may use dlsym() to dynamically reference symbols!" 1>&2
+ fi
+}
+
+echo "Writing to $VERSION_SCRIPT_FILE ..."
+echo "$VERSION_SCRIPT_START" > $VERSION_SCRIPT_FILE
+
+cat $MANIFEST_FILES | xargs file | grep "ELF 64-bit" | cut --delimiter=: -f 1 | \
+while read file; do extract_symbols_from_elf "$file"; done >> $VERSION_SCRIPT_FILE
+
+echo "$VERSION_SCRIPT_END" >> $VERSION_SCRIPT_FILE
diff --git a/scripts/generate_version_script.sh b/scripts/generate_version_script.sh
index 8072afa8..7653f6e4 100755
--- a/scripts/generate_version_script.sh
+++ b/scripts/generate_version_script.sh
@@ -22,6 +22,9 @@ VERSION_SCRIPT_END=$(cat <<"EOF"
EOF
)

+ALL_SYMBOLS_FILE=$(dirname $VERSION_SCRIPT_FILE)/all.symbols
+cat exported_symbols/*.symbols exported_symbols/$ARCH/*.symbols | sort -d | uniq > $ALL_SYMBOLS_FILE
+
echo "$VERSION_SCRIPT_START" > $VERSION_SCRIPT_FILE

#Firstly output list of symbols from files common to all architectures
diff --git a/scripts/list_manifest_files.py b/scripts/list_manifest_files.py
new file mode 100755
index 00000000..683bdf1e
--- /dev/null
+++ b/scripts/list_manifest_files.py
@@ -0,0 +1,50 @@
+#!/usr/bin/python3
+
+import optparse, os, sys, subprocess
+from manifest_common import add_var, expand, unsymlink, read_manifest, defines
+
+def list_files(manifest,manifest_dir):
+ manifest = [(x, y % defines) for (x, y) in manifest]
+ files = list(expand(manifest))
+ files = [(x, unsymlink(y)) for (x, y) in files]
+
+ for name, hostname in files:
+ if not hostname.startswith("->"):
+ if os.path.islink(hostname):
+ link = os.readlink(hostname)
+ print(link)
+ elif not os.path.isdir(hostname):
+ if not os.path.isabs(hostname):
+ hostname = os.path.join(manifest_dir,hostname)
+ print(hostname)
+
+def main():
+ make_option = optparse.make_option
+
+ opt = optparse.OptionParser(option_list=[
+ make_option('-m',
+ dest='manifest',
+ help='read manifest from FILE',
+ metavar='FILE'),
+ make_option('-D',
+ type='string',
+ help='define VAR=DATA',
+ metavar='VAR=DATA',
+ action='callback',
+ callback=add_var)
+ ])
+
+ (options, args) = opt.parse_args()
+
+ if not 'libgcc_s_dir' in defines:
+ libgcc_s_path = subprocess.check_output(['gcc', '-print-file-name=libgcc_s.so.1']).decode('utf-8')
+ defines['libgcc_s_dir'] = os.path.dirname(libgcc_s_path)
+
+ manifest_path = options.manifest or 'build/last/usr.manifest'
+ manifest_dir = os.path.abspath(os.path.dirname(manifest_path))
+
+ manifest = read_manifest(manifest_path)
+ list_files(manifest,manifest_dir)
+
+if __name__ == "__main__":
+ main()
--
2.31.1

Commit Bot

unread,
Mar 16, 2022, 9:22:46 PM3/16/22
to osv...@googlegroups.com, Waldemar Kozaczuk
From: Waldemar Kozaczuk <jwkoz...@gmail.com>
Committer: Waldemar Kozaczuk <jwkoz...@gmail.com>
Branch: master

build: support app version script
diff --git a/Makefile b/Makefile
--- a/scripts/generate_app_version_script.sh
--- a/scripts/generate_version_script.sh
+++ b/scripts/generate_version_script.sh
@@ -22,6 +22,9 @@ VERSION_SCRIPT_END=$(cat <<"EOF"
EOF
)

+ALL_SYMBOLS_FILE=$(dirname $VERSION_SCRIPT_FILE)/all.symbols
+cat exported_symbols/*.symbols exported_symbols/$ARCH/*.symbols | sort -d | uniq > $ALL_SYMBOLS_FILE
+
echo "$VERSION_SCRIPT_START" > $VERSION_SCRIPT_FILE

#Firstly output list of symbols from files common to all architectures
diff --git a/scripts/list_manifest_files.py b/scripts/list_manifest_files.py
--- a/scripts/list_manifest_files.py
Reply all
Reply to author
Forward
0 new messages