diff --git a/.gitignore b/.gitignore index 905a2dc77..8cecb72a1 100644 --- a/.gitignore +++ b/.gitignore @@ -28,6 +28,7 @@ templates/lxc-alpine templates/lxc-altlinux templates/lxc-archlinux templates/lxc-busybox +templates/lxc-cirros templates/lxc-debian templates/lxc-fedora templates/lxc-opensuse @@ -43,6 +44,7 @@ src/lxc/lxc-checkconfig src/lxc/lxc-checkpoint src/lxc/lxc-clone src/lxc/lxc-console +src/lxc/lxc-config src/lxc/lxc-create src/lxc/lxc-destroy src/lxc/lxc-execute @@ -52,6 +54,7 @@ src/lxc/lxc-info src/lxc/lxc-init src/lxc/lxc-kill src/lxc/lxc-monitor +src/lxc/lxc-monitord src/lxc/lxc-netstat src/lxc/lxc-ps src/lxc/lxc-restart @@ -64,20 +67,25 @@ src/lxc/lxc-unshare src/lxc/lxc-version src/lxc/lxc-wait src/lxc/legacy/lxc-ls +src/lxc/lxc-user-nic src/python-lxc/build/ src/python-lxc/lxc/__pycache__/ +src/tests/lxc-test-cgpath +src/tests/lxc-test-clonetest +src/tests/lxc-test-console src/tests/lxc-test-containertests src/tests/lxc-test-createtest src/tests/lxc-test-destroytest src/tests/lxc-test-get_item src/tests/lxc-test-getkeys src/tests/lxc-test-locktests +src/tests/lxc-test-lxcpath src/tests/lxc-test-saveconfig src/tests/lxc-test-shutdowntest src/tests/lxc-test-startone - +src/tests/lxc-usernic-test config/compile config/config.guess @@ -104,3 +112,5 @@ src/stamp-h1 .pc patches +*.orig +*.rej diff --git a/CONTRIBUTING b/CONTRIBUTING index f6447c12c..81edd3bda 100644 --- a/CONTRIBUTING +++ b/CONTRIBUTING @@ -40,6 +40,30 @@ You can submit your patches to the lxc-devel@lists.sourceforge.net mailing list. Use https://lists.sourceforge.net/lists/listinfo/lxc-devel to subscribe to the list. + +Licensing for new files: +------------------------ + +LXC is made of files shipped under a few different licenses. + +Anything that ends up being part of the LXC library needs to be released +under LGPLv2.1+ or a license compatible with it (though the latter will +only be accepted for cases where the code originated elsewhere and was +imported into LXC). + +Language bindings for the libraries need to be released under LGPLv2.1+. + +Anything else (non-libaries) needs to be Free Software and needs to be +allowed to link with LGPLv2.1+ code (if needed). LXC upstream prefers +LGPLv2.1+ or GPLv2 for those. + + +When introducing a new file into the project, please make sure it has a +copyright header making clear under which license it's being released +and if it doesn't match the criteria described above, please explain +your decision on the lxc-devel mailing-list when submitting your patch. + + Developer Certificate of Origin: -------------------------------- diff --git a/autogen.sh b/autogen.sh index 29cd4247a..b75579af2 100755 --- a/autogen.sh +++ b/autogen.sh @@ -1,4 +1,25 @@ #!/bin/sh +# +# lxc: linux Container library +# +# (C) Copyright IBM Corp. 2007, 2008 +# +# Authors: +# Daniel Lezcano +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA set -x diff --git a/config/Makefile.am b/config/Makefile.am index 47f27505e..81d770921 100644 --- a/config/Makefile.am +++ b/config/Makefile.am @@ -1,11 +1,11 @@ configdir = $(sysconfdir)/lxc config_DATA = default.conf -defaultconf = @LXC_DEFAULT_CONF@ +distroconf = @LXC_DISTRO_CONF@ EXTRA_DIST = default.conf.ubuntu default.conf.libvirt default.conf.unknown default.conf: - cp $(defaultconf) $@ + cp $(distroconf) $@ clean-local: @$(RM) -f default.conf diff --git a/configure.ac b/configure.ac index 7dc82cfbc..4eaf329b0 100644 --- a/configure.ac +++ b/configure.ac @@ -6,7 +6,7 @@ AC_INIT([lxc], [0.9.0]) AC_CONFIG_SRCDIR([configure.ac]) AC_CONFIG_AUX_DIR([config]) AC_CONFIG_HEADERS([src/config.h]) -AM_INIT_AUTOMAKE([-Wall -Werror -Wno-portability]) +AM_INIT_AUTOMAKE([-Wall -Werror -Wno-portability subdir-objects]) AC_CANONICAL_HOST AM_PROG_CC_C_O AC_GNU_SOURCE @@ -39,19 +39,22 @@ if test "z$with_distro" = "z"; then fi case $with_distro in ubuntu) - defaultconf=default.conf.ubuntu + distroconf=default.conf.ubuntu ;; redhat|fedora|oracle|oracleserver) - defaultconf=default.conf.libvirt + distroconf=default.conf.libvirt ;; *) echo -n "Linux distribution network config unknown, defaulting to lxc.network.type = empty" - defaultconf=default.conf.unknown + distroconf=default.conf.unknown ;; esac AC_MSG_RESULT([$with_distro]) AM_CONDITIONAL([HAVE_DEBIAN], [test x"$with_distro" = "xdebian" -o x"$with_distro" = "xubuntu"]) +AC_CHECK_PROG([NEWUIDMAP], [newuidmap], [newuidmap]) +AM_CONDITIONAL([HAVE_NEWUIDMAP], [test -n "$NEWUIDMAP"]) + # Allow disabling rpath AC_ARG_ENABLE([rpath], [AC_HELP_STRING([--disable-rpath], [do not set rpath in executables])], @@ -60,14 +63,15 @@ AM_CONDITIONAL([ENABLE_RPATH], [test "x$enable_rpath" = "xyes"]) # Documentation (manpages) AC_ARG_ENABLE([doc], - [AC_HELP_STRING([--enable-doc], [make mans (require docbook2x-man installed) [default=auto]])], + [AC_HELP_STRING([--enable-doc], [make mans (requires docbook2man or docbook2x-man to be installed) [default=auto]])], [], [enable_doc=auto]) if test "x$enable_doc" = "xyes" -o "x$enable_doc" = "xauto"; then db2xman="" + dbparsers="docbook2x-man db2x_docbook2man docbook2man" AC_MSG_CHECKING(for docbook2x-man) - for name in docbook2x-man db2x_docbook2man; do + for name in ${dbparsers}; do if "$name" --help >/dev/null 2>&1; then db2xman="$name" break; @@ -87,6 +91,13 @@ if test "x$enable_doc" = "xyes" -o "x$enable_doc" = "xauto"; then fi AM_CONDITIONAL([ENABLE_DOCBOOK], [test "x$db2xman" != "x"]) +if test "x$db2xman" = "xdocbook2man"; then + docdtd="\"-//Davenport//DTD DocBook V3.0//EN\"" +else + docdtd="\"-//OASIS//DTD DocBook XML\" \"http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd\"" +fi +AC_SUBST(docdtd) + # Apparmor AC_ARG_ENABLE([apparmor], [AC_HELP_STRING([--enable-apparmor], [enable apparmor])], @@ -97,6 +108,8 @@ if test "$enable_apparmor" = "check" ; then fi AM_CONDITIONAL([ENABLE_APPARMOR], [test "x$enable_apparmor" = "xyes"]) +AC_CHECK_LIB([gnutls], [gnutls_hash_fast]) + AM_COND_IF([ENABLE_APPARMOR], [AC_CHECK_HEADER([sys/apparmor.h],[],[AC_MSG_ERROR([You must install the AppArmor development package in order to compile lxc])]) AC_CHECK_LIB([apparmor], [aa_change_profile],[],[AC_MSG_ERROR([You must install the AppArmor development package in order to compile lxc])]) @@ -137,22 +150,75 @@ AM_COND_IF([ENABLE_PYTHON], PKG_CHECK_MODULES([PYTHONDEV], [python3 >= 3.2],[],[AC_MSG_ERROR([You must install python3-dev])]) AC_DEFINE_UNQUOTED([ENABLE_PYTHON], 1, [Python3 is available])]) -# Lua module and scripts -if test x"$with_distro" = "xdebian" -o x"$with_distro" = "xubuntu" ; then - LUAPKGCONFIG=lua5.1 -else - LUAPKGCONFIG=lua -fi +# Not in older autoconf versions +# AS_VAR_COPY(DEST, SOURCE) +# ------------------------- +# Set the polymorphic shell variable DEST to the contents of the polymorphic +# shell variable SOURCE. +m4_ifdef([AS_VAR_COPY], [], +[AC_DEFUN([AS_VAR_COPY], + [AS_LITERAL_IF([$1[]$2], [$1=$$2], [eval $1=\$$2])]) +]) +dnl PKG_CHECK_VAR was introduced with pkg-config 0.28 +m4_ifdef([PKG_CHECK_VAR], [], +[AC_DEFUN([PKG_CHECK_VAR], + [AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl + AC_ARG_VAR([$1], [value of $3 for $2, overriding pkg-config])dnl + _PKG_CONFIG([$1], [variable="][$3]["], [$2]) + AS_VAR_COPY([$1], [pkg_cv_][$1]) + AS_VAR_IF([$1], [""], [$5], [$4])dnl + ])# PKG_CHECK_VAR +]) + +# Lua module and scripts AC_ARG_ENABLE([lua], [AC_HELP_STRING([--enable-lua], [enable lua binding])], - [enable_lua=yes], [enable_lua=no]) + [], [enable_lua=check]) -AM_CONDITIONAL([ENABLE_LUA], [test "x$enable_lua" = "xyes"]) +AC_ARG_WITH([lua-pc], + [AS_HELP_STRING( + [--with-lua-pc=PKG], + [Specify pkg-config package name for lua] + )], [], [with_lua_pc=no]) + +if test "x$enable_lua" = "xyes" -a "x$with_lua_pc" != "xno"; then + # exit with error if not found + PKG_CHECK_MODULES([LUA], [$with_lua_pc], [LUAPKGCONFIG=$with_lua_pc]) +fi + +if test "x$enable_lua" = "xcheck" -a "x$with_lua_pc" != "xno"; then + PKG_CHECK_MODULES([LUA], [$with_lua_pc], + [LUAPKGCONFIG=$with_lua_pc + enable_lua=yes], + [enable_lua=no]) +fi + +if test "x$enable_lua" != "xno"; then + PKG_CHECK_MODULES([LUA], [lua], [LUAPKGCONFIG=lua], + [PKG_CHECK_MODULES([LUA], [lua5.2], [LUAPKGCONFIG=lua5.2], + [PKG_CHECK_MODULES([LUA], [lua5.1], [LUAPKGCONFIG=lua5.1], + [AS_IF([test "x$enable_lua" = "xyes"], + [AC_MSG_ERROR([Lua not found. Please use --with-lua-pc=PKG])], + [enable_lua=no])] + )] + )]) + AS_IF([test "x$LUAPKGCONFIG" != "x"], [enable_lua=yes]) +fi + +AM_CONDITIONAL([ENABLE_LUA], + [test "x$enable_lua" = "xyes"]) AM_COND_IF([ENABLE_LUA], - [PKG_CHECK_MODULES([LUA], [$LUAPKGCONFIG >= 5.1],[],[AC_MSG_ERROR([You must install lua-devel for lua 5.1])]) - AC_DEFINE_UNQUOTED([ENABLE_LUA], 1, [Lua is available])]) + [AC_MSG_CHECKING([Lua version]) + PKG_CHECK_VAR([LUA_VERSION], [$LUAPKGCONFIG], [V],, + [PKG_CHECK_VAR([LUA_VERSION], [$LUAPKGCONFIG], [major_version])]) + AC_MSG_RESULT([$LUA_VERSION]) + PKG_CHECK_VAR([LUA_INSTALL_CMOD], [$LUAPKGCONFIG], [INSTALL_CMOD],, + [LUA_INSTALL_CMOD=$libdir/lua/$LUA_VERSION]) + PKG_CHECK_VAR([LUA_INSTALL_LMOD], [$LUAPKGCONFIG], [INSTALL_LMOD],, + [LUA_INSTALL_LMOD=$datadir/lua/$LUA_VERSION]) + ]) # Optional test binaries AC_ARG_ENABLE([tests], @@ -176,6 +242,18 @@ AC_ARG_WITH([global-conf], [global lxc configuration file] )], [], [with_global_conf=['${sysconfdir}/lxc/lxc.conf']]) +AC_ARG_WITH([usernic-conf], + [AC_HELP_STRING( + [--with-usernic-conf], + [user network interface configuration file] + )], [], [with_usernic_conf=['${sysconfdir}/lxc/lxc-usernet']]) + +AC_ARG_WITH([usernic-db], + [AC_HELP_STRING( + [--with-usernic-db], + [lxc user nic database] + )], [], [with_usernic_db=['/run/lxc/nics']]) + # Rootfs path, where the container mount structure is assembled AC_ARG_WITH([rootfs-path], [AC_HELP_STRING( @@ -210,13 +288,16 @@ AS_AC_EXPAND(BINDIR, "$bindir") AS_AC_EXPAND(LIBEXECDIR, "$libexecdir") AS_AC_EXPAND(INCLUDEDIR, "$includedir") AS_AC_EXPAND(SYSCONFDIR, "$sysconfdir") +AS_AC_EXPAND(LXC_DEFAULT_CONFIG, "$sysconfdir/lxc/default.conf") AS_AC_EXPAND(DATADIR, "$datadir") AS_AC_EXPAND(LOCALSTATEDIR, "$localstatedir") AS_AC_EXPAND(DOCDIR, "$docdir") -AS_AC_EXPAND(LXC_DEFAULT_CONF, "$defaultconf") +AS_AC_EXPAND(LXC_DISTRO_CONF, "$distroconf") AS_AC_EXPAND(LXC_GENERATE_DATE, "$(date)") AS_AC_EXPAND(LXCPATH, "$with_config_path") AS_AC_EXPAND(LXC_GLOBAL_CONF, "$with_global_conf") +AS_AC_EXPAND(LXC_USERNIC_CONF, "$with_usernic_conf") +AS_AC_EXPAND(LXC_USERNIC_DB, "$with_usernic_db") AS_AC_EXPAND(LXCROOTFSMOUNT, "$with_rootfs_path") AS_AC_EXPAND(LXCTEMPLATEDIR, "$datadir/lxc/templates") AS_AC_EXPAND(LXCHOOKDIR, "$datadir/lxc/hooks") @@ -270,10 +351,10 @@ AM_CONDITIONAL([IS_BIONIC], [test "x$is_bionic" = "xyes"]) AC_CHECK_DECLS([PR_CAPBSET_DROP], [], [], [#include ]) # Check for some headers -AC_CHECK_HEADERS([sys/signalfd.h pty.h sys/capability.h sys/personality.h utmpx.h sys/timerfd.h]) +AC_CHECK_HEADERS([sys/signalfd.h pty.h ifaddrs.h sys/capability.h sys/personality.h utmpx.h sys/timerfd.h]) # Check for some syscalls functions -AC_CHECK_FUNCS([setns pivot_root sethostname unshare]) +AC_CHECK_FUNCS([setns pivot_root sethostname unshare rand_r confstr]) # Check for some functions AC_CHECK_LIB(util, openpty) @@ -326,7 +407,6 @@ AC_CONFIG_FILES([ doc/lxc-netstat.sgml doc/lxc-ps.sgml doc/lxc-restart.sgml - doc/lxc-shutdown.sgml doc/lxc-start-ephemeral.sgml doc/lxc-start.sgml doc/lxc-stop.sgml @@ -355,6 +435,7 @@ AC_CONFIG_FILES([ hooks/Makefile templates/Makefile + templates/lxc-cirros templates/lxc-debian templates/lxc-ubuntu templates/lxc-ubuntu-cloud @@ -373,11 +454,7 @@ AC_CONFIG_FILES([ src/lxc/lxc-netstat src/lxc/lxc-checkconfig src/lxc/lxc-version - src/lxc/lxc-create - src/lxc/lxc-clone - src/lxc/lxc-shutdown src/lxc/lxc-start-ephemeral - src/lxc/lxc-destroy src/lxc/legacy/lxc-ls src/lxc/lxc.functions diff --git a/doc/Makefile.am b/doc/Makefile.am index 1a2469548..a00036a39 100644 --- a/doc/Makefile.am +++ b/doc/Makefile.am @@ -22,7 +22,6 @@ man_MANS = \ lxc-netstat.1 \ lxc-ps.1 \ lxc-restart.1 \ - lxc-shutdown.1 \ lxc-start.1 \ lxc-stop.1 \ lxc-unfreeze.1 \ diff --git a/doc/common_options.sgml.in b/doc/common_options.sgml.in index 0dfd5b95c..929e4f7f6 100644 --- a/doc/common_options.sgml.in +++ b/doc/common_options.sgml.in @@ -19,7 +19,7 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA --> diff --git a/doc/legacy/lxc-ls.sgml.in b/doc/legacy/lxc-ls.sgml.in index c04a4a4c2..bed9b8a93 100644 --- a/doc/legacy/lxc-ls.sgml.in +++ b/doc/legacy/lxc-ls.sgml.in @@ -20,11 +20,11 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA --> - ]> @@ -50,7 +50,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA lxc-ls --active - ls option + ls options @@ -79,7 +79,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - + diff --git a/doc/lxc-attach.sgml.in b/doc/lxc-attach.sgml.in index a3bdb4499..0791c0ff0 100644 --- a/doc/lxc-attach.sgml.in +++ b/doc/lxc-attach.sgml.in @@ -19,11 +19,11 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA --> - diff --git a/doc/lxc-cgroup.sgml.in b/doc/lxc-cgroup.sgml.in index 5dcd61962..4546574d0 100644 --- a/doc/lxc-cgroup.sgml.in +++ b/doc/lxc-cgroup.sgml.in @@ -19,11 +19,11 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA --> - diff --git a/doc/lxc-checkconfig.sgml.in b/doc/lxc-checkconfig.sgml.in index ec7a4c1b2..e171ad117 100644 --- a/doc/lxc-checkconfig.sgml.in +++ b/doc/lxc-checkconfig.sgml.in @@ -17,11 +17,11 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA --> - ]> diff --git a/doc/lxc-checkpoint.sgml.in b/doc/lxc-checkpoint.sgml.in index 6cd7169f7..6d3b75745 100644 --- a/doc/lxc-checkpoint.sgml.in +++ b/doc/lxc-checkpoint.sgml.in @@ -19,11 +19,11 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA --> - diff --git a/doc/lxc-clone.sgml.in b/doc/lxc-clone.sgml.in index d00b57b01..a2efe8504 100644 --- a/doc/lxc-clone.sgml.in +++ b/doc/lxc-clone.sgml.in @@ -20,11 +20,11 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA --> - @@ -50,13 +50,31 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA lxc-clone + -s + -K + -M + -H + -B backingstore + -L fssize + -p lxcpath + -P newlxcpath -o orig -n new + -- hook arguments + + + lxc-clone -s + -K + -M + -H + -B backingstore -L fssize - -v vgname - -p lxc_lv_prefix - -t fstype + -p lxcpath + -P newlxcpath + orig + new + -- hook arguments @@ -64,10 +82,29 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Description - lxc-clone Creates a new container as a copy of an existing - container. When the original container's rootfs is an LVM block device or - is on a btrfs filesystem, then a snapshotted clone can be created, taking up - very little initial disk space. + lxc-clone Creates a new container as a clone of an existing + container. Two types of clones are supported: copy and snapshot. A copy + clone copies the root filessytem from the original container to the new. A + snapshot filesystem uses the backing store's snapshot functionality to create + a very small copy-on-write snapshot of the original container. Snapshot + clones require the new container backing store to support snapshotting. Currently + this includes only btrfs, lvm, overlayfs and zfs. LVM devices do not support + snapshots fo snapshots. + + + + The backing store of the new container will be the same type as the + original container, + with one exception: overlayfs snapshots can be created of directory backed + containers. This can be requested by using the -B overlayfs + arguments. + + + + The names of the original and new container can be given (in that order) + after all options, or can be specified with the + -o and -n options, + respectively. @@ -78,6 +115,108 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + + + + + + The new container's rootfs should be a LVM or btrfs snapshot of the original. + + + + + + + + + + + Do not change the hostname of the container (in the root + filesystem). + + + + + + + + + + + Use the same MAC address as the original container, rather tahn + generating a new random one. + + + + + + + + + + + Copy all mount hooks into the new container's directory, and + update any lxcpaths and container names as needed. + + + + + + + + + + + In the case of a block device backed container, a size for the new + block device. By default, the new device will be made the + same size as the original. + + + + + + + + + + + The lxcpath of the original container. By default, the system + wide configured lxcpath will be used. + + + + + + + + + + + The lxcpath for the new container. By default the same lxcpath + as the original will be used. Note that with btrfs snapshots, + changing lxcpaths may not be possible, as subvolume snapshots + must be in the same btrfs filesystem. + + + + + + + + + + + Select a different backing store for the new container. By + default the same as the original container's is used. Note that + currently changing the backingstore is only supported for + overlayfs snapshots of directory backed containers. Valid + backing stores include dir (directory), btrfs, lvm, zfs, loop + and overlayfs. + + + + @@ -100,73 +239,28 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - - - - - - - The new container's rootfs should be a LVM or btrfs snapshot of the original. - - - - - - - - - - - In the case of a LVM-backed container, a size for the new - block device. By default, the new device will be made the - same size as the original. - - - - - - - - - - - For an LVM-backed container, the volume group name to use. By - default it is 'lxc'. - - - - - - - - - - - For an LVM-backed container, a string to prefix to the container name to - form the logical volume name. For instance, specifying - -n c1 -p lxc_ will cause the container rootfs to - be on a logical volume called lxc_c1. - - - - - - - - - - - For a non-snapshot LVM clone, the file system to use for the new - container. Note this option is ignored when requesting a - snapshotted container. - - - + + Clone hook + + If the container being cloned has one or more lxc.hook.clone + specified, then the specified hooks will be called for the new container. The + first 3 arguments passed to the clone hook will be the container name, a section + ('lxc'), and the hook type ('clone'). Extra arguments passed + lxc-clone will be passed to the hook program starting at + argument 4. The LXC_ROOTFS_MOUNT environment variable gives + the path under which the container's root filesystem is mounted. The + configuration file pathname is stored in LXC_CONFIG_FILE, the + new container name in LXC_NAME, the old container name in + LXC_SRC_NAME, and the path or device on which + the rootfs is located is in LXC_ROOTFS_PATH. + + + &seealso; diff --git a/doc/lxc-console.sgml.in b/doc/lxc-console.sgml.in index 9298a5234..a08a6a209 100644 --- a/doc/lxc-console.sgml.in +++ b/doc/lxc-console.sgml.in @@ -19,11 +19,11 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA --> - @@ -77,6 +77,12 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA the state it was before the disconnection. + + A ttynum of 0 may be given to attach + to the container's /dev/console instead of its + dev/tty<ttynum>. + + A keyboard escape sequence may be used to disconnect from the tty and quit lxc-console. The default escape sequence is <Ctrl+a q>. @@ -107,8 +113,9 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - Specify the tty number to connect, if not specified a tty - number will be automatically choosen by the container. + Specify the tty number to connect to or 0 for the console. If not + specified the next available tty number will be automatically + choosen by the container. diff --git a/doc/lxc-create.sgml.in b/doc/lxc-create.sgml.in index 90864e067..b11ff30be 100644 --- a/doc/lxc-create.sgml.in +++ b/doc/lxc-create.sgml.in @@ -19,11 +19,11 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA --> - @@ -125,16 +125,18 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - 'backingstore' is one of 'none', 'dir', 'lvm', or 'btrfs'. The + 'backingstore' is one of 'none', 'dir', 'lvm', 'loop', or 'btrfs'. The default is 'none', meaning that the container root filesystem will be a directory under @LXCPATH@/container/rootfs. 'dir' has the same meaning as 'none', but also allows the optional --dir ROOTFS to be specified, meaning that the container rootfs should be placed under the specified path, - rather than the default. - The option 'btrfs' need not be specified as it will be used - automatically if the @LXCPATH@ filesystem is found to - be btrfs. If backingstore is 'lvm', then an lvm block device will be + rather than the default. If 'btrfs' is specified, then the + target filesystem must be btrfs, and the container rootfs will be + created as a new subvolume. This allows snapshotted clones to be + created, but also causes rsync --one-filesystem to treat it as a + separate filesystem. + If backingstore is 'lvm', then an lvm block device will be used and the following further options are available: --lvname lvname1 will create an LV named lvname1 rather than the default, which diff --git a/doc/lxc-destroy.sgml.in b/doc/lxc-destroy.sgml.in index 366dc9a10..7f346c64a 100644 --- a/doc/lxc-destroy.sgml.in +++ b/doc/lxc-destroy.sgml.in @@ -19,11 +19,11 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA --> - @@ -81,17 +81,16 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + + + + Use an alternate container path. The default is @LXCPATH@. + + + - - - - - Use an alternate container path. The default is @LXCPATH@. - - - - diff --git a/doc/lxc-device.sgml.in b/doc/lxc-device.sgml.in index e7773823e..954d17760 100644 --- a/doc/lxc-device.sgml.in +++ b/doc/lxc-device.sgml.in @@ -18,11 +18,11 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA --> - ]> diff --git a/doc/lxc-execute.sgml.in b/doc/lxc-execute.sgml.in index c83a5eb53..a47fa3544 100644 --- a/doc/lxc-execute.sgml.in +++ b/doc/lxc-execute.sgml.in @@ -19,11 +19,11 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA --> - @@ -132,12 +132,12 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Signal the end of options and disables further option processing. Any arguments after the -- are treated as - arguments. + arguments to command. - This option is useful when you want to execute, with the - command lxc-execute, a command line - with its own options. + This option is useful when you want specify options + to command and don't want + lxc-execute to interpret them. diff --git a/doc/lxc-freeze.sgml.in b/doc/lxc-freeze.sgml.in index fba139b59..5e569237a 100644 --- a/doc/lxc-freeze.sgml.in +++ b/doc/lxc-freeze.sgml.in @@ -19,11 +19,11 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA --> - diff --git a/doc/lxc-info.sgml.in b/doc/lxc-info.sgml.in index 03212b01a..c6fc624ba 100644 --- a/doc/lxc-info.sgml.in +++ b/doc/lxc-info.sgml.in @@ -17,11 +17,11 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA --> - @@ -48,9 +48,10 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA lxc-info -n name - -s - -p - -t state + -c KEY + -s + -p + -t state @@ -77,6 +78,18 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + + + + + + Print a configuration key from the running container. This option + may be given mulitple times to print out multiple key = value pairs. + + + + @@ -135,6 +148,15 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + lxc-info -n foo -c lxc.network.0.veth.pair + + + prints the veth pair name of foo. + + + + diff --git a/doc/lxc-kill.sgml.in b/doc/lxc-kill.sgml.in index 8d58db923..1a9f10ed8 100644 --- a/doc/lxc-kill.sgml.in +++ b/doc/lxc-kill.sgml.in @@ -19,11 +19,11 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA --> - diff --git a/doc/lxc-ls.sgml.in b/doc/lxc-ls.sgml.in index 4af2596f9..2c8d5ae9a 100644 --- a/doc/lxc-ls.sgml.in +++ b/doc/lxc-ls.sgml.in @@ -20,11 +20,11 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA --> - ]> diff --git a/doc/lxc-monitor.sgml.in b/doc/lxc-monitor.sgml.in index f9760a5a8..a0aa7df57 100644 --- a/doc/lxc-monitor.sgml.in +++ b/doc/lxc-monitor.sgml.in @@ -19,11 +19,11 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA --> - @@ -63,22 +63,17 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA to monitor all the containers, several of them or just one. + + The =PATH option may be specified multiple + times to monitor more than one container path. Note however that + containers with the same name in multiple paths will be + indistinguishable in the output. + + &commonoptions; - - Bugs - - - Only one lxc-monitor can run at a time. Other - invocations will fail with the following error: - - - lxc-monitor: bind : Address already in use - - - Examples diff --git a/doc/lxc-netstat.sgml.in b/doc/lxc-netstat.sgml.in index ab475f3ea..b4539d277 100644 --- a/doc/lxc-netstat.sgml.in +++ b/doc/lxc-netstat.sgml.in @@ -17,11 +17,11 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA --> - ]> diff --git a/doc/lxc-ps.sgml.in b/doc/lxc-ps.sgml.in index f20bb0293..16902a44f 100644 --- a/doc/lxc-ps.sgml.in +++ b/doc/lxc-ps.sgml.in @@ -20,11 +20,11 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA --> - ]> @@ -52,7 +52,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA --name name --lxc --host - -- ps option + -- ps options @@ -69,7 +69,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA the container associated to processes. - The additionnal specified ps options must not + The additional specified ps options must not remove the default ps header and the pid information, to be able to have the lxc-ps to find the container associated to processes. @@ -119,7 +119,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - + diff --git a/doc/lxc-restart.sgml.in b/doc/lxc-restart.sgml.in index 9be025703..ceb4a9d89 100644 --- a/doc/lxc-restart.sgml.in +++ b/doc/lxc-restart.sgml.in @@ -19,11 +19,11 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA --> - diff --git a/doc/lxc-shutdown.sgml.in b/doc/lxc-shutdown.sgml.in deleted file mode 100644 index a4f3accfd..000000000 --- a/doc/lxc-shutdown.sgml.in +++ /dev/null @@ -1,98 +0,0 @@ - - - - -]> - - - - @LXC_GENERATE_DATE@ - - - lxc-shutdown - 1 - - - - lxc-shutdown - - - externally shut down or reboot a container - - - - - - lxc-shutdown - -n name - -w - -r - - - - - Description - - - lxc-shutdown sends a SIGPWR signal to the - specified container to request it to cleanly shut down. If - -w is specified, then lxc-shutdown - will wait until the container has shut down before exiting. - If -r is specified, the container will be - asked to reboot (using a SIGINT signal), and -w - will be ignored. If the container ignore these signals, then - nothing will happen. In that case, you can use lxc-stop - to force the container to stop. - - - - - &commonoptions; - - &seealso; - - - Author - Serge Hallyn serge.hallyn@canonical.com - - - - - diff --git a/doc/lxc-start-ephemeral.sgml.in b/doc/lxc-start-ephemeral.sgml.in index b753a9bf7..46e0592c0 100644 --- a/doc/lxc-start-ephemeral.sgml.in +++ b/doc/lxc-start-ephemeral.sgml.in @@ -18,11 +18,11 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA --> - ]> diff --git a/doc/lxc-start.sgml.in b/doc/lxc-start.sgml.in index d501636a6..09f917164 100644 --- a/doc/lxc-start.sgml.in +++ b/doc/lxc-start.sgml.in @@ -19,11 +19,11 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA --> - diff --git a/doc/lxc-stop.sgml.in b/doc/lxc-stop.sgml.in index 33e3064b9..777be2b39 100644 --- a/doc/lxc-stop.sgml.in +++ b/doc/lxc-stop.sgml.in @@ -19,11 +19,11 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA --> - @@ -50,6 +50,11 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA lxc-stop -n name + -W + -r + -t timeout + -k + -s @@ -57,14 +62,90 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Description - lxc-stop kills all the processes inside the - container. This command should be used if the processes are no - longer accessible and can no be exited normally. + lxc-stop reboots, cleanly shuts down, or kills + all the processes inside the container. By default, it will + request a clean shutdown of the container (by sending SIGPWR to + the container), wait 60 seconds for the container to exit, and + returns. If the container fails to cleanly exit, then after 60 + seconds the container will be sent the + lxc.stopsignal to force it to shut down. - + + The -W, -r, -s + and -k options specify the action to perform. + -W indicates that after performing the specified + action, lxc-stop should immediately exit, while + -t TIMEOUT specifies the maximum amount of time + to wait for the container to complete the shutdown or reboot. + - &commonoptions; + + Options + + + + + + + + + Request a reboot of the container. + + + + + + + + + + + Only request a clean shutdown, do not kill the container tasks if the + clean shutdown fails. + + + + + + + + + + + Rather than requesting a clean shutdown of the container, explicitly + kill all tasks in the container. This is the legacy + lxc-stop behavior. + + + + + + + + + + + Simply perform the requestion action (reboot, shutdown, or hard + kill) and exit. + + + + + + + + + + + Wait TIMEOUT seconds before hard-stopping the container of (in + the reboot case) returning failure. + + + + + + Diagnostic @@ -92,7 +173,6 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - diff --git a/doc/lxc-top.sgml.in b/doc/lxc-top.sgml.in index 2a4f83541..002115495 100644 --- a/doc/lxc-top.sgml.in +++ b/doc/lxc-top.sgml.in @@ -17,11 +17,11 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA --> - ]> diff --git a/doc/lxc-unfreeze.sgml.in b/doc/lxc-unfreeze.sgml.in index 34a2cf402..4e1a5865d 100644 --- a/doc/lxc-unfreeze.sgml.in +++ b/doc/lxc-unfreeze.sgml.in @@ -19,11 +19,11 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA --> - diff --git a/doc/lxc-unshare.sgml.in b/doc/lxc-unshare.sgml.in index 1fbaf0bba..a812eb965 100644 --- a/doc/lxc-unshare.sgml.in +++ b/doc/lxc-unshare.sgml.in @@ -20,11 +20,11 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA --> - @@ -49,7 +49,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - lxc-clone + lxc-unshare -s namespaces -u user command @@ -115,7 +115,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA To spawn a new shell with its own UTS (hostname) namespace, - lxc-clone -s UTSNAME /bin/bash + lxc-unshare -s UTSNAME /bin/bash If the hostname is changed in that shell, the change will not be reflected on the host. @@ -123,7 +123,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA To spawn a shell in a new network, pid, and mount namespace, - lxc-clone -s "NETWORK|PID|MOUNT" /bin/bash + lxc-unshare -s "NETWORK|PID|MOUNT" /bin/bash The resulting shell will have pid 1 and will see no network interfaces. After re-mounting /proc in that shell, diff --git a/doc/lxc-version.sgml.in b/doc/lxc-version.sgml.in index 3833e9fc7..f3a7b6377 100644 --- a/doc/lxc-version.sgml.in +++ b/doc/lxc-version.sgml.in @@ -17,11 +17,11 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA --> - ]> diff --git a/doc/lxc-wait.sgml.in b/doc/lxc-wait.sgml.in index b43061f6d..5c5c9581b 100644 --- a/doc/lxc-wait.sgml.in +++ b/doc/lxc-wait.sgml.in @@ -20,11 +20,11 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA --> - diff --git a/doc/lxc.conf.sgml.in b/doc/lxc.conf.sgml.in index 226e36e2b..257c13467 100644 --- a/doc/lxc.conf.sgml.in +++ b/doc/lxc.conf.sgml.in @@ -19,11 +19,11 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA --> - ]> @@ -65,8 +65,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA The configuration file defines the different system resources to be assigned for the container. At present, the utsname, the - network, the mount points, the root file system and the control - groups are supported. + network, the mount points, the root file system, the user namespace, + and the control groups are supported. @@ -75,12 +75,38 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA the line is a comment. + + Configuration + + In order to ease administration of multiple related containers, it + is possible to have a container configuration file cause another + file to be loaded. For instance, network configuration + can be defined in one common file which is included by multiple + containers. Then, if the containers are moved to another host, + only one file may need to be updated. + + + + + + + + + + Specify the file to be included. The included file must be + in the same valid lxc configuration file format. + + + + + + Architecture Allows to set the architecture for the container. For example, set a 32bits architecture for a container running 32bits - binaries on a 64bits host. That fix the container scripts + binaries on a 64bits host. This fixes the container scripts which rely on the architecture to do some work like downloading the packages. @@ -229,7 +255,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA bridge is set up as a reflective relay. Broadcast frames coming in from the upper_dev get flooded to all macvlan interfaces in VEPA mode, local frames are not - delivered locallay, or , it + delivered locally, or , it provides the behavior of a simple bridge between different macvlan interfaces on the same port. Frames from one interface to another one get delivered directly @@ -258,7 +284,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA specify an action to do for the network. - + activates the interface. @@ -395,6 +421,11 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA type, other arguments may be passed: veth/macvlan/phys. And finally (host-sided) device name. + + Standard output from the script is logged at debug level. + Standard error is not logged, but can be captured by the + hook redirecting its standard error to standard output. + @@ -415,6 +446,11 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA type, other arguments may be passed: veth/macvlan/phys. And finally (host-sided) device name. + + Standard output from the script is logged at debug level. + Standard error is not logged, but can be captured by the + hook redirecting its standard error to standard output. + @@ -448,7 +484,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA If the container is configured with a root filesystem and the inittab file is setup to use the console, you may want to specify - where goes the output of this console. + where the output of this console goes. @@ -471,14 +507,14 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Console through the ttys - If the container is configured with a root filesystem and the - inittab file is setup to launch a getty on the ttys. This - option will specify the number of ttys to be available for the - container. The number of getty in the inittab file of the - container should not be greater than the number of ttys - specified in this configuration file, otherwise the excess - getty sessions will die and respawn indefinitly giving - annoying messages on the console. + This option is useful if the container is configured with a root + filesystem and the inittab file is setup to launch a getty on the + ttys. The option specifies the number of ttys to be available for + the container. The number of gettys in the inittab file of the + container should not be greater than the number of ttys specified + in this option, otherwise the excess getty sessions will die and + respawn indefinitely giving annoying messages on the console or in + /var/log/messages. @@ -534,7 +570,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA rootfs LXC will mount a fresh tmpfs under /dev (limited to 100k) and fill in a minimal set of initial devices. This is generally required when starting a container containing - a "systemd" based "init" but may be optional at other times. Addional + a "systemd" based "init" but may be optional at other times. Additional devices in the containers /dev directory may be created through the use of the hook. @@ -590,13 +626,20 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA specify a file location in the fstab format, containing the - mount informations. If the rootfs is an image file or a - device block and the fstab is used to mount a point + mount information. If the rootfs is an image file or a + block device and the fstab is used to mount a point somewhere in this rootfs, the path of the rootfs mount point should be prefixed with the @LXCROOTFSMOUNT@ default path or the value of if - specified. + specified. Note that when mounting a filesystem from an + image file or block device the third field (fs_vfstype) + cannot be auto as with + + mount + 8 + + but must be explicitly specified. @@ -728,6 +771,67 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + + + + + + Specify the capability to be kept in the container. All other + capabilities will be dropped. + + + + + + + + Apparmor profile + + If lxc was compiled and installed with apparmor support, and the host + system has apparmor enabled, then the apparmor profile under which the + container should be run can be specified in the container + configuration. The default is lxc-container-default. + + + + + + + + + Specify the apparmor profile under which the container should + be run. To specify that the container should be unconfined, + use + + lxc.aa_profile = unconfined + + + + + + + Seccomp configuration + + A container can be started with a reduced set of available + system calls by loading a seccomp profile at startup. The + seccomp configuration file should begin with a version number + (which currently must be 1) on the first line, a policy type + (which must be 'whitelist') on the second line, followed by a + list of allowed system call numbers, one per line. + + + + + + + + + Specify a file containing the seccomp configuration to + load before the container starts. + + + @@ -763,11 +867,37 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - Startup hooks + Container hooks - Startup hooks are programs or scripts which can be executed + Container hooks are programs or scripts which can be executed at various times in a container's lifetime. + + When a container hook is executed, information is passed both + as command line arguments and through environment variables. + The arguments are: + + Container name. + Section (always 'lxc'). + The hook type (i.e. 'clone' or 'pre-mount'). + Additional arguments In the + case of the clone hook, any extra arguments passed to + lxc-clone will appear as further arguments to the hook. + + The following environment variables are set: + + LXC_NAME: is the container's name. + LXC_ROOTFS_MOUNT: the path to the mounted root filesystem. + LXC_CONFIG_FILE: the path to the container configuration file. + LXC_SRC_NAME: in the case of the clone hook, this is the original container's name. + LXC_ROOTFS_PATH: this is the lxc.rootfs enty for the container. Note this is likely not where the mounted rootfs is to be found, use LXC_ROOTFS_MOUNT for that. + + + + Standard output from the hooks is logged at debug level. + Standard error is not logged, but can be captured by the + hook redirecting its standard error to standard output. + @@ -859,10 +989,24 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + + + + + + + A hook to be run when the container is cloned to a new one. + See lxc-clone + 1 for more information. + + + + - Startup hooks Environment Variables + Container hooks Environment Variables A number of environment variables are made available to the startup hooks to provide configuration information and assist in the @@ -893,7 +1037,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Host relative path to the container configuration file. This gives the container to reference the original, top level, configuration file for the container in order to locate any - addotional configuration information not otherwise made + additional configuration information not otherwise made available. [] @@ -957,6 +1101,54 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + Logging + + Logging can be configured on a per-container basis. By default, + depending upon how the lxc package was compiled, container startup + is logged only at the ERROR level, and logged to a file named after + the container (with '.log' appended) either under the container path, + or under @LOGPATH@. + + + Both the default log level and the log file can be specified in the + container configuration file, overriding the default behavior. Note + that the configuration file entries can in turn be overridden by the + command line options to lxc-start. + + + + + + + + + The level at which to log. The log level is an integer in + the range of 0..8 inclusive, where a lower number means more + verbose debugging. In particular 0 = trace, 1 = debug, 2 = + info, 3 = notice, 4 = warn, 5 = error, 6 = critical, 7 = + alert, and 8 = fatal. If unspecified, the level defaults + to 5 (error), so that only errors and above are logged. + + + Note that when a script (such as either a hook script or a + network interface up or down script) is called, the script's + standard output is logged at level 1, debug. + + + + + + + + + + The file to which logging info should be written. + + + + + @@ -1069,8 +1261,12 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA fstab 5 - + , + + capabilities + 7 + diff --git a/doc/lxc.sgml.in b/doc/lxc.sgml.in index a333c49cd..0e1f9121d 100644 --- a/doc/lxc.sgml.in +++ b/doc/lxc.sgml.in @@ -19,11 +19,11 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA --> - ]> diff --git a/doc/see_also.sgml.in b/doc/see_also.sgml.in index 341cd4d00..1e86a534a 100644 --- a/doc/see_also.sgml.in +++ b/doc/see_also.sgml.in @@ -19,7 +19,7 @@ Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA --> diff --git a/hooks/Makefile.am b/hooks/Makefile.am index 196e632f4..a050ccb8a 100644 --- a/hooks/Makefile.am +++ b/hooks/Makefile.am @@ -1,7 +1,9 @@ hooksdir=@LXCHOOKDIR@ hooks_SCRIPTS = \ + clonehostname \ mountcgroups \ - mountecryptfsroot + mountecryptfsroot \ + ubuntu-cloud-prep EXTRA_DIST=$(hooks_SCRIPTS) diff --git a/hooks/clonehostname b/hooks/clonehostname new file mode 100755 index 000000000..8865c2df8 --- /dev/null +++ b/hooks/clonehostname @@ -0,0 +1,29 @@ +#!/bin/sh +# +# Update the hostname in the cloned container's scripts +# +# Copyright © 2013 Oracle. +# +# This library is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2, as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +# Note that /etc/hostname is updated by lxc itself +for file in \ + $LXC_ROOTFS_PATH/etc/sysconfig/network \ + $LXC_ROOTFS_PATH/etc/sysconfig/network-scripts/ifcfg-* ; +do + if [ -f $file ]; then + sed -i "s|$LXC_SRC_NAME|$LXC_NAME|" $file + fi +done +exit 0 diff --git a/hooks/mountcgroups b/hooks/mountcgroups index 879fa3c7a..a63c8db82 100755 --- a/hooks/mountcgroups +++ b/hooks/mountcgroups @@ -14,7 +14,7 @@ # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # # This is an example hook to mount all mounted cgroups in the @@ -26,17 +26,40 @@ set -e c=$1 +configfile=$LXC_CONFIG_FILE d=/sys/fs/cgroup d2=$LXC_ROOTFS_MOUNT/${d} +# name lxc hook lxcpath +lxcpath=$4 if [ ! -d "$d" ]; then exit 0 fi mount -n -t tmpfs tmpfs ${d2} +do_devices_setup() { + local devdir="$1" + local c="$2" + local line + local w # which (allow or deny) + local v # value + egrep "^lxc.cgroup.devices.(allow|deny)[ \t]*=" ${configfile} | while read line; do + w=`echo $line | awk -F. '{ print $4 }' | awk '{ print $1 }'` + v=`echo $line | awk -F= '{ print $2 }'` + echo "$v" >> "$devdir"/devices.$w + done +} + # XXX TODO - we'll need to account for other cgroup groups beside 'lxc', # i.e. 'build' or 'users/joe'. for dir in `/bin/ls $d`; do + if [ "$dir" = "devices" ]; then + devicesdir="${d}/${dir}/lxc/${c}" + mkdir -p "$devicesdir" + # set the devices cgroup perms now - we can't change from blacklist to + # whitelist, or add perms, once we have children. + do_devices_setup "$devicesdir" "${c}" + fi mkdir -p "${d}/${dir}/lxc/${c}/${c}.real" echo 1 > "${d}/${dir}/lxc/${c}/${c}.real/tasks" mkdir -p ${d2}/${dir} diff --git a/hooks/mountecryptfsroot b/hooks/mountecryptfsroot index f3cb0fbbe..15bb2ea17 100755 --- a/hooks/mountecryptfsroot +++ b/hooks/mountecryptfsroot @@ -14,7 +14,7 @@ # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # This hook can be used to mount an ecryptfs filesystem as a container's # rootfs. diff --git a/hooks/ubuntu-cloud-prep b/hooks/ubuntu-cloud-prep new file mode 100755 index 000000000..f0b30ea60 --- /dev/null +++ b/hooks/ubuntu-cloud-prep @@ -0,0 +1,184 @@ +#!/bin/bash +## If the container being cloned has one or more lxc.hook.clone +## specified, then the specified hooks will be called for the new +## container. The arguments passed to the clone hook are: +## 1. the container name +## 2. a section ('lxc') +## 3. hook type ('clone') +## 4. .. additional arguments to lxc-clone +## Environment variables: +## LXC_ROOTFS_MOUNT: path to the root filesystem +## LXC_CONFIG_FILE: path to config file +## LXC_SRC_NAME: old container name +## LXC_ROOTFS_PATH: path or device on which the root fs is located + +set -f +VERBOSITY="0" + +error() { echo "$@" 1>&2; } +debug() { [ "$VERBOSITY" -ge "$1" ] || return; shift; error "$@"; } +fail() { [ $# -eq 0 ] || error "$@"; exit 1; } + +prep_usage() { +cat </dev/null) || + : + getopt_ret=$? + if [ $getopt_ret -eq 0 ]; then + eval set -- "${getopt_out}" || + { error "Unexpected error reading usage"; return 1; } + fi + + local cur="" next="" + local userdata="" hostid="" authkey="" locales=1 cloud=0 + local create_etc_init=0 name="ubuntucloud-lxc" + + while [ $# -ne 0 ]; do + cur="$1"; next="$2"; + case "$cur" in + -C|--cloud) cloud=1;; + -h|--help) prep_usage; return 0;; + --name) name="$next";; + -i|--hostid) hostid="$next";; + -L|--nolocales) locales=0;; + --create-etc-init) create_etc_init=1;; + -S|--auth-key) + [ -f "$next" ] || + { error "--auth-key: '$next' not a file"; return 1; } + authkey="$next";; + -u|--userdata) + [ -f "$next" ] || + { error "--userdata: '$next' not a file"; return 1; } + userdata="$next";; + -v|--verbose) VERBOSITY=$((${VERBOSITY}+1));; + --) shift; break;; + esac + shift; + done + + [ $# -eq 1 ] || { + prep_usage 1>&2; + error "expected 1 arguments, got ($_LXC_HOOK) $#: $*"; + return 1; + } + + local root_d="$1"; + + if [ $getopt_ret -ne 0 -a "$_LXC_HOOK" = "clone" ]; then + # getopt above failed, but we were called from lxc clone. there might + # be multiple clone hooks and the args provided here not for us. This + # seems like not the greatest interface, so all we'll do is mention it. + error "${0##*}: usage failed, continuing with defaults" + fi + + [ "$create_etc_init" -eq 0 ] || + echo "#upstart needs help for overlayfs (LP: #1213925)." > \ + "$root_d/etc/init/.overlayfs-upstart-helper" || + { error "failed to create /etc/init in overlay"; return 1; } + + local seed_d="" + seed_d="$root_d/var/lib/cloud/seed/nocloud-net" + + echo "$name" > "$root_d/etc/hostname" || + { error "failed to write /etc/hostname"; return 1; } + + if [ $cloud -eq 1 ]; then + debug 1 "--cloud provided, not modifying seed in '$seed_d'" + else + if [ -z "$hostid" ]; then + hostid=$(uuidgen | cut -c -8) && [ -n "$hostid" ] || + { error "failed to get hostid"; return 1; } + fi + mkdir -p "$seed_d" || + { error "failed to create '$seed_d'"; return 1; } + + echo "instance-id: lxc-$hostid" > "$seed_d/meta-data" || + { error "failed to write to $seed_d/meta-data"; return 1; } + + echo "local-hostname: $name" >> "$seed_d/meta-data" || + { error "failed to write to $seed_d/meta-data"; return 1; } + + if [ -n "$authkey" ]; then + { + echo "public-keys:" && + sed -e '/^$/d' -e 's,^,- ,' "$authkey" + } >> "$seed_d/meta-data" + [ $? -eq 0 ] || + { error "failed to write public keys to metadata"; return 1; } + fi + + local larch="usr/lib/locale/locale-archive" + if [ $locales -eq 1 ]; then + cp "/$larch" "$root_d/$larch" || { + error "failed to cp '/$larch' '$root_d/$larch'"; + return 1; + } + fi + + if [ -z "$MIRROR" ]; then + MIRROR="http://archive.ubuntu.com/ubuntu" + fi + + if [ -n "$userdata" ]; then + cp "$userdata" "$seed_d/user-data" + else + { + local lc=$(locale | awk -F= '/LANG=/ {print $NF; }') + echo "#cloud-config" + echo "output: {all: '| tee -a /var/log/cloud-init-output.log'}" + echo "apt_mirror: $MIRROR" + echo "manage_etc_hosts: localhost" + [ -z "$LANG" ] || echo "locale: $LANG"; + echo "password: ubuntu" + echo "chpasswd: { expire: false; }" + } > "$seed_d/user-data" + fi + [ $? -eq 0 ] || { + error "failed to write user-data write to '$seed_d/user-data'"; + return 1; + } + fi + +} + +main() { + # main just joins 2 modes of being called. from user one from lxc clone + local _LXC_HOOK + if [ -n "$LXC_ROOTFS_MOUNT" -a "$3" = "clone" ]; then + _LXC_HOOK="clone" + local name="$1" create_etc_init="" + shift 3 + # if mountpoint is overlayfs then add '--create-etc-init' + [ "${LXC_ROOTFS_PATH#overlayfs}" != "${LXC_ROOTFS_PATH}" ] && + create_etc_init="--create-etc-init" + debug 1 prep "--name=$name" $create_etc_init "$LXC_ROOTFS_MOUNT" "$@" + prep "--name=$name" $create_etc_init "$LXC_ROOTFS_MOUNT" "$@" + else + _LXC_HOOK="" + prep "$@" + fi + return $? +} + +main "$@" + +# vi: ts=4 expandtab diff --git a/lxc.spec.in b/lxc.spec.in index 058393a10..995d455a8 100644 --- a/lxc.spec.in +++ b/lxc.spec.in @@ -18,7 +18,7 @@ # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Name: @PACKAGE@ Version: @VERSION@ @@ -100,7 +100,6 @@ rm -rf %{buildroot} %attr(4111,root,root) %{_bindir}/lxc-attach %attr(4111,root,root) %{_bindir}/lxc-create %attr(4111,root,root) %{_bindir}/lxc-clone -%attr(4111,root,root) %{_bindir}/lxc-shutdown %attr(4111,root,root) %{_bindir}/lxc-start %attr(4111,root,root) %{_bindir}/lxc-netstat %attr(4111,root,root) %{_bindir}/lxc-unshare diff --git a/runapitests.sh b/runapitests.sh index 116938719..c1d90eb2a 100644 --- a/runapitests.sh +++ b/runapitests.sh @@ -1,4 +1,22 @@ #!/bin/sh +# liblxcapi +# +# Copyright © 2012 Serge Hallyn . +# Copyright © 2012 Canonical Ltd. +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. + +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA cleanup() { rm -f /etc/lxc/test-busybox.conf diff --git a/src/include/getline.c b/src/include/getline.c index d4117cbba..b1dd8924f 100644 --- a/src/include/getline.c +++ b/src/include/getline.c @@ -1,3 +1,32 @@ +/* + * Copyright (c) 2006 SPARTA, Inc. + * All rights reserved. + * + * This software was developed by SPARTA ISSO under SPAWAR contract + * N66001-04-C-6019 ("SEFOS"). + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + #include #include #include diff --git a/src/include/getline.h b/src/include/getline.h index b030d7a1c..ddf4f157f 100644 --- a/src/include/getline.h +++ b/src/include/getline.h @@ -1,6 +1,37 @@ +/* + * Copyright (c) 2006 SPARTA, Inc. + * All rights reserved. + * + * This software was developed by SPARTA ISSO under SPAWAR contract + * N66001-04-C-6019 ("SEFOS"). + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + #ifndef _getline_h #define _getline_h +#include + extern ssize_t getline(char **outbuf, size_t *outsize, FILE *fp); #endif diff --git a/src/include/ifaddrs.c b/src/include/ifaddrs.c new file mode 100644 index 000000000..109f32297 --- /dev/null +++ b/src/include/ifaddrs.c @@ -0,0 +1,597 @@ +/* +Copyright (c) 2013, Kenneth MacKay +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "ifaddrs.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef struct NetlinkList +{ + struct NetlinkList *m_next; + struct nlmsghdr *m_data; + unsigned int m_size; +} NetlinkList; + +static int netlink_socket(void) +{ + int l_socket = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if(l_socket < 0) + { + return -1; + } + + struct sockaddr_nl l_addr; + memset(&l_addr, 0, sizeof(l_addr)); + l_addr.nl_family = AF_NETLINK; + if(bind(l_socket, (struct sockaddr *)&l_addr, sizeof(l_addr)) < 0) + { + close(l_socket); + return -1; + } + + return l_socket; +} + +static int netlink_send(int p_socket, int p_request) +{ + char l_buffer[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + NLMSG_ALIGN(sizeof(struct rtgenmsg))]; + memset(l_buffer, 0, sizeof(l_buffer)); + struct nlmsghdr *l_hdr = (struct nlmsghdr *)l_buffer; + struct rtgenmsg *l_msg = (struct rtgenmsg *)NLMSG_DATA(l_hdr); + + l_hdr->nlmsg_len = NLMSG_LENGTH(sizeof(*l_msg)); + l_hdr->nlmsg_type = p_request; + l_hdr->nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST; + l_hdr->nlmsg_pid = 0; + l_hdr->nlmsg_seq = p_socket; + l_msg->rtgen_family = AF_UNSPEC; + + struct sockaddr_nl l_addr; + memset(&l_addr, 0, sizeof(l_addr)); + l_addr.nl_family = AF_NETLINK; + return (sendto(p_socket, l_hdr, l_hdr->nlmsg_len, 0, (struct sockaddr *)&l_addr, sizeof(l_addr))); +} + +static int netlink_recv(int p_socket, void *p_buffer, size_t p_len) +{ + struct msghdr l_msg; + struct iovec l_iov = { p_buffer, p_len }; + struct sockaddr_nl l_addr; + + for(;;) + { + l_msg.msg_name = (void *)&l_addr; + l_msg.msg_namelen = sizeof(l_addr); + l_msg.msg_iov = &l_iov; + l_msg.msg_iovlen = 1; + l_msg.msg_control = NULL; + l_msg.msg_controllen = 0; + l_msg.msg_flags = 0; + int l_result = recvmsg(p_socket, &l_msg, 0); + + if(l_result < 0) + { + if(errno == EINTR) + { + continue; + } + return -2; + } + + if(l_msg.msg_flags & MSG_TRUNC) + { // buffer was too small + return -1; + } + return l_result; + } +} + +static struct nlmsghdr *getNetlinkResponse(int p_socket, int *p_size, int *p_done) +{ + size_t l_size = 4096; + void *l_buffer = NULL; + + for(;;) + { + free(l_buffer); + l_buffer = malloc(l_size); + + int l_read = netlink_recv(p_socket, l_buffer, l_size); + *p_size = l_read; + if(l_read == -2) + { + free(l_buffer); + return NULL; + } + if(l_read >= 0) + { + pid_t l_pid = getpid(); + struct nlmsghdr *l_hdr; + for(l_hdr = (struct nlmsghdr *)l_buffer; NLMSG_OK(l_hdr, (unsigned int)l_read); l_hdr = (struct nlmsghdr *)NLMSG_NEXT(l_hdr, l_read)) + { + if((pid_t)l_hdr->nlmsg_pid != l_pid || (int)l_hdr->nlmsg_seq != p_socket) + { + continue; + } + + if(l_hdr->nlmsg_type == NLMSG_DONE) + { + *p_done = 1; + break; + } + + if(l_hdr->nlmsg_type == NLMSG_ERROR) + { + free(l_buffer); + return NULL; + } + } + return l_buffer; + } + + l_size *= 2; + } +} + +static NetlinkList *newListItem(struct nlmsghdr *p_data, unsigned int p_size) +{ + NetlinkList *l_item = malloc(sizeof(NetlinkList)); + l_item->m_next = NULL; + l_item->m_data = p_data; + l_item->m_size = p_size; + return l_item; +} + +static void freeResultList(NetlinkList *p_list) +{ + NetlinkList *l_cur; + while(p_list) + { + l_cur = p_list; + p_list = p_list->m_next; + free(l_cur->m_data); + free(l_cur); + } +} + +static NetlinkList *getResultList(int p_socket, int p_request) +{ + if(netlink_send(p_socket, p_request) < 0) + { + return NULL; + } + + NetlinkList *l_list = NULL; + NetlinkList *l_end = NULL; + int l_size; + int l_done = 0; + while(!l_done) + { + struct nlmsghdr *l_hdr = getNetlinkResponse(p_socket, &l_size, &l_done); + if(!l_hdr) + { // error + freeResultList(l_list); + return NULL; + } + + NetlinkList *l_item = newListItem(l_hdr, l_size); + if(!l_list) + { + l_list = l_item; + } + else + { + l_end->m_next = l_item; + } + l_end = l_item; + } + return l_list; +} + +static size_t maxSize(size_t a, size_t b) +{ + return (a > b ? a : b); +} + +static size_t calcAddrLen(sa_family_t p_family, int p_dataSize) +{ + switch(p_family) + { + case AF_INET: + return sizeof(struct sockaddr_in); + case AF_INET6: + return sizeof(struct sockaddr_in6); + case AF_PACKET: + return maxSize(sizeof(struct sockaddr_ll), offsetof(struct sockaddr_ll, sll_addr) + p_dataSize); + default: + return maxSize(sizeof(struct sockaddr), offsetof(struct sockaddr, sa_data) + p_dataSize); + } +} + +static void makeSockaddr(sa_family_t p_family, struct sockaddr *p_dest, void *p_data, size_t p_size) +{ + switch(p_family) + { + case AF_INET: + memcpy(&((struct sockaddr_in*)p_dest)->sin_addr, p_data, p_size); + break; + case AF_INET6: + memcpy(&((struct sockaddr_in6*)p_dest)->sin6_addr, p_data, p_size); + break; + case AF_PACKET: + memcpy(((struct sockaddr_ll*)p_dest)->sll_addr, p_data, p_size); + ((struct sockaddr_ll*)p_dest)->sll_halen = p_size; + break; + default: + memcpy(p_dest->sa_data, p_data, p_size); + break; + } + p_dest->sa_family = p_family; +} + +static void addToEnd(struct ifaddrs **p_resultList, struct ifaddrs *p_entry) +{ + if(!*p_resultList) + { + *p_resultList = p_entry; + } + else + { + struct ifaddrs *l_cur = *p_resultList; + while(l_cur->ifa_next) + { + l_cur = l_cur->ifa_next; + } + l_cur->ifa_next = p_entry; + } +} + +static void interpretLink(struct nlmsghdr *p_hdr, struct ifaddrs **p_links, struct ifaddrs **p_resultList) +{ + struct ifinfomsg *l_info = (struct ifinfomsg *)NLMSG_DATA(p_hdr); + + size_t l_nameSize = 0; + size_t l_addrSize = 0; + size_t l_dataSize = 0; + + size_t l_rtaSize = NLMSG_PAYLOAD(p_hdr, sizeof(struct ifinfomsg)); + struct rtattr *l_rta; + for(l_rta = (struct rtattr *)(((char *)l_info) + NLMSG_ALIGN(sizeof(struct ifinfomsg))); RTA_OK(l_rta, l_rtaSize); l_rta = RTA_NEXT(l_rta, l_rtaSize)) + { + size_t l_rtaDataSize = RTA_PAYLOAD(l_rta); + switch(l_rta->rta_type) + { + case IFLA_ADDRESS: + case IFLA_BROADCAST: + l_addrSize += NLMSG_ALIGN(calcAddrLen(AF_PACKET, l_rtaDataSize)); + break; + case IFLA_IFNAME: + l_nameSize += NLMSG_ALIGN(l_rtaSize + 1); + break; + case IFLA_STATS: + l_dataSize += NLMSG_ALIGN(l_rtaSize); + break; + default: + break; + } + } + + struct ifaddrs *l_entry = malloc(sizeof(struct ifaddrs) + l_nameSize + l_addrSize + l_dataSize); + memset(l_entry, 0, sizeof(struct ifaddrs)); + l_entry->ifa_name = ""; + + char *l_name = ((char *)l_entry) + sizeof(struct ifaddrs); + char *l_addr = l_name + l_nameSize; + char *l_data = l_addr + l_addrSize; + + l_entry->ifa_flags = l_info->ifi_flags; + + l_rtaSize = NLMSG_PAYLOAD(p_hdr, sizeof(struct ifinfomsg)); + for(l_rta = (struct rtattr *)(((char *)l_info) + NLMSG_ALIGN(sizeof(struct ifinfomsg))); RTA_OK(l_rta, l_rtaSize); l_rta = RTA_NEXT(l_rta, l_rtaSize)) + { + void *l_rtaData = RTA_DATA(l_rta); + size_t l_rtaDataSize = RTA_PAYLOAD(l_rta); + switch(l_rta->rta_type) + { + case IFLA_ADDRESS: + case IFLA_BROADCAST: + { + size_t l_addrLen = calcAddrLen(AF_PACKET, l_rtaDataSize); + makeSockaddr(AF_PACKET, (struct sockaddr *)l_addr, l_rtaData, l_rtaDataSize); + ((struct sockaddr_ll *)l_addr)->sll_ifindex = l_info->ifi_index; + ((struct sockaddr_ll *)l_addr)->sll_hatype = l_info->ifi_type; + if(l_rta->rta_type == IFLA_ADDRESS) + { + l_entry->ifa_addr = (struct sockaddr *)l_addr; + } + else + { + l_entry->ifa_broadaddr = (struct sockaddr *)l_addr; + } + l_addr += NLMSG_ALIGN(l_addrLen); + break; + } + case IFLA_IFNAME: + strncpy(l_name, l_rtaData, l_rtaDataSize); + l_name[l_rtaDataSize] = '\0'; + l_entry->ifa_name = l_name; + break; + case IFLA_STATS: + memcpy(l_data, l_rtaData, l_rtaDataSize); + l_entry->ifa_data = l_data; + break; + default: + break; + } + } + + addToEnd(p_resultList, l_entry); + p_links[l_info->ifi_index - 1] = l_entry; +} + +static void interpretAddr(struct nlmsghdr *p_hdr, struct ifaddrs **p_links, struct ifaddrs **p_resultList) +{ + struct ifaddrmsg *l_info = (struct ifaddrmsg *)NLMSG_DATA(p_hdr); + + size_t l_nameSize = 0; + size_t l_addrSize = 0; + + int l_addedNetmask = 0; + + size_t l_rtaSize = NLMSG_PAYLOAD(p_hdr, sizeof(struct ifaddrmsg)); + struct rtattr *l_rta; + for(l_rta = (struct rtattr *)(((char *)l_info) + NLMSG_ALIGN(sizeof(struct ifaddrmsg))); RTA_OK(l_rta, l_rtaSize); l_rta = RTA_NEXT(l_rta, l_rtaSize)) + { + size_t l_rtaDataSize = RTA_PAYLOAD(l_rta); + if(l_info->ifa_family == AF_PACKET) + { + continue; + } + + switch(l_rta->rta_type) + { + case IFA_ADDRESS: + case IFA_LOCAL: + if((l_info->ifa_family == AF_INET || l_info->ifa_family == AF_INET6) && !l_addedNetmask) + { // make room for netmask + l_addrSize += NLMSG_ALIGN(calcAddrLen(l_info->ifa_family, l_rtaDataSize)); + l_addedNetmask = 1; + } + case IFA_BROADCAST: + l_addrSize += NLMSG_ALIGN(calcAddrLen(l_info->ifa_family, l_rtaDataSize)); + break; + case IFA_LABEL: + l_nameSize += NLMSG_ALIGN(l_rtaSize + 1); + break; + default: + break; + } + } + + struct ifaddrs *l_entry = malloc(sizeof(struct ifaddrs) + l_nameSize + l_addrSize); + memset(l_entry, 0, sizeof(struct ifaddrs)); + l_entry->ifa_name = p_links[l_info->ifa_index - 1]->ifa_name; + + char *l_name = ((char *)l_entry) + sizeof(struct ifaddrs); + char *l_addr = l_name + l_nameSize; + + l_entry->ifa_flags = l_info->ifa_flags | p_links[l_info->ifa_index - 1]->ifa_flags; + + l_rtaSize = NLMSG_PAYLOAD(p_hdr, sizeof(struct ifaddrmsg)); + for(l_rta = (struct rtattr *)(((char *)l_info) + NLMSG_ALIGN(sizeof(struct ifaddrmsg))); RTA_OK(l_rta, l_rtaSize); l_rta = RTA_NEXT(l_rta, l_rtaSize)) + { + void *l_rtaData = RTA_DATA(l_rta); + size_t l_rtaDataSize = RTA_PAYLOAD(l_rta); + switch(l_rta->rta_type) + { + case IFA_ADDRESS: + case IFA_BROADCAST: + case IFA_LOCAL: + { + size_t l_addrLen = calcAddrLen(l_info->ifa_family, l_rtaDataSize); + makeSockaddr(l_info->ifa_family, (struct sockaddr *)l_addr, l_rtaData, l_rtaDataSize); + if(l_info->ifa_family == AF_INET6) + { + if(IN6_IS_ADDR_LINKLOCAL((struct in6_addr *)l_rtaData) || IN6_IS_ADDR_MC_LINKLOCAL((struct in6_addr *)l_rtaData)) + { + ((struct sockaddr_in6 *)l_addr)->sin6_scope_id = l_info->ifa_index; + } + } + + if(l_rta->rta_type == IFA_ADDRESS) + { // apparently in a point-to-point network IFA_ADDRESS contains the dest address and IFA_LOCAL contains the local address + if(l_entry->ifa_addr) + { + l_entry->ifa_dstaddr = (struct sockaddr *)l_addr; + } + else + { + l_entry->ifa_addr = (struct sockaddr *)l_addr; + } + } + else if(l_rta->rta_type == IFA_LOCAL) + { + if(l_entry->ifa_addr) + { + l_entry->ifa_dstaddr = l_entry->ifa_addr; + } + l_entry->ifa_addr = (struct sockaddr *)l_addr; + } + else + { + l_entry->ifa_broadaddr = (struct sockaddr *)l_addr; + } + l_addr += NLMSG_ALIGN(l_addrLen); + break; + } + case IFA_LABEL: + strncpy(l_name, l_rtaData, l_rtaDataSize); + l_name[l_rtaDataSize] = '\0'; + l_entry->ifa_name = l_name; + break; + default: + break; + } + } + + if(l_entry->ifa_addr && (l_entry->ifa_addr->sa_family == AF_INET || l_entry->ifa_addr->sa_family == AF_INET6)) + { + unsigned l_maxPrefix = (l_entry->ifa_addr->sa_family == AF_INET ? 32 : 128); + unsigned l_prefix = (l_info->ifa_prefixlen > l_maxPrefix ? l_maxPrefix : l_info->ifa_prefixlen); + char l_mask[16] = {0}; + unsigned i; + for(i=0; i<(l_prefix/8); ++i) + { + l_mask[i] = 0xff; + } + l_mask[i] = 0xff << (8 - (l_prefix % 8)); + + makeSockaddr(l_entry->ifa_addr->sa_family, (struct sockaddr *)l_addr, l_mask, l_maxPrefix / 8); + l_entry->ifa_netmask = (struct sockaddr *)l_addr; + } + + addToEnd(p_resultList, l_entry); +} + +static void interpret(int p_socket, NetlinkList *p_netlinkList, struct ifaddrs **p_links, struct ifaddrs **p_resultList) +{ + pid_t l_pid = getpid(); + for(; p_netlinkList; p_netlinkList = p_netlinkList->m_next) + { + unsigned int l_nlsize = p_netlinkList->m_size; + struct nlmsghdr *l_hdr; + for(l_hdr = p_netlinkList->m_data; NLMSG_OK(l_hdr, l_nlsize); l_hdr = NLMSG_NEXT(l_hdr, l_nlsize)) + { + if((pid_t)l_hdr->nlmsg_pid != l_pid || (int)l_hdr->nlmsg_seq != p_socket) + { + continue; + } + + if(l_hdr->nlmsg_type == NLMSG_DONE) + { + break; + } + + if(l_hdr->nlmsg_type == RTM_NEWLINK) + { + interpretLink(l_hdr, p_links, p_resultList); + } + else if(l_hdr->nlmsg_type == RTM_NEWADDR) + { + interpretAddr(l_hdr, p_links, p_resultList); + } + } + } +} + +static unsigned countLinks(int p_socket, NetlinkList *p_netlinkList) +{ + unsigned l_links = 0; + pid_t l_pid = getpid(); + for(; p_netlinkList; p_netlinkList = p_netlinkList->m_next) + { + unsigned int l_nlsize = p_netlinkList->m_size; + struct nlmsghdr *l_hdr; + for(l_hdr = p_netlinkList->m_data; NLMSG_OK(l_hdr, l_nlsize); l_hdr = NLMSG_NEXT(l_hdr, l_nlsize)) + { + if((pid_t)l_hdr->nlmsg_pid != l_pid || (int)l_hdr->nlmsg_seq != p_socket) + { + continue; + } + + if(l_hdr->nlmsg_type == NLMSG_DONE) + { + break; + } + + if(l_hdr->nlmsg_type == RTM_NEWLINK) + { + ++l_links; + } + } + } + + return l_links; +} + +int getifaddrs(struct ifaddrs **ifap) +{ + if(!ifap) + { + return -1; + } + *ifap = NULL; + + int l_socket = netlink_socket(); + if(l_socket < 0) + { + return -1; + } + + NetlinkList *l_linkResults = getResultList(l_socket, RTM_GETLINK); + if(!l_linkResults) + { + close(l_socket); + return -1; + } + + NetlinkList *l_addrResults = getResultList(l_socket, RTM_GETADDR); + if(!l_addrResults) + { + close(l_socket); + freeResultList(l_linkResults); + return -1; + } + + unsigned l_numLinks = countLinks(l_socket, l_linkResults) + countLinks(l_socket, l_addrResults); + struct ifaddrs *l_links[l_numLinks]; + memset(l_links, 0, l_numLinks * sizeof(struct ifaddrs *)); + + interpret(l_socket, l_linkResults, l_links, ifap); + interpret(l_socket, l_addrResults, l_links, ifap); + + freeResultList(l_linkResults); + freeResultList(l_addrResults); + close(l_socket); + return 0; +} + +void freeifaddrs(struct ifaddrs *ifa) +{ + struct ifaddrs *l_cur; + while(ifa) + { + l_cur = ifa; + ifa = ifa->ifa_next; + free(l_cur); + } +} diff --git a/src/include/ifaddrs.h b/src/include/ifaddrs.h new file mode 100644 index 000000000..9cd19fec1 --- /dev/null +++ b/src/include/ifaddrs.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 1995, 1999 + * Berkeley Software Design, Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * THIS SOFTWARE IS PROVIDED BY Berkeley Software Design, Inc. ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL Berkeley Software Design, Inc. BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * BSDI ifaddrs.h,v 2.5 2000/02/23 14:51:59 dab Exp + */ + +#ifndef _IFADDRS_H_ +#define _IFADDRS_H_ + +struct ifaddrs { + struct ifaddrs *ifa_next; + char *ifa_name; + unsigned int ifa_flags; + struct sockaddr *ifa_addr; + struct sockaddr *ifa_netmask; + struct sockaddr *ifa_dstaddr; + void *ifa_data; +}; + +/* + * This may have been defined in . Note that if is + * to be included it must be included before this header file. + */ +#ifndef ifa_broadaddr +#define ifa_broadaddr ifa_dstaddr /* broadcast address interface */ +#endif + +#include + +__BEGIN_DECLS +extern int getifaddrs(struct ifaddrs **ifap); +extern void freeifaddrs(struct ifaddrs *ifa); +__END_DECLS + +#endif diff --git a/src/include/lxcmntent.c b/src/include/lxcmntent.c index 22b0c123f..dcb34a2f6 100644 --- a/src/include/lxcmntent.c +++ b/src/include/lxcmntent.c @@ -1,3 +1,23 @@ +/* Utilities for reading/writing fstab, mtab, etc. + Copyright (C) 1995-2000, 2001, 2002, 2003, 2006 + Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + #include #include #include diff --git a/src/include/lxcmntent.h b/src/include/lxcmntent.h index 528e8bc05..418322c0c 100644 --- a/src/include/lxcmntent.h +++ b/src/include/lxcmntent.h @@ -1,3 +1,23 @@ +/* Utilities for reading/writing fstab, mtab, etc. + Copyright (C) 1995-2000, 2001, 2002, 2003, 2006 + Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + #ifndef _lxcmntent_h #define _lxcmntent_h @@ -15,6 +35,7 @@ struct mntent }; extern struct mntent *getmntent (FILE *stream); +extern struct mntent *getmntent_r (FILE *stream, struct mntent *mp, char *buffer, int bufsiz); #endif #ifndef HAVE_SETMNTENT diff --git a/src/include/openpty.c b/src/include/openpty.c index 0c1fecc14..01579c517 100644 --- a/src/include/openpty.c +++ b/src/include/openpty.c @@ -1,21 +1,25 @@ -/* Copyright (C) 1998, 1999, 2004 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Zack Weinberg , 1998. + /* + * openpty: glibc implementation + * + * Copyright (C) 1998, 1999, 2004 Free Software Foundation, Inc. + * + * Authors: + * Zack Weinberg , 1998. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ #define _XOPEN_SOURCE /* See feature_test_macros(7) */ #include diff --git a/src/include/openpty.h b/src/include/openpty.h index f5fa152ec..ddc9c8113 100644 --- a/src/include/openpty.h +++ b/src/include/openpty.h @@ -1,3 +1,26 @@ +/* + * openpty: glibc implementation + * + * Copyright (C) 1998, 1999, 2004 Free Software Foundation, Inc. + * + * Authors: + * Zack Weinberg , 1998. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + #ifndef _openpty_h #define _openpty_h diff --git a/src/lua-lxc/Makefile.am b/src/lua-lxc/Makefile.am index 82dbae843..11eabfdc8 100644 --- a/src/lua-lxc/Makefile.am +++ b/src/lua-lxc/Makefile.am @@ -1,7 +1,7 @@ if ENABLE_LUA -luadir=$(datadir)/lua/5.1 -sodir=$(libdir)/lua/5.1/lxc +luadir=$(LUA_INSTALL_LMOD) +sodir=$(LUA_INSTALL_CMOD)/lxc lua_SCRIPTS=lxc.lua diff --git a/src/lua-lxc/core.c b/src/lua-lxc/core.c index 364178685..002e8bf66 100644 --- a/src/lua-lxc/core.c +++ b/src/lua-lxc/core.c @@ -6,27 +6,37 @@ * Authors: * Dwight Engen * - * This library is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #define LUA_LIB #define _GNU_SOURCE #include #include +#include #include +#include +#include #include +#if LUA_VERSION_NUM < 502 +#define luaL_newlib(L,l) (lua_newtable(L), luaL_register(L,NULL,l)) +#define luaL_setfuncs(L,l,n) (assert(n==0), luaL_register(L,NULL,l)) +#define luaL_checkunsigned(L,n) luaL_checknumber(L,n) +#endif + #ifdef NO_CHECK_UDATA #define checkudata(L,i,tname) lua_touserdata(L, i) #else @@ -111,7 +121,7 @@ static int container_create(lua_State *L) argv[i] = strdupa(luaL_checkstring(L, i+3)); argv[i] = NULL; - lua_pushboolean(L, !!c->create(c, template_name, argv)); + lua_pushboolean(L, !!c->create(c, template_name, NULL, NULL, 0, argv)); return 1; } @@ -372,10 +382,24 @@ static int lxc_default_config_path_get(lua_State *L) { return 1; } +/* utility functions */ +static int lxc_util_usleep(lua_State *L) { + usleep((useconds_t)luaL_checkunsigned(L, 1)); + return 0; +} + +static int lxc_util_dirname(lua_State *L) { + char *path = strdupa(luaL_checkstring(L, 1)); + lua_pushstring(L, dirname(path)); + return 1; +} + static luaL_Reg lxc_lib_methods[] = { {"version_get", lxc_version_get}, {"default_config_path_get", lxc_default_config_path_get}, {"container_new", container_new}, + {"usleep", lxc_util_usleep}, + {"dirname", lxc_util_dirname}, {NULL, NULL} }; @@ -388,7 +412,7 @@ static int lxc_lib_uninit(lua_State *L) { LUALIB_API int luaopen_lxc_core(lua_State *L) { /* this is where we would initialize liblxc.so if we needed to */ - luaL_register(L, "lxc", lxc_lib_methods); + luaL_newlib(L, lxc_lib_methods); lua_newuserdata(L, 0); lua_newtable(L); /* metatable */ @@ -400,12 +424,12 @@ LUALIB_API int luaopen_lxc_core(lua_State *L) { lua_rawset(L, -3); luaL_newmetatable(L, CONTAINER_TYPENAME); + luaL_setfuncs(L, lxc_container_methods, 0); lua_pushvalue(L, -1); /* push metatable */ lua_pushstring(L, "__gc"); lua_pushcfunction(L, container_gc); lua_settable(L, -3); lua_setfield(L, -2, "__index"); /* metatable.__index = metatable */ - luaL_register(L, NULL, lxc_container_methods); lua_pop(L, 1); return 1; } diff --git a/src/lua-lxc/lxc.lua b/src/lua-lxc/lxc.lua index b6bc344d9..aa80a95c9 100755 --- a/src/lua-lxc/lxc.lua +++ b/src/lua-lxc/lxc.lua @@ -6,18 +6,19 @@ -- Authors: -- Dwight Engen -- --- This library is free software; you can redistribute it and/or modify --- it under the terms of the GNU General Public License version 2, as --- published by the Free Software Foundation. +-- This library is free software; you can redistribute it and/or +-- modify it under the terms of the GNU Lesser General Public +-- License as published by the Free Software Foundation; either +-- version 2.1 of the License, or (at your option) any later version. -- --- This program is distributed in the hope that it will be useful, --- but WITHOUT ANY WARRANTY; without even the implied warranty of --- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the --- GNU General Public License for more details. +-- This library is distributed in the hope that it will be useful, +-- but WITHOUT ANY WARRANTY; without even the implied warranty of +-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +-- Lesser General Public License for more details. -- --- You should have received a copy of the GNU General Public License along --- with this program; if not, write to the Free Software Foundation, Inc., --- 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +-- You should have received a copy of the GNU Lesser General Public +-- License along with this library; if not, write to the Free Software +-- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -- local core = require("lxc.core") @@ -31,6 +32,11 @@ local lxc_path local cgroup_path local log_level = 3 +-- lua 5.1 compat +if table.unpack == nil then + table.unpack = unpack +end + -- the following two functions can be useful for debugging function printf(...) local function wrapper(...) io.write(string.format(...)) end @@ -64,22 +70,6 @@ function string:split(delim, max_cols) return cols end -function dirname(path) - local f,output - f = io.popen("dirname " .. path) - output = f:read('*all') - f:close() - return output:sub(1,-2) -end - -function basename(path, suffix) - local f,output - f = io.popen("basename " .. path .. " " .. (suffix or "")) - output = f:read('*all') - f:close() - return output:sub(1,-2) -end - function cgroup_path_get() local f,line,cgroup_path @@ -88,9 +78,12 @@ function cgroup_path_get() while true do local c line = f:read() + if line == nil then + break + end c = line:split(" ", 6) if (c[1] == "cgroup") then - cgroup_path = dirname(c[2]) + cgroup_path = core.dirname(c[2]) break end end @@ -261,7 +254,7 @@ end -- methods for stats collection from various cgroup files -- read integers at given coordinates from a cgroup file function container:stat_get_ints(controller, item, coords) - local f = io.open(cgroup_path.."/"..controller.."/lxc/"..self.ctname.."/"..item, "r") + local f = io.open(cgroup_path.."/"..controller.."/"..self.ctname.."/"..item, "r") local lines = {} local result = {} @@ -282,12 +275,12 @@ function container:stat_get_ints(controller, item, coords) table.insert(result, val) end end - return unpack(result) + return table.unpack(result) end -- read an integer from a cgroup file function container:stat_get_int(controller, item) - local f = io.open(cgroup_path.."/"..controller.."/lxc/"..self.ctname.."/"..item, "r") + local f = io.open(cgroup_path.."/"..controller.."/"..self.ctname.."/"..item, "r") if (not f) then return 0 end @@ -301,36 +294,23 @@ end function container:stat_match_get_int(controller, item, match, column) local val - local f = io.open(cgroup_path.."/"..controller.."/lxc/"..self.ctname.."/"..item, "r") + local f = io.open(cgroup_path.."/"..controller.."/"..self.ctname.."/"..item, "r") if (not f) then return 0 end for line in f:lines() do - printf("matching line:%s with match:%s\n", line, match) if (string.find(line, match)) then local col col = line:split(" ", 80) val = tonumber(col[column]) or 0 - printf("found line!! val:%d\n", val) end end f:close() return val end -function stats_clear(stat) - stat.mem_used = 0 - stat.mem_limit = 0 - stat.memsw_used = 0 - stat.memsw_limit = 0 - stat.cpu_use_nanos = 0 - stat.cpu_use_user = 0 - stat.cpu_use_sys = 0 - stat.blkio = 0 -end - function container:stats_get(total) local stat = {} stat.mem_used = self:stat_get_int("memory", "memory.usage_in_bytes") @@ -355,10 +335,32 @@ function container:stats_get(total) return stat end +local M = { container = container } +function M.stats_clear(stat) + stat.mem_used = 0 + stat.mem_limit = 0 + stat.memsw_used = 0 + stat.memsw_limit = 0 + stat.cpu_use_nanos = 0 + stat.cpu_use_user = 0 + stat.cpu_use_sys = 0 + stat.blkio = 0 +end + +function M.stats_clear(stat) + stat.mem_used = 0 + stat.mem_limit = 0 + stat.memsw_used = 0 + stat.memsw_limit = 0 + stat.cpu_use_nanos = 0 + stat.cpu_use_user = 0 + stat.cpu_use_sys = 0 + stat.blkio = 0 +end -- return configured containers found in LXC_PATH directory -function containers_configured(names_only) +function M.containers_configured(names_only) local containers = {} for dir in lfs.dir(lxc_path) do @@ -386,39 +388,29 @@ function containers_configured(names_only) end -- return running containers found in cgroup fs -function containers_running(names_only) +function M.containers_running(names_only) local containers = {} - local attr + local names = M.containers_configured(true) - -- the lxc directory won't exist if no containers has ever been started - attr = lfs.attributes(cgroup_path .. "/cpu/lxc") - if (not attr) then - return containers - end - - for file in lfs.dir(cgroup_path .. "/cpu/lxc") do - if (file ~= "." and file ~= "..") - then - local pathfile = cgroup_path .. "/cpu/lxc/" .. file - local attr = lfs.attributes(pathfile) - - if (attr.mode == "directory") then + for _,name in ipairs(names) do + local ct = container:new(name) + if ct:running() then + -- note, this is a "mixed" table, ie both dictionary and list + table.insert(containers, name) if (names_only) then - -- note, this is a "mixed" table, ie both dictionary and list - containers[file] = true - table.insert(containers, file) + containers[name] = true + ct = nil else - local ct = container:new(file) - -- note, this is a "mixed" table, ie both dictionary and list - containers[file] = ct - table.insert(containers, file) + containers[name] = ct end - end end end + table.sort(containers, function (a,b) return (a < b) end) return containers end lxc_path = core.default_config_path_get() cgroup_path = cgroup_path_get() + +return M diff --git a/src/lxc/Makefile.am b/src/lxc/Makefile.am index ebeca4663..da5ff159a 100644 --- a/src/lxc/Makefile.am +++ b/src/lxc/Makefile.am @@ -1,25 +1,28 @@ pkginclude_HEADERS = \ arguments.h \ - start.h \ - console.h \ - error.h \ - monitor.h \ - utils.h \ - namespace.h \ + attach.h \ + attach_options.h \ + bdev.h \ caps.h \ - lxc.h \ cgroup.h \ conf.h \ + console.h \ + error.h \ list.h \ log.h \ - state.h \ - attach.h \ lxccontainer.h \ + lxc.h \ lxclock.h \ + monitor.h \ + namespace.h \ + start.h \ + state.h \ + utils.h \ version.h if IS_BIONIC pkginclude_HEADERS += \ + ../include/ifaddrs.h \ ../include/openpty.h \ ../include/lxcmntent.h endif @@ -36,9 +39,9 @@ so_PROGRAMS = liblxc.so liblxc_so_SOURCES = \ arguments.c arguments.h \ + bdev.c bdev.h \ commands.c commands.h \ start.c start.h \ - stop.c \ execute.c \ monitor.c monitor.h \ console.c \ @@ -60,9 +63,9 @@ liblxc_so_SOURCES = \ attach.c attach.h \ \ network.c network.h \ - nl.c nl.h \ - rtnl.c rtnl.h \ - genl.c genl.h \ + nl.c nl.h \ + rtnl.c rtnl.h \ + genl.c genl.h \ \ caps.c caps.h \ lxcseccomp.h \ @@ -77,6 +80,7 @@ liblxc_so_SOURCES = \ if IS_BIONIC liblxc_so_SOURCES += \ + ../include/ifaddrs.c ../include/ifaddrs.h \ ../include/openpty.c ../include/openpty.h \ ../include/lxcmntent.c ../include/lxcmntent.h endif @@ -93,12 +97,19 @@ AM_CFLAGS=-I$(top_srcdir)/src \ -DLXC_GLOBAL_CONF=\"$(LXC_GLOBAL_CONF)\" \ -DLXCINITDIR=\"$(LXCINITDIR)\" \ -DLXCTEMPLATEDIR=\"$(LXCTEMPLATEDIR)\" \ - -DLOGPATH=\"$(LOGPATH)\" + -DLOGPATH=\"$(LOGPATH)\" \ + -DLXC_DEFAULT_CONFIG=\"$(LXC_DEFAULT_CONFIG)\" \ + -DLXC_USERNIC_DB=\"$(LXC_USERNIC_DB)\" \ + -DLXC_USERNIC_CONF=\"$(LXC_USERNIC_CONF)\" if ENABLE_APPARMOR AM_CFLAGS += -DHAVE_APPARMOR endif +if HAVE_NEWUIDMAP +AM_CFLAGS += -DHAVE_NEWUIDMAP +endif + if USE_CONFIGPATH_LOGS AM_CFLAGS += -DUSE_CONFIGPATH_LOGS endif @@ -120,11 +131,7 @@ bin_SCRIPTS = \ lxc-ps \ lxc-netstat \ lxc-checkconfig \ - lxc-version \ - lxc-create \ - lxc-clone \ - lxc-shutdown \ - lxc-destroy + lxc-version EXTRA_DIST = \ lxc-device \ @@ -132,15 +139,15 @@ EXTRA_DIST = \ lxc-top if ENABLE_PYTHON - bin_SCRIPTS += lxc-device - bin_SCRIPTS += lxc-ls - bin_SCRIPTS += lxc-start-ephemeral +bin_SCRIPTS += lxc-device +bin_SCRIPTS += lxc-ls +bin_SCRIPTS += lxc-start-ephemeral else - bin_SCRIPTS += legacy/lxc-ls +bin_SCRIPTS += legacy/lxc-ls endif if ENABLE_LUA - bin_SCRIPTS += lxc-top +bin_SCRIPTS += lxc-top endif bin_PROGRAMS = \ @@ -148,8 +155,10 @@ bin_PROGRAMS = \ lxc-unshare \ lxc-stop \ lxc-start \ + lxc-clone \ lxc-execute \ lxc-monitor \ + lxc-monitord \ lxc-wait \ lxc-console \ lxc-freeze \ @@ -158,14 +167,19 @@ bin_PROGRAMS = \ lxc-unfreeze \ lxc-checkpoint \ lxc-restart \ - lxc-kill + lxc-kill \ + lxc-config \ + lxc-destroy \ + lxc-create \ + lxc-user-nic + +if HAVE_NEWUIDMAP +bin_PROGRAMS += lxc-usernsexec +endif pkglibexec_PROGRAMS = \ lxc-init -#pkglibexec_SCRIPTS = \ -# lxc.functions - AM_LDFLAGS = -Wl,-E if ENABLE_RPATH AM_LDFLAGS += -Wl,-rpath -Wl,$(libdir) @@ -175,19 +189,26 @@ LDADD=liblxc.so @CAP_LIBS@ @APPARMOR_LIBS@ @SECCOMP_LIBS@ lxc_attach_SOURCES = lxc_attach.c lxc_cgroup_SOURCES = lxc_cgroup.c lxc_checkpoint_SOURCES = lxc_checkpoint.c +lxc_config_SOURCES = lxc_config.c lxc_console_SOURCES = lxc_console.c +lxc_destroy_SOURCES = lxc_destroy.c lxc_execute_SOURCES = lxc_execute.c lxc_freeze_SOURCES = lxc_freeze.c lxc_info_SOURCES = lxc_info.c lxc_init_SOURCES = lxc_init.c lxc_monitor_SOURCES = lxc_monitor.c +lxc_monitord_SOURCES = lxc_monitord.c lxc_restart_SOURCES = lxc_restart.c +lxc_clone_SOURCES = lxc_clone.c lxc_start_SOURCES = lxc_start.c lxc_stop_SOURCES = lxc_stop.c lxc_unfreeze_SOURCES = lxc_unfreeze.c lxc_unshare_SOURCES = lxc_unshare.c lxc_wait_SOURCES = lxc_wait.c lxc_kill_SOURCES = lxc_kill.c +lxc_create_SOURCES = lxc_create.c +lxc_usernsexec_SOURCES = lxc_usernsexec.c +lxc_user_nic_SOURCES = lxc_user_nic.c install-exec-local: install-soPROGRAMS mkdir -p $(DESTDIR)$(datadir)/lxc diff --git a/src/lxc/af_unix.c b/src/lxc/af_unix.c index 6c393359e..204658d32 100644 --- a/src/lxc/af_unix.c +++ b/src/lxc/af_unix.c @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include @@ -36,6 +36,7 @@ lxc_log_define(lxc_af_unix, lxc); int lxc_af_unix_open(const char *path, int type, int flags) { int fd; + size_t len; struct sockaddr_un addr; if (flags & O_TRUNC) @@ -52,8 +53,16 @@ int lxc_af_unix_open(const char *path, int type, int flags) addr.sun_family = AF_UNIX; /* copy entire buffer in case of abstract socket */ - memcpy(addr.sun_path, path, - path[0]?strlen(path):sizeof(addr.sun_path)); + len = sizeof(addr.sun_path); + if (path[0]) { + len = strlen(path); + if (len >= sizeof(addr.sun_path)) { + close(fd); + errno = ENAMETOOLONG; + return -1; + } + } + memcpy(addr.sun_path, path, len); if (bind(fd, (struct sockaddr *)&addr, sizeof(addr))) { int tmp = errno; @@ -61,7 +70,7 @@ int lxc_af_unix_open(const char *path, int type, int flags) errno = tmp; return -1; } - + if (type == SOCK_STREAM && listen(fd, 100)) { int tmp = errno; close(fd); @@ -75,8 +84,8 @@ int lxc_af_unix_open(const char *path, int type, int flags) int lxc_af_unix_close(int fd) { struct sockaddr_un addr; - socklen_t addrlen; - + socklen_t addrlen = sizeof(addr); + if (!getsockname(fd, (struct sockaddr *)&addr, &addrlen) && addr.sun_path[0]) unlink(addr.sun_path); diff --git a/src/lxc/af_unix.h b/src/lxc/af_unix.h index ff21c7be5..6bc253dd0 100644 --- a/src/lxc/af_unix.h +++ b/src/lxc/af_unix.h @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ extern int lxc_af_unix_open(const char *path, int type, int flags); diff --git a/src/lxc/apparmor.c b/src/lxc/apparmor.c index 11f32d550..3941d3f75 100644 --- a/src/lxc/apparmor.c +++ b/src/lxc/apparmor.c @@ -1,3 +1,23 @@ +/* apparmor + * + * Copyright © 2012 Serge Hallyn . + * Copyright © 2012 Canonical Ltd. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + #include #include #include @@ -36,16 +56,19 @@ again: f = fopen(path, "r"); if (!f) { SYSERROR("opening %s\n", path); + if (buf) + free(buf); return NULL; } sz += 1024; buf = realloc(buf, sz); + memset(buf, 0, sz); if (!buf) { ERROR("out of memory"); fclose(f); return NULL; } - ret = fread(buf, 1, sz, f); + ret = fread(buf, 1, sz - 1, f); fclose(f); if (ret >= sz) goto again; diff --git a/src/lxc/apparmor.h b/src/lxc/apparmor.h index d71447864..e27a72859 100644 --- a/src/lxc/apparmor.h +++ b/src/lxc/apparmor.h @@ -1,3 +1,23 @@ +/* apparmor + * + * Copyright © 2012 Serge Hallyn . + * Copyright © 2012 Canonical Ltd. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + #include /* for lxc_handler */ #include diff --git a/src/lxc/arguments.c b/src/lxc/arguments.c index f61c6eb84..c35dfd896 100644 --- a/src/lxc/arguments.c +++ b/src/lxc/arguments.c @@ -19,7 +19,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include @@ -147,16 +147,37 @@ for any corresponding short options.\n\ See the %s man page for further information.\n\n", args->progname, args->help, args->progname); + if (args->helpfn) + args->helpfn(args); exit(code); } +static int lxc_arguments_lxcpath_add(struct lxc_arguments *args, + const char *lxcpath) +{ + if (args->lxcpath_additional != -1 && + args->lxcpath_cnt > args->lxcpath_additional) { + fprintf(stderr, "This command only accepts %d -P,--lxcpath arguments\n", + args->lxcpath_additional + 1); + exit(EXIT_FAILURE); + } + + args->lxcpath = realloc(args->lxcpath, (args->lxcpath_cnt + 1) * + sizeof(args->lxcpath[0])); + if (args->lxcpath == NULL) { + lxc_error(args, "no memory"); + return -ENOMEM; + } + args->lxcpath[args->lxcpath_cnt++] = lxcpath; + return 0; +} + extern int lxc_arguments_parse(struct lxc_arguments *args, int argc, char * const argv[]) { char shortopts[256]; int ret = 0; - args->lxcpath = default_lxc_path(); ret = build_shortopts(args->options, shortopts, sizeof(shortopts)); if (ret < 0) { lxc_error(args, "build_shortopts() failed : %s", @@ -174,9 +195,12 @@ extern int lxc_arguments_parse(struct lxc_arguments *args, case 'n': args->name = optarg; break; case 'o': args->log_file = optarg; break; case 'l': args->log_priority = optarg; break; - case 'c': args->console = optarg; break; case 'q': args->quiet = 1; break; - case 'P': args->lxcpath = optarg; break; + case 'P': + ret = lxc_arguments_lxcpath_add(args, optarg); + if (ret < 0) + return ret; + break; case OPT_USAGE: print_usage(args->options, args); case '?': print_help(args, 1); case 'h': print_help(args, 0); @@ -195,6 +219,13 @@ extern int lxc_arguments_parse(struct lxc_arguments *args, args->argv = &argv[optind]; args->argc = argc - optind; + /* If no lxcpaths were given, use default */ + if (!args->lxcpath_cnt) { + ret = lxc_arguments_lxcpath_add(args, default_lxc_path()); + if (ret < 0) + return ret; + } + /* Check the command options */ if (!args->name) { diff --git a/src/lxc/arguments.h b/src/lxc/arguments.h index 6f6826b75..7da7d5fdc 100644 --- a/src/lxc/arguments.h +++ b/src/lxc/arguments.h @@ -19,7 +19,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef __arguments_h #define __arguments_h @@ -33,6 +33,7 @@ typedef int (*lxc_arguments_checker_t) (const struct lxc_arguments *); struct lxc_arguments { const char *help; + void(*helpfn)(const struct lxc_arguments *); const char *progname; const struct option* options; lxc_arguments_parser_t parser; @@ -47,7 +48,10 @@ struct lxc_arguments { const char *console; const char *console_log; const char *pidfile; - const char *lxcpath; + const char **lxcpath; + int lxcpath_cnt; + /* set to 0 to accept only 1 lxcpath, -1 for unlimited */ + int lxcpath_additional; /* for lxc-checkpoint/restart */ const char *statefile; @@ -58,13 +62,27 @@ struct lxc_arguments { int ttynum; char escape; - /* for lxc-wait */ + /* for lxc-wait and lxc-shutdown */ char *states; long timeout; + int nowait; + int reboot; + int hardstop; + int shutdown; + + /* for lxc-destroy */ + int force; /* close fds from parent? */ int close_all_fds; + /* lxc-create */ + char *bdevtype, *configfile, *template; + char *fstype; + unsigned long fssize; + char *lvname, *vgname; + char *zfsroot, *lowerdir, *dir; + /* remaining arguments */ char *const *argv; int argc; diff --git a/src/lxc/attach.c b/src/lxc/attach.c index 5b3ee4fc1..413b78bc5 100644 --- a/src/lxc/attach.c +++ b/src/lxc/attach.c @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #define _GNU_SOURCE @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -46,42 +47,20 @@ #include "caps.h" #include "config.h" #include "apparmor.h" +#include "utils.h" +#include "commands.h" +#include "cgroup.h" + +#if HAVE_SYS_PERSONALITY_H +#include +#endif + +#ifndef SOCK_CLOEXEC +# define SOCK_CLOEXEC 02000000 +#endif lxc_log_define(lxc_attach, lxc); -/* Define setns() if missing from the C library */ -#ifndef HAVE_SETNS -static int setns(int fd, int nstype) -{ -#ifdef __NR_setns -return syscall(__NR_setns, fd, nstype); -#else -errno = ENOSYS; -return -1; -#endif -} -#endif - -/* Define unshare() if missing from the C library */ -#ifndef HAVE_UNSHARE -static int unshare(int flags) -{ -#ifdef __NR_unshare -return syscall(__NR_unshare, flags); -#else -errno = ENOSYS; -return -1; -#endif -} -#endif - -/* Define getline() if missing from the C library */ -#ifndef HAVE_GETLINE -#ifdef HAVE_FGETLN -#include <../include/getline.h> -#endif -#endif - struct lxc_proc_context_info *lxc_proc_get_context_info(pid_t pid) { struct lxc_proc_context_info *info = calloc(1, sizeof(*info)); @@ -114,6 +93,8 @@ struct lxc_proc_context_info *lxc_proc_get_context_info(pid_t pid) } } + if (line) + free(line); fclose(proc_file); if (!found) { @@ -145,7 +126,6 @@ struct lxc_proc_context_info *lxc_proc_get_context_info(pid_t pid) out_error: free(info); - free(line); return NULL; } @@ -182,7 +162,7 @@ int lxc_attach_to_ns(pid_t pid, int which) } snprintf(path, MAXPATHLEN, "/proc/%d/ns/%s", pid, ns[i]); - fd[i] = open(path, O_RDONLY); + fd[i] = open(path, O_RDONLY | O_CLOEXEC); if (fd[i] < 0) { saved_errno = errno; @@ -279,23 +259,83 @@ int lxc_attach_drop_privs(struct lxc_proc_context_info *ctx) int lxc_attach_set_environment(enum lxc_attach_env_policy_t policy, char** extra_env, char** extra_keep) { - /* TODO: implement extra_env, extra_keep - * Rationale: - * - extra_env is an array of strings of the form - * "VAR=VALUE", which are to be set (after clearing or not, - * depending on the value of the policy variable) - * - extra_keep is an array of strings of the form - * "VAR", which are extra environment variables to be kept - * around after clearing (if that is done, otherwise, the - * remain anyway) - */ - (void) extra_env; - (void) extra_keep; - if (policy == LXC_ATTACH_CLEAR_ENV) { + char **extra_keep_store = NULL; + int path_kept = 0; + + if (extra_keep) { + size_t count, i; + + for (count = 0; extra_keep[count]; count++); + + extra_keep_store = calloc(count, sizeof(char *)); + if (!extra_keep_store) { + SYSERROR("failed to allocate memory for storing current " + "environment variable values that will be kept"); + return -1; + } + for (i = 0; i < count; i++) { + char *v = getenv(extra_keep[i]); + if (v) { + extra_keep_store[i] = strdup(v); + if (!extra_keep_store[i]) { + SYSERROR("failed to allocate memory for storing current " + "environment variable values that will be kept"); + while (i > 0) + free(extra_keep_store[--i]); + free(extra_keep_store); + return -1; + } + if (strcmp(extra_keep[i], "PATH") == 0) + path_kept = 1; + } + /* calloc sets entire array to zero, so we don't + * need an else */ + } + } + if (clearenv()) { + char **p; SYSERROR("failed to clear environment"); - /* don't error out though */ + if (extra_keep_store) { + for (p = extra_keep_store; *p; p++) + free(*p); + free(extra_keep_store); + } + return -1; + } + + if (extra_keep_store) { + size_t i; + for (i = 0; extra_keep[i]; i++) { + if (extra_keep_store[i]) + setenv(extra_keep[i], extra_keep_store[i], 1); + free(extra_keep_store[i]); + } + free(extra_keep_store); + } + + /* always set a default path; shells and execlp tend + * to be fine without it, but there is a disturbing + * number of C programs out there that just assume + * that getenv("PATH") is never NULL and then die a + * painful segfault death. */ + if (!path_kept) { +#ifdef HAVE_CONFSTR + size_t n; + char *path_env; + + n = confstr(_CS_PATH, NULL, 0); + path_env = malloc(n); + if (path_env) { + confstr(_CS_PATH, path_env, n); + setenv("PATH", path_env, 1); + free(path_env); + } + /* don't error out, this is just an extra service */ +#else + setenv("PATH", "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", 1); +#endif } } @@ -304,6 +344,24 @@ int lxc_attach_set_environment(enum lxc_attach_env_policy_t policy, char** extra return -1; } + /* set extra environment variables */ + if (extra_env) { + for (; *extra_env; extra_env++) { + /* duplicate the string, just to be on + * the safe side, because putenv does not + * do it for us */ + char *p = strdup(*extra_env); + /* we just assume the user knows what they + * are doing, so we don't do any checks */ + if (!p) { + SYSERROR("failed to allocate memory for additional environment " + "variables"); + return -1; + } + putenv(p); + } + } + return 0; } @@ -358,7 +416,7 @@ char *lxc_attach_getpwshell(uid_t uid) continue; /* trim line on the right hand side */ - for (i = strlen(line); line && i > 0 && (line[i - 1] == '\n' || line[i - 1] == '\r'); --i) + for (i = strlen(line); i > 0 && (line[i - 1] == '\n' || line[i - 1] == '\r'); --i) line[i - 1] = '\0'; /* split into tokens: first user name */ @@ -507,3 +565,492 @@ void lxc_attach_get_init_uidgid(uid_t* init_uid, gid_t* init_gid) /* TODO: we should also parse supplementary groups and use * setgroups() to set them */ } + +struct attach_clone_payload { + int ipc_socket; + lxc_attach_options_t* options; + struct lxc_proc_context_info* init_ctx; + lxc_attach_exec_t exec_function; + void* exec_payload; +}; + +static int attach_child_main(void* data); + +/* help the optimizer along if it doesn't know that exit always exits */ +#define rexit(c) do { int __c = (c); exit(__c); return __c; } while(0) + +/* define default options if no options are supplied by the user */ +static lxc_attach_options_t attach_static_default_options = LXC_ATTACH_OPTIONS_DEFAULT; + +int lxc_attach(const char* name, const char* lxcpath, lxc_attach_exec_t exec_function, void* exec_payload, lxc_attach_options_t* options, pid_t* attached_process) +{ + int ret, status; + pid_t init_pid, pid, attached_pid; + struct lxc_proc_context_info *init_ctx; + char* cwd; + char* new_cwd; + int ipc_sockets[2]; + + if (!options) + options = &attach_static_default_options; + + init_pid = lxc_cmd_get_init_pid(name, lxcpath); + if (init_pid < 0) { + ERROR("failed to get the init pid"); + return -1; + } + + init_ctx = lxc_proc_get_context_info(init_pid); + if (!init_ctx) { + ERROR("failed to get context of the init process, pid = %ld", (long)init_pid); + return -1; + } + + cwd = getcwd(NULL, 0); + + /* determine which namespaces the container was created with + * by asking lxc-start, if necessary + */ + if (options->namespaces == -1) { + options->namespaces = lxc_cmd_get_clone_flags(name, lxcpath); + /* call failed */ + if (options->namespaces == -1) { + ERROR("failed to automatically determine the " + "namespaces which the container unshared"); + free(cwd); + free(init_ctx->aa_profile); + free(init_ctx); + return -1; + } + } + + /* create a socket pair for IPC communication; set SOCK_CLOEXEC in order + * to make sure we don't irritate other threads that want to fork+exec away + * + * IMPORTANT: if the initial process is multithreaded and another call + * just fork()s away without exec'ing directly after, the socket fd will + * exist in the forked process from the other thread and any close() in + * our own child process will not really cause the socket to close properly, + * potentiall causing the parent to hang. + * + * For this reason, while IPC is still active, we have to use shutdown() + * if the child exits prematurely in order to signal that the socket + * is closed and cannot assume that the child exiting will automatically + * do that. + * + * IPC mechanism: (X is receiver) + * initial process intermediate attached + * X <--- send pid of + * attached proc, + * then exit + * send 0 ------------------------------------> X + * [do initialization] + * X <------------------------------------ send 1 + * [add to cgroup, ...] + * send 2 ------------------------------------> X + * close socket close socket + * run program + */ + ret = socketpair(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + if (ret < 0) { + SYSERROR("could not set up required IPC mechanism for attaching"); + free(cwd); + free(init_ctx->aa_profile); + free(init_ctx); + return -1; + } + + /* create intermediate subprocess, three reasons: + * 1. runs all pthread_atfork handlers and the + * child will no longer be threaded + * (we can't properly setns() in a threaded process) + * 2. we can't setns() in the child itself, since + * we want to make sure we are properly attached to + * the pidns + * 3. also, the initial thread has to put the attached + * process into the cgroup, which we can only do if + * we didn't already setns() (otherwise, user + * namespaces will hate us) + */ + pid = fork(); + + if (pid < 0) { + SYSERROR("failed to create first subprocess"); + free(cwd); + free(init_ctx->aa_profile); + free(init_ctx); + return -1; + } + + if (pid) { + pid_t to_cleanup_pid = pid; + int expected = 0; + + /* inital thread, we close the socket that is for the + * subprocesses + */ + close(ipc_sockets[1]); + free(cwd); + + /* get pid from intermediate process */ + ret = lxc_read_nointr_expect(ipc_sockets[0], &attached_pid, sizeof(attached_pid), NULL); + if (ret <= 0) { + if (ret != 0) + ERROR("error using IPC to receive pid of attached process"); + goto cleanup_error; + } + + /* reap intermediate process */ + ret = wait_for_pid(pid); + if (ret < 0) + goto cleanup_error; + + /* we will always have to reap the grandchild now */ + to_cleanup_pid = attached_pid; + + /* tell attached process it may start initializing */ + status = 0; + ret = lxc_write_nointr(ipc_sockets[0], &status, sizeof(status)); + if (ret <= 0) { + ERROR("error using IPC to notify attached process for initialization (0)"); + goto cleanup_error; + } + + /* wait for the attached process to finish initializing */ + expected = 1; + ret = lxc_read_nointr_expect(ipc_sockets[0], &status, sizeof(status), &expected); + if (ret <= 0) { + if (ret != 0) + ERROR("error using IPC to receive notification from attached process (1)"); + goto cleanup_error; + } + + /* attach to cgroup, if requested */ + if (options->attach_flags & LXC_ATTACH_MOVE_TO_CGROUP) { + ret = lxc_cgroup_attach(attached_pid, name, lxcpath); + if (ret < 0) { + ERROR("could not move attached process %ld to cgroup of container", (long)attached_pid); + goto cleanup_error; + } + } + + /* tell attached process we're done */ + status = 2; + ret = lxc_write_nointr(ipc_sockets[0], &status, sizeof(status)); + if (ret <= 0) { + ERROR("error using IPC to notify attached process for initialization (2)"); + goto cleanup_error; + } + + /* now shut down communication with child, we're done */ + shutdown(ipc_sockets[0], SHUT_RDWR); + close(ipc_sockets[0]); + free(init_ctx->aa_profile); + free(init_ctx); + + /* we're done, the child process should now execute whatever + * it is that the user requested. The parent can now track it + * with waitpid() or similar. + */ + + *attached_process = attached_pid; + return 0; + + cleanup_error: + /* first shut down the socket, then wait for the pid, + * otherwise the pid we're waiting for may never exit + */ + shutdown(ipc_sockets[0], SHUT_RDWR); + close(ipc_sockets[0]); + if (to_cleanup_pid) + (void) wait_for_pid(to_cleanup_pid); + free(init_ctx->aa_profile); + free(init_ctx); + return -1; + } + + /* first subprocess begins here, we close the socket that is for the + * initial thread + */ + close(ipc_sockets[0]); + + /* attach now, create another subprocess later, since pid namespaces + * only really affect the children of the current process + */ + ret = lxc_attach_to_ns(init_pid, options->namespaces); + if (ret < 0) { + ERROR("failed to enter the namespace"); + shutdown(ipc_sockets[1], SHUT_RDWR); + rexit(-1); + } + + /* attach succeeded, try to cwd */ + if (options->initial_cwd) + new_cwd = options->initial_cwd; + else + new_cwd = cwd; + ret = chdir(new_cwd); + if (ret < 0) + WARN("could not change directory to '%s'", new_cwd); + free(cwd); + + /* now create the real child process */ + { + struct attach_clone_payload payload = { + .ipc_socket = ipc_sockets[1], + .options = options, + .init_ctx = init_ctx, + .exec_function = exec_function, + .exec_payload = exec_payload + }; + /* We use clone_parent here to make this subprocess a direct child of + * the initial process. Then this intermediate process can exit and + * the parent can directly track the attached process. + */ + pid = lxc_clone(attach_child_main, &payload, CLONE_PARENT); + } + + /* shouldn't happen, clone() should always return positive pid */ + if (pid <= 0) { + SYSERROR("failed to create subprocess"); + shutdown(ipc_sockets[1], SHUT_RDWR); + rexit(-1); + } + + /* tell grandparent the pid of the pid of the newly created child */ + ret = lxc_write_nointr(ipc_sockets[1], &pid, sizeof(pid)); + if (ret != sizeof(pid)) { + /* if this really happens here, this is very unfortunate, since the + * parent will not know the pid of the attached process and will + * not be able to wait for it (and we won't either due to CLONE_PARENT) + * so the parent won't be able to reap it and the attached process + * will remain a zombie + */ + ERROR("error using IPC to notify main process of pid of the attached process"); + shutdown(ipc_sockets[1], SHUT_RDWR); + rexit(-1); + } + + /* the rest is in the hands of the initial and the attached process */ + rexit(0); +} + +int attach_child_main(void* data) +{ + struct attach_clone_payload* payload = (struct attach_clone_payload*)data; + int ipc_socket = payload->ipc_socket; + lxc_attach_options_t* options = payload->options; + struct lxc_proc_context_info* init_ctx = payload->init_ctx; +#if HAVE_SYS_PERSONALITY_H + long new_personality; +#endif + int ret; + int status; + int expected; + long flags; + int fd; + uid_t new_uid; + gid_t new_gid; + + /* wait for the initial thread to signal us that it's ready + * for us to start initializing + */ + expected = 0; + status = -1; + ret = lxc_read_nointr_expect(ipc_socket, &status, sizeof(status), &expected); + if (ret <= 0) { + ERROR("error using IPC to receive notification from initial process (0)"); + shutdown(ipc_socket, SHUT_RDWR); + rexit(-1); + } + + /* load apparmor profile */ + if ((options->namespaces & CLONE_NEWNS) && (options->attach_flags & LXC_ATTACH_APPARMOR)) { + ret = attach_apparmor(init_ctx->aa_profile); + if (ret < 0) { + shutdown(ipc_socket, SHUT_RDWR); + rexit(-1); + } + } + + /* A description of the purpose of this functionality is + * provided in the lxc-attach(1) manual page. We have to + * remount here and not in the parent process, otherwise + * /proc may not properly reflect the new pid namespace. + */ + if (!(options->namespaces & CLONE_NEWNS) && (options->attach_flags & LXC_ATTACH_REMOUNT_PROC_SYS)) { + ret = lxc_attach_remount_sys_proc(); + if (ret < 0) { + shutdown(ipc_socket, SHUT_RDWR); + rexit(-1); + } + } + + /* now perform additional attachments*/ +#if HAVE_SYS_PERSONALITY_H + if (options->personality < 0) + new_personality = init_ctx->personality; + else + new_personality = options->personality; + + if (options->attach_flags & LXC_ATTACH_SET_PERSONALITY) { + ret = personality(new_personality); + if (ret < 0) { + SYSERROR("could not ensure correct architecture"); + shutdown(ipc_socket, SHUT_RDWR); + rexit(-1); + } + } +#endif + + if (options->attach_flags & LXC_ATTACH_DROP_CAPABILITIES) { + ret = lxc_attach_drop_privs(init_ctx); + if (ret < 0) { + ERROR("could not drop privileges"); + shutdown(ipc_socket, SHUT_RDWR); + rexit(-1); + } + } + + /* always set the environment (specify (LXC_ATTACH_KEEP_ENV, NULL, NULL) if you want this to be a no-op) */ + ret = lxc_attach_set_environment(options->env_policy, options->extra_env_vars, options->extra_keep_env); + if (ret < 0) { + ERROR("could not set initial environment for attached process"); + shutdown(ipc_socket, SHUT_RDWR); + rexit(-1); + } + + /* set user / group id */ + new_uid = 0; + new_gid = 0; + /* ignore errors, we will fall back to root in that case + * (/proc was not mounted etc.) + */ + if (options->namespaces & CLONE_NEWUSER) + lxc_attach_get_init_uidgid(&new_uid, &new_gid); + + if (options->uid != (uid_t)-1) + new_uid = options->uid; + if (options->gid != (gid_t)-1) + new_gid = options->gid; + + /* try to set the uid/gid combination */ + if ((new_gid != 0 || options->namespaces & CLONE_NEWUSER) && setgid(new_gid)) { + SYSERROR("switching to container gid"); + shutdown(ipc_socket, SHUT_RDWR); + rexit(-1); + } + if ((new_uid != 0 || options->namespaces & CLONE_NEWUSER) && setuid(new_uid)) { + SYSERROR("switching to container uid"); + shutdown(ipc_socket, SHUT_RDWR); + rexit(-1); + } + + /* tell initial process it may now put us into the cgroups */ + status = 1; + ret = lxc_write_nointr(ipc_socket, &status, sizeof(status)); + if (ret != sizeof(status)) { + ERROR("error using IPC to notify initial process for initialization (1)"); + shutdown(ipc_socket, SHUT_RDWR); + rexit(-1); + } + + /* wait for the initial thread to signal us that it has done + * everything for us when it comes to cgroups etc. + */ + expected = 2; + status = -1; + ret = lxc_read_nointr_expect(ipc_socket, &status, sizeof(status), &expected); + if (ret <= 0) { + ERROR("error using IPC to receive final notification from initial process (2)"); + shutdown(ipc_socket, SHUT_RDWR); + rexit(-1); + } + + shutdown(ipc_socket, SHUT_RDWR); + close(ipc_socket); + free(init_ctx->aa_profile); + free(init_ctx); + + /* The following is done after the communication socket is + * shut down. That way, all errors that might (though + * unlikely) occur up until this point will have their messages + * printed to the original stderr (if logging is so configured) + * and not the fd the user supplied, if any. + */ + + /* fd handling for stdin, stdout and stderr; + * ignore errors here, user may want to make sure + * the fds are closed, for example */ + if (options->stdin_fd >= 0 && options->stdin_fd != 0) + dup2(options->stdin_fd, 0); + if (options->stdout_fd >= 0 && options->stdout_fd != 1) + dup2(options->stdout_fd, 1); + if (options->stderr_fd >= 0 && options->stderr_fd != 2) + dup2(options->stderr_fd, 2); + + /* close the old fds */ + if (options->stdin_fd > 2) + close(options->stdin_fd); + if (options->stdout_fd > 2) + close(options->stdout_fd); + if (options->stderr_fd > 2) + close(options->stderr_fd); + + /* try to remove CLOEXEC flag from stdin/stdout/stderr, + * but also here, ignore errors */ + for (fd = 0; fd <= 2; fd++) { + flags = fcntl(fd, F_GETFL); + if (flags < 0) + continue; + if (flags & FD_CLOEXEC) + fcntl(fd, F_SETFL, flags & ~FD_CLOEXEC); + } + + /* we're done, so we can now do whatever the user intended us to do */ + rexit(payload->exec_function(payload->exec_payload)); +} + +int lxc_attach_run_command(void* payload) +{ + lxc_attach_command_t* cmd = (lxc_attach_command_t*)payload; + + execvp(cmd->program, cmd->argv); + SYSERROR("failed to exec '%s'", cmd->program); + return -1; +} + +int lxc_attach_run_shell(void* payload) +{ + uid_t uid; + struct passwd *passwd; + char *user_shell; + + /* ignore payload parameter */ + (void)payload; + + uid = getuid(); + passwd = getpwuid(uid); + + /* this probably happens because of incompatible nss + * implementations in host and container (remember, this + * code is still using the host's glibc but our mount + * namespace is in the container) + * we may try to get the information by spawning a + * [getent passwd uid] process and parsing the result + */ + if (!passwd) + user_shell = lxc_attach_getpwshell(uid); + else + user_shell = passwd->pw_shell; + + if (user_shell) + execlp(user_shell, user_shell, NULL); + + /* executed if either no passwd entry or execvp fails, + * we will fall back on /bin/sh as a default shell + */ + execlp("/bin/sh", "/bin/sh", NULL); + SYSERROR("failed to exec shell"); + return -1; +} diff --git a/src/lxc/attach.h b/src/lxc/attach.h index 151445ae8..518d08662 100644 --- a/src/lxc/attach.h +++ b/src/lxc/attach.h @@ -18,13 +18,14 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef _attach_h #define _attach_h #include +#include "attach_options.h" struct lxc_proc_context_info { char *aa_profile; @@ -34,11 +35,6 @@ struct lxc_proc_context_info { extern struct lxc_proc_context_info *lxc_proc_get_context_info(pid_t pid); -typedef enum lxc_attach_env_policy_t { - LXC_ATTACH_KEEP_ENV, - LXC_ATTACH_CLEAR_ENV -} lxc_attach_env_policy_t; - extern int lxc_attach_to_ns(pid_t other_pid, int which); extern int lxc_attach_remount_sys_proc(); extern int lxc_attach_drop_privs(struct lxc_proc_context_info *ctx); @@ -48,4 +44,6 @@ extern char *lxc_attach_getpwshell(uid_t uid); extern void lxc_attach_get_init_uidgid(uid_t* init_uid, gid_t* init_gid); +extern int lxc_attach(const char* name, const char* lxcpath, lxc_attach_exec_t exec_function, void* exec_payload, lxc_attach_options_t* options, pid_t* attached_process); + #endif diff --git a/src/lxc/attach_options.h b/src/lxc/attach_options.h new file mode 100644 index 000000000..5291e4f55 --- /dev/null +++ b/src/lxc/attach_options.h @@ -0,0 +1,120 @@ +/* + * lxc: linux Container library + * + * (C) Copyright IBM Corp. 2007, 2008 + * + * Authors: + * Daniel Lezcano + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _LXC_ATTACH_OPTIONS_H +#define _LXC_ATTACH_OPTIONS_H + +#include + +typedef enum lxc_attach_env_policy_t { + LXC_ATTACH_KEEP_ENV, + LXC_ATTACH_CLEAR_ENV +} lxc_attach_env_policy_t; + +enum { + /* the following are on by default: */ + LXC_ATTACH_MOVE_TO_CGROUP = 0x00000001, + LXC_ATTACH_DROP_CAPABILITIES = 0x00000002, + LXC_ATTACH_SET_PERSONALITY = 0x00000004, + LXC_ATTACH_APPARMOR = 0x00000008, + + /* the following are off by default */ + LXC_ATTACH_REMOUNT_PROC_SYS = 0x00010000, + + /* we have 16 bits for things that are on by default + * and 16 bits that are off by default, that should + * be sufficient to keep binary compatibility for + * a while + */ + LXC_ATTACH_DEFAULT = 0x0000FFFF +}; + +typedef struct lxc_attach_options_t lxc_attach_options_t; +typedef int (*lxc_attach_exec_t)(void* payload); + +struct lxc_attach_options_t { + /* any combination of the above enum */ + int attach_flags; + /* the namespaces to attach to (CLONE_NEW... flags) */ + int namespaces; + /* initial personality, -1 to autodetect + * (may be ignored if lxc is compiled w/o personality support) */ + long personality; + + /* inital current directory, use NULL to use cwd + * (might not exist in container, then / will be + * used because of kernel defaults) + */ + char* initial_cwd; + + /* the uid and gid to attach to, + * -1 for default (init uid/gid for userns containers, + * otherwise or if detection fails 0/0) + */ + uid_t uid; + gid_t gid; + + /* environment handling */ + lxc_attach_env_policy_t env_policy; + char** extra_env_vars; + char** extra_keep_env; + + /* file descriptors for stdin, stdout and stderr, + * dup2() will be used before calling exec_function, + * (assuming not 0, 1 and 2 are specified) and the + * original fds are closed before passing control + * over. Any O_CLOEXEC flag will be removed after + * that + */ + int stdin_fd; + int stdout_fd; + int stderr_fd; +}; + +#define LXC_ATTACH_OPTIONS_DEFAULT \ + { \ + /* .attach_flags = */ LXC_ATTACH_DEFAULT, \ + /* .namespaces = */ -1, \ + /* .personality = */ -1, \ + /* .initial_cwd = */ NULL, \ + /* .uid = */ (uid_t)-1, \ + /* .gid = */ (gid_t)-1, \ + /* .env_policy = */ LXC_ATTACH_KEEP_ENV, \ + /* .extra_env_vars = */ NULL, \ + /* .extra_keep_env = */ NULL, \ + /* .stdin_fd = */ 0, 1, 2 \ + } + +typedef struct lxc_attach_command_t { + char* program; /* the program to run (passed to execvp) */ + char** argv; /* the argv pointer of that program, including the program itself in argv[0] */ +} lxc_attach_command_t; + +/* default execution functions: + * run_command: pointer to lxc_attach_command_t + * run_shell: no payload, will be ignored + */ +extern int lxc_attach_run_command(void* payload); +extern int lxc_attach_run_shell(void* payload); + +#endif diff --git a/src/lxc/bdev.c b/src/lxc/bdev.c new file mode 100644 index 000000000..b45f2cb06 --- /dev/null +++ b/src/lxc/bdev.c @@ -0,0 +1,2070 @@ +/* + * lxc: linux Container library + * + * (C) Copyright IBM Corp. 2007, 2008 + * + * Authors: + * Daniel Lezcano + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* + * this is all just a first shot for experiment. If we go this route, much + * shoudl change. bdev should be a directory with per-bdev file. Things which + * I'm doing by calling out to userspace should sometimes be done through + * libraries like liblvm2 + */ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "lxc.h" +#include "config.h" +#include "conf.h" +#include "bdev.h" +#include "log.h" +#include "error.h" +#include "utils.h" +#include "namespace.h" +#include "parse.h" +#include "utils.h" + +#ifndef BLKGETSIZE64 +#define BLKGETSIZE64 _IOR(0x12,114,size_t) +#endif + +#ifndef LO_FLAGS_AUTOCLEAR +#define LO_FLAGS_AUTOCLEAR 4 +#endif + +lxc_log_define(bdev, lxc); + +static int do_rsync(const char *src, const char *dest) +{ + // call out to rsync + pid_t pid; + char *s; + size_t l; + + pid = fork(); + if (pid < 0) + return -1; + if (pid > 0) + return wait_for_pid(pid); + l = strlen(src) + 2; + s = malloc(l); + if (!s) + exit(1); + strcpy(s, src); + s[l-2] = '/'; + s[l-1] = '\0'; + + execlp("rsync", "rsync", "-a", s, dest, (char *)NULL); + exit(1); +} + +/* + * return block size of dev->src + */ +static int blk_getsize(struct bdev *bdev, unsigned long *size) +{ + int fd, ret; + char *path = bdev->src; + + if (strcmp(bdev->type, "loop") == 0) + path = bdev->src + 5; + + fd = open(path, O_RDONLY); + if (fd < 0) + return -1; + ret = ioctl(fd, BLKGETSIZE64, size); + close(fd); + return ret; +} + +/* + * These are copied from conf.c. However as conf.c will be moved to using + * the callback system, they can be pulled from there eventually, so we + * don't need to pollute utils.c with these low level functions + */ +static int find_fstype_cb(char* buffer, void *data) +{ + struct cbarg { + const char *rootfs; + const char *target; + int mntopt; + } *cbarg = data; + + char *fstype; + + /* we don't try 'nodev' entries */ + if (strstr(buffer, "nodev")) + return 0; + + fstype = buffer; + fstype += lxc_char_left_gc(fstype, strlen(fstype)); + fstype[lxc_char_right_gc(fstype, strlen(fstype))] = '\0'; + + DEBUG("trying to mount '%s'->'%s' with fstype '%s'", + cbarg->rootfs, cbarg->target, fstype); + + if (mount(cbarg->rootfs, cbarg->target, fstype, cbarg->mntopt, NULL)) { + DEBUG("mount failed with error: %s", strerror(errno)); + return 0; + } + + INFO("mounted '%s' on '%s', with fstype '%s'", + cbarg->rootfs, cbarg->target, fstype); + + return 1; +} + +static int mount_unknow_fs(const char *rootfs, const char *target, int mntopt) +{ + int i; + + struct cbarg { + const char *rootfs; + const char *target; + int mntopt; + } cbarg = { + .rootfs = rootfs, + .target = target, + .mntopt = mntopt, + }; + + /* + * find the filesystem type with brute force: + * first we check with /etc/filesystems, in case the modules + * are auto-loaded and fall back to the supported kernel fs + */ + char *fsfile[] = { + "/etc/filesystems", + "/proc/filesystems", + }; + + for (i = 0; i < sizeof(fsfile)/sizeof(fsfile[0]); i++) { + + int ret; + + if (access(fsfile[i], F_OK)) + continue; + + ret = lxc_file_for_each_line(fsfile[i], find_fstype_cb, &cbarg); + if (ret < 0) { + ERROR("failed to parse '%s'", fsfile[i]); + return -1; + } + + if (ret) + return 0; + } + + ERROR("failed to determine fs type for '%s'", rootfs); + return -1; +} + +static int do_mkfs(const char *path, const char *fstype) +{ + pid_t pid; + + if ((pid = fork()) < 0) { + ERROR("error forking"); + return -1; + } + if (pid > 0) + return wait_for_pid(pid); + + // If the file is not a block device, we don't want mkfs to ask + // us about whether to proceed. + close(0); + close(1); + close(2); + open("/dev/zero", O_RDONLY); + open("/dev/null", O_RDWR); + open("/dev/null", O_RDWR); + execlp("mkfs", "mkfs", "-t", fstype, path, NULL); + exit(1); +} + +static char *linkderef(char *path, char *dest) +{ + struct stat sbuf; + ssize_t ret; + + ret = stat(path, &sbuf); + if (ret < 0) + return NULL; + if (!S_ISLNK(sbuf.st_mode)) + return path; + ret = readlink(path, dest, MAXPATHLEN); + if (ret < 0) { + SYSERROR("error reading link %s", path); + return NULL; + } else if (ret >= MAXPATHLEN) { + ERROR("link in %s too long", path); + return NULL; + } + dest[ret] = '\0'; + return dest; +} + +/* + * Given a bdev (presumably blockdev-based), detect the fstype + * by trying mounting (in a private mntns) it. + * @bdev: bdev to investigate + * @type: preallocated char* in which to write the fstype + * @len: length of passed in char* + * Returns length of fstype, of -1 on error + */ +static int detect_fs(struct bdev *bdev, char *type, int len) +{ + int p[2], ret; + size_t linelen; + pid_t pid; + FILE *f; + char *sp1, *sp2, *sp3, *line = NULL; + char *srcdev; + + if (!bdev || !bdev->src || !bdev->dest) + return -1; + + srcdev = bdev->src; + if (strcmp(bdev->type, "loop") == 0) + srcdev = bdev->src + 5; + + if (pipe(p) < 0) + return -1; + if ((pid = fork()) < 0) + return -1; + if (pid > 0) { + int status; + close(p[1]); + memset(type, 0, len); + ret = read(p[0], type, len-1); + close(p[0]); + if (ret < 0) { + SYSERROR("error reading from pipe"); + wait(&status); + return -1; + } else if (ret == 0) { + ERROR("child exited early - fstype not found"); + wait(&status); + return -1; + } + wait(&status); + type[len-1] = '\0'; + INFO("detected fstype %s for %s", type, srcdev); + return ret; + } + + if (unshare(CLONE_NEWNS) < 0) + exit(1); + + ret = mount_unknow_fs(srcdev, bdev->dest, 0); + if (ret < 0) { + ERROR("failed mounting %s onto %s to detect fstype", srcdev, bdev->dest); + exit(1); + } + // if symlink, get the real dev name + char devpath[MAXPATHLEN]; + char *l = linkderef(srcdev, devpath); + if (!l) + exit(1); + f = fopen("/proc/self/mounts", "r"); + if (!f) + exit(1); + while (getline(&line, &linelen, f) != -1) { + sp1 = index(line, ' '); + if (!sp1) + exit(1); + *sp1 = '\0'; + if (strcmp(line, l)) + continue; + sp2 = index(sp1+1, ' '); + if (!sp2) + exit(1); + *sp2 = '\0'; + sp3 = index(sp2+1, ' '); + if (!sp3) + exit(1); + *sp3 = '\0'; + sp2++; + if (write(p[1], sp2, strlen(sp2)) != strlen(sp2)) + exit(1); + exit(0); + } + exit(1); +} + +struct bdev_type { + char *name; + struct bdev_ops *ops; +}; + +static int is_dir(const char *path) +{ + struct stat statbuf; + int ret = stat(path, &statbuf); + if (ret == 0 && S_ISDIR(statbuf.st_mode)) + return 1; + return 0; +} + +static int dir_detect(const char *path) +{ + if (strncmp(path, "dir:", 4) == 0) + return 1; // take their word for it + if (is_dir(path)) + return 1; + return 0; +} + +// +// XXXXXXX plain directory bind mount ops +// +static int dir_mount(struct bdev *bdev) +{ + if (strcmp(bdev->type, "dir")) + return -22; + if (!bdev->src || !bdev->dest) + return -22; + return mount(bdev->src, bdev->dest, "bind", MS_BIND | MS_REC, NULL); +} + +static int dir_umount(struct bdev *bdev) +{ + if (strcmp(bdev->type, "dir")) + return -22; + if (!bdev->src || !bdev->dest) + return -22; + return umount(bdev->dest); +} + +/* the bulk of this needs to become a common helper */ +static char *dir_new_path(char *src, const char *oldname, const char *name, + const char *oldpath, const char *lxcpath) +{ + char *ret, *p, *p2; + int l1, l2, nlen; + + nlen = strlen(src) + 1; + l1 = strlen(oldpath); + p = src; + /* if src starts with oldpath, look for oldname only after + * that path */ + if (strncmp(src, oldpath, l1) == 0) { + p += l1; + nlen += (strlen(lxcpath) - l1); + } + l2 = strlen(oldname); + while ((p = strstr(p, oldname)) != NULL) { + p += l2; + nlen += strlen(name) - l2; + } + + ret = malloc(nlen); + if (!ret) + return NULL; + + p = ret; + if (strncmp(src, oldpath, l1) == 0) { + p += sprintf(p, "%s", lxcpath); + src += l1; + } + + while ((p2 = strstr(src, oldname)) != NULL) { + strncpy(p, src, p2-src); // copy text up to oldname + p += p2-src; // move target pointer (p) + p += sprintf(p, "%s", name); // print new name in place of oldname + src = p2 + l2; // move src to end of oldname + } + sprintf(p, "%s", src); // copy the rest of src + return ret; +} + +/* + * for a simple directory bind mount, we substitute the old container + * name and paths for the new + */ +static int dir_clonepaths(struct bdev *orig, struct bdev *new, const char *oldname, + const char *cname, const char *oldpath, const char *lxcpath, int snap, + unsigned long newsize) +{ + int len, ret; + + if (snap) { + ERROR("directories cannot be snapshotted. Try overlayfs."); + return -1; + } + + if (!orig->dest || !orig->src) + return -1; + + len = strlen(lxcpath) + strlen(cname) + strlen("rootfs") + 3; + new->src = malloc(len); + if (!new->src) + return -1; + ret = snprintf(new->src, len, "%s/%s/rootfs", lxcpath, cname); + if (ret < 0 || ret >= len) + return -1; + if ((new->dest = strdup(new->src)) == NULL) + return -1; + + return 0; +} + +static int dir_destroy(struct bdev *orig) +{ + if (!lxc_rmdir_onedev(orig->src)) + return -1; + return 0; +} + +static int dir_create(struct bdev *bdev, const char *dest, const char *n, + struct bdev_specs *specs) +{ + bdev->src = strdup(dest); + bdev->dest = strdup(dest); + if (!bdev->src || !bdev->dest) { + ERROR("Out of memory"); + return -1; + } + + if (mkdir_p(bdev->src, 0755) < 0) { + ERROR("Error creating %s\n", bdev->src); + return -1; + } + if (mkdir_p(bdev->dest, 0755) < 0) { + ERROR("Error creating %s\n", bdev->dest); + return -1; + } + + return 0; +} + +struct bdev_ops dir_ops = { + .detect = &dir_detect, + .mount = &dir_mount, + .umount = &dir_umount, + .clone_paths = &dir_clonepaths, + .destroy = &dir_destroy, + .create = &dir_create, +}; + + +// +// XXXXXXX zfs ops +// There are two ways we could do this. We could always specify the +// 'zfs device' (i.e. tank/lxc lxc/container) as rootfs. But instead +// (at least right now) we have lxc-create specify $lxcpath/$lxcname/rootfs +// as the mountpoint, so that it is always mounted. +// +// That means 'mount' is really never needed and could be noop, but for the +// sake of flexibility let's always bind-mount. +// + +static int zfs_list_entry(const char *path, char *output, size_t inlen) +{ + FILE *f; + int found=0; + + if ((f = popen("zfs list 2> /dev/null", "r")) == NULL) { + SYSERROR("popen failed"); + return 0; + } + while (fgets(output, inlen, f)) { + if (strstr(output, path)) { + found = 1; + break; + } + } + (void) pclose(f); + + return found; +} + +static int zfs_detect(const char *path) +{ + char *output = malloc(LXC_LOG_BUFFER_SIZE); + int found; + + if (!output) { + ERROR("out of memory"); + return 0; + } + found = zfs_list_entry(path, output, LXC_LOG_BUFFER_SIZE); + free(output); + return found; +} + +static int zfs_mount(struct bdev *bdev) +{ + if (strcmp(bdev->type, "zfs")) + return -22; + if (!bdev->src || !bdev->dest) + return -22; + return mount(bdev->src, bdev->dest, "bind", MS_BIND | MS_REC, NULL); +} + +static int zfs_umount(struct bdev *bdev) +{ + if (strcmp(bdev->type, "zfs")) + return -22; + if (!bdev->src || !bdev->dest) + return -22; + return umount(bdev->dest); +} + +static int zfs_clone(const char *opath, const char *npath, const char *oname, + const char *nname, const char *lxcpath, int snapshot) +{ + // use the 'zfs list | grep opath' entry to get the zfsroot + char output[MAXPATHLEN], option[MAXPATHLEN], *p; + const char *zfsroot = output; + int ret; + pid_t pid; + + if (zfs_list_entry(opath, output, MAXPATHLEN)) { + // zfsroot is output up to ' ' + if ((p = index(output, ' ')) == NULL) + return -1; + *p = '\0'; + if ((p = strrchr(output, '/')) == NULL) + return -1; + *p = '\0'; + } else + zfsroot = default_zfs_root(); + + ret = snprintf(option, MAXPATHLEN, "-omountpoint=%s/%s/rootfs", + lxcpath, nname); + if (ret < 0 || ret >= MAXPATHLEN) + return -1; + + // zfs create -omountpoint=$lxcpath/$lxcname $zfsroot/$nname + if (!snapshot) { + if ((pid = fork()) < 0) + return -1; + if (!pid) { + char dev[MAXPATHLEN]; + ret = snprintf(dev, MAXPATHLEN, "%s/%s", zfsroot, nname); + if (ret < 0 || ret >= MAXPATHLEN) + exit(1); + execlp("zfs", "zfs", "create", option, dev, NULL); + exit(1); + } + return wait_for_pid(pid); + } else { + // if snapshot, do + // 'zfs snapshot zfsroot/oname@nname + // zfs clone zfsroot/oname@nname zfsroot/nname + char path1[MAXPATHLEN], path2[MAXPATHLEN]; + + ret = snprintf(path1, MAXPATHLEN, "%s/%s@%s", zfsroot, + oname, nname); + if (ret < 0 || ret >= MAXPATHLEN) + return -1; + (void) snprintf(path2, MAXPATHLEN, "%s/%s", zfsroot, nname); + + // if the snapshot exists, delete it + if ((pid = fork()) < 0) + return -1; + if (!pid) { + execlp("zfs", "zfs", "destroy", path1, NULL); + exit(1); + } + // it probably doesn't exist so destroy probably will fail. + (void) wait_for_pid(pid); + + // run first (snapshot) command + if ((pid = fork()) < 0) + return -1; + if (!pid) { + execlp("zfs", "zfs", "snapshot", path1, NULL); + exit(1); + } + if (wait_for_pid(pid) < 0) + return -1; + + // run second (clone) command + if ((pid = fork()) < 0) + return -1; + if (!pid) { + execlp("zfs", "zfs", "clone", option, path1, path2, NULL); + exit(1); + } + return wait_for_pid(pid); + } +} + +static int zfs_clonepaths(struct bdev *orig, struct bdev *new, const char *oldname, + const char *cname, const char *oldpath, const char *lxcpath, int snap, + unsigned long newsize) +{ + int len, ret; + + if (!orig->src || !orig->dest) + return -1; + + if (snap && strcmp(orig->type, "zfs")) { + ERROR("zfs snapshot from %s backing store is not supported", + orig->type); + return -1; + } + + len = strlen(lxcpath) + strlen(cname) + strlen("rootfs") + 3; + new->src = malloc(len); + if (!new->src) + return -1; + ret = snprintf(new->src, len, "%s/%s/rootfs", lxcpath, cname); + if (ret < 0 || ret >= len) + return -1; + if ((new->dest = strdup(new->src)) == NULL) + return -1; + + return zfs_clone(orig->src, new->src, oldname, cname, lxcpath, snap); +} + +/* + * TODO: detect whether this was a clone, and if so then also delete the + * snapshot it was based on, so that we don't hold the original + * container busy. + */ +static int zfs_destroy(struct bdev *orig) +{ + pid_t pid; + char output[MAXPATHLEN], *p; + + if ((pid = fork()) < 0) + return -1; + if (pid) + return wait_for_pid(pid); + + if (!zfs_list_entry(orig->src, output, MAXPATHLEN)) { + ERROR("Error: zfs entry for %s not found", orig->src); + return -1; + } + + // zfs mount is output up to ' ' + if ((p = index(output, ' ')) == NULL) + return -1; + *p = '\0'; + + execlp("zfs", "zfs", "destroy", output, NULL); + exit(1); +} + +static int zfs_create(struct bdev *bdev, const char *dest, const char *n, + struct bdev_specs *specs) +{ + const char *zfsroot; + char option[MAXPATHLEN]; + int ret; + pid_t pid; + + if (!specs || !specs->u.zfs.zfsroot) + zfsroot = default_zfs_root(); + else + zfsroot = specs->u.zfs.zfsroot; + + if (!(bdev->dest = strdup(dest))) { + ERROR("No mount target specified or out of memory"); + return -1; + } + if (!(bdev->src = strdup(bdev->dest))) { + ERROR("out of memory"); + return -1; + } + + ret = snprintf(option, MAXPATHLEN, "-omountpoint=%s", bdev->dest); + if (ret < 0 || ret >= MAXPATHLEN) + return -1; + if ((pid = fork()) < 0) + return -1; + if (pid) + return wait_for_pid(pid); + + char dev[MAXPATHLEN]; + ret = snprintf(dev, MAXPATHLEN, "%s/%s", zfsroot, n); + if (ret < 0 || ret >= MAXPATHLEN) + exit(1); + execlp("zfs", "zfs", "create", option, dev, NULL); + exit(1); +} + +struct bdev_ops zfs_ops = { + .detect = &zfs_detect, + .mount = &zfs_mount, + .umount = &zfs_umount, + .clone_paths = &zfs_clonepaths, + .destroy = &zfs_destroy, + .create = &zfs_create, +}; + +// +// LVM ops +// + +/* + * Look at /sys/dev/block/maj:min/dm/uuid. If it contains the hardcoded LVM + * prefix "LVM-", then this is an lvm2 LV + */ +static int lvm_detect(const char *path) +{ + char devp[MAXPATHLEN], buf[4]; + FILE *fout; + int ret; + struct stat statbuf; + + if (strncmp(path, "lvm:", 4) == 0) + return 1; // take their word for it + + ret = stat(path, &statbuf); + if (ret != 0) + return 0; + if (!S_ISBLK(statbuf.st_mode)) + return 0; + + ret = snprintf(devp, MAXPATHLEN, "/sys/dev/block/%d:%d/dm/uuid", + major(statbuf.st_rdev), minor(statbuf.st_rdev)); + if (ret < 0 || ret >= MAXPATHLEN) { + ERROR("lvm uuid pathname too long"); + return 0; + } + fout = fopen(devp, "r"); + if (!fout) + return 0; + ret = fread(buf, 1, 4, fout); + fclose(fout); + if (ret != 4 || strncmp(buf, "LVM-", 4) != 0) + return 0; + return 1; +} + +static int lvm_mount(struct bdev *bdev) +{ + if (strcmp(bdev->type, "lvm")) + return -22; + if (!bdev->src || !bdev->dest) + return -22; + /* if we might pass in data sometime, then we'll have to enrich + * mount_unknow_fs */ + return mount_unknow_fs(bdev->src, bdev->dest, 0); +} + +static int lvm_umount(struct bdev *bdev) +{ + if (strcmp(bdev->type, "lvm")) + return -22; + if (!bdev->src || !bdev->dest) + return -22; + return umount(bdev->dest); +} + +/* + * path must be '/dev/$vg/$lv', $vg must be an existing VG, and $lv must + * not yet exist. This function will attempt to create /dev/$vg/$lv of + * size $size. + */ +static int do_lvm_create(const char *path, unsigned long size) +{ + int ret, pid; + char sz[24], *pathdup, *vg, *lv; + + if ((pid = fork()) < 0) { + SYSERROR("failed fork"); + return -1; + } + if (pid > 0) + return wait_for_pid(pid); + + // lvcreate default size is in M, not bytes. + ret = snprintf(sz, 24, "%lu", size/1000000); + if (ret < 0 || ret >= 24) + exit(1); + + pathdup = strdup(path); + if (!pathdup) + exit(1); + lv = strrchr(pathdup, '/'); + if (!lv) { + free(pathdup); + exit(1); + } + *lv = '\0'; + lv++; + vg = strrchr(pathdup, '/'); + if (!vg) + exit(1); + vg++; + execlp("lvcreate", "lvcreate", "-L", sz, vg, "-n", lv, (char *)NULL); + free(pathdup); + exit(1); +} + +static int lvm_snapshot(const char *orig, const char *path, unsigned long size) +{ + int ret, pid; + char sz[24], *pathdup, *lv; + + if ((pid = fork()) < 0) { + SYSERROR("failed fork"); + return -1; + } + if (pid > 0) + return wait_for_pid(pid); + // lvcreate default size is in M, not bytes. + ret = snprintf(sz, 24, "%lu", size/1000000); + if (ret < 0 || ret >= 24) + exit(1); + + pathdup = strdup(path); + if (!pathdup) + exit(1); + lv = strrchr(pathdup, '/'); + if (!lv) { + free(pathdup); + exit(1); + } + *lv = '\0'; + lv++; + + ret = execlp("lvcreate", "lvcreate", "-s", "-L", sz, "-n", lv, orig, (char *)NULL); + free(pathdup); + exit(1); +} + +// this will return 1 for physical disks, qemu-nbd, loop, etc +// right now only lvm is a block device +static int is_blktype(struct bdev *b) +{ + if (strcmp(b->type, "lvm") == 0) + return 1; + return 0; +} + +static int lvm_clonepaths(struct bdev *orig, struct bdev *new, const char *oldname, + const char *cname, const char *oldpath, const char *lxcpath, int snap, + unsigned long newsize) +{ + char fstype[100]; + unsigned long size = newsize; + int len, ret; + + if (!orig->src || !orig->dest) + return -1; + + if (strcmp(orig->type, "lvm")) { + const char *vg; + + if (snap) { + ERROR("LVM snapshot from %s backing store is not supported", + orig->type); + return -1; + } + vg = default_lvm_vg(); + len = strlen("/dev/") + strlen(vg) + strlen(cname) + 2; + if ((new->src = malloc(len)) == NULL) + return -1; + ret = snprintf(new->src, len, "/dev/%s/%s", vg, cname); + if (ret < 0 || ret >= len) + return -1; + } else { + new->src = dir_new_path(orig->src, oldname, cname, oldpath, lxcpath); + if (!new->src) + return -1; + } + + if (orig->data) { + new->data = strdup(orig->data); + if (!new->data) + return -1; + } + + len = strlen(lxcpath) + strlen(cname) + strlen("rootfs") + 3; + new->dest = malloc(len); + if (!new->dest) + return -1; + ret = snprintf(new->dest, len, "%s/%s/rootfs", lxcpath, cname); + if (ret < 0 || ret >= len) + return -1; + if (mkdir_p(new->dest, 0755) < 0) + return -1; + + if (is_blktype(orig)) { + if (!newsize && blk_getsize(orig, &size) < 0) { + ERROR("Error getting size of %s", orig->src); + return -1; + } + if (detect_fs(orig, fstype, 100) < 0) { + INFO("could not find fstype for %s, using ext3", orig->src); + return -1; + } + } else { + sprintf(fstype, "ext3"); + if (!newsize) + size = 1000000000; // default to 1G + } + + if (snap) { + if (lvm_snapshot(orig->src, new->src, size) < 0) { + ERROR("could not create %s snapshot of %s", new->src, orig->src); + return -1; + } + } else { + if (do_lvm_create(new->src, size) < 0) { + ERROR("Error creating new lvm blockdev"); + return -1; + } + if (do_mkfs(new->src, fstype) < 0) { + ERROR("Error creating filesystem type %s on %s", fstype, + new->src); + return -1; + } + } + + return 0; +} + +static int lvm_destroy(struct bdev *orig) +{ + pid_t pid; + + if ((pid = fork()) < 0) + return -1; + if (!pid) { + execlp("lvremove", "lvremove", "-f", orig->src, NULL); + exit(1); + } + return wait_for_pid(pid); +} + +#define DEFAULT_FS_SIZE 1024000000 +#define DEFAULT_FSTYPE "ext3" +static int lvm_create(struct bdev *bdev, const char *dest, const char *n, + struct bdev_specs *specs) +{ + const char *vg, *fstype, *lv = n; + unsigned long sz; + int ret, len; + + if (!specs) + return -1; + + vg = specs->u.lvm.vg; + if (!vg) + vg = default_lvm_vg(); + + /* /dev/$vg/$lv */ + if (specs->u.lvm.lv) + lv = specs->u.lvm.lv; + len = strlen(vg) + strlen(lv) + 7; + bdev->src = malloc(len); + if (!bdev->src) + return -1; + + ret = snprintf(bdev->src, len, "/dev/%s/%s", vg, lv); + if (ret < 0 || ret >= len) + return -1; + + // lvm.fssize is in bytes. + sz = specs->u.lvm.fssize; + if (!sz) + sz = DEFAULT_FS_SIZE; + + INFO("Error creating new lvm blockdev %s size %lu", bdev->src, sz); + if (do_lvm_create(bdev->src, sz) < 0) { + ERROR("Error creating new lvm blockdev %s size %lu", bdev->src, sz); + return -1; + } + + fstype = specs->u.lvm.fstype; + if (!fstype) + fstype = DEFAULT_FSTYPE; + if (do_mkfs(bdev->src, fstype) < 0) { + ERROR("Error creating filesystem type %s on %s", fstype, + bdev->src); + return -1; + } + if (!(bdev->dest = strdup(dest))) + return -1; + + if (mkdir_p(bdev->dest, 0755) < 0) { + ERROR("Error creating %s\n", bdev->dest); + return -1; + } + + return 0; +} + +struct bdev_ops lvm_ops = { + .detect = &lvm_detect, + .mount = &lvm_mount, + .umount = &lvm_umount, + .clone_paths = &lvm_clonepaths, + .destroy = &lvm_destroy, + .create = &lvm_create, +}; + +// +// btrfs ops +// + +struct btrfs_ioctl_space_info { + unsigned long long flags; + unsigned long long total_bytes; + unsigned long long used_bytes; +}; + +struct btrfs_ioctl_space_args { + unsigned long long space_slots; + unsigned long long total_spaces; + struct btrfs_ioctl_space_info spaces[0]; +}; + +#define BTRFS_IOCTL_MAGIC 0x94 +#define BTRFS_IOC_SUBVOL_GETFLAGS _IOR(BTRFS_IOCTL_MAGIC, 25, unsigned long long) +#define BTRFS_IOC_SPACE_INFO _IOWR(BTRFS_IOCTL_MAGIC, 20, \ + struct btrfs_ioctl_space_args) + +static bool is_btrfs_fs(const char *path) +{ + int fd, ret; + struct btrfs_ioctl_space_args sargs; + + // make sure this is a btrfs filesystem + fd = open(path, O_RDONLY); + if (fd < 0) + return false; + sargs.space_slots = 0; + sargs.total_spaces = 0; + ret = ioctl(fd, BTRFS_IOC_SPACE_INFO, &sargs); + close(fd); + if (ret < 0) + return false; + + return true; +} + +static int btrfs_detect(const char *path) +{ + struct stat st; + int ret; + + if (!is_btrfs_fs(path)) + return 0; + + // and make sure it's a subvolume. + ret = stat(path, &st); + if (ret < 0) + return 0; + + if (st.st_ino == 256 && S_ISDIR(st.st_mode)) + return 1; + + return 0; +} + +static int btrfs_mount(struct bdev *bdev) +{ + if (strcmp(bdev->type, "btrfs")) + return -22; + if (!bdev->src || !bdev->dest) + return -22; + return mount(bdev->src, bdev->dest, "bind", MS_BIND | MS_REC, NULL); +} + +static int btrfs_umount(struct bdev *bdev) +{ + if (strcmp(bdev->type, "btrfs")) + return -22; + if (!bdev->src || !bdev->dest) + return -22; + return umount(bdev->dest); +} + +#define BTRFS_SUBVOL_NAME_MAX 4039 +#define BTRFS_PATH_NAME_MAX 4087 + +struct btrfs_ioctl_vol_args { + signed long long fd; + char name[BTRFS_PATH_NAME_MAX + 1]; +}; + +#define BTRFS_IOCTL_MAGIC 0x94 +#define BTRFS_IOC_SUBVOL_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 24, \ + struct btrfs_ioctl_vol_args_v2) +#define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \ + struct btrfs_ioctl_vol_args_v2) +#define BTRFS_IOC_SUBVOL_CREATE _IOW(BTRFS_IOCTL_MAGIC, 14, \ + struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_SNAP_DESTROY _IOW(BTRFS_IOCTL_MAGIC, 15, \ + struct btrfs_ioctl_vol_args) + +#define BTRFS_QGROUP_INHERIT_SET_LIMITS (1ULL << 0) + +struct btrfs_ioctl_vol_args_v2 { + signed long long fd; + unsigned long long transid; + unsigned long long flags; + union { + struct { + unsigned long long size; + //struct btrfs_qgroup_inherit *qgroup_inherit; + void *qgroup_inherit; + }; + unsigned long long unused[4]; + }; + char name[BTRFS_SUBVOL_NAME_MAX + 1]; +}; + +static int btrfs_subvolume_create(const char *path) +{ + int ret, fd = -1; + struct btrfs_ioctl_vol_args args; + char *p, *newfull = strdup(path); + + if (!newfull) { + ERROR("Error: out of memory"); + return -1; + } + + p = strrchr(newfull, '/'); + if (!p) { + ERROR("bad path: %s", path); + return -1; + } + *p = '\0'; + + if ((fd = open(newfull, O_RDONLY)) < 0) { + ERROR("Error opening %s", newfull); + free(newfull); + return -1; + } + + memset(&args, 0, sizeof(args)); + strncpy(args.name, p+1, BTRFS_SUBVOL_NAME_MAX); + args.name[BTRFS_SUBVOL_NAME_MAX-1] = 0; + ret = ioctl(fd, BTRFS_IOC_SUBVOL_CREATE, &args); + INFO("btrfs: snapshot create ioctl returned %d", ret); + + free(newfull); + close(fd); + return ret; +} + +static int btrfs_snapshot(const char *orig, const char *new) +{ + int fd = -1, fddst = -1, ret = -1; + struct btrfs_ioctl_vol_args_v2 args; + char *newdir, *newname, *newfull = NULL; + + newfull = strdup(new); + if (!newfull) { + ERROR("Error: out of memory"); + goto out; + } + // make sure the directory doesn't already exist + if (rmdir(newfull) < 0 && errno != -ENOENT) { + SYSERROR("Error removing empty new rootfs"); + goto out; + } + newname = basename(newfull); + newdir = dirname(newfull); + fd = open(orig, O_RDONLY); + if (fd < 0) { + SYSERROR("Error opening original rootfs %s", orig); + goto out; + } + fddst = open(newdir, O_RDONLY); + if (fddst < 0) { + SYSERROR("Error opening new container dir %s", newdir); + goto out; + } + + memset(&args, 0, sizeof(args)); + args.fd = fd; + strncpy(args.name, newname, BTRFS_SUBVOL_NAME_MAX); + args.name[BTRFS_SUBVOL_NAME_MAX-1] = 0; + ret = ioctl(fddst, BTRFS_IOC_SNAP_CREATE_V2, &args); + INFO("btrfs: snapshot create ioctl returned %d", ret); + +out: + if (fddst != -1) + close(fddst); + if (fd != -1) + close(fd); + if (newfull) + free(newfull); + return ret; +} + +static int btrfs_clonepaths(struct bdev *orig, struct bdev *new, const char *oldname, + const char *cname, const char *oldpath, const char *lxcpath, int snap, + unsigned long newsize) +{ + if (!orig->dest || !orig->src) + return -1; + + if (strcmp(orig->type, "btrfs")) { + int len, ret; + if (snap) { + ERROR("btrfs snapshot from %s backing store is not supported", + orig->type); + return -1; + } + len = strlen(lxcpath) + strlen(cname) + strlen("rootfs") + 3; + new->src = malloc(len); + if (!new->src) + return -1; + ret = snprintf(new->src, len, "%s/%s/rootfs", lxcpath, cname); + if (ret < 0 || ret >= len) + return -1; + } else { + // in case rootfs is in custom path, reuse it + if ((new->src = dir_new_path(orig->src, oldname, cname, oldpath, lxcpath)) == NULL) + return -1; + + } + + if ((new->dest = strdup(new->src)) == NULL) + return -1; + + if (orig->data && (new->data = strdup(orig->data)) == NULL) + return -1; + + if (snap) + return btrfs_snapshot(orig->dest, new->dest); + + if (rmdir(new->dest) < 0 && errno != -ENOENT) { + SYSERROR("removing %s\n", new->dest); + return -1; + } + + return btrfs_subvolume_create(new->dest); +} + +static int btrfs_destroy(struct bdev *orig) +{ + int ret, fd = -1; + struct btrfs_ioctl_vol_args args; + char *path = orig->src; + char *p, *newfull = strdup(path); + + if (!newfull) { + ERROR("Error: out of memory"); + return -1; + } + + p = strrchr(newfull, '/'); + if (!p) { + ERROR("bad path: %s", path); + return -1; + } + *p = '\0'; + + if ((fd = open(newfull, O_RDONLY)) < 0) { + ERROR("Error opening %s", newfull); + free(newfull); + return -1; + } + + memset(&args, 0, sizeof(args)); + strncpy(args.name, p+1, BTRFS_SUBVOL_NAME_MAX); + args.name[BTRFS_SUBVOL_NAME_MAX-1] = 0; + ret = ioctl(fd, BTRFS_IOC_SNAP_DESTROY, &args); + INFO("btrfs: snapshot create ioctl returned %d", ret); + + free(newfull); + close(fd); + return ret; +} + +static int btrfs_create(struct bdev *bdev, const char *dest, const char *n, + struct bdev_specs *specs) +{ + bdev->src = strdup(dest); + bdev->dest = strdup(dest); + if (!bdev->src || !bdev->dest) + return -1; + return btrfs_subvolume_create(bdev->dest); +} + +struct bdev_ops btrfs_ops = { + .detect = &btrfs_detect, + .mount = &btrfs_mount, + .umount = &btrfs_umount, + .clone_paths = &btrfs_clonepaths, + .destroy = &btrfs_destroy, + .create = &btrfs_create, +}; + +// +// loopback dev ops +// +static int loop_detect(const char *path) +{ + if (strncmp(path, "loop:", 5) == 0) + return 1; + return 0; +} + +static int find_free_loopdev(int *retfd, char *namep) +{ + struct dirent dirent, *direntp; + struct loop_info64 lo; + DIR *dir; + int fd = -1; + + if (!(dir = opendir("/dev"))) { + SYSERROR("Error opening /dev"); + return -1; + } + while (!readdir_r(dir, &dirent, &direntp)) { + + if (!direntp) + break; + if (strncmp(direntp->d_name, "loop", 4) != 0) + continue; + if ((fd = openat(dirfd(dir), direntp->d_name, O_RDWR)) < 0) + continue; + if (ioctl(fd, LOOP_GET_STATUS64, &lo) == 0 || errno != ENXIO) { + close(fd); + fd = -1; + continue; + } + // We can use this fd + snprintf(namep, 100, "/dev/%s", direntp->d_name); + break; + } + closedir(dir); + if (fd == -1) { + ERROR("No loop device found"); + return -1; + } + + *retfd = fd; + return 0; +} + +static int loop_mount(struct bdev *bdev) +{ + int lfd, ffd = -1, ret = -1; + struct loop_info64 lo; + char loname[100]; + + if (strcmp(bdev->type, "loop")) + return -22; + if (!bdev->src || !bdev->dest) + return -22; + if (find_free_loopdev(&lfd, loname) < 0) + return -22; + + if ((ffd = open(bdev->src + 5, O_RDWR)) < 0) { + SYSERROR("Error opening backing file %s\n", bdev->src); + goto out; + } + + if (ioctl(lfd, LOOP_SET_FD, ffd) < 0) { + SYSERROR("Error attaching backing file to loop dev"); + goto out; + } + memset(&lo, 0, sizeof(lo)); + lo.lo_flags = LO_FLAGS_AUTOCLEAR; + if (ioctl(lfd, LOOP_SET_STATUS64, &lo) < 0) { + SYSERROR("Error setting autoclear on loop dev\n"); + goto out; + } + + ret = mount_unknow_fs(loname, bdev->dest, 0); + if (ret < 0) + ERROR("Error mounting %s\n", bdev->src); + else + bdev->lofd = lfd; + +out: + if (ffd > -1) + close(ffd); + if (ret < 0) { + close(lfd); + bdev->lofd = -1; + } + return ret; +} + +static int loop_umount(struct bdev *bdev) +{ + int ret; + + if (strcmp(bdev->type, "loop")) + return -22; + if (!bdev->src || !bdev->dest) + return -22; + ret = umount(bdev->dest); + if (bdev->lofd >= 0) { + close(bdev->lofd); + bdev->lofd = -1; + } + return ret; +} + +static int do_loop_create(const char *path, unsigned long size, const char *fstype) +{ + int fd; + // create the new loopback file. + fd = creat(path, S_IRUSR|S_IWUSR); + if (fd < 0) + return -1; + if (lseek(fd, size, SEEK_SET) < 0) { + SYSERROR("Error seeking to set new loop file size"); + close(fd); + return -1; + } + if (write(fd, "1", 1) != 1) { + SYSERROR("Error creating new loop file"); + close(fd); + return -1; + } + if (close(fd) < 0) { + SYSERROR("Error closing new loop file"); + return -1; + } + + // create an fs in the loopback file + if (do_mkfs(path, fstype) < 0) { + ERROR("Error creating filesystem type %s on %s", fstype, + path); + return -1; + } + + return 0; +} + +/* + * No idea what the original blockdev will be called, but the copy will be + * called $lxcpath/$lxcname/rootdev + */ +static int loop_clonepaths(struct bdev *orig, struct bdev *new, const char *oldname, + const char *cname, const char *oldpath, const char *lxcpath, int snap, + unsigned long newsize) +{ + char fstype[100]; + unsigned long size = newsize; + int len, ret; + char *srcdev; + + if (snap) { + ERROR("loop devices cannot be snapshotted."); + return -1; + } + + if (!orig->dest || !orig->src) + return -1; + + len = strlen(lxcpath) + strlen(cname) + strlen("rootdev") + 3; + srcdev = alloca(len); + ret = snprintf(srcdev, len, "%s/%s/rootdev", lxcpath, cname); + if (ret < 0 || ret >= len) + return -1; + + new->src = malloc(len + 5); + if (!new->src) + return -1; + ret = snprintf(new->src, len + 5, "loop:%s", srcdev); + if (ret < 0 || ret >= len + 5) + return -1; + + new->dest = malloc(len); + if (!new->dest) + return -1; + ret = snprintf(new->dest, len, "%s/%s/rootfs", lxcpath, cname); + if (ret < 0 || ret >= len) + return -1; + + // it's tempting to say: if orig->src == loopback and !newsize, then + // copy the loopback file. However, we'd have to make sure to + // correctly keep holes! So punt for now. + + if (is_blktype(orig)) { + if (!newsize && blk_getsize(orig, &size) < 0) { + ERROR("Error getting size of %s", orig->src); + return -1; + } + if (detect_fs(orig, fstype, 100) < 0) { + INFO("could not find fstype for %s, using %s", orig->src, + DEFAULT_FSTYPE); + return -1; + } + } else { + sprintf(fstype, "%s", DEFAULT_FSTYPE); + if (!newsize) + size = DEFAULT_FS_SIZE; // default to 1G + } + return do_loop_create(srcdev, size, fstype); +} + +static int loop_create(struct bdev *bdev, const char *dest, const char *n, + struct bdev_specs *specs) +{ + const char *fstype; + unsigned long sz; + int ret, len; + char *srcdev; + + if (!specs) + return -1; + + // dest is passed in as $lxcpath / $lxcname / rootfs + // srcdev will be: $lxcpath / $lxcname / rootdev + // src will be 'loop:$srcdev' + len = strlen(dest) + 2; + srcdev = alloca(len); + + ret = snprintf(srcdev, len, "%s", dest); + if (ret < 0 || ret >= len) + return -1; + sprintf(srcdev + len - 4, "dev"); + + bdev->src = malloc(len + 5); + if (!bdev->src) + return -1; + ret = snprintf(bdev->src, len + 5, "loop:%s", srcdev); + if (ret < 0 || ret >= len + 5) + return -1; + + sz = specs->u.loop.fssize; + if (!sz) + sz = DEFAULT_FS_SIZE; + + fstype = specs->u.loop.fstype; + if (!fstype) + fstype = DEFAULT_FSTYPE; + + if (!(bdev->dest = strdup(dest))) + return -1; + + if (mkdir_p(bdev->dest, 0755) < 0) { + ERROR("Error creating %s\n", bdev->dest); + return -1; + } + + return do_loop_create(srcdev, sz, fstype); +} + +static int loop_destroy(struct bdev *orig) +{ + return unlink(orig->src + 5); +} + +struct bdev_ops loop_ops = { + .detect = &loop_detect, + .mount = &loop_mount, + .umount = &loop_umount, + .clone_paths = &loop_clonepaths, + .destroy = &loop_destroy, + .create = &loop_create, +}; + +// +// overlayfs ops +// + +static int overlayfs_detect(const char *path) +{ + if (strncmp(path, "overlayfs:", 10) == 0) + return 1; // take their word for it + return 0; +} + +// +// XXXXXXX plain directory bind mount ops +// +static int overlayfs_mount(struct bdev *bdev) +{ + char *options, *dup, *lower, *upper; + int len; + int ret; + + if (strcmp(bdev->type, "overlayfs")) + return -22; + if (!bdev->src || !bdev->dest) + return -22; + + // separately mount it first + // mount -t overlayfs -oupperdir=${upper},lowerdir=${lower} lower dest + dup = alloca(strlen(bdev->src)+1); + strcpy(dup, bdev->src); + if (!(lower = index(dup, ':'))) + return -22; + if (!(upper = index(++lower, ':'))) + return -22; + *upper = '\0'; + upper++; + + // TODO We should check whether bdev->src is a blockdev, and if so + // but for now, only support overlays of a basic directory + + len = strlen(lower) + strlen(upper) + strlen("upperdir=,lowerdir=") + 1; + options = alloca(len); + ret = snprintf(options, len, "upperdir=%s,lowerdir=%s", upper, lower); + if (ret < 0 || ret >= len) + return -1; + ret = mount(lower, bdev->dest, "overlayfs", MS_MGC_VAL, options); + if (ret < 0) + SYSERROR("overlayfs: error mounting %s onto %s options %s", + lower, bdev->dest, options); + else + INFO("overlayfs: mounted %s onto %s options %s", + lower, bdev->dest, options); + return ret; +} + +static int overlayfs_umount(struct bdev *bdev) +{ + if (strcmp(bdev->type, "overlayfs")) + return -22; + if (!bdev->src || !bdev->dest) + return -22; + return umount(bdev->dest); +} + +static int overlayfs_clonepaths(struct bdev *orig, struct bdev *new, const char *oldname, + const char *cname, const char *oldpath, const char *lxcpath, int snap, + unsigned long newsize) +{ + if (!snap) { + ERROR("overlayfs is only for snapshot clones"); + return -22; + } + + if (!orig->src || !orig->dest) + return -1; + + new->dest = dir_new_path(orig->dest, oldname, cname, oldpath, lxcpath); + if (!new->dest) + return -1; + if (mkdir_p(new->dest, 0755) < 0) + return -1; + + if (strcmp(orig->type, "dir") == 0) { + char *delta; + int ret, len; + + // if we have /var/lib/lxc/c2/rootfs, then delta will be + // /var/lib/lxc/c2/delta0 + delta = strdup(new->dest); + if (!delta) { + return -1; + } + if (strlen(delta) < 6) { + free(delta); + return -22; + } + strcpy(&delta[strlen(delta)-6], "delta0"); + if ((ret = mkdir(delta, 0755)) < 0) { + SYSERROR("error: mkdir %s", delta); + free(delta); + return -1; + } + + // the src will be 'overlayfs:lowerdir:upperdir' + len = strlen(delta) + strlen(orig->src) + 12; + new->src = malloc(len); + if (!new->src) { + free(delta); + return -ENOMEM; + } + ret = snprintf(new->src, len, "overlayfs:%s:%s", orig->src, delta); + free(delta); + if (ret < 0 || ret >= len) + return -ENOMEM; + } else if (strcmp(orig->type, "overlayfs") == 0) { + // What exactly do we want to do here? + // I think we want to use the original lowerdir, with a + // private delta which is originally rsynced from the + // original delta + char *osrc, *odelta, *nsrc, *ndelta; + int len, ret; + if (!(osrc = strdup(orig->src))) + return -22; + nsrc = index(osrc, ':') + 1; + if (nsrc != osrc + 10 || (odelta = index(nsrc, ':')) == NULL) { + free(osrc); + return -22; + } + *odelta = '\0'; + odelta++; + ndelta = dir_new_path(odelta, oldname, cname, oldpath, lxcpath); + if (!ndelta) { + free(osrc); + return -ENOMEM; + } + if (do_rsync(odelta, ndelta) < 0) { + free(osrc); + free(ndelta); + ERROR("copying overlayfs delta"); + return -1; + } + len = strlen(nsrc) + strlen(ndelta) + 12; + new->src = malloc(len); + if (!new->src) { + free(osrc); + free(ndelta); + return -ENOMEM; + } + ret = snprintf(new->src, len, "overlayfs:%s:%s", nsrc, ndelta); + free(osrc); + free(ndelta); + if (ret < 0 || ret >= len) + return -ENOMEM; + } else { + ERROR("overlayfs clone of %s container is not yet supported", + orig->type); + // Note, supporting this will require overlayfs_mount supporting + // mounting of the underlay. No big deal, just needs to be done. + return -1; + } + + return 0; +} + +int overlayfs_destroy(struct bdev *orig) +{ + char *upper; + + if (strncmp(orig->src, "overlayfs:", 10) != 0) + return -22; + upper = index(orig->src + 10, ':'); + if (!upper) + return -22; + upper++; + return lxc_rmdir_onedev(upper); +} + +/* + * to say 'lxc-create -t ubuntu -n o1 -B overlayfs' means you want + * $lxcpath/$lxcname/rootfs to have the created container, while all + * changes after starting the container are written to + * $lxcpath/$lxcname/delta0 + */ +static int overlayfs_create(struct bdev *bdev, const char *dest, const char *n, + struct bdev_specs *specs) +{ + char *delta; + int ret, len = strlen(dest), newlen; + + if (len < 8 || strcmp(dest+len-7, "/rootfs") != 0) + return -1; + + if (!(bdev->dest = strdup(dest))) { + ERROR("Out of memory"); + return -1; + } + + delta = alloca(strlen(dest)+1); + strcpy(delta, dest); + strcpy(delta+len-6, "delta0"); + + if (mkdir_p(delta, 0755) < 0) { + ERROR("Error creating %s\n", delta); + return -1; + } + + /* overlayfs:lower:upper */ + newlen = (2 * len) + strlen("overlayfs:") + 2; + bdev->src = malloc(newlen); + if (!bdev->src) { + ERROR("Out of memory"); + return -1; + } + ret = snprintf(bdev->src, newlen, "overlayfs:%s:%s", dest, delta); + if (ret < 0 || ret >= newlen) + return -1; + + if (mkdir_p(bdev->dest, 0755) < 0) { + ERROR("Error creating %s\n", bdev->dest); + return -1; + } + + return 0; +} + +struct bdev_ops overlayfs_ops = { + .detect = &overlayfs_detect, + .mount = &overlayfs_mount, + .umount = &overlayfs_umount, + .clone_paths = &overlayfs_clonepaths, + .destroy = &overlayfs_destroy, + .create = &overlayfs_create, +}; + +struct bdev_type bdevs[] = { + {.name = "zfs", .ops = &zfs_ops,}, + {.name = "lvm", .ops = &lvm_ops,}, + {.name = "btrfs", .ops = &btrfs_ops,}, + {.name = "dir", .ops = &dir_ops,}, + {.name = "overlayfs", .ops = &overlayfs_ops,}, + {.name = "loop", .ops = &loop_ops,}, +}; + +static const size_t numbdevs = sizeof(bdevs) / sizeof(struct bdev_type); + +void bdev_put(struct bdev *bdev) +{ + if (bdev->data) + free(bdev->data); + if (bdev->src) + free(bdev->src); + if (bdev->dest) + free(bdev->dest); + free(bdev); +} + +struct bdev *bdev_get(const char *type) +{ + int i; + struct bdev *bdev; + + for (i=0; iops = bdevs[i].ops; + bdev->type = bdevs[i].name; + return bdev; +} + +struct bdev *bdev_init(const char *src, const char *dst, const char *data) +{ + int i; + struct bdev *bdev; + + for (i=0; idetect(src); + if (r) + break; + } + + if (i == numbdevs) + return NULL; + bdev = malloc(sizeof(struct bdev)); + if (!bdev) + return NULL; + memset(bdev, 0, sizeof(struct bdev)); + bdev->ops = bdevs[i].ops; + bdev->type = bdevs[i].name; + if (data) + bdev->data = strdup(data); + if (src) + bdev->src = strdup(src); + if (dst) + bdev->dest = strdup(dst); + + return bdev; +} + +/* + * If we're not snaphotting, then bdev_copy becomes a simple case of mount + * the original, mount the new, and rsync the contents. + */ +struct bdev *bdev_copy(const char *src, const char *oldname, const char *cname, + const char *oldpath, const char *lxcpath, const char *bdevtype, + int snap, const char *bdevdata, unsigned long newsize, + int *needs_rdep) +{ + struct bdev *orig, *new; + pid_t pid; + + /* if the container name doesn't show up in the rootfs path, then + * we don't know how to come up with a new name + */ + if (strstr(src, oldname) == NULL) { + ERROR("original rootfs path %s doesn't include container name %s", + src, oldname); + return NULL; + } + + orig = bdev_init(src, NULL, NULL); + if (!orig) { + ERROR("failed to detect blockdev type for %s\n", src); + return NULL; + } + + if (!orig->dest) { + int ret; + orig->dest = malloc(MAXPATHLEN); + if (!orig->dest) { + ERROR("out of memory"); + bdev_put(orig); + return NULL; + } + ret = snprintf(orig->dest, MAXPATHLEN, "%s/%s/rootfs", oldpath, oldname); + if (ret < 0 || ret >= MAXPATHLEN) { + ERROR("rootfs path too long"); + bdev_put(orig); + return NULL; + } + } + + /* + * If newtype is NULL and snapshot is set, then use overlayfs + */ + if (!bdevtype && snap && strcmp(orig->type , "dir") == 0) + bdevtype = "overlayfs"; + + *needs_rdep = 0; + if (bdevtype && strcmp(orig->type, "dir") == 0 && + strcmp(bdevtype, "overlayfs") == 0) + *needs_rdep = 1; + + new = bdev_get(bdevtype ? bdevtype : orig->type); + if (!new) { + ERROR("no such block device type: %s", bdevtype ? bdevtype : orig->type); + bdev_put(orig); + return NULL; + } + + if (new->ops->clone_paths(orig, new, oldname, cname, oldpath, lxcpath, snap, newsize) < 0) { + ERROR("failed getting pathnames for cloned storage: %s\n", src); + bdev_put(orig); + bdev_put(new); + return NULL; + } + + pid = fork(); + if (pid < 0) { + SYSERROR("fork"); + bdev_put(orig); + bdev_put(new); + return NULL; + } + + if (pid > 0) { + int ret = wait_for_pid(pid); + bdev_put(orig); + if (ret < 0) { + bdev_put(new); + return NULL; + } + return new; + } + + if (unshare(CLONE_NEWNS) < 0) { + SYSERROR("unshare CLONE_NEWNS"); + exit(1); + } + if (snap) + exit(0); + + // If not a snapshot, copy the fs. + if (orig->ops->mount(orig) < 0) { + ERROR("failed mounting %s onto %s\n", src, orig->dest); + exit(1); + } + if (new->ops->mount(new) < 0) { + ERROR("failed mounting %s onto %s\n", new->src, new->dest); + exit(1); + } + if (do_rsync(orig->dest, new->dest) < 0) { + ERROR("rsyncing %s to %s\n", orig->src, new->src); + exit(1); + } + // don't bother umounting, ns exit will do that + + exit(0); +} + +static struct bdev * do_bdev_create(const char *dest, const char *type, + const char *cname, struct bdev_specs *specs) +{ + struct bdev *bdev = bdev_get(type); + if (!bdev) { + return NULL; + } + + if (bdev->ops->create(bdev, dest, cname, specs) < 0) { + bdev_put(bdev); + return NULL; + } + + return bdev; +} + +/* + * bdev_create: + * Create a backing store for a container. + * If successfull, return a struct bdev *, with the bdev mounted and ready + * for use. Before completing, the caller will need to call the + * umount operation and bdev_put(). + * @dest: the mountpoint (i.e. /var/lib/lxc/$name/rootfs) + * @type: the bdevtype (dir, btrfs, zfs, etc) + * @cname: the container name + * @specs: details about the backing store to create, like fstype + */ +struct bdev *bdev_create(const char *dest, const char *type, + const char *cname, struct bdev_specs *specs) +{ + struct bdev *bdev; + char *best_options[] = {"btrfs", "zfs", "lvm", "dir", NULL}; + + if (!type) + return do_bdev_create(dest, "dir", cname, specs); + + if (strcmp(type, "best") == 0) { + int i; + // try for the best backing store type, according to our + // opinionated preferences + for (i=0; best_options[i]; i++) { + if ((bdev = do_bdev_create(dest, best_options[i], cname, specs))) + return bdev; + } + return NULL; // 'dir' should never fail, so this shouldn't happen + } + + // -B lvm,dir + if (index(type, ',') != NULL) { + char *dup = alloca(strlen(type)+1), *saveptr, *token; + strcpy(dup, type); + for (token = strtok_r(dup, ",", &saveptr); token; + token = strtok_r(NULL, ",", &saveptr)) { + if ((bdev = do_bdev_create(dest, token, cname, specs))) + return bdev; + } + } + + return do_bdev_create(dest, type, cname, specs); +} + +char *overlayfs_getlower(char *p) +{ + char *p1 = index(p, ':'); + if (p1) + *p1 = '\0'; + return p; +} diff --git a/src/lxc/bdev.h b/src/lxc/bdev.h new file mode 100644 index 000000000..f9d9a0cb3 --- /dev/null +++ b/src/lxc/bdev.h @@ -0,0 +1,138 @@ +/* + * lxc: linux Container library + * + * (C) Copyright IBM Corp. 2007, 2008 + * + * Authors: + * Daniel Lezcano + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef __LXC_BDEV_H +#define __LXC_BDEV_H +/* blockdev operations for: + * dir, raw, btrfs, overlayfs, aufs, lvm, loop, zfs + * someday: qemu-nbd, qcow2, qed + */ + +#include "config.h" +#include "lxccontainer.h" + +struct bdev; + +/* + * specifications for how to create a new backing store + */ +struct bdev_specs { + union { + struct { + char *zfsroot; + } zfs; + struct { + char *vg; + char *lv; + char *fstype; + unsigned long fssize; // fs size in bytes + } lvm; + struct { + char *fstype; + unsigned long fssize; // fs size in bytes + } loop; + } u; +}; + +struct bdev_ops { + /* detect whether path is of this bdev type */ + int (*detect)(const char *path); + // mount requires src and dest to be set. + int (*mount)(struct bdev *bdev); + int (*umount)(struct bdev *bdev); + int (*destroy)(struct bdev *bdev); + int (*create)(struct bdev *bdev, const char *dest, const char *n, + struct bdev_specs *specs); + /* given original mount, rename the paths for cloned container */ + int (*clone_paths)(struct bdev *orig, struct bdev *new, const char *oldname, + const char *cname, const char *oldpath, const char *lxcpath, + int snap, unsigned long newsize); +}; + +/* + * When lxc-start (conf.c) is mounting a rootfs, then src will be the + * 'lxc.rootfs' value, dest will be mount dir (i.e. $libdir/lxc) When clone + * or create is doing so, then dest will be $lxcpath/$lxcname/rootfs, since + * we may need to rsync from one to the other. + * data is so far unused. + */ +struct bdev { + struct bdev_ops *ops; + char *type; + char *src; + char *dest; + char *data; + // turn the following into a union if need be + // lofd is the open fd for the mounted loopback file + int lofd; +}; + +char *overlayfs_getlower(char *p); + +/* + * Instantiate a bdev object. The src is used to determine which blockdev + * type this should be. The dst and data are optional, and will be used + * in case of mount/umount. + * + * Optionally, src can be 'dir:/var/lib/lxc/c1' or 'lvm:/dev/lxc/c1'. For + * other backing stores, this will allow additional options. In particular, + * "overlayfs:/var/lib/lxc/canonical/rootfs:/var/lib/lxc/c1/delta" will mean + * use /var/lib/lxc/canonical/rootfs as lower dir, and /var/lib/lxc/c1/delta + * as the upper, writeable layer. + */ +struct bdev *bdev_init(const char *src, const char *dst, const char *data); + +struct bdev *bdev_copy(const char *src, const char *oldname, const char *cname, + const char *oldpath, const char *lxcpath, const char *bdevtype, + int snap, const char *bdevdata, unsigned long newsize, + int *needs_rdep); +struct bdev *bdev_create(const char *dest, const char *type, + const char *cname, struct bdev_specs *specs); +void bdev_put(struct bdev *bdev); + +/* define constants if the kernel/glibc headers don't define them */ +#ifndef MS_DIRSYNC +#define MS_DIRSYNC 128 +#endif + +#ifndef MS_REC +#define MS_REC 16384 +#endif + +#ifndef MNT_DETACH +#define MNT_DETACH 2 +#endif + +#ifndef MS_SLAVE +#define MS_SLAVE (1<<19) +#endif + +#ifndef MS_RELATIME +#define MS_RELATIME (1 << 21) +#endif + +#ifndef MS_STRICTATIME +#define MS_STRICTATIME (1 << 24) +#endif + +#endif diff --git a/src/lxc/caps.c b/src/lxc/caps.c index 05444514d..006172d1d 100644 --- a/src/lxc/caps.c +++ b/src/lxc/caps.c @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #define _GNU_SOURCE @@ -227,42 +227,4 @@ int lxc_caps_last_cap(void) return last_cap; } -/* - * check if we have the caps needed to start a container. returns 1 on - * success, 0 on error. (I'd prefer this be a bool, but am afraid that - * might fail to build on some distros). - */ -int lxc_caps_check(void) -{ - uid_t uid = getuid(); - cap_t caps; - cap_flag_value_t value; - int i, ret; - - cap_value_t needed_caps[] = { CAP_SYS_ADMIN, CAP_NET_ADMIN, CAP_SETUID, CAP_SETGID }; - -#define NUMCAPS ((int) (sizeof(needed_caps) / sizeof(cap_t))) - - if (!uid) - return 1; - - caps = cap_get_proc(); - if (!caps) { - ERROR("failed to cap_get_proc: %m"); - return 0; - } - - for (i=0; i @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -39,6 +40,8 @@ #include "error.h" #include "config.h" #include "commands.h" +#include "list.h" +#include "conf.h" #include #include @@ -50,10 +53,21 @@ #include #endif +#ifndef HAVE_GETLINE +#ifdef HAVE_FGETLN +#include <../include/getline.h> +#endif +#endif + lxc_log_define(lxc_cgroup, lxc); #define MTAB "/proc/mounts" +/* In the case of a bind mount, there could be two long pathnames in the + * mntent plus options so use large enough buffer size + */ +#define LARGE_MAXPATHLEN 4 * MAXPATHLEN + /* Check if a mount is a cgroup hierarchy for any subsystem. * Return the first subsystem found (or NULL if none). */ @@ -69,8 +83,10 @@ static char *mount_has_subsystem(const struct mntent *mntent) return 0; /* skip the first line, which contains column headings */ - if (!fgets(line, MAXPATHLEN, f)) + if (!fgets(line, MAXPATHLEN, f)) { + fclose(f); return 0; + } while (fgets(line, MAXPATHLEN, f)) { c = strchr(line, '\t'); @@ -89,18 +105,19 @@ static char *mount_has_subsystem(const struct mntent *mntent) /* * Determine mountpoint for a cgroup subsystem. - * @subsystem: cgroup subsystem (i.e. freezer). If this is NULL, the first - * cgroup mountpoint with any subsystems is used. - * @mnt: a passed-in buffer of at least size MAXPATHLEN into which the path + * @dest: a passed-in buffer of at least size MAXPATHLEN into which the path * is copied. + * @subsystem: cgroup subsystem (i.e. freezer) * - * Returns 0 on success, -1 on error. + * Returns true on success, false on error. */ -static int get_cgroup_mount(const char *subsystem, char *mnt) +bool get_subsys_mount(char *dest, const char *subsystem) { - struct mntent *mntent; + struct mntent mntent_r; FILE *file = NULL; - int ret, err = -1; + int ret; + bool retv = false; + char buf[LARGE_MAXPATHLEN] = {0}; file = setmntent(MTAB, "r"); if (!file) { @@ -108,24 +125,23 @@ static int get_cgroup_mount(const char *subsystem, char *mnt) return -1; } - while ((mntent = getmntent(file))) { - if (strcmp(mntent->mnt_type, "cgroup")) + while ((getmntent_r(file, &mntent_r, buf, sizeof(buf)))) { + if (strcmp(mntent_r.mnt_type, "cgroup")) continue; if (subsystem) { - if (!hasmntopt(mntent, subsystem)) + if (!hasmntopt(&mntent_r, subsystem)) continue; } else { - if (!mount_has_subsystem(mntent)) + if (!mount_has_subsystem(&mntent_r)) continue; } - ret = snprintf(mnt, MAXPATHLEN, "%s", mntent->mnt_dir); + ret = snprintf(dest, MAXPATHLEN, "%s", mntent_r.mnt_dir); if (ret < 0 || ret >= MAXPATHLEN) goto fail; - DEBUG("using cgroup mounted at '%s'", mnt); - err = 0; + retv = true; goto out; }; @@ -134,128 +150,120 @@ fail: subsystem ? subsystem : "(NULL)"); out: endmntent(file); - return err; + return retv; } /* - * cgroup_path_get: Calculate the full path for a particular subsystem, plus - * a passed-in (to be appended) relative cgpath for a container. - * @path: a char** into which a pointer to the answer is copied - * @subsystem: subsystem of interest (i.e. freezer). - * @cgpath: a container's (relative) cgroup path, i.e. "/lxc/c1". - * - * Returns 0 on success, -1 on error. - * - * The answer is written in a static char[MAXPATHLEN] in this function and - * should not be freed. + * is_in_cgroup: check whether pid is found in the passed-in cgroup tasks + * file. + * @path: in full path to a cgroup tasks file + * Note that in most cases the file will simply not exist, which is ok - it + * just means that's not our cgroup. */ -extern int cgroup_path_get(char **path, const char *subsystem, const char *cgpath) +static bool is_in_cgroup(pid_t pid, char *path) { - static char buf[MAXPATHLEN]; - static char retbuf[MAXPATHLEN]; - int rc; + int cmppid; + FILE *f = fopen(path, "r"); + char *line = NULL; + size_t sz = 0; - /* lxc_cgroup_set passes a state object for the subsystem, - * so trim it to just the subsystem part */ - if (subsystem) { - rc = snprintf(retbuf, MAXPATHLEN, "%s", subsystem); - if (rc < 0 || rc >= MAXPATHLEN) { - ERROR("subsystem name too long"); - return -1; + if (!f) + return false; + while (getline(&line, &sz, f) != -1) { + if (sscanf(line, "%d", &cmppid) == 1 && cmppid == pid) { + fclose(f); + free(line); + return true; } - char *s = index(retbuf, '.'); - if (s) - *s = '\0'; - DEBUG("%s: called for subsys %s name %s\n", __func__, retbuf, cgpath); } - if (get_cgroup_mount(subsystem ? retbuf : NULL, buf)) { - ERROR("cgroup is not mounted"); - return -1; - } - - rc = snprintf(retbuf, MAXPATHLEN, "%s/%s", buf, cgpath); - if (rc < 0 || rc >= MAXPATHLEN) { - ERROR("name too long"); - return -1; - } - - DEBUG("%s: returning %s for subsystem %s", __func__, retbuf, subsystem); - - *path = retbuf; - return 0; + fclose(f); + if (line) + free(line); + return false; } /* - * Calculate a container's cgroup path for a particular subsystem. This - * is the cgroup path relative to the root of the cgroup filesystem. - * @path: A char ** into which we copy the char* containing the answer - * @subsystem: the cgroup subsystem of interest (i.e. freezer) - * @name: container name - * @lxcpath: the lxcpath in which the container is running. + * lxc_cgroup_path_get: Get the absolute pathname for a cgroup + * file for a running container. * - * Returns 0 on success, -1 on error. - * - * Note that the char* copied into *path is a static char[MAXPATHLEN] in - * commands.c:receive_answer(). It should not be freed. - */ -extern int lxc_get_cgpath(const char **path, const char *subsystem, const char *name, const char *lxcpath) -{ - struct lxc_command command = { - .request = { .type = LXC_COMMAND_CGROUP }, - }; - - int ret, stopped = 0; - - ret = lxc_command(name, &command, &stopped, lxcpath); - if (ret < 0) { - if (!stopped) - ERROR("failed to send command"); - return -1; - } - - if (!ret) { - WARN("'%s' has stopped before sending its state", name); - return -1; - } - - if (command.answer.ret < 0 || command.answer.pathlen < 0) { - ERROR("failed to get state for '%s': %s", - name, strerror(-command.answer.ret)); - return -1; - } - - *path = command.answer.path; - - return 0; -} - -/* - * lxc_cgroup_path_get: determine full pathname for a cgroup - * file for a specific container. - * @path: char ** used to return the answer. The char * will point - * into the static char* retuf from cgroup_path_get() (so no need - * to free it). - * @subsystem: cgroup subsystem (i.e. "freezer") for which to - * return an answer. If NULL, then the first cgroup entry in - * mtab will be used. + * @subsystem : subsystem of interest (e.g. "freezer"). If NULL, then + * the first cgroup entry in mtab will be used. + * @name : name of container to connect to + * @lxcpath : the lxcpath in which the container is running * * This is the exported function, which determines cgpath from the - * monitor running in lxcpath. + * lxc-start of the @name container running in @lxcpath. * - * Returns 0 on success, < 0 on error. + * Returns path on success, NULL on error. The caller must free() + * the returned path. */ -int lxc_cgroup_path_get(char **path, const char *subsystem, const char *name, const char *lxcpath) +char *lxc_cgroup_path_get(const char *subsystem, const char *name, + const char *lxcpath) { - const char *cgpath; + char *cgpath, *cgp, path[MAXPATHLEN], *pathp, *p; + pid_t initpid = lxc_cmd_get_init_pid(name, lxcpath); + int ret; - if (lxc_get_cgpath(&cgpath, subsystem, name, lxcpath) < 0) - return -1; + if (initpid < 0) + return NULL; - return cgroup_path_get(path, subsystem, cgpath); + cgpath = lxc_cmd_get_cgroup_path(name, lxcpath, subsystem); + if (!cgpath) + return NULL; + + if (!get_subsys_mount(path, subsystem)) + return NULL; + + pathp = path + strlen(path); + /* + * find a mntpt where i have the subsystem mounted, then find + * a subset cgpath under that which has pid in it. + * + * If d->mntpt is '/a/b/c/d', and the mountpoint is /x/y/z, + * then look for ourselves in: + * /x/y/z/a/b/c/d/tasks + * /x/y/z/b/c/d/tasks + * /x/y/z/c/d/tasks + * /x/y/z/d/tasks + * /x/y/z/tasks + */ + cgp = cgpath; + while (cgp[0]) { + ret = snprintf(pathp, MAXPATHLEN - (pathp - path), "%s/tasks", cgp); + if (ret < 0 || ret >= MAXPATHLEN) + return NULL; + if (!is_in_cgroup(initpid, path)) { + // does not exist, try the next one + cgp = index(cgp+1, '/'); + if (!cgp) + break; + continue; + } + break; + } + if (!cgp || !*cgp) { + // try just the path + ret = snprintf(pathp, MAXPATHLEN - (pathp - path), "/tasks"); + if (ret < 0 || ret >= MAXPATHLEN) + return NULL; + if (!is_in_cgroup(initpid, path)) { + return NULL; + } + return strdup("/"); + } + // path still has 'tasks' on the end, drop it + if ((p = strrchr(path, '/')) != NULL) + *p = '\0'; + return strdup(path); } /* - * small helper which simply write a value into a (cgroup) file + * do_cgroup_set: Write a value into a cgroup file + * + * @path : absolute path to cgroup file + * @value : value to write into file + * + * Returns 0 on success, < 0 on error. */ static int do_cgroup_set(const char *path, const char *value) { @@ -279,27 +287,147 @@ static int do_cgroup_set(const char *path, const char *value) return 0; } +static int in_subsys_list(const char *s, const char *list) +{ + char *token, *str, *saveptr = NULL; + + if (!list || !s) + return 0; + + str = alloca(strlen(list)+1); + strcpy(str, list); + for (; (token = strtok_r(str, ",", &saveptr)); str = NULL) { + if (strcmp(s, token) == 0) + return 1; + } + + return 0; +} + +static char *cgroup_get_subsys_abspath(struct lxc_handler *handler, const char *subsys) +{ + struct cgroup_desc *d; + + for (d = handler->cgroup; d; d = d->next) { + if (in_subsys_list(subsys, d->subsystems)) + return d->curcgroup; + } + + return NULL; +} + +static bool cgroup_devices_has_deny(struct lxc_handler *h, char *v) +{ + char *cgabspath, path[MAXPATHLEN]; + FILE *f; + char *line = NULL; + size_t len = 0; + bool ret = true; + int r; + + // XXX FIXME if users could use something other than 'lxc.devices.deny = a'. + // not sure they ever do, but they *could* + // right now, I'm assuming they do NOT + if (strcmp(v, "a") && strcmp(v, "a *:* rwm")) + return false; + cgabspath = cgroup_get_subsys_abspath(h, "devices"); + if (!cgabspath) + return false; + + r = snprintf(path, MAXPATHLEN, "%s/devices.list", cgabspath); + if (r < 0 || r >= MAXPATHLEN) { + ERROR("pathname too long for devices.list"); + return false; + } + + if (!(f = fopen(path, "r"))) + return false; + + while (getline(&line, &len, f) != -1) { + size_t len = strlen(line); + if (len > 0 && line[len-1] == '\n') + line[len-1] = '\0'; + if (strcmp(line, "a *:* rwm") == 0) { + ret = false; + goto out; + } + } + +out: + fclose(f); + if (line) + free(line); + return ret; +} + +static bool cgroup_devices_has_allow(struct lxc_handler *h, char *v) +{ + char *cgabspath, path[MAXPATHLEN]; + int r; + bool ret = false; + FILE *f; + char *line = NULL; + size_t len = 0; + + cgabspath = cgroup_get_subsys_abspath(h, "devices"); + if (!cgabspath) + return false; + + r = snprintf(path, MAXPATHLEN, "%s/devices.list", cgabspath); + if (r < 0 || r >= MAXPATHLEN) { + ERROR("pathname too long to for devices.list"); + return false; + } + + if (!(f = fopen(path, "r"))) + return false; + + while (getline(&line, &len, f) != -1) { + if (len < 1) + goto out; + if (line[len-1] == '\n') + line[len-1] = '\0'; + if (strcmp(line, "a *:* rwm") == 0 || strcmp(line, v) == 0) { + ret = true; + goto out; + } + } + +out: + if (line) + free(line); + fclose(f); + return ret; +} + /* - * small helper to write a value into a file in a particular directory. - * @cgpath: the directory in which to find the file - * @filename: the file (under cgpath) to which to write - * @value: what to write + * lxc_cgroup_set_bypath: Write a value into a cgroup file + * + * @cgrelpath : a container's relative cgroup path (e.g. "lxc/c1") + * @filename : the cgroup file to write (e.g. "freezer.state") + * @value : value to write into file * * Returns 0 on success, < 0 on error. */ -int lxc_cgroup_set_bypath(const char *cgpath, const char *filename, const char *value) +int lxc_cgroup_set_value(struct lxc_handler *handler, const char *filename, + const char *value) { + char *cgabspath, path[MAXPATHLEN], *p; int ret; - char *dirpath; - char path[MAXPATHLEN]; - ret = cgroup_path_get(&dirpath, filename, cgpath); - if (ret) + ret = snprintf(path, MAXPATHLEN, "%s", filename); + if (ret < 0 || ret >= MAXPATHLEN) + return -1; + if ((p = index(path, '.')) != NULL) + *p = '\0'; + cgabspath = cgroup_get_subsys_abspath(handler, path); + if (!cgabspath) return -1; - ret = snprintf(path, MAXPATHLEN, "%s/%s", dirpath, filename); + ret = snprintf(path, MAXPATHLEN, "%s/%s", cgabspath, filename); if (ret < 0 || ret >= MAXPATHLEN) { - ERROR("pathname too long"); + ERROR("pathname too long to set cgroup value %s to %s", + filename, value); return -1; } @@ -307,50 +435,58 @@ int lxc_cgroup_set_bypath(const char *cgpath, const char *filename, const char * } /* - * set a cgroup value for a container + * lxc_cgroup_set: Write a value into a cgroup file * - * @name: name of the container - * @filename: the cgroup file (i.e. freezer.state) whose value to change - * @value: the value to write to the file - * @lxcpath: the lxcpath under which the container is running. + * @name : name of container to connect to + * @filename : the cgroup file to write (e.g. "freezer.state") + * @value : value to write into file + * @lxcpath : the lxcpath in which the container is running * * Returns 0 on success, < 0 on error. */ - int lxc_cgroup_set(const char *name, const char *filename, const char *value, const char *lxcpath) { int ret; - char *dirpath; + char *cgabspath; char path[MAXPATHLEN]; + char *subsystem = alloca(strlen(filename)+1), *p; + strcpy(subsystem, filename); - ret = lxc_cgroup_path_get(&dirpath, filename, name, lxcpath); - if (ret) + if ((p = index(subsystem, '.')) != NULL) + *p = '\0'; + + cgabspath = lxc_cgroup_path_get(subsystem, name, lxcpath); + if (!cgabspath) return -1; - ret = snprintf(path, MAXPATHLEN, "%s/%s", dirpath, filename); + ret = snprintf(path, MAXPATHLEN, "%s/%s", cgabspath, filename); if (ret < 0 || ret >= MAXPATHLEN) { ERROR("pathname too long"); - return -1; + ret = -1; + goto out; } - return do_cgroup_set(path, value); + ret = do_cgroup_set(path, value); + +out: + free(cgabspath); + return ret; } /* - * Get value of a cgroup setting for a container. + * lxc_cgroup_get: Read value from a cgroup file * - * @name: name of the container - * @filename: the cgroup file to read (i.e. 'freezer.state') - * @value: a preallocated char* into which to copy the answer - * @len: the length of pre-allocated @value - * @lxcpath: the lxcpath in which the container is running (i.e. - * /var/lib/lxc) + * @name : name of container to connect to + * @filename : the cgroup file to read (e.g. "freezer.state") + * @value : a pre-allocated buffer to copy the answer into + * @len : the length of pre-allocated @value + * @lxcpath : the lxcpath in which the container is running * - * Returns < 0 on error, or the number of bytes read. + * Returns the number of bytes read on success, < 0 on error * - * If you pass in NULL value or 0 len, then you are asking for the size of the - * file. + * If you pass in NULL value or 0 len, the return value will be the size of + * the file, and @value will not contain the contents. * * Note that we can't get the file size quickly through stat or lseek. * Therefore if you pass in len > 0 but less than the file size, your only @@ -360,25 +496,32 @@ int lxc_cgroup_set(const char *name, const char *filename, const char *value, int lxc_cgroup_get(const char *name, const char *filename, char *value, size_t len, const char *lxcpath) { - int fd, ret = -1; - char *dirpath; + int fd, ret; + char *cgabspath; char path[MAXPATHLEN]; - int rc; + char *subsystem = alloca(strlen(filename)+1), *p; - ret = lxc_cgroup_path_get(&dirpath, filename, name, lxcpath); - if (ret) + strcpy(subsystem, filename); + + if ((p = index(subsystem, '.')) != NULL) + *p = '\0'; + + cgabspath = lxc_cgroup_path_get(subsystem, name, lxcpath); + if (!cgabspath) return -1; - rc = snprintf(path, MAXPATHLEN, "%s/%s", dirpath, filename); - if (rc < 0 || rc >= MAXPATHLEN) { + ret = snprintf(path, MAXPATHLEN, "%s/%s", cgabspath, filename); + if (ret < 0 || ret >= MAXPATHLEN) { ERROR("pathname too long"); - return -1; + ret = -1; + goto out; } fd = open(path, O_RDONLY); if (fd < 0) { ERROR("open %s : %s", path, strerror(errno)); - return -1; + ret = -1; + goto out; } if (!len || !value) { @@ -397,23 +540,24 @@ int lxc_cgroup_get(const char *name, const char *filename, char *value, ERROR("read %s : %s", path, strerror(errno)); close(fd); +out: + free(cgabspath); return ret; } -int lxc_cgroup_nrtasks(const char *cgpath) +int lxc_cgroup_nrtasks(struct lxc_handler *handler) { - char *dpath; char path[MAXPATHLEN]; - int pid, ret, count = 0; + int pid, ret; FILE *file; - int rc; - ret = cgroup_path_get(&dpath, NULL, cgpath); - if (ret) + if (!handler->cgroup) return -1; - rc = snprintf(path, MAXPATHLEN, "%s/tasks", dpath); - if (rc < 0 || rc >= MAXPATHLEN) { + /* XXX Should we use a specific subsystem rather than the first one we + * found (handler->cgroup->curcgroup)? */ + ret = snprintf(path, MAXPATHLEN, "%s/tasks", handler->cgroup->curcgroup); + if (ret < 0 || ret >= MAXPATHLEN) { ERROR("pathname too long"); return -1; } @@ -424,30 +568,43 @@ int lxc_cgroup_nrtasks(const char *cgpath) return -1; } + ret = 0; while (fscanf(file, "%d", &pid) != EOF) - count++; + ret++; fclose(file); - - return count; + return ret; } -/* - * If first creating the /sys/fs/cgroup/$subsys/lxc container, then - * try to set clone_children to 1. Some kernels don't support - * clone_children, and cgroup maintainer wants to deprecate it. So - * XXX TODO we should instead after each cgroup mkdir (here and in - * hooks/mountcgroup) check if cpuset is in the subsystems, and if so - * manually copy over mems and cpus. - */ -static void set_clone_children(const char *mntdir) +static int subsys_lists_match(const char *list1, const char *list2) +{ + char *token, *str, *saveptr = NULL; + + if (!list1 || !list2) + return 0; + + if (strlen(list1) != strlen(list2)) + return 0; + + str = alloca(strlen(list1)+1); + strcpy(str, list1); + for (; (token = strtok_r(str, ",", &saveptr)); str = NULL) { + if (in_subsys_list(token, list2) == 0) + return 0; + } + + return 1; +} + +static void set_clone_children(struct mntent *m) { char path[MAXPATHLEN]; FILE *fout; int ret; - ret = snprintf(path, MAXPATHLEN, "%s/cgroup.clone_children", mntdir); - INFO("writing to %s\n", path); + if (!in_subsys_list("cpuset", m->mnt_opts)) + return; + ret = snprintf(path, MAXPATHLEN, "%s/cgroup.clone_children", m->mnt_dir); if (ret < 0 || ret > MAXPATHLEN) return; fout = fopen(path, "w"); @@ -457,59 +614,320 @@ static void set_clone_children(const char *mntdir) fclose(fout); } -/* - * Make sure the 'cgroup group' exists, so that we don't have to worry about - * that later. - * - * @lxcgroup: the cgroup group, i.e. 'lxc' by default. - * - * See detailed comments at lxc_cgroup_path_create for more information. - * - * Returns 0 on success, -1 on error. - */ -static int create_lxcgroups(const char *lxcgroup) +static bool have_visited(char *opts, char *visited, char *all_subsystems) { - FILE *file = NULL; - struct mntent *mntent; - int ret, retv = -1; - char path[MAXPATHLEN]; + char *str, *s = NULL, *token; - file = setmntent(MTAB, "r"); - if (!file) { - SYSERROR("failed to open %s", MTAB); - return -1; + str = alloca(strlen(opts)+1); + strcpy(str, opts); + for (; (token = strtok_r(str, ",", &s)); str = NULL) { + if (!in_subsys_list(token, all_subsystems)) + continue; + if (visited && in_subsys_list(token, visited)) + return true; } - while ((mntent = getmntent(file))) { + return false; +} - if (strcmp(mntent->mnt_type, "cgroup")) - continue; - if (!mount_has_subsystem(mntent)) - continue; +static bool is_in_desclist(struct cgroup_desc *d, char *opts, char *all_subsystems) +{ + while (d) { + if (have_visited(opts, d->subsystems, all_subsystems)) + return true; + d = d->next; + } + return false; +} - /* - * TODO - handle case where lxcgroup has subdirs? (i.e. build/l1) - * We probably only want to support that for /users/joe - */ - ret = snprintf(path, MAXPATHLEN, "%s/%s", - mntent->mnt_dir, lxcgroup ? lxcgroup : "lxc"); - if (ret < 0 || ret >= MAXPATHLEN) - goto fail; - if (access(path, F_OK)) { - set_clone_children(mntent->mnt_dir); - ret = mkdir(path, 0755); - if (ret == -1 && errno != EEXIST) { - SYSERROR("failed to create '%s' directory", path); - goto fail; - } +static char *record_visited(char *opts, char *all_subsystems) +{ + char *s = NULL, *token, *str; + int oldlen = 0, newlen, toklen; + char *visited = NULL; + + str = alloca(strlen(opts)+1); + strcpy(str, opts); + for (; (token = strtok_r(str, ",", &s)); str = NULL) { + if (!in_subsys_list(token, all_subsystems)) + continue; + toklen = strlen(token); + newlen = oldlen + toklen + 1; // ',' + token or token + '\0' + visited = realloc(visited, newlen); + if (!visited) + return (char *)-ENOMEM; + if (oldlen) + strcat(visited, ","); + else + *visited = '\0'; + strcat(visited, token); + oldlen = newlen; + } + + return visited; +} + +static char *get_all_subsystems(void) +{ + FILE *f; + char *line = NULL, *ret = NULL; + size_t len; + int first = 1; + + /* read the list of subsystems from the kernel */ + f = fopen("/proc/cgroups", "r"); + if (!f) + return NULL; + + while (getline(&line, &len, f) != -1) { + char *c; + int oldlen, newlen, inc; + + /* skip the first line */ + if (first) { + first=0; + continue; } + c = strchr(line, '\t'); + if (!c) + continue; + *c = '\0'; + + oldlen = ret ? strlen(ret) : 0; + newlen = oldlen + strlen(line) + 2; + ret = realloc(ret, newlen); + if (!ret) + goto out; + inc = snprintf(ret + oldlen, newlen, ",%s", line); + if (inc < 0 || inc >= newlen) { + free(ret); + ret = NULL; + goto out; + } } - retv = 0; -fail: - endmntent(file); - return retv; +out: + if (line) + free(line); + fclose(f); + return ret; +} + +/* + * /etc/lxc/lxc.conf can contain lxc.cgroup.use = entries. + * If any of those are present, then lxc will ONLY consider + * cgroup filesystems mounted at one of the listed entries. + */ +static char *get_cgroup_uselist() +{ + FILE *f; + char *line = NULL, *ret = NULL; + size_t sz = 0, retsz = 0, newsz; + + if ((f = fopen(LXC_GLOBAL_CONF, "r")) == NULL) + return NULL; + while (getline(&line, &sz, f) != -1) { + char *p = line; + while (*p && isblank(*p)) + p++; + if (strncmp(p, "lxc.cgroup.use", 14) != 0) + continue; + p = index(p, '='); + if (!p) + continue; + p++; + while (*p && isblank(*p)) + p++; + if (strlen(p) < 1) + continue; + newsz = retsz + strlen(p); + if (retsz == 0) + newsz += 1; // for trailing \0 + // the last line in the file could lack \n + if (p[strlen(p)-1] != '\n') + newsz += 1; + ret = realloc(ret, newsz); + if (!ret) { + ERROR("Out of memory reading cgroup uselist"); + fclose(f); + free(line); + return (char *)-ENOMEM; + } + if (retsz == 0) + strcpy(ret, p); + else + strcat(ret, p); + if (p[strlen(p)-1] != '\n') + ret[newsz-2] = '\0'; + ret[newsz-1] = '\0'; + retsz = newsz; + } + + if (line) + free(line); + return ret; +} + +static bool is_in_uselist(char *uselist, struct mntent *m) +{ + char *p; + if (!uselist) + return true; + if (!*uselist) + return false; + while (*uselist) { + p = index(uselist, '\n'); + if (strncmp(m->mnt_dir, uselist, p - uselist) == 0) + return true; + uselist = p+1; + } + return false; +} + +static bool find_real_cgroup(struct cgroup_desc *d, char *path) +{ + FILE *f; + char *line = NULL, *p, *p2; + int ret = 0; + size_t len; + + if ((f = fopen("/proc/self/cgroup", "r")) == NULL) { + SYSERROR("Error opening /proc/self/cgroups"); + return false; + } + + // If there is no subsystem, ignore the mount. Note we may want + // to change this, so that unprivileged users can use a unbound + // cgroup mount to arrange their container tasks. + if (!d->subsystems) { + fclose(f); + return false; + } + while (getline(&line, &len, f) != -1) { + if (!(p = index(line, ':'))) + continue; + if (!(p2 = index(++p, ':'))) + continue; + *p2 = '\0'; + // remove trailing newlines + if (*(p2 + 1) && p2[strlen(p2 + 1)] == '\n') + p2[strlen(p2 + 1)] = '\0'; + // in case of multiple mounts it may be more correct to + // insist all subsystems be the same + if (subsys_lists_match(p, d->subsystems)) + goto found; + } + + if (line) + free(line); + fclose(f); + return false;; + +found: + fclose(f); + ret = snprintf(path, MAXPATHLEN, "%s", p2+1); + if (ret < 0 || ret >= MAXPATHLEN) { + free(line); + return false; + } + free(line); + return true; +} + + +/* + * for a given cgroup mount entry, and a to-be-created container, + * 1. Figure out full path of the cgroup we are currently in, + * 2. Find a new free cgroup which is $path / $lxc_name with an + * optional '-$n' where n is an ever-increasing integer. + */ +static char *find_free_cgroup(struct cgroup_desc *d, const char *lxc_name) +{ + char tail[20], cgpath[MAXPATHLEN], *cgp, path[MAXPATHLEN]; + int i = 0, ret; + size_t l; + + if (!find_real_cgroup(d, cgpath)) { + ERROR("Failed to find current cgroup"); + return NULL; + } + + /* + * If d->mntpt is '/a/b/c/d', and the mountpoint is /x/y/z, + * then look for ourselves in: + * /x/y/z/a/b/c/d/tasks + * /x/y/z/b/c/d/tasks + * /x/y/z/c/d/tasks + * /x/y/z/d/tasks + * /x/y/z/tasks + */ + cgp = cgpath; + while (cgp[0]) { + ret = snprintf(path, MAXPATHLEN, "%s%s/tasks", d->mntpt, cgp); + if (ret < 0 || ret >= MAXPATHLEN) + return NULL; + if (!is_in_cgroup(getpid(), path)) { + // does not exist, try the next one + cgp = index(cgp+1, '/'); + if (!cgp) + break; + continue; + } + break; + } + if (!cgp || !*cgp) { + // try just the path + ret = snprintf(path, MAXPATHLEN, "%s/tasks", d->mntpt); + if (ret < 0 || ret >= MAXPATHLEN) + return NULL; + if (!is_in_cgroup(getpid(), path)) + return NULL; + } + // found it + // path has '/tasks' at end, drop that + if (!(cgp = strrchr(path, '/'))) { + ERROR("Got nonsensical path name %s\n", path); + return NULL; + } + *cgp = '\0'; + + if (strlen(path) + strlen(lxc_name) + 20 > MAXPATHLEN) { + ERROR("Error: cgroup path too long"); + return NULL; + } + tail[0] = '\0'; + while (1) { + struct stat sb; + int freebytes = MAXPATHLEN - (cgp - path); + + if (i) { + ret = snprintf(tail, 20, "-%d", i); + if (ret < 0 || ret >= 20) + return NULL; + } + ret = snprintf(cgp, freebytes, "/%s%s", lxc_name, tail); + if (ret < 0 || ret >= freebytes) + return NULL; + if (stat(path, &sb) == -1) + break; + i++; + } + + l = strlen(cgpath); + ret = snprintf(cgpath + l, MAXPATHLEN - l, "/%s%s", lxc_name, tail); + if (ret < 0 || ret >= (MAXPATHLEN - l)) { + ERROR("Out of memory"); + return NULL; + } + if ((d->realcgroup = strdup(cgpath)) == NULL) { + ERROR("Out of memory"); + return NULL; + } + l = strlen(d->realcgroup); + if (l > 0 && d->realcgroup[l-1] == '\n') + d->realcgroup[l-1] = '\0'; + return strdup(path); } /* @@ -532,117 +950,166 @@ fail: * is at /sys/fs/cgroup/freezer, and this fn returns '/lxc/r1', then the * freezer cgroup's full path will be /sys/fs/cgroup/freezer/lxc/r1/. * - * XXX This should probably be locked globally - * * Races won't be determintal, you'll just end up with leftover unused cgroups */ -char *lxc_cgroup_path_create(const char *lxcgroup, const char *name) +struct cgroup_desc *lxc_cgroup_path_create(const char *name) { - int i = 0, ret; - char *retpath, path[MAXPATHLEN]; - char tail[12]; + struct cgroup_desc *retdesc = NULL, *newdesc = NULL; FILE *file = NULL; - struct mntent *mntent; + struct mntent mntent_r; + char buf[LARGE_MAXPATHLEN] = {0}; + char *all_subsystems = get_all_subsystems(); + char *cgroup_uselist = get_cgroup_uselist(); - if (create_lxcgroups(lxcgroup) < 0) + if (cgroup_uselist == (char *)-ENOMEM) { + if (all_subsystems) + free(all_subsystems); return NULL; - -again: + } + if (!all_subsystems) { + ERROR("failed to get a list of all cgroup subsystems"); + if (cgroup_uselist) + free(cgroup_uselist); + return NULL; + } file = setmntent(MTAB, "r"); if (!file) { SYSERROR("failed to open %s", MTAB); + free(all_subsystems); + if (cgroup_uselist) + free(cgroup_uselist); return NULL; } - if (i) - snprintf(tail, 12, "-%d", i); - else - *tail = '\0'; + while ((getmntent_r(file, &mntent_r, buf, sizeof(buf)))) { - while ((mntent = getmntent(file))) { - - if (strcmp(mntent->mnt_type, "cgroup")) - continue; - if (!mount_has_subsystem(mntent)) + if (strcmp(mntent_r.mnt_type, "cgroup")) continue; - /* find unused mnt_dir + lxcgroup + name + -$i */ - ret = snprintf(path, MAXPATHLEN, "%s/%s/%s%s", mntent->mnt_dir, - lxcgroup ? lxcgroup : "lxc", name, tail); - if (ret < 0 || ret >= MAXPATHLEN) + if (cgroup_uselist && !is_in_uselist(cgroup_uselist, &mntent_r)) + continue; + + /* make sure we haven't checked this subsystem already */ + if (is_in_desclist(retdesc, mntent_r.mnt_opts, all_subsystems)) + continue; + + if (!(newdesc = malloc(sizeof(struct cgroup_desc)))) { + ERROR("Out of memory reading cgroups"); goto fail; - - if (access(path, F_OK) == 0) goto next; - - if (mkdir(path, 0755)) { - ERROR("Error creating cgroups"); + } + newdesc->subsystems = record_visited(mntent_r.mnt_opts, all_subsystems); + if (newdesc->subsystems == (char *)-ENOMEM) { + ERROR("Out of memory recording cgroup subsystems"); + free(newdesc); + newdesc = NULL; + goto fail; + } + if (!newdesc->subsystems) { + free(newdesc); + newdesc = NULL; + continue; + } + newdesc->mntpt = strdup(mntent_r.mnt_dir); + newdesc->realcgroup = NULL; + newdesc->curcgroup = find_free_cgroup(newdesc, name); + if (!newdesc->mntpt || !newdesc->curcgroup) { + ERROR("Out of memory reading cgroups"); goto fail; } + set_clone_children(&mntent_r); + + if (mkdir(newdesc->curcgroup, 0755)) { + ERROR("Error creating cgroup %s", newdesc->curcgroup); + goto fail; + } + newdesc->next = retdesc; + retdesc = newdesc; } endmntent(file); - - // print out the cgpath part - ret = snprintf(path, MAXPATHLEN, "%s/%s%s", - lxcgroup ? lxcgroup : "lxc", name, tail); - if (ret < 0 || ret >= MAXPATHLEN) // can't happen - goto fail; - - retpath = strdup(path); - - return retpath; - -next: - endmntent(file); - i++; - goto again; + free(all_subsystems); + if (cgroup_uselist) + free(cgroup_uselist); + return retdesc; fail: endmntent(file); + free(all_subsystems); + if (cgroup_uselist) + free(cgroup_uselist); + if (newdesc) { + if (newdesc->mntpt) + free(newdesc->mntpt); + if (newdesc->subsystems) + free(newdesc->subsystems); + if (newdesc->curcgroup) + free(newdesc->curcgroup); + if (newdesc->realcgroup) + free(newdesc->realcgroup); + free(newdesc); + } + while (retdesc) { + struct cgroup_desc *t = retdesc; + retdesc = retdesc->next; + if (t->mntpt) + free(t->mntpt); + if (t->subsystems) + free(t->subsystems); + if (t->curcgroup) + free(t->curcgroup); + if (t->realcgroup) + free(t->realcgroup); + free(t); + + } return NULL; } -int lxc_cgroup_enter(const char *cgpath, pid_t pid) +static bool lxc_cgroup_enter_one(const char *dir, int pid) { char path[MAXPATHLEN]; - FILE *file = NULL, *fout; - struct mntent *mntent; - int ret, retv = -1; + int ret; + FILE *fout; - file = setmntent(MTAB, "r"); - if (!file) { - SYSERROR("failed to open %s", MTAB); - return -1; + ret = snprintf(path, MAXPATHLEN, "%s/tasks", dir); + if (ret < 0 || ret >= MAXPATHLEN) { + ERROR("Error entering cgroup"); + return false; } - - while ((mntent = getmntent(file))) { - if (strcmp(mntent->mnt_type, "cgroup")) - continue; - if (!mount_has_subsystem(mntent)) - continue; - ret = snprintf(path, MAXPATHLEN, "%s/%s/tasks", - mntent->mnt_dir, cgpath); - if (ret < 0 || ret >= MAXPATHLEN) { - ERROR("entering cgroup"); - goto out; - } - fout = fopen(path, "w"); - if (!fout) { - ERROR("entering cgroup"); - goto out; - } - fprintf(fout, "%d\n", (int)pid); + fout = fopen(path, "w"); + if (!fout) { + SYSERROR("Error entering cgroup"); + return false; + } + if (fprintf(fout, "%d\n", (int)pid) < 0) { + ERROR("Error writing pid to %s to enter cgroup", path); fclose(fout); + return false; + } + if (fclose(fout) < 0) { + SYSERROR("Error writing pid to %s to enter cgroup", path); + return false; } - retv = 0; -out: - endmntent(file); - return retv; + return true; } -int recursive_rmdir(char *dirname) +int lxc_cgroup_enter(struct cgroup_desc *cgroups, pid_t pid) +{ + while (cgroups) { + if (!cgroups->subsystems) + goto next; + + if (!lxc_cgroup_enter_one(cgroups->curcgroup, pid)) + return -1; +next: + cgroups = cgroups->next; + } + return 0; +} + +static int cgroup_rmdir(char *dirname) { struct dirent dirent, *direntp; DIR *dir; @@ -663,7 +1130,7 @@ int recursive_rmdir(char *dirname) break; if (!strcmp(direntp->d_name, ".") || - !strcmp(direntp->d_name, "..")) + !strcmp(direntp->d_name, "..")) continue; rc = snprintf(pathname, MAXPATHLEN, "%s/%s", dirname, direntp->d_name); @@ -675,7 +1142,7 @@ int recursive_rmdir(char *dirname) if (ret) continue; if (S_ISDIR(mystat.st_mode)) - recursive_rmdir(pathname); + cgroup_rmdir(pathname); } ret = rmdir(dirname); @@ -683,71 +1150,177 @@ int recursive_rmdir(char *dirname) if (closedir(dir)) ERROR("failed to close directory"); return ret; - - -} - -static int lxc_one_cgroup_destroy(struct mntent *mntent, const char *cgpath) -{ - char cgname[MAXPATHLEN]; - char *cgmnt = mntent->mnt_dir; - int rc; - - rc = snprintf(cgname, MAXPATHLEN, "%s/%s", cgmnt, cgpath); - if (rc < 0 || rc >= MAXPATHLEN) { - ERROR("name too long"); - return -1; - } - DEBUG("destroying %s\n", cgname); - if (recursive_rmdir(cgname)) { - SYSERROR("failed to remove cgroup '%s'", cgname); - return -1; - } - - DEBUG("'%s' unlinked", cgname); - - return 0; } /* * for each mounted cgroup, destroy the cgroup for the container */ -int lxc_cgroup_destroy(const char *cgpath) +void lxc_cgroup_destroy_desc(struct cgroup_desc *cgroups) { - struct mntent *mntent; - FILE *file = NULL; - int err, retv = 0; - - file = setmntent(MTAB, "r"); - if (!file) { - SYSERROR("failed to open %s", MTAB); - return -1; + while (cgroups) { + struct cgroup_desc *next = cgroups->next; + if (cgroup_rmdir(cgroups->curcgroup) < 0) + SYSERROR("Error removing cgroup directory %s", cgroups->curcgroup); + free(cgroups->mntpt); + free(cgroups->subsystems); + free(cgroups->curcgroup); + free(cgroups->realcgroup); + free(cgroups); + cgroups = next; } - - while ((mntent = getmntent(file))) { - if (strcmp(mntent->mnt_type, "cgroup")) - continue; - if (!mount_has_subsystem(mntent)) - continue; - - err = lxc_one_cgroup_destroy(mntent, cgpath); - if (err) // keep trying to clean up the others - retv = -1; - } - - endmntent(file); - return retv; } int lxc_cgroup_attach(pid_t pid, const char *name, const char *lxcpath) { - const char *dirpath; + FILE *f; + char *line = NULL, ret = 0; + size_t len = 0; + int first = 1; + char *dirpath; - if (lxc_get_cgpath(&dirpath, NULL, name, lxcpath) < 0) { - ERROR("Error getting cgroup for container %s: %s", lxcpath, name); + /* read the list of subsystems from the kernel */ + f = fopen("/proc/cgroups", "r"); + if (!f) return -1; - } - INFO("joining pid %d to cgroup %s", pid, dirpath); - return lxc_cgroup_enter(dirpath, pid); + while (getline(&line, &len, f) != -1) { + char *c; + + /* skip the first line */ + if (first) { + first=0; + continue; + } + + c = strchr(line, '\t'); + if (!c) + continue; + *c = '\0'; + dirpath = lxc_cgroup_path_get(line, name, lxcpath); + if (!dirpath) + continue; + + INFO("joining pid %d to cgroup %s", pid, dirpath); + if (!lxc_cgroup_enter_one(dirpath, pid)) { + ERROR("Failed joining %d to %s\n", pid, dirpath); + ret = -1; + continue; + } + } + + if (line) + free(line); + fclose(f); + return ret; +} + +bool is_in_subcgroup(int pid, const char *subsystem, struct cgroup_desc *d) +{ + char filepath[MAXPATHLEN], *line = NULL, v1[MAXPATHLEN], v2[MAXPATHLEN]; + FILE *f; + int ret, junk; + size_t sz = 0, l1, l2; + char *end = index(subsystem, '.'); + int len = end ? (end - subsystem) : strlen(subsystem); + const char *cgpath = NULL; + + while (d) { + if (in_subsys_list("devices", d->subsystems)) { + cgpath = d->realcgroup; + l1 = strlen(cgpath); + break; + } + d = d->next; + } + if (!d) + return false; + + ret = snprintf(filepath, MAXPATHLEN, "/proc/%d/cgroup", pid); + if (ret < 0 || ret >= MAXPATHLEN) + return false; + if ((f = fopen(filepath, "r")) == NULL) + return false; + while (getline(&line, &sz, f) != -1) { + // nr:subsystem:path + v2[0] = v2[1] = '\0'; + ret = sscanf(line, "%d:%[^:]:%s", &junk, v1, v2); + if (ret != 3) { + fclose(f); + free(line); + return false; + } + len = end ? end - subsystem : strlen(subsystem); + if (strncmp(v1, subsystem, len) != 0) + continue; + // v2 will start with '/', skip it by using v2+1 + // we must be in SUBcgroup, so make sure l2 > l1 + l2 = strlen(v2+1); + if (l2 > l1 && strncmp(v2+1, cgpath, l1) == 0) { + fclose(f); + free(line); + return true; + } + } + fclose(f); + if (line) + free(line); + return false; +} + +char *cgroup_get_subsys_path(struct lxc_handler *handler, const char *subsys) +{ + struct cgroup_desc *d; + + for (d = handler->cgroup; d; d = d->next) { + if (in_subsys_list(subsys, d->subsystems)) + return d->realcgroup; + } + + return NULL; +} + +static int _setup_cgroup(struct lxc_handler *h, struct lxc_list *cgroups, + int devices) +{ + struct lxc_list *iterator; + struct lxc_cgroup *cg; + int ret = -1; + + if (lxc_list_empty(cgroups)) + return 0; + + lxc_list_for_each(iterator, cgroups) { + cg = iterator->elem; + + if (devices == !strncmp("devices", cg->subsystem, 7)) { + if (strcmp(cg->subsystem, "devices.deny") == 0 && + cgroup_devices_has_deny(h, cg->value)) + continue; + if (strcmp(cg->subsystem, "devices.allow") == 0 && + cgroup_devices_has_allow(h, cg->value)) + continue; + if (lxc_cgroup_set_value(h, cg->subsystem, cg->value)) { + ERROR("Error setting %s to %s for %s\n", + cg->subsystem, cg->value, h->name); + goto out; + } + } + + DEBUG("cgroup '%s' set to '%s'", cg->subsystem, cg->value); + } + + ret = 0; + INFO("cgroup has been setup"); +out: + return ret; +} + +int setup_cgroup_devices(struct lxc_handler *h, struct lxc_list *cgroups) +{ + return _setup_cgroup(h, cgroups, 1); +} + +int setup_cgroup(struct lxc_handler *h, struct lxc_list *cgroups) +{ + return _setup_cgroup(h, cgroups, 0); } diff --git a/src/lxc/cgroup.h b/src/lxc/cgroup.h index 937b9c92f..01ee9319a 100644 --- a/src/lxc/cgroup.h +++ b/src/lxc/cgroup.h @@ -18,21 +18,40 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef _cgroup_h #define _cgroup_h +#include -#define MAXPRIOLEN 24 +/* + * cgroup_desc: describe a container's cgroup membership + */ +struct cgroup_desc { + char *mntpt; /* where this is mounted */ + char *subsystems; /* comma-separated list of subsystems, or NULL */ + char *curcgroup; /* task's current cgroup, full pathanme */ + char *realcgroup; /* the cgroup as known in /proc/self/cgroup */ + struct cgroup_desc *next; +}; struct lxc_handler; -extern int lxc_cgroup_destroy(const char *cgpath); -extern int lxc_cgroup_path_get(char **path, const char *subsystem, const char *name, +extern void lxc_cgroup_destroy_desc(struct cgroup_desc *cgroups); +extern char *lxc_cgroup_path_get(const char *subsystem, const char *name, const char *lxcpath); -extern int lxc_cgroup_nrtasks(const char *cgpath); -extern char *lxc_cgroup_path_create(const char *lxcgroup, const char *name); -extern int lxc_cgroup_enter(const char *cgpath, pid_t pid); +extern int lxc_cgroup_nrtasks(struct lxc_handler *handler); +struct cgroup_desc *lxc_cgroup_path_create(const char *name); +extern int lxc_cgroup_enter(struct cgroup_desc *cgroups, pid_t pid); extern int lxc_cgroup_attach(pid_t pid, const char *name, const char *lxcpath); -extern int cgroup_path_get(char **path, const char *subsystem, const char *cgpath); -extern int lxc_get_cgpath(const char **path, const char *subsystem, const char *name, const char *lxcpath); +extern char *cgroup_path_get(const char *subsystem, const char *cgpath); +extern bool get_subsys_mount(char *dest, const char *subsystem); +extern bool is_in_subcgroup(int pid, const char *subsystem, struct cgroup_desc *d); +/* + * Called by commands.c by a container's monitor to find out the + * container's cgroup path in a specific subsystem + */ +extern char *cgroup_get_subsys_path(struct lxc_handler *handler, const char *subsys); +struct lxc_list; +extern int setup_cgroup(struct lxc_handler *h, struct lxc_list *cgroups); +extern int setup_cgroup_devices(struct lxc_handler *h, struct lxc_list *cgroups); #endif diff --git a/src/lxc/checkpoint.c b/src/lxc/checkpoint.c index 1f6b471e3..75834c98f 100644 --- a/src/lxc/checkpoint.c +++ b/src/lxc/checkpoint.c @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include diff --git a/src/lxc/commands.c b/src/lxc/commands.c index d45ae2112..0c058104a 100644 --- a/src/lxc/commands.c +++ b/src/lxc/commands.c @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include @@ -34,27 +34,41 @@ #include #include +#include #include #include /* for struct lxc_handler */ #include #include "commands.h" +#include "console.h" +#include "confile.h" #include "mainloop.h" #include "af_unix.h" #include "config.h" /* - * This file provides the different functions to have the client - * and the server to communicate + * This file provides the different functions for clients to + * query/command the server. The client is typically some lxc + * tool and the server is typically the container (ie. lxc-start). * - * Each command is transactional, the client send a request to - * the server and the server answer the request with a message + * Each command is transactional, the clients send a request to + * the server and the server answers the request with a message * giving the request's status (zero or a negative errno value). + * Both the request and response may contain addtional data. * * Each command is wrapped in a ancillary message in order to pass * a credential making possible to the server to check if the client * is allowed to ask for this command or not. * + * IMPORTANTLY: Note that semantics for current commands are fixed. If you + * wish to make any changes to how, say, LXC_CMD_GET_CONFIG_ITEM works by + * adding information to the end of cmd.data, then you must introduce a new + * LXC_CMD_GET_CONFIG_ITEM_V2 define with a new number. You may wish to + * also mark LXC_CMD_GET_CONFIG_ITEM deprecated in commands.h. + * + * This is necessary in order to avoid having a newly compiled lxc command + * communicating with a running (old) monitor from crashing the running + * container. */ lxc_log_define(lxc_commands, lxc); @@ -81,181 +95,642 @@ static int fill_sock_name(char *path, int len, const char *name, return 0; } -static int receive_answer(int sock, struct lxc_answer *answer) +static const char *lxc_cmd_str(lxc_cmd_t cmd) { - int ret; - static char answerpath[MAXPATHLEN]; + static const char *cmdname[LXC_CMD_MAX] = { + [LXC_CMD_CONSOLE] = "console", + [LXC_CMD_STOP] = "stop", + [LXC_CMD_GET_STATE] = "get_state", + [LXC_CMD_GET_INIT_PID] = "get_init_pid", + [LXC_CMD_GET_CLONE_FLAGS] = "get_clone_flags", + [LXC_CMD_GET_CGROUP] = "get_cgroup", + [LXC_CMD_GET_CONFIG_ITEM] = "get_config_item", + }; - ret = lxc_af_unix_recv_fd(sock, &answer->fd, answer, sizeof(*answer)); - if (ret < 0) - ERROR("failed to receive answer for the command"); - if (answer->pathlen == 0) - return ret; - if (answer->pathlen >= MAXPATHLEN) { - ERROR("cgroup path was too long"); + if (cmd < 0 || cmd >= LXC_CMD_MAX) + return "Unknown cmd"; + return cmdname[cmd]; +} + +/* + * lxc_cmd_rsp_recv: Receive a response to a command + * + * @sock : the socket connected to the container + * @cmd : command to put response in + * + * Returns the size of the response message or < 0 on failure + * + * Note that if the command response datalen > 0, then data is + * a malloc()ed buffer and should be free()ed by the caller. If + * the response data is <= a void * worth of data, it will be + * stored directly in data and datalen will be 0. + * + * As a special case, the response for LXC_CMD_CONSOLE is created + * here as it contains an fd for the master pty passed through the + * unix socket. + */ +static int lxc_cmd_rsp_recv(int sock, struct lxc_cmd_rr *cmd) +{ + int ret,rspfd; + struct lxc_cmd_rsp *rsp = &cmd->rsp; + + ret = lxc_af_unix_recv_fd(sock, &rspfd, rsp, sizeof(*rsp)); + if (ret < 0) { + ERROR("command %s failed to receive response", + lxc_cmd_str(cmd->req.cmd)); return -1; } - ret = recv(sock, answerpath, answer->pathlen, 0); - if (ret != answer->pathlen) { - ERROR("failed to receive answer for the command"); - ret = 0; - } else - answer->path = answerpath; + + if (cmd->req.cmd == LXC_CMD_CONSOLE) { + struct lxc_cmd_console_rsp_data *rspdata; + + /* recv() returns 0 bytes when a tty cannot be allocated, + * rsp->ret is < 0 when the peer permission check failed + */ + if (ret == 0 || rsp->ret < 0) + return 0; + + rspdata = malloc(sizeof(*rspdata)); + if (!rspdata) { + ERROR("command %s couldn't allocate response buffer", + lxc_cmd_str(cmd->req.cmd)); + return -1; + } + rspdata->masterfd = rspfd; + rspdata->ttynum = PTR_TO_INT(rsp->data); + rsp->data = rspdata; + } + + if (rsp->datalen == 0) + return ret; + if (rsp->datalen > LXC_CMD_DATA_MAX) { + ERROR("command %s response data %d too long", + lxc_cmd_str(cmd->req.cmd), rsp->datalen); + errno = EFBIG; + return -1; + } + + rsp->data = malloc(rsp->datalen); + if (!rsp->data) { + ERROR("command %s unable to allocate response buffer", + lxc_cmd_str(cmd->req.cmd)); + return -1; + } + ret = recv(sock, rsp->data, rsp->datalen, 0); + if (ret != rsp->datalen) { + ERROR("command %s failed to receive response data", + lxc_cmd_str(cmd->req.cmd)); + if (ret >= 0) + ret = -1; + } return ret; } -static int __lxc_command(const char *name, struct lxc_command *command, - int *stopped, int stay_connected, const char *lxcpath) +/* + * lxc_cmd_rsp_send: Send a command response + * + * @fd : file descriptor of socket to send response on + * @rsp : response to send + * + * Returns 0 on success, < 0 on failure + */ +static int lxc_cmd_rsp_send(int fd, struct lxc_cmd_rsp *rsp) +{ + int ret; + + ret = send(fd, rsp, sizeof(*rsp), 0); + if (ret != sizeof(*rsp)) { + ERROR("failed to send command response %d %s", ret, + strerror(errno)); + return -1; + } + + if (rsp->datalen > 0) { + ret = send(fd, rsp->data, rsp->datalen, 0); + if (ret != rsp->datalen) { + WARN("failed to send command response data %d %s", ret, + strerror(errno)); + return -1; + } + } + return 0; +} + +/* + * lxc_cmd: Connect to the specified running container, send it a command + * request and collect the response + * + * @name : name of container to connect to + * @cmd : command with initialized reqest to send + * @stopped : output indicator if the container was not running + * @lxcpath : the lxcpath in which the container is running + * + * Returns the size of the response message on success, < 0 on failure + * + * Note that there is a special case for LXC_CMD_CONSOLE. For this command + * the fd cannot be closed because it is used as a placeholder to indicate + * that a particular tty slot is in use. The fd is also used as a signal to + * the container that when the caller dies or closes the fd, the container + * will notice the fd on its side of the socket in its mainloop select and + * then free the slot with lxc_cmd_fd_cleanup(). The socket fd will be + * returned in the cmd response structure. + */ +static int lxc_cmd(const char *name, struct lxc_cmd_rr *cmd, int *stopped, + const char *lxcpath) { int sock, ret = -1; char path[sizeof(((struct sockaddr_un *)0)->sun_path)] = { 0 }; char *offset = &path[1]; int len; + int stay_connected = cmd->req.cmd == LXC_CMD_CONSOLE; len = sizeof(path)-1; if (fill_sock_name(offset, len, name, lxcpath)) return -1; sock = lxc_af_unix_connect(path); - if (sock < 0 && errno == ECONNREFUSED) { - *stopped = 1; - return -1; - } - if (sock < 0) { - SYSERROR("failed to connect to '@%s'", offset); + if (errno == ECONNREFUSED) + *stopped = 1; + else + SYSERROR("command %s failed to connect to '@%s'", + lxc_cmd_str(cmd->req.cmd), offset); return -1; } - ret = lxc_af_unix_send_credential(sock, &command->request, - sizeof(command->request)); - if (ret < 0) { - SYSERROR("failed to send request to '@%s'", offset); + ret = lxc_af_unix_send_credential(sock, &cmd->req, sizeof(cmd->req)); + if (ret != sizeof(cmd->req)) { + SYSERROR("command %s failed to send req to '@%s' %d", + lxc_cmd_str(cmd->req.cmd), offset, ret); + if (ret >=0) + ret = -1; goto out; } - if (ret != sizeof(command->request)) { - SYSERROR("message partially sent to '@%s'", offset); - goto out; + if (cmd->req.datalen > 0) { + ret = send(sock, cmd->req.data, cmd->req.datalen, 0); + if (ret != cmd->req.datalen) { + SYSERROR("command %s failed to send request data to '@%s' %d", + lxc_cmd_str(cmd->req.cmd), offset, ret); + if (ret >=0) + ret = -1; + goto out; + } } - ret = receive_answer(sock, &command->answer); + ret = lxc_cmd_rsp_recv(sock, cmd); out: - if (!stay_connected || ret < 0) + if (!stay_connected || ret <= 0) close(sock); + if (stay_connected && ret > 0) + cmd->rsp.ret = sock; return ret; } -extern int lxc_command(const char *name, - struct lxc_command *command, int *stopped, - const char *lxcpath) -{ - return __lxc_command(name, command, stopped, 0, lxcpath); -} +/* Implentations of the commands and their callbacks */ -extern int lxc_command_connected(const char *name, - struct lxc_command *command, int *stopped, - const char *lxcpath) +/* + * lxc_cmd_get_init_pid: Get pid of the container's init process + * + * @name : name of container to connect to + * @lxcpath : the lxcpath in which the container is running + * + * Returns the pid on success, < 0 on failure + */ +pid_t lxc_cmd_get_init_pid(const char *name, const char *lxcpath) { - return __lxc_command(name, command, stopped, 1, lxcpath); -} - - -pid_t get_init_pid(const char *name, const char *lxcpath) -{ - struct lxc_command command = { - .request = { .type = LXC_COMMAND_PID }, + int ret, stopped = 0; + struct lxc_cmd_rr cmd = { + .req = { .cmd = LXC_CMD_GET_INIT_PID }, }; - int ret, stopped = 0; + ret = lxc_cmd(name, &cmd, &stopped, lxcpath); + if (ret < 0) + return ret; - ret = lxc_command(name, &command, &stopped, lxcpath); - if (ret < 0 && stopped) - return -1; - - if (ret < 0) { - ERROR("failed to send command"); - return -1; - } - - if (command.answer.ret) { - ERROR("failed to retrieve the init pid: %s", - strerror(-command.answer.ret)); - return -1; - } - - return command.answer.pid; + return PTR_TO_INT(cmd.rsp.data); } -int lxc_get_clone_flags(const char *name, const char *lxcpath) +static int lxc_cmd_get_init_pid_callback(int fd, struct lxc_cmd_req *req, + struct lxc_handler *handler) { - struct lxc_command command = { - .request = { .type = LXC_COMMAND_CLONE_FLAGS }, + struct lxc_cmd_rsp rsp = { .data = INT_TO_PTR(handler->pid) }; + + return lxc_cmd_rsp_send(fd, &rsp); +} + +/* + * lxc_cmd_get_clone_flags: Get clone flags container was spawned with + * + * @name : name of container to connect to + * @lxcpath : the lxcpath in which the container is running + * + * Returns the clone flags on success, < 0 on failure + */ +int lxc_cmd_get_clone_flags(const char *name, const char *lxcpath) +{ + int ret, stopped = 0; + struct lxc_cmd_rr cmd = { + .req = { .cmd = LXC_CMD_GET_CLONE_FLAGS }, }; - int ret, stopped = 0; + ret = lxc_cmd(name, &cmd, &stopped, lxcpath); + if (ret < 0) + return ret; - ret = lxc_command(name, &command, &stopped, lxcpath); + return PTR_TO_INT(cmd.rsp.data); +} + +static int lxc_cmd_get_clone_flags_callback(int fd, struct lxc_cmd_req *req, + struct lxc_handler *handler) +{ + struct lxc_cmd_rsp rsp = { .data = INT_TO_PTR(handler->clone_flags) }; + + return lxc_cmd_rsp_send(fd, &rsp); +} + +extern char *cgroup_get_subsys_path(struct lxc_handler *handler, const char *subsys); +/* + * lxc_cmd_get_cgroup_path: Calculate a container's cgroup path for a + * particular subsystem. This is the cgroup path relative to the root + * of the cgroup filesystem. + * + * @name : name of container to connect to + * @lxcpath : the lxcpath in which the container is running + * @subsystem : the subsystem being asked about + * + * Returns the path on success, NULL on failure. The caller must free() the + * returned path. + */ +char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath, + const char *subsystem) +{ + int ret, stopped = 0; + struct lxc_cmd_rr cmd = { + .req = { + .cmd = LXC_CMD_GET_CGROUP, + .datalen = strlen(subsystem)+1, + .data = subsystem, + }, + }; + + ret = lxc_cmd(name, &cmd, &stopped, lxcpath); + if (ret < 0) + return NULL; + + if (!ret) { + WARN("'%s' has stopped before sending its state", name); + return NULL; + } + + if (cmd.rsp.ret < 0 || cmd.rsp.datalen < 0) { + ERROR("command %s failed for '%s': %s", + lxc_cmd_str(cmd.req.cmd), name, + strerror(-cmd.rsp.ret)); + return NULL; + } + + return cmd.rsp.data; +} + +static int lxc_cmd_get_cgroup_callback(int fd, struct lxc_cmd_req *req, + struct lxc_handler *handler) +{ + struct lxc_cmd_rsp rsp; + char *path; + + if (req->datalen < 1) + return -1; + + path = cgroup_get_subsys_path(handler, req->data); + if (!path) + return -1; + rsp.datalen = strlen(path) + 1, + rsp.data = path; + rsp.ret = 0; + + return lxc_cmd_rsp_send(fd, &rsp); +} + +/* + * lxc_cmd_get_config_item: Get config item the running container + * + * @name : name of container to connect to + * @item : the configuration item to retrieve (ex: lxc.network.0.veth.pair) + * @lxcpath : the lxcpath in which the container is running + * + * Returns the item on success, NULL on failure. The caller must free() the + * returned item. + */ +char *lxc_cmd_get_config_item(const char *name, const char *item, + const char *lxcpath) +{ + int ret, stopped = 0; + struct lxc_cmd_rr cmd = { + .req = { .cmd = LXC_CMD_GET_CONFIG_ITEM, + .data = item, + .datalen = strlen(item)+1, + }, + }; + + ret = lxc_cmd(name, &cmd, &stopped, lxcpath); + if (ret < 0) + return NULL; + + if (cmd.rsp.ret == 0) + return cmd.rsp.data; + return NULL; +} + +static int lxc_cmd_get_config_item_callback(int fd, struct lxc_cmd_req *req, + struct lxc_handler *handler) +{ + int cilen; + struct lxc_cmd_rsp rsp; + char *cidata; + + memset(&rsp, 0, sizeof(rsp)); + cilen = lxc_get_config_item(handler->conf, req->data, NULL, 0); + if (cilen <= 0) + goto err1; + + cidata = alloca(cilen + 1); + if (lxc_get_config_item(handler->conf, req->data, cidata, cilen + 1) != cilen) + goto err1; + cidata[cilen] = '\0'; + rsp.data = cidata; + rsp.datalen = cilen + 1; + rsp.ret = 0; + goto out; + +err1: + rsp.ret = -1; +out: + return lxc_cmd_rsp_send(fd, &rsp); +} + +/* + * lxc_cmd_get_state: Get current state of the container + * + * @name : name of container to connect to + * @lxcpath : the lxcpath in which the container is running + * + * Returns the state on success, < 0 on failure + */ +lxc_state_t lxc_cmd_get_state(const char *name, const char *lxcpath) +{ + int ret, stopped = 0; + struct lxc_cmd_rr cmd = { + .req = { .cmd = LXC_CMD_GET_STATE } + }; + + ret = lxc_cmd(name, &cmd, &stopped, lxcpath); if (ret < 0 && stopped) + return STOPPED; + + if (ret < 0) return -1; - if (ret < 0) { - ERROR("failed to send command"); + if (!ret) { + WARN("'%s' has stopped before sending its state", name); return -1; } - return command.answer.ret; + DEBUG("'%s' is in '%s' state", name, + lxc_state2str(PTR_TO_INT(cmd.rsp.data))); + return PTR_TO_INT(cmd.rsp.data); } -extern void lxc_console_remove_fd(int, struct lxc_tty_info *); -extern int lxc_console_callback(int, struct lxc_request *, struct lxc_handler *); -extern int lxc_stop_callback(int, struct lxc_request *, struct lxc_handler *); -extern int lxc_state_callback(int, struct lxc_request *, struct lxc_handler *); -extern int lxc_pid_callback(int, struct lxc_request *, struct lxc_handler *); -extern int lxc_clone_flags_callback(int, struct lxc_request *, struct lxc_handler *); -extern int lxc_cgroup_callback(int, struct lxc_request *, struct lxc_handler *); +static int lxc_cmd_get_state_callback(int fd, struct lxc_cmd_req *req, + struct lxc_handler *handler) +{ + struct lxc_cmd_rsp rsp = { .data = INT_TO_PTR(handler->state) }; -static int trigger_command(int fd, struct lxc_request *request, + return lxc_cmd_rsp_send(fd, &rsp); +} + +/* + * lxc_cmd_stop: Stop the container previously started with lxc_start. All + * the processes running inside this container will be killed. + * + * @name : name of container to connect to + * @lxcpath : the lxcpath in which the container is running + * + * Returns 0 on success, < 0 on failure + */ +int lxc_cmd_stop(const char *name, const char *lxcpath) +{ + int ret, stopped = 0; + struct lxc_cmd_rr cmd = { + .req = { .cmd = LXC_CMD_STOP }, + }; + + ret = lxc_cmd(name, &cmd, &stopped, lxcpath); + if (ret < 0) { + if (stopped) { + INFO("'%s' is already stopped", name); + return 0; + } + return -1; + } + + /* we do not expect any answer, because we wait for the connection to be + * closed + */ + if (ret > 0) { + ERROR("failed to stop '%s': %s", name, strerror(-cmd.rsp.ret)); + return -1; + } + + INFO("'%s' has stopped", name); + return 0; +} + +static int lxc_cmd_stop_callback(int fd, struct lxc_cmd_req *req, + struct lxc_handler *handler) +{ + struct lxc_cmd_rsp rsp; + int ret; + int stopsignal = SIGKILL; + + if (handler->conf->stopsignal) + stopsignal = handler->conf->stopsignal; + memset(&rsp, 0, sizeof(rsp)); + rsp.ret = kill(handler->pid, stopsignal); + if (!rsp.ret) { + char *path = cgroup_get_subsys_path(handler, "freezer"); + if (!path) { + ERROR("container %s:%s is not in a freezer cgroup", + handler->lxcpath, handler->name); + return 0; + } + ret = lxc_unfreeze_bypath(path); + if (!ret) + return 0; + + ERROR("failed to unfreeze container"); + rsp.ret = ret; + } + + return lxc_cmd_rsp_send(fd, &rsp); +} + +/* + * lxc_cmd_console_winch: To process as if a SIGWINCH were received + * + * @name : name of container to connect to + * @lxcpath : the lxcpath in which the container is running + * + * Returns 0 on success, < 0 on failure + */ +int lxc_cmd_console_winch(const char *name, const char *lxcpath) +{ + int ret, stopped = 0; + struct lxc_cmd_rr cmd = { + .req = { .cmd = LXC_CMD_CONSOLE_WINCH }, + }; + + ret = lxc_cmd(name, &cmd, &stopped, lxcpath); + if (ret < 0) + return ret; + + return 0; +} + +static int lxc_cmd_console_winch_callback(int fd, struct lxc_cmd_req *req, + struct lxc_handler *handler) +{ + struct lxc_cmd_rsp rsp = { .data = 0 }; + + lxc_console_sigwinch(SIGWINCH); + return lxc_cmd_rsp_send(fd, &rsp); +} + +/* + * lxc_cmd_console: Open an fd to a tty in the container + * + * @name : name of container to connect to + * @ttynum : in: the tty to open or -1 for next available + * : out: the tty allocated + * @fd : out: file descriptor for master side of pty + * @lxcpath : the lxcpath in which the container is running + * + * Returns fd holding tty allocated on success, < 0 on failure + */ +int lxc_cmd_console(const char *name, int *ttynum, int *fd, const char *lxcpath) +{ + int ret, stopped = 0; + struct lxc_cmd_console_rsp_data *rspdata; + struct lxc_cmd_rr cmd = { + .req = { .cmd = LXC_CMD_CONSOLE, .data = INT_TO_PTR(*ttynum) }, + }; + + ret = lxc_cmd(name, &cmd, &stopped, lxcpath); + if (ret < 0) + return ret; + + if (cmd.rsp.ret < 0) { + ERROR("console access denied: %s", strerror(-cmd.rsp.ret)); + ret = -1; + goto out; + } + + if (ret == 0) { + ERROR("console %d invalid,busy or all consoles busy", *ttynum); + ret = -1; + goto out; + } + + rspdata = cmd.rsp.data; + if (rspdata->masterfd < 0) { + ERROR("unable to allocate fd for tty %d", rspdata->ttynum); + goto out; + } + + ret = cmd.rsp.ret; /* sock fd */ + *fd = rspdata->masterfd; + *ttynum = rspdata->ttynum; + INFO("tty %d allocated fd %d sock %d", rspdata->ttynum, *fd, ret); +out: + free(cmd.rsp.data); + return ret; +} + +static int lxc_cmd_console_callback(int fd, struct lxc_cmd_req *req, + struct lxc_handler *handler) +{ + int ttynum = PTR_TO_INT(req->data); + int masterfd; + struct lxc_cmd_rsp rsp; + + masterfd = lxc_console_allocate(handler->conf, fd, &ttynum); + if (masterfd < 0) + goto out_close; + + memset(&rsp, 0, sizeof(rsp)); + rsp.data = INT_TO_PTR(ttynum); + if (lxc_af_unix_send_fd(fd, masterfd, &rsp, sizeof(rsp)) < 0) { + ERROR("failed to send tty to client"); + lxc_console_free(handler->conf, fd); + goto out_close; + } + + return 0; + +out_close: + /* special indicator to lxc_cmd_handler() to close the fd and do + * related cleanup + */ + return 1; +} + + + +static int lxc_cmd_process(int fd, struct lxc_cmd_req *req, struct lxc_handler *handler) { - typedef int (*callback)(int, struct lxc_request *, struct lxc_handler *); + typedef int (*callback)(int, struct lxc_cmd_req *, struct lxc_handler *); - callback cb[LXC_COMMAND_MAX] = { - [LXC_COMMAND_TTY] = lxc_console_callback, - [LXC_COMMAND_STOP] = lxc_stop_callback, - [LXC_COMMAND_STATE] = lxc_state_callback, - [LXC_COMMAND_PID] = lxc_pid_callback, - [LXC_COMMAND_CLONE_FLAGS] = lxc_clone_flags_callback, - [LXC_COMMAND_CGROUP] = lxc_cgroup_callback, + callback cb[LXC_CMD_MAX] = { + [LXC_CMD_CONSOLE] = lxc_cmd_console_callback, + [LXC_CMD_CONSOLE_WINCH] = lxc_cmd_console_winch_callback, + [LXC_CMD_STOP] = lxc_cmd_stop_callback, + [LXC_CMD_GET_STATE] = lxc_cmd_get_state_callback, + [LXC_CMD_GET_INIT_PID] = lxc_cmd_get_init_pid_callback, + [LXC_CMD_GET_CLONE_FLAGS] = lxc_cmd_get_clone_flags_callback, + [LXC_CMD_GET_CGROUP] = lxc_cmd_get_cgroup_callback, + [LXC_CMD_GET_CONFIG_ITEM] = lxc_cmd_get_config_item_callback, }; - if (request->type < 0 || request->type >= LXC_COMMAND_MAX) + if (req->cmd < 0 || req->cmd >= LXC_CMD_MAX) { + ERROR("bad cmd %d recieved", req->cmd); return -1; - - return cb[request->type](fd, request, handler); + } + return cb[req->cmd](fd, req, handler); } -static void command_fd_cleanup(int fd, struct lxc_handler *handler, +static void lxc_cmd_fd_cleanup(int fd, struct lxc_handler *handler, struct lxc_epoll_descr *descr) { - lxc_console_remove_fd(fd, &handler->conf->tty_info); + lxc_console_free(handler->conf, fd); lxc_mainloop_del_handler(descr, fd); close(fd); } -static int command_handler(int fd, void *data, struct lxc_epoll_descr *descr) +static int lxc_cmd_handler(int fd, void *data, struct lxc_epoll_descr *descr) { int ret; - struct lxc_request request; + struct lxc_cmd_req req; struct lxc_handler *handler = data; - ret = lxc_af_unix_rcv_credential(fd, &request, sizeof(request)); + ret = lxc_af_unix_rcv_credential(fd, &req, sizeof(req)); if (ret == -EACCES) { /* we don't care for the peer, just send and close */ - struct lxc_answer answer = { .ret = ret }; - send(fd, &answer, sizeof(answer), 0); + struct lxc_cmd_rsp rsp = { .ret = ret }; + + lxc_cmd_rsp_send(fd, &rsp); goto out_close; } @@ -269,12 +744,32 @@ static int command_handler(int fd, void *data, struct lxc_epoll_descr *descr) goto out_close; } - if (ret != sizeof(request)) { + if (ret != sizeof(req)) { WARN("partial request, ignored"); + ret = -1; goto out_close; } - ret = trigger_command(fd, &request, handler); + if (req.datalen > LXC_CMD_DATA_MAX) { + ERROR("cmd data length %d too large", req.datalen); + ret = -1; + goto out_close; + } + + if (req.datalen > 0) { + void *reqdata; + + reqdata = alloca(req.datalen); + ret = recv(fd, reqdata, req.datalen, 0); + if (ret != req.datalen) { + WARN("partial request, ignored"); + ret = -1; + goto out_close; + } + req.data = reqdata; + } + + ret = lxc_cmd_process(fd, &req, handler); if (ret) { /* this is not an error, but only a request to close fd */ ret = 0; @@ -284,12 +779,11 @@ static int command_handler(int fd, void *data, struct lxc_epoll_descr *descr) out: return ret; out_close: - command_fd_cleanup(fd, handler, descr); + lxc_cmd_fd_cleanup(fd, handler, descr); goto out; } -static int incoming_command_handler(int fd, void *data, - struct lxc_epoll_descr *descr) +static int lxc_cmd_accept(int fd, void *data, struct lxc_epoll_descr *descr) { int opt = 1, ret = -1, connection; @@ -310,7 +804,7 @@ static int incoming_command_handler(int fd, void *data, goto out_close; } - ret = lxc_mainloop_add_handler(descr, connection, command_handler, data); + ret = lxc_mainloop_add_handler(descr, connection, lxc_cmd_handler, data); if (ret) { ERROR("failed to add handler"); goto out_close; @@ -324,8 +818,8 @@ out_close: goto out; } -extern int lxc_command_init(const char *name, struct lxc_handler *handler, - const char *lxcpath) +int lxc_cmd_init(const char *name, struct lxc_handler *handler, + const char *lxcpath) { int fd; char path[sizeof(((struct sockaddr_un *)0)->sun_path)] = { 0 }; @@ -357,14 +851,13 @@ extern int lxc_command_init(const char *name, struct lxc_handler *handler, return 0; } -extern int lxc_command_mainloop_add(const char *name, - struct lxc_epoll_descr *descr, - struct lxc_handler *handler) +int lxc_cmd_mainloop_add(const char *name, + struct lxc_epoll_descr *descr, + struct lxc_handler *handler) { int ret, fd = handler->conf->maincmd_fd; - ret = lxc_mainloop_add_handler(descr, fd, incoming_command_handler, - handler); + ret = lxc_mainloop_add_handler(descr, fd, lxc_cmd_accept, handler); if (ret) { ERROR("failed to add handler for command socket"); close(fd); diff --git a/src/lxc/commands.h b/src/lxc/commands.h index 3b725fbbf..2c0258ca5 100644 --- a/src/lxc/commands.h +++ b/src/lxc/commands.h @@ -18,54 +18,75 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ + #ifndef __commands_h #define __commands_h -enum { - LXC_COMMAND_TTY, - LXC_COMMAND_STOP, - LXC_COMMAND_STATE, - LXC_COMMAND_PID, - LXC_COMMAND_CLONE_FLAGS, - LXC_COMMAND_CGROUP, - LXC_COMMAND_MAX, +#include "state.h" + +#define LXC_CMD_DATA_MAX (MAXPATHLEN*2) + +/* https://developer.gnome.org/glib/2.28/glib-Type-Conversion-Macros.html */ +#define INT_TO_PTR(n) ((void *) (long) (n)) +#define PTR_TO_INT(p) ((int) (long) (p)) + +typedef enum { + LXC_CMD_CONSOLE, + LXC_CMD_CONSOLE_WINCH, + LXC_CMD_STOP, + LXC_CMD_GET_STATE, + LXC_CMD_GET_INIT_PID, + LXC_CMD_GET_CLONE_FLAGS, + LXC_CMD_GET_CGROUP, + LXC_CMD_GET_CONFIG_ITEM, + LXC_CMD_MAX, +} lxc_cmd_t; + +struct lxc_cmd_req { + lxc_cmd_t cmd; + int datalen; + const void *data; }; -struct lxc_request { - int type; - int data; -}; - -struct lxc_answer { - int fd; +struct lxc_cmd_rsp { int ret; /* 0 on success, -errno on failure */ - pid_t pid; - int pathlen; - const char *path; + int datalen; + void *data; }; -struct lxc_command { - struct lxc_request request; - struct lxc_answer answer; +struct lxc_cmd_rr { + struct lxc_cmd_req req; + struct lxc_cmd_rsp rsp; }; -extern pid_t get_init_pid(const char *name, const char *lxcpath); -extern int lxc_get_clone_flags(const char *name, const char *lxcpath); +struct lxc_cmd_console_rsp_data { + int masterfd; + int ttynum; +}; -extern int lxc_command(const char *name, struct lxc_command *command, - int *stopped, const char *lxcpath); - -extern int lxc_command_connected(const char *name, struct lxc_command *command, - int *stopped, const char *lxcpath); +extern int lxc_cmd_console_winch(const char *name, const char *lxcpath); +extern int lxc_cmd_console(const char *name, int *ttynum, int *fd, + const char *lxcpath); +/* + * Get the 'real' cgroup path (as seen in /proc/self/cgroup) for a container + * for a particular subsystem + */ +extern char *lxc_cmd_get_cgroup_path(const char *name, const char *lxcpath, + const char *subsystem); +extern int lxc_cmd_get_clone_flags(const char *name, const char *lxcpath); +extern char *lxc_cmd_get_config_item(const char *name, const char *item, const char *lxcpath); +extern pid_t lxc_cmd_get_init_pid(const char *name, const char *lxcpath); +extern lxc_state_t lxc_cmd_get_state(const char *name, const char *lxcpath); +extern int lxc_cmd_stop(const char *name, const char *lxcpath); struct lxc_epoll_descr; struct lxc_handler; -extern int lxc_command_init(const char *name, struct lxc_handler *handler, +extern int lxc_cmd_init(const char *name, struct lxc_handler *handler, const char *lxcpath); -extern int lxc_command_mainloop_add(const char *name, struct lxc_epoll_descr *descr, +extern int lxc_cmd_mainloop_add(const char *name, struct lxc_epoll_descr *descr, struct lxc_handler *handler); -#endif +#endif /* __commands_h */ diff --git a/src/lxc/conf.c b/src/lxc/conf.c index 6b3f31813..2d95e5dad 100644 --- a/src/lxc/conf.c +++ b/src/lxc/conf.c @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #define _GNU_SOURCE #include @@ -31,6 +31,13 @@ #include #include #include +#include + +#if HAVE_IFADDRS_H +#include +#else +#include <../include/ifaddrs.h> +#endif #if HAVE_PTY_H #include @@ -64,6 +71,7 @@ #include "log.h" #include "lxc.h" /* for lxc_cgroup_set() */ #include "caps.h" /* for lxc_caps_last_cap() */ +#include "bdev.h" #if HAVE_APPARMOR #include @@ -92,30 +100,6 @@ lxc_log_define(lxc_conf, lxc); #define MAXMTULEN 16 #define MAXLINELEN 128 -#ifndef MS_DIRSYNC -#define MS_DIRSYNC 128 -#endif - -#ifndef MS_REC -#define MS_REC 16384 -#endif - -#ifndef MNT_DETACH -#define MNT_DETACH 2 -#endif - -#ifndef MS_SLAVE -#define MS_SLAVE (1<<19) -#endif - -#ifndef MS_RELATIME -#define MS_RELATIME (1 << 21) -#endif - -#ifndef MS_STRICTATIME -#define MS_STRICTATIME (1 << 24) -#endif - #if HAVE_SYS_CAPABILITY_H #ifndef CAP_SETFCAP #define CAP_SETFCAP 31 @@ -172,7 +156,7 @@ return -1; #endif char *lxchook_names[NUM_LXC_HOOKS] = { - "pre-start", "pre-mount", "mount", "autodev", "start", "post-stop" }; + "pre-start", "pre-mount", "mount", "autodev", "start", "post-stop", "clone" }; typedef int (*instanciate_cb)(struct lxc_handler *, struct lxc_netdev *); @@ -295,10 +279,75 @@ static struct caps_opt caps_opt[] = { static struct caps_opt caps_opt[] = {}; #endif +static char padchar[] = +"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + +static char *mkifname(char *template) +{ + char *name = NULL; + int i = 0; + FILE *urandom; + unsigned int seed; + struct ifaddrs *ifaddr, *ifa; + int ifexists = 0; + + /* Get all the network interfaces */ + getifaddrs(&ifaddr); + + /* Initialize the random number generator */ + urandom = fopen ("/dev/urandom", "r"); + if (urandom != NULL) { + if (fread (&seed, sizeof(seed), 1, urandom) <= 0) + seed = time(0); + fclose(urandom); + } + else + seed = time(0); + +#ifndef HAVE_RAND_R + srand(seed); +#endif + + /* Generate random names until we find one that doesn't exist */ + while(1) { + ifexists = 0; + name = strdup(template); + + if (name == NULL) + return NULL; + + for (i = 0; i < strlen(name); i++) { + if (name[i] == 'X') { +#ifdef HAVE_RAND_R + name[i] = padchar[rand_r(&seed) % (strlen(padchar) - 1)]; +#else + name[i] = padchar[rand() % (strlen(padchar) - 1)]; +#endif + } + } + + for (ifa = ifaddr; ifa != NULL; ifa = ifa->ifa_next) { + if (strcmp(ifa->ifa_name, name) == 0) { + ifexists = 1; + break; + } + } + + if (ifexists == 0) + break; + + free(name); + } + + freeifaddrs(ifaddr); + return name; +} + static int run_buffer(char *buffer) { FILE *f; char *output; + int ret; f = popen(buffer, "r"); if (!f) { @@ -309,6 +358,7 @@ static int run_buffer(char *buffer) output = malloc(LXC_LOG_BUFFER_SIZE); if (!output) { ERROR("failed to allocate memory for script output"); + pclose(f); return -1; } @@ -317,14 +367,72 @@ static int run_buffer(char *buffer) free(output); - if (pclose(f) == -1) { + ret = pclose(f); + if (ret == -1) { SYSERROR("Script exited on error"); return -1; + } else if (WIFEXITED(ret) && WEXITSTATUS(ret) != 0) { + ERROR("Script exited with status %d", WEXITSTATUS(ret)); + return -1; + } else if (WIFSIGNALED(ret)) { + ERROR("Script terminated by signal %d (%s)", WTERMSIG(ret), + strsignal(WTERMSIG(ret))); + return -1; } return 0; } +static int run_script_argv(const char *name, const char *section, + const char *script, const char *hook, const char *lxcpath, + char **argsin) +{ + int ret, i; + char *buffer; + size_t size = 0; + + INFO("Executing script '%s' for container '%s', config section '%s'", + script, name, section); + + for (i=0; argsin && argsin[i]; i++) + size += strlen(argsin[i]) + 1; + + size += strlen(hook) + 1; + + size += strlen(script); + size += strlen(name); + size += strlen(section); + size += 3; + + if (size > INT_MAX) + return -1; + + buffer = alloca(size); + if (!buffer) { + ERROR("failed to allocate memory"); + return -1; + } + + ret = snprintf(buffer, size, "%s %s %s %s", script, name, section, hook); + if (ret < 0 || ret >= size) { + ERROR("Script name too long"); + return -1; + } + + for (i=0; argsin && argsin[i]; i++) { + int len = size-ret; + int rc; + rc = snprintf(buffer + ret, len, " %s", argsin[i]); + if (rc < 0 || rc >= len) { + ERROR("Script args too long"); + return -1; + } + ret += rc; + } + + return run_buffer(buffer); +} + static int run_script(const char *name, const char *section, const char *script, ...) { @@ -358,7 +466,6 @@ static int run_script(const char *name, const char *section, ret = snprintf(buffer, size, "%s %s %s", script, name, section); if (ret < 0 || ret >= size) { ERROR("Script name too long"); - free(buffer); return -1; } @@ -368,7 +475,6 @@ static int run_script(const char *name, const char *section, int rc; rc = snprintf(buffer + ret, len, " %s", p); if (rc < 0 || rc >= len) { - free(buffer); ERROR("Script args too long"); return -1; } @@ -537,6 +643,7 @@ static int mount_rootfs_file(const char *rootfs, const char *target) if (errno != ENXIO) { WARN("unexpected error for ioctl on '%s': %m", direntp->d_name); + close(fd); continue; } @@ -581,8 +688,8 @@ int pin_rootfs(const char *rootfs) return -2; if (!realpath(rootfs, absrootfs)) { - SYSERROR("failed to get real path for '%s'", rootfs); - return -1; + INFO("failed to get real path for '%s', not pinning", rootfs); + return -2; } if (access(absrootfs, F_OK)) { @@ -700,7 +807,8 @@ static int setup_tty(const struct lxc_rootfs *rootfs, SYSERROR("error creating %s\n", lxcpath); return -1; } - close(ret); + if (ret >= 0) + close(ret); ret = unlink(path); if (ret && errno != ENOENT) { SYSERROR("error unlinking %s\n", path); @@ -749,7 +857,7 @@ static int setup_tty(const struct lxc_rootfs *rootfs, static int setup_rootfs_pivot_root_cb(char *buffer, void *data) { struct lxc_list *mountlist, *listentry, *iterator; - char *pivotdir, *mountpoint, *mountentry; + char *pivotdir, *mountpoint, *mountentry, *saveptr = NULL; int found; void **cbparm; @@ -760,12 +868,12 @@ static int setup_rootfs_pivot_root_cb(char *buffer, void *data) pivotdir = cbparm[1]; /* parse entry, first field is mountname, ignore */ - mountpoint = strtok(mountentry, " "); + mountpoint = strtok_r(mountentry, " ", &saveptr); if (!mountpoint) return -1; /* second field is mountpoint */ - mountpoint = strtok(NULL, " "); + mountpoint = strtok_r(NULL, " ", &saveptr); if (!mountpoint) return -1; @@ -794,6 +902,7 @@ static int setup_rootfs_pivot_root_cb(char *buffer, void *data) listentry->elem = strdup(mountpoint); if (!listentry->elem) { SYSERROR("strdup failed"); + free(listentry); return -1; } lxc_list_add_tail(mountlist, listentry); @@ -1055,8 +1164,10 @@ int detect_shared_rootfs(void) if (strcmp(p+1, "/") == 0) { // this is '/'. is it shared? p = index(p2+1, ' '); - if (strstr(p, "shared:")) + if (p && strstr(p, "shared:")) { + fclose(f); return 1; + } } } fclose(f); @@ -1150,6 +1261,15 @@ static int setup_rootfs(struct lxc_conf *conf) } } + // First try mounting rootfs using a bdev + struct bdev *bdev = bdev_init(rootfs->path, rootfs->mount, NULL); + if (bdev && bdev->ops->mount(bdev) == 0) { + bdev_put(bdev); + DEBUG("mounted '%s' on '%s'", rootfs->path, rootfs->mount); + return 0; + } + if (bdev) + bdev_put(bdev); if (mount_rootfs(rootfs->path, rootfs->mount)) { ERROR("failed to mount rootfs"); return -1; @@ -1248,8 +1368,8 @@ static int setup_dev_console(const struct lxc_rootfs *rootfs, return 0; } - if (console->peer == -1) { - INFO("no console output required"); + if (console->master < 0) { + INFO("no console"); return 0; } @@ -1311,10 +1431,11 @@ static int setup_ttydir_console(const struct lxc_rootfs *rootfs, SYSERROR("error %d creating %s\n", errno, lxcpath); return -1; } - close(ret); + if (ret >= 0) + close(ret); - if (console->peer == -1) { - INFO("no console output required"); + if (console->master < 0) { + INFO("no console"); return 0; } @@ -1380,34 +1501,6 @@ static int setup_kmsg(const struct lxc_rootfs *rootfs, return 0; } -int setup_cgroup(const char *cgpath, struct lxc_list *cgroups) -{ - struct lxc_list *iterator; - struct lxc_cgroup *cg; - int ret = -1; - - if (lxc_list_empty(cgroups)) - return 0; - - lxc_list_for_each(iterator, cgroups) { - - cg = iterator->elem; - - if (lxc_cgroup_set_bypath(cgpath, cg->subsystem, cg->value)) { - ERROR("Error setting %s to %s for %s\n", cg->subsystem, - cg->value, cgpath); - goto out; - } - - DEBUG("cgroup '%s' set to '%s'", cg->subsystem, cg->value); - } - - ret = 0; - INFO("cgroup has been setup"); -out: - return ret; -} - static void parse_mntopt(char *opt, unsigned long *flags, char **data) { struct mount_opt *mo; @@ -1745,7 +1838,76 @@ static int setup_caps(struct lxc_list *caps) } - DEBUG("capabilities has been setup"); + DEBUG("capabilities have been setup"); + + return 0; +} + +static int dropcaps_except(struct lxc_list *caps) +{ + struct lxc_list *iterator; + char *keep_entry; + char *ptr; + int i, capid; + int numcaps = lxc_caps_last_cap() + 1; + INFO("found %d capabilities\n", numcaps); + + if (numcaps <= 0 || numcaps > 200) + return -1; + + // caplist[i] is 1 if we keep capability i + int *caplist = alloca(numcaps * sizeof(int)); + memset(caplist, 0, numcaps * sizeof(int)); + + lxc_list_for_each(iterator, caps) { + + keep_entry = iterator->elem; + + capid = -1; + + for (i = 0; i < sizeof(caps_opt)/sizeof(caps_opt[0]); i++) { + + if (strcmp(keep_entry, caps_opt[i].name)) + continue; + + capid = caps_opt[i].value; + break; + } + + if (capid < 0) { + /* try to see if it's numeric, so the user may specify + * capabilities that the running kernel knows about but + * we don't */ + capid = strtol(keep_entry, &ptr, 10); + if (!ptr || *ptr != '\0' || + capid == LONG_MIN || capid == LONG_MAX) + /* not a valid number */ + capid = -1; + else if (capid > lxc_caps_last_cap()) + /* we have a number but it's not a valid + * capability */ + capid = -1; + } + + if (capid < 0) { + ERROR("unknown capability %s", keep_entry); + return -1; + } + + DEBUG("drop capability '%s' (%d)", keep_entry, capid); + + caplist[capid] = 1; + } + for (i=0; iloglevel = LXC_LOG_PRIORITY_NOTSET; new->personality = -1; new->console.log_path = NULL; new->console.log_fd = -1; new->console.path = NULL; new->console.peer = -1; + new->console.peerpty.busy = -1; + new->console.peerpty.master = -1; + new->console.peerpty.slave = -1; new->console.master = -1; new->console.slave = -1; new->console.name[0] = '\0'; new->maincmd_fd = -1; - new->rootfs.mount = default_rootfs_mount; + new->rootfs.mount = strdup(default_rootfs_mount); + if (!new->rootfs.mount) { + ERROR("lxc_conf_init : %m"); + free(new); + return NULL; + } new->kmsg = 1; lxc_list_init(&new->cgroup); lxc_list_init(&new->network); lxc_list_init(&new->mount_list); lxc_list_init(&new->caps); + lxc_list_init(&new->keepcaps); lxc_list_init(&new->id_map); for (i=0; ihooks[i]); @@ -2105,13 +2277,13 @@ static int instanciate_veth(struct lxc_handler *handler, struct lxc_netdev *netd ERROR("veth1 name too long"); return -1; } - veth1 = mktemp(veth1buf); + veth1 = mkifname(veth1buf); /* store away for deconf */ memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ); } snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX"); - veth2 = mktemp(veth2buf); + veth2 = mkifname(veth2buf); if (!strlen(veth1) || !strlen(veth2)) { ERROR("failed to allocate a temporary name"); @@ -2217,7 +2389,7 @@ static int instanciate_macvlan(struct lxc_handler *handler, struct lxc_netdev *n if (err >= sizeof(peerbuf)) return -1; - peer = mktemp(peerbuf); + peer = mkifname(peerbuf); if (!strlen(peer)) { ERROR("failed to make a temporary name"); return -1; @@ -2715,7 +2887,7 @@ int uid_shift_ttys(int pid, struct lxc_conf *conf) return 0; } -int lxc_setup(const char *name, struct lxc_conf *lxc_conf) +int lxc_setup(const char *name, struct lxc_conf *lxc_conf, const char *lxcpath) { #if HAVE_APPARMOR /* || HAVE_SMACK || HAVE_SELINUX */ int mounted; @@ -2731,7 +2903,7 @@ int lxc_setup(const char *name, struct lxc_conf *lxc_conf) return -1; } - if (run_lxc_hooks(name, "pre-mount", lxc_conf)) { + if (run_lxc_hooks(name, "pre-mount", lxc_conf, lxcpath, NULL)) { ERROR("failed to run pre-mount hooks for container '%s'.", name); return -1; } @@ -2758,13 +2930,13 @@ int lxc_setup(const char *name, struct lxc_conf *lxc_conf) return -1; } - if (run_lxc_hooks(name, "mount", lxc_conf)) { + if (run_lxc_hooks(name, "mount", lxc_conf, lxcpath, NULL)) { ERROR("failed to run mount hooks for container '%s'.", name); return -1; } if (lxc_conf->autodev) { - if (run_lxc_hooks(name, "autodev", lxc_conf)) { + if (run_lxc_hooks(name, "autodev", lxc_conf, lxcpath, NULL)) { ERROR("failed to run autodev hooks for container '%s'.", name); return -1; } @@ -2774,7 +2946,7 @@ int lxc_setup(const char *name, struct lxc_conf *lxc_conf) } } - if (setup_console(&lxc_conf->rootfs, &lxc_conf->console, lxc_conf->ttydir)) { + if (!lxc_conf->is_execute && setup_console(&lxc_conf->rootfs, &lxc_conf->console, lxc_conf->ttydir)) { ERROR("failed to setup the console for '%s'", name); return -1; } @@ -2784,7 +2956,7 @@ int lxc_setup(const char *name, struct lxc_conf *lxc_conf) ERROR("failed to setup kmsg for '%s'", name); } - if (setup_tty(&lxc_conf->rootfs, &lxc_conf->tty_info, lxc_conf->ttydir)) { + if (!lxc_conf->is_execute && setup_tty(&lxc_conf->rootfs, &lxc_conf->tty_info, lxc_conf->ttydir)) { ERROR("failed to setup the ttys for '%s'", name); return -1; } @@ -2792,9 +2964,13 @@ int lxc_setup(const char *name, struct lxc_conf *lxc_conf) #if HAVE_APPARMOR /* || HAVE_SMACK || HAVE_SELINUX */ INFO("rootfs path is .%s., mount is .%s.", lxc_conf->rootfs.path, lxc_conf->rootfs.mount); - if (lxc_conf->rootfs.path == NULL || strlen(lxc_conf->rootfs.path) == 0) - mounted = 0; - else + if (lxc_conf->rootfs.path == NULL || strlen(lxc_conf->rootfs.path) == 0) { + if (mount("proc", "/proc", "proc", 0, NULL)) { + SYSERROR("Failed mounting /proc, proceeding"); + mounted = 0; + } else + mounted = 1; + } else mounted = lsm_mount_proc_if_needed(lxc_conf->rootfs.path, lxc_conf->rootfs.mount); if (mounted == -1) { SYSERROR("failed to mount /proc in the container."); @@ -2820,7 +2996,16 @@ int lxc_setup(const char *name, struct lxc_conf *lxc_conf) } if (lxc_list_empty(&lxc_conf->id_map)) { - if (setup_caps(&lxc_conf->caps)) { + if (!lxc_list_empty(&lxc_conf->keepcaps)) { + if (!lxc_list_empty(&lxc_conf->caps)) { + ERROR("Simultaneously requested dropping and keeping caps"); + return -1; + } + if (dropcaps_except(&lxc_conf->keepcaps)) { + ERROR("failed to keep requested caps\n"); + return -1; + } + } else if (setup_caps(&lxc_conf->caps)) { ERROR("failed to drop capabilities"); return -1; } @@ -2831,7 +3016,8 @@ int lxc_setup(const char *name, struct lxc_conf *lxc_conf) return 0; } -int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf) +int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf, + const char *lxcpath, char *argv[]) { int which = -1; struct lxc_list *it; @@ -2848,12 +3034,14 @@ int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf) which = LXCHOOK_START; else if (strcmp(hook, "post-stop") == 0) which = LXCHOOK_POSTSTOP; + else if (strcmp(hook, "clone") == 0) + which = LXCHOOK_CLONE; else return -1; lxc_list_for_each(it, &conf->hooks[which]) { int ret; char *hookname = it->elem; - ret = run_script(name, "lxc", hookname, hook, NULL); + ret = run_script_argv(name, "lxc", hookname, hook, lxcpath, argv); if (ret) return ret; } @@ -3004,6 +3192,30 @@ int lxc_clear_config_caps(struct lxc_conf *c) return 0; } +int lxc_clear_idmaps(struct lxc_conf *c) +{ + struct lxc_list *it, *next; + + lxc_list_for_each_safe(it, &c->id_map, next) { + lxc_list_del(it); + free(it->elem); + free(it); + } + return 0; +} + +int lxc_clear_config_keepcaps(struct lxc_conf *c) +{ + struct lxc_list *it,*next; + + lxc_list_for_each_safe(it, &c->keepcaps, next) { + lxc_list_del(it); + free(it->elem); + free(it); + } + return 0; +} + int lxc_clear_cgroups(struct lxc_conf *c, const char *key) { struct lxc_list *it,*next; @@ -3084,7 +3296,7 @@ void lxc_conf_free(struct lxc_conf *conf) return; if (conf->console.path) free(conf->console.path); - if (conf->rootfs.mount != default_rootfs_mount) + if (conf->rootfs.mount) free(conf->rootfs.mount); if (conf->rootfs.path) free(conf->rootfs.path); @@ -3094,6 +3306,8 @@ void lxc_conf_free(struct lxc_conf *conf) free(conf->ttydir); if (conf->fstab) free(conf->fstab); + if (conf->rcfile) + free(conf->rcfile); lxc_clear_config_network(conf); #if HAVE_APPARMOR if (conf->aa_profile) @@ -3101,9 +3315,11 @@ void lxc_conf_free(struct lxc_conf *conf) #endif lxc_seccomp_free(conf); lxc_clear_config_caps(conf); + lxc_clear_config_keepcaps(conf); lxc_clear_cgroups(conf, "lxc.cgroup"); lxc_clear_hooks(conf, "lxc.hook"); lxc_clear_mount_entries(conf); lxc_clear_saved_nics(conf); + lxc_clear_idmaps(conf); free(conf); } diff --git a/src/lxc/conf.h b/src/lxc/conf.h index 465b1ece4..5febf126d 100644 --- a/src/lxc/conf.h +++ b/src/lxc/conf.h @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef _conf_h #define _conf_h @@ -188,6 +188,8 @@ struct lxc_tty_info { struct lxc_pty_info *pty_info; }; +struct lxc_tty_state; + /* * Defines the structure to store the console information * @peer : the file descriptor put/get console traffic @@ -197,11 +199,14 @@ struct lxc_console { int slave; int master; int peer; + struct lxc_pty_info peerpty; + struct lxc_epoll_descr *descr; char *path; char *log_path; int log_fd; char name[MAXPATHLEN]; struct termios *tios; + struct lxc_tty_state *tty_state; }; /* @@ -227,7 +232,8 @@ struct lxc_rootfs { * @network : network configuration * @utsname : container utsname * @fstab : path to a fstab file format - * @caps : list of the capabilities + * @caps : list of the capabilities to drop + * @keepcaps : list of the capabilities to keep * @tty_info : tty data * @console : console data * @ttydir : directory (under /dev) in which to create console and ttys @@ -237,7 +243,7 @@ struct lxc_rootfs { */ enum lxchooks { LXCHOOK_PRESTART, LXCHOOK_PREMOUNT, LXCHOOK_MOUNT, LXCHOOK_AUTODEV, - LXCHOOK_START, LXCHOOK_POSTSTOP, NUM_LXC_HOOKS}; + LXCHOOK_START, LXCHOOK_POSTSTOP, LXCHOOK_CLONE, NUM_LXC_HOOKS}; extern char *lxchook_names[NUM_LXC_HOOKS]; struct saved_nic { @@ -246,6 +252,7 @@ struct saved_nic { }; struct lxc_conf { + int is_execute; char *fstab; int tty; int pts; @@ -260,6 +267,7 @@ struct lxc_conf { int num_savednics; struct lxc_list mount_list; struct lxc_list caps; + struct lxc_list keepcaps; struct lxc_tty_info tty_info; struct lxc_console console; struct lxc_rootfs rootfs; @@ -282,11 +290,19 @@ struct lxc_conf { int stopsignal; // signal used to stop container int kmsg; // if 1, create /dev/kmsg symlink char *rcfile; // Copy of the top level rcfile we read + + // Logfile and logleve can be set in a container config file. + // Those function as defaults. The defaults can be overriden + // by command line. However we don't want the command line + // specified values to be saved on c->save_config(). So we + // store the config file specified values here. + char *logfile; // the logfile as specifed in config + int loglevel; // loglevel as specifed in config (if any) }; -int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf); +int run_lxc_hooks(const char *name, char *hook, struct lxc_conf *conf, + const char *lxcpath, char *argv[]); -extern int setup_cgroup(const char *cgpath, struct lxc_list *cgroups); extern int detect_shared_rootfs(void); /* @@ -309,19 +325,19 @@ extern void lxc_delete_tty(struct lxc_tty_info *tty_info); extern int lxc_clear_config_network(struct lxc_conf *c); extern int lxc_clear_nic(struct lxc_conf *c, const char *key); extern int lxc_clear_config_caps(struct lxc_conf *c); +extern int lxc_clear_config_keepcaps(struct lxc_conf *c); extern int lxc_clear_cgroups(struct lxc_conf *c, const char *key); extern int lxc_clear_mount_entries(struct lxc_conf *c); extern int lxc_clear_hooks(struct lxc_conf *c, const char *key); -extern int setup_cgroup(const char *name, struct lxc_list *cgroups); - extern int uid_shift_ttys(int pid, struct lxc_conf *conf); /* * Configure the container from inside */ -extern int lxc_setup(const char *name, struct lxc_conf *lxc_conf); +extern int lxc_setup(const char *name, struct lxc_conf *lxc_conf, + const char *lxcpath); extern void lxc_rename_phys_nics_on_shutdown(struct lxc_conf *conf); #endif diff --git a/src/lxc/confile.c b/src/lxc/confile.c index 676878e62..7904db4a4 100644 --- a/src/lxc/confile.c +++ b/src/lxc/confile.c @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #define _GNU_SOURCE #include @@ -85,6 +85,7 @@ static int config_network_script(const char *, const char *, struct lxc_conf *); static int config_network_ipv6(const char *, const char *, struct lxc_conf *); static int config_network_ipv6_gateway(const char *, const char *, struct lxc_conf *); static int config_cap_drop(const char *, const char *, struct lxc_conf *); +static int config_cap_keep(const char *, const char *, struct lxc_conf *); static int config_console(const char *, const char *, struct lxc_conf *); static int config_seccomp(const char *, const char *, struct lxc_conf *); static int config_includefile(const char *, const char *, struct lxc_conf *); @@ -117,6 +118,7 @@ static struct lxc_config_t config[] = { { "lxc.hook.autodev", config_hook }, { "lxc.hook.start", config_hook }, { "lxc.hook.post-stop", config_hook }, + { "lxc.hook.clone", config_hook }, { "lxc.network.type", config_network_type }, { "lxc.network.flags", config_network_flags }, { "lxc.network.link", config_network_link }, @@ -135,6 +137,7 @@ static struct lxc_config_t config[] = { /* config_network_nic must come after all other 'lxc.network.*' entries */ { "lxc.network.", config_network_nic }, { "lxc.cap.drop", config_cap_drop }, + { "lxc.cap.keep", config_cap_keep }, { "lxc.console", config_console }, { "lxc.seccomp", config_seccomp }, { "lxc.include", config_includefile }, @@ -272,6 +275,7 @@ static int config_network_type(const char *key, const char *value, list = malloc(sizeof(*list)); if (!list) { SYSERROR("failed to allocate memory"); + free(netdev); return -1; } @@ -610,6 +614,7 @@ static int config_network_ipv4(const char *key, const char *value, list = malloc(sizeof(*list)); if (!list) { SYSERROR("failed to allocate memory"); + free(inetdev); return -1; } @@ -619,6 +624,8 @@ static int config_network_ipv4(const char *key, const char *value, addr = strdup(value); if (!addr) { ERROR("no address specified"); + free(inetdev); + free(list); return -1; } @@ -636,12 +643,16 @@ static int config_network_ipv4(const char *key, const char *value, if (!inet_pton(AF_INET, addr, &inetdev->addr)) { SYSERROR("invalid ipv4 address: %s", value); + free(inetdev); free(addr); + free(list); return -1; } if (bcast && !inet_pton(AF_INET, bcast, &inetdev->bcast)) { SYSERROR("invalid ipv4 broadcast address: %s", value); + free(inetdev); + free(list); free(addr); return -1; } @@ -683,6 +694,7 @@ static int config_network_ipv4_gateway(const char *key, const char *value, if (!value) { ERROR("no ipv4 gateway address specified"); + free(gw); return -1; } @@ -692,6 +704,7 @@ static int config_network_ipv4_gateway(const char *key, const char *value, } else { if (!inet_pton(AF_INET, value, gw)) { SYSERROR("invalid ipv4 gateway address: %s", value); + free(gw); return -1; } @@ -725,6 +738,7 @@ static int config_network_ipv6(const char *key, const char *value, list = malloc(sizeof(*list)); if (!list) { SYSERROR("failed to allocate memory"); + free(inet6dev); return -1; } @@ -734,6 +748,8 @@ static int config_network_ipv6(const char *key, const char *value, valdup = strdup(value); if (!valdup) { ERROR("no address specified"); + free(list); + free(inet6dev); return -1; } @@ -745,8 +761,10 @@ static int config_network_ipv6(const char *key, const char *value, inet6dev->prefix = atoi(netmask); } - if (!inet_pton(AF_INET6, value, &inet6dev->addr)) { - SYSERROR("invalid ipv6 address: %s", value); + if (!inet_pton(AF_INET6, valdup, &inet6dev->addr)) { + SYSERROR("invalid ipv6 address: %s", valdup); + free(list); + free(inet6dev); free(valdup); return -1; } @@ -761,18 +779,11 @@ static int config_network_ipv6_gateway(const char *key, const char *value, struct lxc_conf *lxc_conf) { struct lxc_netdev *netdev; - struct in6_addr *gw; netdev = network_netdev(key, value, &lxc_conf->network); if (!netdev) return -1; - gw = malloc(sizeof(*gw)); - if (!gw) { - SYSERROR("failed to allocate ipv6 gateway address"); - return -1; - } - if (!value) { ERROR("no ipv6 gateway address specified"); return -1; @@ -782,8 +793,17 @@ static int config_network_ipv6_gateway(const char *key, const char *value, netdev->ipv6_gateway = NULL; netdev->ipv6_gateway_auto = true; } else { + struct in6_addr *gw; + + gw = malloc(sizeof(*gw)); + if (!gw) { + SYSERROR("failed to allocate ipv6 gateway address"); + return -1; + } + if (!inet_pton(AF_INET6, value, gw)) { SYSERROR("invalid ipv6 gateway address: %s", value); + free(gw); return -1; } @@ -877,6 +897,8 @@ static int config_hook(const char *key, const char *value, return add_hook(lxc_conf, LXCHOOK_START, copy); else if (strcmp(key, "lxc.hook.post-stop") == 0) return add_hook(lxc_conf, LXCHOOK_POSTSTOP, copy); + else if (strcmp(key, "lxc.hook.clone") == 0) + return add_hook(lxc_conf, LXCHOOK_CLONE, copy); SYSERROR("Unknown key: %s", key); free(copy); return -1; @@ -970,6 +992,11 @@ static int config_aa_profile(const char *key, const char *value, static int config_logfile(const char *key, const char *value, struct lxc_conf *lxc_conf) { + // store these values in the lxc_conf, and then try to set for + // actual current logging. + if (lxc_conf->logfile) + free(lxc_conf->logfile); + lxc_conf->logfile = strdup(value); return lxc_log_set_file(value); } @@ -989,6 +1016,9 @@ static int config_loglevel(const char *key, const char *value, newlevel = atoi(value); else newlevel = lxc_log_priority_to_int(value); + // store these values in the lxc_conf, and then try to set for + // actual current logging. + lxc_conf->loglevel = newlevel; return lxc_log_set_level(newlevel); } @@ -1233,8 +1263,10 @@ static int config_mount(const char *key, const char *value, return -1; mntelem = strdup(value); - if (!mntelem) + if (!mntelem) { + free(mntlist); return -1; + } mntlist->elem = mntelem; lxc_list_add_tail(&lxc_conf->mount_list, mntlist); @@ -1242,6 +1274,52 @@ static int config_mount(const char *key, const char *value, return 0; } +static int config_cap_keep(const char *key, const char *value, + struct lxc_conf *lxc_conf) +{ + char *keepcaps, *keepptr, *sptr, *token; + struct lxc_list *keeplist; + int ret = -1; + + if (!strlen(value)) + return -1; + + keepcaps = strdup(value); + if (!keepcaps) { + SYSERROR("failed to dup '%s'", value); + return -1; + } + + /* in case several capability keep is specified in a single line + * split these caps in a single element for the list */ + for (keepptr = keepcaps;;keepptr = NULL) { + token = strtok_r(keepptr, " \t", &sptr); + if (!token) { + ret = 0; + break; + } + + keeplist = malloc(sizeof(*keeplist)); + if (!keeplist) { + SYSERROR("failed to allocate keepcap list"); + break; + } + + keeplist->elem = strdup(token); + if (!keeplist->elem) { + SYSERROR("failed to dup '%s'", token); + free(keeplist); + break; + } + + lxc_list_add_tail(&lxc_conf->keepcaps, keeplist); + } + + free(keepcaps); + + return ret; +} + static int config_cap_drop(const char *key, const char *value, struct lxc_conf *lxc_conf) { @@ -1344,6 +1422,7 @@ static int config_utsname(const char *key, const char *value, if (strlen(value) >= sizeof(utsname->nodename)) { ERROR("node name '%s' is too long", utsname->nodename); + free(utsname); return -1; } @@ -1607,6 +1686,22 @@ static int lxc_get_item_cap_drop(struct lxc_conf *c, char *retv, int inlen) return fulllen; } +static int lxc_get_item_cap_keep(struct lxc_conf *c, char *retv, int inlen) +{ + int len, fulllen = 0; + struct lxc_list *it; + + if (!retv) + inlen = 0; + else + memset(retv, 0, inlen); + + lxc_list_for_each(it, &c->keepcaps) { + strprint(retv, inlen, "%s\n", (char *)it->elem); + } + return fulllen; +} + static int lxc_get_mount_entries(struct lxc_conf *c, char *retv, int inlen) { int len, fulllen = 0; @@ -1682,8 +1777,12 @@ static int lxc_get_item_nic(struct lxc_conf *c, char *retv, int inlen, strprint(retv, inlen, "%s", mode); } } else if (strcmp(p1, "veth.pair") == 0) { - if (netdev->type == LXC_NET_VETH && netdev->priv.veth_attr.pair) - strprint(retv, inlen, "%s", netdev->priv.veth_attr.pair); + if (netdev->type == LXC_NET_VETH) { + strprint(retv, inlen, "%s", + netdev->priv.veth_attr.pair ? + netdev->priv.veth_attr.pair : + netdev->priv.veth_attr.veth1); + } } else if (strcmp(p1, "vlan") == 0) { if (netdev->type == LXC_NET_VLAN) { strprint(retv, inlen, "%d", netdev->priv.vlan_attr.vid); @@ -1783,6 +1882,8 @@ int lxc_get_config_item(struct lxc_conf *c, const char *key, char *retv, v = c->rootfs.pivot; else if (strcmp(key, "lxc.cap.drop") == 0) return lxc_get_item_cap_drop(c, retv, inlen); + else if (strcmp(key, "lxc.cap.keep") == 0) + return lxc_get_item_cap_keep(c, retv, inlen); else if (strncmp(key, "lxc.hook", 8) == 0) return lxc_get_item_hooks(c, retv, inlen, key); else if (strcmp(key, "lxc.network") == 0) @@ -1806,6 +1907,8 @@ int lxc_clear_config_item(struct lxc_conf *c, const char *key) return lxc_clear_nic(c, key + 12); else if (strcmp(key, "lxc.cap.drop") == 0) return lxc_clear_config_caps(c); + else if (strcmp(key, "lxc.cap.keep") == 0) + return lxc_clear_config_keepcaps(c); else if (strncmp(key, "lxc.cgroup", 10) == 0) return lxc_clear_cgroups(c, key); else if (strcmp(key, "lxc.mount.entries") == 0) @@ -1846,10 +1949,10 @@ void write_config(FILE *fout, struct lxc_conf *c) if (c->aa_profile) fprintf(fout, "lxc.aa_profile = %s\n", c->aa_profile); #endif - if (lxc_log_get_level() != LXC_LOG_PRIORITY_NOTSET) - fprintf(fout, "lxc.loglevel = %s\n", lxc_log_priority_to_string(lxc_log_get_level())); - if (lxc_log_get_file()) - fprintf(fout, "lxc.logfile = %s\n", lxc_log_get_file()); + if (c->loglevel != LXC_LOG_PRIORITY_NOTSET) + fprintf(fout, "lxc.loglevel = %s\n", lxc_log_priority_to_string(c->loglevel)); + if (c->logfile) + fprintf(fout, "lxc.logfile = %s\n", c->logfile); lxc_list_for_each(it, &c->cgroup) { struct lxc_cgroup *cg = it->elem; fprintf(fout, "lxc.cgroup.%s = %s\n", cg->subsystem, cg->value); @@ -1918,6 +2021,14 @@ void write_config(FILE *fout, struct lxc_conf *c) } lxc_list_for_each(it, &c->caps) fprintf(fout, "lxc.cap.drop = %s\n", (char *)it->elem); + lxc_list_for_each(it, &c->keepcaps) + fprintf(fout, "lxc.cap.keep = %s\n", (char *)it->elem); + lxc_list_for_each(it, &c->id_map) { + struct id_map *idmap = it->elem; + fprintf(fout, "lxc.id_map = %c %lu %lu %lu\n", + idmap->idtype == ID_TYPE_UID ? 'u' : 'g', idmap->nsid, + idmap->hostid, idmap->range); + } for (i=0; ihooks[i]) fprintf(fout, "lxc.hook.%s = %s\n", diff --git a/src/lxc/confile.h b/src/lxc/confile.h index 0cc771ccb..9d12071fc 100644 --- a/src/lxc/confile.h +++ b/src/lxc/confile.h @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include diff --git a/src/lxc/console.c b/src/lxc/console.c index 9ef62c102..e35a811aa 100644 --- a/src/lxc/console.c +++ b/src/lxc/console.c @@ -18,9 +18,11 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#include +#include #include #include #include @@ -29,6 +31,7 @@ #include #include +#include "lxccontainer.h" #include "log.h" #include "conf.h" #include "config.h" @@ -37,6 +40,8 @@ #include "commands.h" #include "mainloop.h" #include "af_unix.h" +#include "lxclock.h" +#include "utils.h" #if HAVE_PTY_H #include @@ -46,240 +51,495 @@ lxc_log_define(lxc_console, lxc); -extern int lxc_console(const char *name, int ttynum, int *fd, const char *lxcpath) +static struct lxc_list lxc_ttys; + +typedef void (*sighandler_t)(int); +struct lxc_tty_state { - int ret, stopped = 0; - struct lxc_command command = { - .request = { .type = LXC_COMMAND_TTY, .data = ttynum }, - }; + struct lxc_list node; + int stdinfd; + int stdoutfd; + int masterfd; + int escape; + int saw_escape; + const char *winch_proxy; + const char *winch_proxy_lxcpath; + int sigfd; + sigset_t oldmask; +}; - ret = lxc_command_connected(name, &command, &stopped, lxcpath); - if (ret < 0 && stopped) { - ERROR("'%s' is stopped", name); +__attribute__((constructor)) +void lxc_console_init(void) +{ + lxc_list_init(&lxc_ttys); +} + +/* lxc_console_winsz: propagte winsz from one terminal to another + * + * @srcfd : terminal to get size from (typically a slave pty) + * @dstfd : terminal to set size on (typically a master pty) + */ +static void lxc_console_winsz(int srcfd, int dstfd) +{ + struct winsize wsz; + if (isatty(srcfd) && ioctl(srcfd, TIOCGWINSZ, &wsz) == 0) { + DEBUG("set winsz dstfd:%d cols:%d rows:%d", dstfd, + wsz.ws_col, wsz.ws_row); + ioctl(dstfd, TIOCSWINSZ, &wsz); + } +} + +static void lxc_console_winch(struct lxc_tty_state *ts) +{ + lxc_console_winsz(ts->stdinfd, ts->masterfd); + if (ts->winch_proxy) { + lxc_cmd_console_winch(ts->winch_proxy, + ts->winch_proxy_lxcpath); + } +} + +void lxc_console_sigwinch(int sig) +{ + if (process_lock() == 0) { + struct lxc_list *it; + struct lxc_tty_state *ts; + + lxc_list_for_each(it, &lxc_ttys) { + ts = it->elem; + lxc_console_winch(ts); + } + process_unlock(); + } +} + +static int lxc_console_cb_sigwinch_fd(int fd, void *cbdata, + struct lxc_epoll_descr *descr) +{ + struct signalfd_siginfo siginfo; + struct lxc_tty_state *ts = cbdata; + + if (read(fd, &siginfo, sizeof(siginfo)) < 0) { + ERROR("failed to read signal info"); return -1; } - if (ret < 0) { - ERROR("failed to send command"); - return -1; - } - - if (!ret) { - ERROR("console denied by '%s'", name); - return -1; - } - - if (command.answer.ret) { - ERROR("console access denied: %s", - strerror(-command.answer.ret)); - return -1; - } - - *fd = command.answer.fd; - if (*fd <0) { - ERROR("unable to allocate fd for tty %d", ttynum); - return -1; - } - - INFO("tty %d allocated", ttynum); + lxc_console_winch(ts); return 0; } -/*---------------------------------------------------------------------------- - * functions used by lxc-start mainloop - * to handle above command request. - *--------------------------------------------------------------------------*/ -extern void lxc_console_remove_fd(int fd, struct lxc_tty_info *tty_info) +/* + * lxc_console_sigwinch_init: install SIGWINCH handler + * + * @srcfd : src for winsz in SIGWINCH handler + * @dstfd : dst for winsz in SIGWINCH handler + * + * Returns lxc_tty_state structure on success or NULL on failure. The sigfd + * member of the returned lxc_tty_state can be select()/poll()ed/epoll()ed + * on (ie added to a mainloop) for SIGWINCH. + * + * Must be called with process_lock held to protect the lxc_ttys list, or + * from a non-threaded context. + * + * Note that SIGWINCH isn't installed as a classic asychronous handler, + * rather signalfd(2) is used so that we can handle the signal when we're + * ready for it. This avoids deadlocks since a signal handler + * (ie lxc_console_sigwinch()) would need to take the thread mutex to + * prevent lxc_ttys list corruption, but using the fd we can provide the + * tty_state needed to the callback (lxc_console_cb_sigwinch_fd()). + */ +static struct lxc_tty_state *lxc_console_sigwinch_init(int srcfd, int dstfd) { - int i; + sigset_t mask; + struct lxc_tty_state *ts; - for (i = 0; i < tty_info->nbtty; i++) { + ts = malloc(sizeof(*ts)); + if (!ts) + return NULL; - if (tty_info->pty_info[i].busy != fd) - continue; + memset(ts, 0, sizeof(*ts)); + ts->stdinfd = srcfd; + ts->masterfd = dstfd; + ts->sigfd = -1; - tty_info->pty_info[i].busy = 0; + /* add tty to list to be scanned at SIGWINCH time */ + lxc_list_add_elem(&ts->node, ts); + lxc_list_add_tail(&lxc_ttys, &ts->node); + + sigemptyset(&mask); + sigaddset(&mask, SIGWINCH); + if (sigprocmask(SIG_BLOCK, &mask, &ts->oldmask)) { + SYSERROR("failed to block SIGWINCH"); + goto err1; } - return; + ts->sigfd = signalfd(-1, &mask, 0); + if (ts->sigfd < 0) { + SYSERROR("failed to get signalfd"); + goto err2; + } + + DEBUG("%d got SIGWINCH fd %d", getpid(), ts->sigfd); + goto out; + +err2: + sigprocmask(SIG_SETMASK, &ts->oldmask, NULL); +err1: + lxc_list_del(&ts->node); + free(ts); + ts = NULL; +out: + return ts; } -extern int lxc_console_callback(int fd, struct lxc_request *request, - struct lxc_handler *handler) +/* + * lxc_console_sigwinch_fini: uninstall SIGWINCH handler + * + * @ts : the lxc_tty_state returned by lxc_console_sigwinch_init + * + * Restore the saved signal handler that was in effect at the time + * lxc_console_sigwinch_init() was called. + * + * Must be called with process_lock held to protect the lxc_ttys list, or + * from a non-threaded context. + */ +static void lxc_console_sigwinch_fini(struct lxc_tty_state *ts) { - int ttynum = request->data; - struct lxc_tty_info *tty_info = &handler->conf->tty_info; + if (ts->sigfd >= 0) + close(ts->sigfd); + lxc_list_del(&ts->node); + sigprocmask(SIG_SETMASK, &ts->oldmask, NULL); + free(ts); +} - if (ttynum > 0) { - if (ttynum > tty_info->nbtty) - goto out_close; +static int lxc_console_cb_con(int fd, void *data, + struct lxc_epoll_descr *descr) +{ + struct lxc_console *console = (struct lxc_console *)data; + char buf[1024]; + int r,w; - if (tty_info->pty_info[ttynum - 1].busy) - goto out_close; - - goto out_send; + w = r = read(fd, buf, sizeof(buf)); + if (r < 0) { + SYSERROR("failed to read"); + return 1; } - /* fixup index tty1 => [0] */ + if (!r) { + INFO("console client on fd %d has exited", fd); + lxc_mainloop_del_handler(descr, fd); + close(fd); + return 0; + } + + if (fd == console->peer) + w = write(console->master, buf, r); + + if (fd == console->master) { + if (console->log_fd >= 0) + w = write(console->log_fd, buf, r); + + if (console->peer >= 0) + w = write(console->peer, buf, r); + } + + if (w != r) + WARN("console short write r:%d w:%d", r, w); + return 0; +} + +static void lxc_console_mainloop_add_peer(struct lxc_console *console) +{ + if (console->peer >= 0) { + if (lxc_mainloop_add_handler(console->descr, console->peer, + lxc_console_cb_con, console)) + WARN("console peer not added to mainloop"); + } + + if (console->tty_state) { + if (lxc_mainloop_add_handler(console->descr, + console->tty_state->sigfd, + lxc_console_cb_sigwinch_fd, + console->tty_state)) { + WARN("failed to add to mainloop SIGWINCH handler for '%d'", + console->tty_state->sigfd); + } + } +} + +int lxc_console_mainloop_add(struct lxc_epoll_descr *descr, + struct lxc_handler *handler) +{ + struct lxc_conf *conf = handler->conf; + struct lxc_console *console = &conf->console; + + if (conf->is_execute) { + INFO("no console for lxc-execute."); + return 0; + } + + if (!conf->rootfs.path) { + INFO("no rootfs, no console."); + return 0; + } + + if (console->master < 0) { + INFO("no console"); + return 0; + } + + if (lxc_mainloop_add_handler(descr, console->master, + lxc_console_cb_con, console)) { + ERROR("failed to add to mainloop console handler for '%d'", + console->master); + return -1; + } + + /* we cache the descr so that we can add an fd to it when someone + * does attach to it in lxc_console_allocate() + */ + console->descr = descr; + lxc_console_mainloop_add_peer(console); + + return 0; +} + +static int setup_tios(int fd, struct termios *oldtios) +{ + struct termios newtios; + + if (!isatty(fd)) { + ERROR("'%d' is not a tty", fd); + return -1; + } + + /* Get current termios */ + if (tcgetattr(fd, oldtios)) { + SYSERROR("failed to get current terminal settings"); + return -1; + } + + newtios = *oldtios; + + /* Remove the echo characters and signal reception, the echo + * will be done with master proxying */ + newtios.c_iflag &= ~IGNBRK; + newtios.c_iflag &= BRKINT; + newtios.c_lflag &= ~(ECHO|ICANON|ISIG); + newtios.c_cc[VMIN] = 1; + newtios.c_cc[VTIME] = 0; + + /* Set new attributes */ + if (tcsetattr(fd, TCSAFLUSH, &newtios)) { + ERROR("failed to set new terminal settings"); + return -1; + } + + return 0; +} + +static void lxc_console_peer_proxy_free(struct lxc_console *console) +{ + if (console->tty_state) { + lxc_console_sigwinch_fini(console->tty_state); + console->tty_state = NULL; + } + close(console->peerpty.master); + close(console->peerpty.slave); + console->peerpty.master = -1; + console->peerpty.slave = -1; + console->peerpty.busy = -1; + console->peerpty.name[0] = '\0'; + console->peer = -1; +} + +static int lxc_console_peer_proxy_alloc(struct lxc_console *console, int sockfd) +{ + struct termios oldtermio; + struct lxc_tty_state *ts; + + if (console->master < 0) { + ERROR("console not set up"); + return -1; + } + if (console->peerpty.busy != -1 || console->peer != -1) { + NOTICE("console already in use"); + return -1; + } + if (console->tty_state) { + ERROR("console already has tty_state"); + return -1; + } + + /* this is the proxy pty that will be given to the client, and that + * the real pty master will send to / recv from + */ + if (openpty(&console->peerpty.master, &console->peerpty.slave, + console->peerpty.name, NULL, NULL)) { + SYSERROR("failed to create proxy pty"); + return -1; + } + + if (setup_tios(console->peerpty.slave, &oldtermio) < 0) + goto err1; + + ts = lxc_console_sigwinch_init(console->peerpty.master, console->master); + if (!ts) + goto err1; + + console->tty_state = ts; + console->peer = console->peerpty.slave; + console->peerpty.busy = sockfd; + lxc_console_mainloop_add_peer(console); + + DEBUG("%d %s peermaster:%d sockfd:%d", getpid(), __FUNCTION__, console->peerpty.master, sockfd); + return 0; + +err1: + lxc_console_peer_proxy_free(console); + return -1; +} + +/* lxc_console_allocate: allocate the console or a tty + * + * @conf : the configuration of the container to allocate from + * @sockfd : the socket fd whose remote side when closed, will be an + * indication that the console or tty is no longer in use + * @ttyreq : the tty requested to be opened, -1 for any, 0 for the console + */ +int lxc_console_allocate(struct lxc_conf *conf, int sockfd, int *ttyreq) +{ + int masterfd = -1, ttynum; + struct lxc_tty_info *tty_info = &conf->tty_info; + struct lxc_console *console = &conf->console; + + process_lock(); + if (*ttyreq == 0) { + if (lxc_console_peer_proxy_alloc(console, sockfd) < 0) + goto out; + masterfd = console->peerpty.master; + goto out; + } + + if (*ttyreq > 0) { + if (*ttyreq > tty_info->nbtty) + goto out; + + if (tty_info->pty_info[*ttyreq - 1].busy) + goto out; + + /* the requested tty is available */ + ttynum = *ttyreq; + goto out_tty; + } + + /* search for next available tty, fixup index tty1 => [0] */ for (ttynum = 1; ttynum <= tty_info->nbtty && tty_info->pty_info[ttynum - 1].busy; ttynum++); /* we didn't find any available slot for tty */ if (ttynum > tty_info->nbtty) - goto out_close; + goto out; -out_send: - if (lxc_af_unix_send_fd(fd, tty_info->pty_info[ttynum - 1].master, - &ttynum, sizeof(ttynum)) < 0) { - ERROR("failed to send tty to client"); - goto out_close; - } + *ttyreq = ttynum; - tty_info->pty_info[ttynum - 1].busy = fd; - - return 0; - -out_close: - /* the close fd and related cleanup will be done by caller */ - return 1; +out_tty: + tty_info->pty_info[ttynum - 1].busy = sockfd; + masterfd = tty_info->pty_info[ttynum - 1].master; +out: + process_unlock(); + return masterfd; } -static int get_default_console(char **console) +/* lxc_console_free: mark the console or a tty as unallocated, free any + * resources allocated by lxc_console_allocate(). + * + * @conf : the configuration of the container whose tty was closed + * @fd : the socket fd whose remote side was closed, which indicated + * the console or tty is no longer in use. this is used to match + * which console/tty is being freed. + */ +void lxc_console_free(struct lxc_conf *conf, int fd) { - int fd; + int i; + struct lxc_tty_info *tty_info = &conf->tty_info; + struct lxc_console *console = &conf->console; - if (!access("/dev/tty", F_OK)) { + process_lock(); + for (i = 0; i < tty_info->nbtty; i++) { + if (tty_info->pty_info[i].busy == fd) + tty_info->pty_info[i].busy = 0; + } + + if (console->peerpty.busy == fd) { + lxc_mainloop_del_handler(console->descr, console->peerpty.slave); + lxc_console_peer_proxy_free(console); + } + process_unlock(); +} + +static void lxc_console_peer_default(struct lxc_console *console) +{ + struct lxc_tty_state *ts; + const char *path = console->path; + + /* if no console was given, try current controlling terminal, there + * won't be one if we were started as a daemon (-d) + */ + if (!path && !access("/dev/tty", F_OK)) { + int fd; fd = open("/dev/tty", O_RDWR); if (fd >= 0) { close(fd); - *console = strdup("/dev/tty"); - goto out; + path = "/dev/tty"; } } - if (!access("/dev/null", F_OK)) { - *console = strdup("/dev/null"); + if (!path) goto out; - } - ERROR("No suitable default console"); -out: - return *console ? 0 : -1; -} + DEBUG("opening %s for console peer", path); + console->peer = lxc_unpriv(open(path, O_CLOEXEC | O_RDWR | O_CREAT | + O_APPEND, 0600)); + if (console->peer < 0) + goto out; -int lxc_create_console(struct lxc_conf *conf) -{ - struct termios tios; - struct lxc_console *console = &conf->console; - int fd; - - if (!conf->rootfs.path) - return 0; - - if (!console->path && get_default_console(&console->path)) { - ERROR("failed to get default console"); - return -1; - } - - if (!strcmp(console->path, "none")) - return 0; - - if (openpty(&console->master, &console->slave, - console->name, NULL, NULL)) { - SYSERROR("failed to allocate a pty"); - return -1; - } - - if (fcntl(console->master, F_SETFD, FD_CLOEXEC)) { - SYSERROR("failed to set console master to close-on-exec"); - goto err; - } - - if (fcntl(console->slave, F_SETFD, FD_CLOEXEC)) { - SYSERROR("failed to set console slave to close-on-exec"); - goto err; - } - - if (console->log_path) { - fd = lxc_unpriv(open(console->log_path, O_CLOEXEC | O_RDWR | O_CREAT | O_APPEND, 0600)); - if (fd < 0) { - SYSERROR("failed to open '%s'", console->log_path); - goto err; - } - DEBUG("using '%s' as console log", console->log_path); - console->log_fd = fd; - } - - fd = lxc_unpriv(open(console->path, O_CLOEXEC | O_RDWR | O_CREAT | - O_APPEND, 0600)); - if (fd < 0) { - SYSERROR("failed to open '%s'", console->path); - goto err_close_console_log; - } - - DEBUG("using '%s' as console", console->path); - - console->peer = fd; + DEBUG("using '%s' as console", path); if (!isatty(console->peer)) - return 0; + return; - console->tios = malloc(sizeof(tios)); + ts = lxc_console_sigwinch_init(console->peer, console->master); + if (!ts) + WARN("Unable to install SIGWINCH"); + console->tty_state = ts; + + lxc_console_winsz(console->peer, console->master); + + console->tios = malloc(sizeof(*console->tios)); if (!console->tios) { SYSERROR("failed to allocate memory"); - goto err_close_console; + goto err1; } - /* Get termios */ - if (tcgetattr(console->peer, console->tios)) { - SYSERROR("failed to get current terminal settings"); - goto err_free; - } + if (setup_tios(console->peer, console->tios) < 0) + goto err2; - tios = *console->tios; + return; - /* Remove the echo characters and signal reception, the echo - * will be done below with master proxying */ - tios.c_iflag &= ~IGNBRK; - tios.c_iflag &= BRKINT; - tios.c_lflag &= ~(ECHO|ICANON|ISIG); - tios.c_cc[VMIN] = 1; - tios.c_cc[VTIME] = 0; - - /* Set new attributes */ - if (tcsetattr(console->peer, TCSAFLUSH, &tios)) { - ERROR("failed to set new terminal settings"); - goto err_free; - } - - return 0; - -err_free: +err2: free(console->tios); - -err_close_console: + console->tios = NULL; +err1: close(console->peer); console->peer = -1; - -err_close_console_log: - if (console->log_fd >= 0) { - close(console->log_fd); - console->log_fd = -1; - } - -err: - close(console->master); - console->master = -1; - - close(console->slave); - console->slave = -1; - return -1; +out: + DEBUG("no console peer"); } -void lxc_delete_console(struct lxc_console *console) +void lxc_console_delete(struct lxc_console *console) { - if (console->tios && + if (console->tios && console->peer >= 0 && tcsetattr(console->peer, TCSAFLUSH, console->tios)) WARN("failed to set old terminal settings"); free(console->tios); @@ -300,73 +560,213 @@ void lxc_delete_console(struct lxc_console *console) console->slave = -1; } -static int console_handler(int fd, void *data, struct lxc_epoll_descr *descr) +int lxc_console_create(struct lxc_conf *conf) { - struct lxc_console *console = (struct lxc_console *)data; + struct lxc_console *console = &conf->console; + + if (conf->is_execute) { + INFO("no console for lxc-execute."); + return 0; + } + + if (!conf->rootfs.path) + return 0; + + if (console->path && !strcmp(console->path, "none")) + return 0; + + if (openpty(&console->master, &console->slave, + console->name, NULL, NULL)) { + SYSERROR("failed to allocate a pty"); + return -1; + } + + if (fcntl(console->master, F_SETFD, FD_CLOEXEC)) { + SYSERROR("failed to set console master to close-on-exec"); + goto err; + } + + if (fcntl(console->slave, F_SETFD, FD_CLOEXEC)) { + SYSERROR("failed to set console slave to close-on-exec"); + goto err; + } + + lxc_console_peer_default(console); + + if (console->log_path) { + console->log_fd = lxc_unpriv(open(console->log_path, + O_CLOEXEC | O_RDWR | + O_CREAT | O_APPEND, 0600)); + if (console->log_fd < 0) { + SYSERROR("failed to open '%s'", console->log_path); + goto err; + } + DEBUG("using '%s' as console log", console->log_path); + } + + return 0; + +err: + lxc_console_delete(console); + return -1; +} + + + +static int lxc_console_cb_tty_stdin(int fd, void *cbdata, + struct lxc_epoll_descr *descr) +{ + struct lxc_tty_state *ts = cbdata; + char c; + + assert(fd == ts->stdinfd); + if (read(ts->stdinfd, &c, 1) < 0) { + SYSERROR("failed to read"); + return 1; + } + + /* we want to exit the console with Ctrl+a q */ + if (c == ts->escape && !ts->saw_escape) { + ts->saw_escape = 1; + return 0; + } + + if (c == 'q' && ts->saw_escape) + return 1; + + ts->saw_escape = 0; + if (write(ts->masterfd, &c, 1) < 0) { + SYSERROR("failed to write"); + return 1; + } + + return 0; +} + +static int lxc_console_cb_tty_master(int fd, void *cbdata, + struct lxc_epoll_descr *descr) +{ + struct lxc_tty_state *ts = cbdata; char buf[1024]; int r,w; + assert(fd == ts->masterfd); r = read(fd, buf, sizeof(buf)); if (r < 0) { SYSERROR("failed to read"); return 1; } - if (!r) { - INFO("console client has exited"); - lxc_mainloop_del_handler(descr, fd); - close(fd); - return 0; + w = write(ts->stdoutfd, buf, r); + if (w < 0 || w != r) { + SYSERROR("failed to write"); + return 1; } - /* no output for the console, do nothing */ - if (console->peer == -1) - return 0; - - if (console->peer == fd) - w = write(console->master, buf, r); - else { - w = write(console->peer, buf, r); - if (console->log_fd > 0) - w = write(console->log_fd, buf, r); - } - if (w != r) - WARN("console short write"); return 0; } -int lxc_console_mainloop_add(struct lxc_epoll_descr *descr, - struct lxc_handler *handler) +int lxc_console_getfd(struct lxc_container *c, int *ttynum, int *masterfd) { - struct lxc_conf *conf = handler->conf; - struct lxc_console *console = &conf->console; + return lxc_cmd_console(c->name, ttynum, masterfd, c->config_path); +} - if (!conf->rootfs.path) { - INFO("no rootfs, no console."); - return 0; - } +int lxc_console(struct lxc_container *c, int ttynum, + int stdinfd, int stdoutfd, int stderrfd, + int escape) +{ + int ret, ttyfd, masterfd; + struct lxc_epoll_descr descr; + struct termios oldtios; + struct lxc_tty_state *ts; - if (!console->path) { - INFO("no console specified"); - return 0; - } - - if (console->peer == -1) { - INFO("no console will be used"); - return 0; - } - - if (lxc_mainloop_add_handler(descr, console->master, - console_handler, console)) { - ERROR("failed to add to mainloop console handler for '%d'", - console->master); + if (!isatty(stdinfd)) { + ERROR("stdin is not a tty"); return -1; } - if (console->peer != -1 && - lxc_mainloop_add_handler(descr, console->peer, - console_handler, console)) - WARN("console input disabled"); + ret = setup_tios(stdinfd, &oldtios); + if (ret) { + ERROR("failed to setup tios"); + return -1; + } - return 0; + process_lock(); + ttyfd = lxc_cmd_console(c->name, &ttynum, &masterfd, c->config_path); + if (ttyfd < 0) { + ret = ttyfd; + goto err1; + } + + fprintf(stderr, "\n" + "Connected to tty %1$d\n" + "Type to exit the console, " + " to enter Ctrl+%2$c itself\n", + ttynum, 'a' + escape - 1); + + ret = setsid(); + if (ret) + INFO("already group leader"); + + ts = lxc_console_sigwinch_init(stdinfd, masterfd); + if (!ts) { + ret = -1; + goto err2; + } + ts->escape = escape; + ts->winch_proxy = c->name; + ts->winch_proxy_lxcpath = c->config_path; + + lxc_console_winsz(stdinfd, masterfd); + lxc_cmd_console_winch(ts->winch_proxy, ts->winch_proxy_lxcpath); + + ret = lxc_mainloop_open(&descr); + if (ret) { + ERROR("failed to create mainloop"); + goto err3; + } + + ret = lxc_mainloop_add_handler(&descr, ts->sigfd, + lxc_console_cb_sigwinch_fd, ts); + if (ret) { + ERROR("failed to add handler for SIGWINCH fd"); + goto err4; + } + + ret = lxc_mainloop_add_handler(&descr, ts->stdinfd, + lxc_console_cb_tty_stdin, ts); + if (ret) { + ERROR("failed to add handler for stdinfd"); + goto err4; + } + + ret = lxc_mainloop_add_handler(&descr, ts->masterfd, + lxc_console_cb_tty_master, ts); + if (ret) { + ERROR("failed to add handler for masterfd"); + goto err4; + } + + process_unlock(); + ret = lxc_mainloop(&descr, -1); + process_lock(); + if (ret) { + ERROR("mainloop returned an error"); + goto err4; + } + + ret = 0; + +err4: + lxc_mainloop_close(&descr); +err3: + lxc_console_sigwinch_fini(ts); +err2: + close(masterfd); + close(ttyfd); +err1: + tcsetattr(stdinfd, TCSAFLUSH, &oldtios); + process_unlock(); + + return ret; } diff --git a/src/lxc/console.h b/src/lxc/console.h index c9bf937c7..d45260cbe 100644 --- a/src/lxc/console.h +++ b/src/lxc/console.h @@ -18,9 +18,21 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -extern int lxc_create_console(struct lxc_conf *); -extern void lxc_delete_console(struct lxc_console *); -extern int lxc_console_mainloop_add(struct lxc_epoll_descr *, struct lxc_handler *); +struct lxc_epoll_descr; +struct lxc_container; + +extern int lxc_console_allocate(struct lxc_conf *conf, int sockfd, int *ttynum); +extern int lxc_console_create(struct lxc_conf *); +extern void lxc_console_delete(struct lxc_console *); +extern void lxc_console_free(struct lxc_conf *conf, int fd); + +extern int lxc_console_mainloop_add(struct lxc_epoll_descr *, struct lxc_handler *); +extern void lxc_console_sigwinch(int sig); +extern int lxc_console(struct lxc_container *c, int ttynum, + int stdinfd, int stdoutfd, int stderrfd, + int escape); +extern int lxc_console_getfd(struct lxc_container *c, int *ttynum, + int *masterfd); diff --git a/src/lxc/error.c b/src/lxc/error.c index 1f2dde3a3..15e6ab0bd 100644 --- a/src/lxc/error.c +++ b/src/lxc/error.c @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include diff --git a/src/lxc/error.h b/src/lxc/error.h index 59d2fe03d..61033d30c 100644 --- a/src/lxc/error.h +++ b/src/lxc/error.h @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef __lxc_error_h #define __lxc_error_h diff --git a/src/lxc/execute.c b/src/lxc/execute.c index c1f6526d8..b4f3ed945 100644 --- a/src/lxc/execute.c +++ b/src/lxc/execute.c @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include @@ -27,6 +27,7 @@ #include #include +#include "conf.h" #include "log.h" #include "start.h" @@ -54,7 +55,7 @@ static char *choose_init(void) ret = snprintf(retv, PATH_MAX, LXCINITDIR "/lxc/lxc-init"); if (ret < 0 || ret >= PATH_MAX) { ERROR("pathname too long"); - return NULL; + goto out1; } ret = stat(retv, &mystat); @@ -64,7 +65,7 @@ static char *choose_init(void) ret = snprintf(retv, PATH_MAX, "/usr/lib/lxc/lxc-init"); if (ret < 0 || ret >= PATH_MAX) { ERROR("pathname too long"); - return NULL; + goto out1; } ret = stat(retv, &mystat); if (ret == 0) @@ -72,11 +73,13 @@ static char *choose_init(void) ret = snprintf(retv, PATH_MAX, "/sbin/lxc-init"); if (ret < 0 || ret >= PATH_MAX) { ERROR("pathname too long"); - return NULL; + goto out1; } ret = stat(retv, &mystat); if (ret == 0) return retv; +out1: + free(retv); return NULL; } @@ -85,23 +88,44 @@ static int execute_start(struct lxc_handler *handler, void* data) int j, i = 0; struct execute_args *my_args = data; char **argv; - int argc = 0; + int argc = 0, argc_add; char *initpath; while (my_args->argv[argc++]); - argv = malloc((argc + my_args->quiet ? 5 : 4) * sizeof(*argv)); + argc_add = 4; + if (my_args->quiet) + argc_add++; + if (!handler->conf->rootfs.path) { + argc_add += 4; + if (lxc_log_has_valid_level()) + argc_add += 2; + } + + argv = malloc((argc + argc_add) * sizeof(*argv)); if (!argv) - return 1; + goto out1; initpath = choose_init(); if (!initpath) { ERROR("Failed to find an lxc-init"); - return 1; + goto out2; } argv[i++] = initpath; if (my_args->quiet) argv[i++] = "--quiet"; + if (!handler->conf->rootfs.path) { + argv[i++] = "--name"; + argv[i++] = (char *)handler->name; + argv[i++] = "--lxcpath"; + argv[i++] = (char *)handler->lxcpath; + + if (lxc_log_has_valid_level()) { + argv[i++] = "--logpriority"; + argv[i++] = (char *) + lxc_log_priority_to_string(lxc_log_get_level()); + } + } argv[i++] = "--"; for (j = 0; j < argc; j++) argv[i++] = my_args->argv[j]; @@ -111,6 +135,10 @@ static int execute_start(struct lxc_handler *handler, void* data) execvp(argv[0], argv); SYSERROR("failed to exec %s", argv[0]); + free(initpath); +out2: + free(argv); +out1: return 1; } @@ -137,5 +165,6 @@ int lxc_execute(const char *name, char *const argv[], int quiet, if (lxc_check_inherited(conf, -1)) return -1; + conf->is_execute = 1; return __lxc_start(name, conf, &execute_start_ops, &args, lxcpath); } diff --git a/src/lxc/freezer.c b/src/lxc/freezer.c index 111bc3540..7c8c61e49 100644 --- a/src/lxc/freezer.c +++ b/src/lxc/freezer.c @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #define _GNU_SOURCE #include @@ -120,14 +120,16 @@ out: static int freeze_unfreeze(const char *name, int freeze, const char *lxcpath) { - char *nsgroup; + char *cgabspath; int ret; - - ret = lxc_cgroup_path_get(&nsgroup, "freezer", name, lxcpath); - if (ret) + + cgabspath = lxc_cgroup_path_get("freezer", name, lxcpath); + if (!cgabspath) return -1; - return do_unfreeze(nsgroup, freeze, name, lxcpath); + ret = do_unfreeze(cgabspath, freeze, name, lxcpath); + free(cgabspath); + return ret; } int lxc_freeze(const char *name, const char *lxcpath) @@ -141,14 +143,19 @@ int lxc_unfreeze(const char *name, const char *lxcpath) return freeze_unfreeze(name, 0, lxcpath); } -int lxc_unfreeze_bypath(const char *cgpath) +int lxc_unfreeze_bypath(const char *cgrelpath) { - char *nsgroup; - int ret; - - ret = cgroup_path_get(&nsgroup, "freezer", cgpath); - if (ret) - return -1; + char cgabspath[MAXPATHLEN]; + int len, ret; - return do_unfreeze(nsgroup, 0, NULL, NULL); + if (!get_subsys_mount(cgabspath, "freezer")) + return -1; + len = strlen(cgabspath); + ret = snprintf(cgabspath+len, MAXPATHLEN-len, "/%s", cgrelpath); + if (ret < 0 || ret >= MAXPATHLEN-len) { + ERROR("freezer path name too long"); + return -1; + } + + return do_unfreeze(cgabspath, 0, NULL, NULL); } diff --git a/src/lxc/genl.c b/src/lxc/genl.c index c9a3d72dc..64177648c 100644 --- a/src/lxc/genl.c +++ b/src/lxc/genl.c @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include @@ -60,45 +60,46 @@ static int genetlink_resolve_family(const char *family) ret = netlink_open(&handler, NETLINK_GENERIC); if (ret) - return ret; + goto out; ret = nla_put_string((struct nlmsg *)&request->nlmsghdr, CTRL_ATTR_FAMILY_NAME, family); if (ret) - goto out; + goto out_close; ret = netlink_transaction(&handler, (struct nlmsg *)&request->nlmsghdr, (struct nlmsg *)&reply->nlmsghdr); if (ret < 0) - goto out; + goto out_close; genlmsghdr = NLMSG_DATA(&reply->nlmsghdr); len = reply->nlmsghdr.nlmsg_len; ret = -ENOMSG; if (reply->nlmsghdr.nlmsg_type != GENL_ID_CTRL) - goto out; + goto out_close; if (genlmsghdr->cmd != CTRL_CMD_NEWFAMILY) - goto out; + goto out_close; ret = -EMSGSIZE; len -= NLMSG_LENGTH(GENL_HDRLEN); if (len < 0) - goto out; + goto out_close; attr = (struct nlattr *)GENLMSG_DATA(reply); attr = (struct nlattr *)((char *)attr + NLA_ALIGN(attr->nla_len)); ret = -ENOMSG; if (attr->nla_type != CTRL_ATTR_FAMILY_ID) - goto out; + goto out_close; ret = *(__u16 *) NLA_DATA(attr); +out_close: + netlink_close(&handler); out: genlmsg_free(request); genlmsg_free(reply); - netlink_close(&handler); return ret; } diff --git a/src/lxc/genl.h b/src/lxc/genl.h index 1da1de0cc..20b673b50 100644 --- a/src/lxc/genl.h +++ b/src/lxc/genl.h @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef __genl_h #define __genl_h diff --git a/src/lxc/legacy/lxc-ls.in b/src/lxc/legacy/lxc-ls.in index c0147c73d..a7b3b19e0 100644 --- a/src/lxc/legacy/lxc-ls.in +++ b/src/lxc/legacy/lxc-ls.in @@ -15,7 +15,7 @@ # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA . @DATADIR@/lxc/lxc.functions diff --git a/src/lxc/list.c b/src/lxc/list.c index 4876259ba..14596c897 100644 --- a/src/lxc/list.c +++ b/src/lxc/list.c @@ -18,6 +18,6 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include diff --git a/src/lxc/list.h b/src/lxc/list.h index 24dffa2d1..8714fb3bb 100644 --- a/src/lxc/list.h +++ b/src/lxc/list.h @@ -1,3 +1,26 @@ +/* + * lxc: linux Container library + * + * (C) Copyright IBM Corp. 2007, 2008 + * + * Authors: + * Daniel Lezcano + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + #ifndef _list_h #define _list_h diff --git a/src/lxc/log.c b/src/lxc/log.c index 68a181c3a..9bbaa69fc 100644 --- a/src/lxc/log.c +++ b/src/lxc/log.c @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include @@ -35,15 +35,19 @@ #include "log.h" #include "caps.h" +#include "utils.h" #define LXC_LOG_PREFIX_SIZE 32 #define LXC_LOG_BUFFER_SIZE 512 int lxc_log_fd = -1; static char log_prefix[LXC_LOG_PREFIX_SIZE] = "lxc"; +static char *log_fname = NULL; +/* command line values for logfile or logpriority should always override + * values from the configuration file or defaults + */ +static int lxc_logfile_specified = 0; static int lxc_loglevel_specified = 0; -// if logfile was specifed on command line, it won't be overridden by lxc.logfile -static int lxc_log_specified = 0; lxc_log_define(lxc_log, lxc); @@ -119,12 +123,6 @@ struct lxc_log_category lxc_log_category_lxc = { }; /*---------------------------------------------------------------------------*/ -extern void lxc_log_setprefix(const char *prefix) -{ - strncpy(log_prefix, prefix, sizeof(log_prefix)); - log_prefix[sizeof(log_prefix) - 1] = 0; -} - static int build_dir(const char *name) { char *n = strdup(name); // because we'll be modifying it @@ -180,29 +178,50 @@ static int log_open(const char *name) return newfd; } -static char *build_log_path(const char *name) +/* + * Build the path to the log file + * @name : the name of the container + * @lxcpath : the lxcpath to use as a basename or NULL to use LOGPATH + * Returns malloced path on sucess, or NULL on failure + */ +static char *build_log_path(const char *name, const char *lxcpath) { char *p; - int len, ret; + int len, ret, use_dir; + +#if USE_CONFIGPATH_LOGS + use_dir = 1; +#else + use_dir = 0; +#endif /* - * '$logpath' + '/' + '$name' + '.log' + '\0' - * or + * If USE_CONFIGPATH_LOGS is true or lxcpath is given, the resulting + * path will be: * '$logpath' + '/' + '$name' + '/' + '$name' + '.log' + '\0' - * sizeof(LOGPATH) includes its \0 + * + * If USE_CONFIGPATH_LOGS is false the resulting path will be: + * '$logpath' + '/' + '$name' + '.log' + '\0' */ - len = sizeof(LOGPATH) + strlen(name) + 6; -#if USE_CONFIGPATH_LOGS - len += strlen(name) + 1; /* add "/$container_name/" */ -#endif + len = strlen(name) + 6; /* 6 == '/' + '.log' + '\0' */ + if (lxcpath) + use_dir = 1; + else + lxcpath = LOGPATH; + + if (use_dir) + len += strlen(lxcpath) + 1 + strlen(name) + 1; /* add "/$container_name/" */ + else + len += strlen(lxcpath) + 1; p = malloc(len); if (!p) return p; -#if USE_CONFIGPATH_LOGS - ret = snprintf(p, len, "%s/%s/%s.log", LOGPATH, name, name); -#else - ret = snprintf(p, len, "%s/%s.log", LOGPATH, name); -#endif + + if (use_dir) + ret = snprintf(p, len, "%s/%s/%s.log", lxcpath, name, name); + else + ret = snprintf(p, len, "%s/%s.log", lxcpath, name); + if (ret < 0 || ret >= len) { free(p); return NULL; @@ -210,19 +229,67 @@ static char *build_log_path(const char *name) return p; } -int do_lxc_log_set_file(const char *fname, int from_default); +/* + * This can be called: + * 1. when a program calls lxc_log_init with no logfile parameter (in which + * case the default is used). In this case lxc.logfile can override this. + * 2. when a program calls lxc_log_init with a logfile parameter. In this + * case we don't want lxc.logfile to override this. + * 3. When a lxc.logfile entry is found in config file. + */ +static int __lxc_log_set_file(const char *fname, int create_dirs) +{ + if (lxc_log_fd != -1) { + // we are overriding the default. + close(lxc_log_fd); + free(log_fname); + } + +#if USE_CONFIGPATH_LOGS + // we don't build_dir for the default if the default is + // i.e. /var/lib/lxc/$container/$container.log + if (create_dirs) +#endif + if (build_dir(fname)) { + ERROR("failed to create dir for log file \"%s\" : %s", fname, + strerror(errno)); + return -1; + } + + lxc_log_fd = log_open(fname); + if (lxc_log_fd == -1) + return -1; + + log_fname = strdup(fname); + return 0; +} + +static int _lxc_log_set_file(const char *name, const char *lxcpath, int create_dirs) +{ + char *logfile; + int ret; + + logfile = build_log_path(name, lxcpath); + if (!logfile) { + ERROR("could not build log path"); + return -1; + } + ret = __lxc_log_set_file(logfile, create_dirs); + free(logfile); + return ret; +} -/*---------------------------------------------------------------------------*/ extern int lxc_log_init(const char *name, const char *file, - const char *priority, const char *prefix, int quiet) + const char *priority, const char *prefix, int quiet, + const char *lxcpath) { int lxc_priority = LXC_LOG_PRIORITY_ERROR; int ret; - char *tmpfile = NULL; - int want_lxc_log_specified = 0; - if (lxc_log_fd != -1) + if (lxc_log_fd != -1) { + WARN("lxc_log_init called with log already initialized"); return 0; + } if (priority) { lxc_loglevel_specified = 1; @@ -241,39 +308,41 @@ extern int lxc_log_init(const char *name, const char *file, lxc_log_category_lxc.appender->next = &log_appender_stderr; if (prefix) - lxc_log_setprefix(prefix); + lxc_log_set_prefix(prefix); - if (file && strcmp(file, "none") == 0) { - want_lxc_log_specified = 1; - return 0; - } - - if (!file) { - tmpfile = build_log_path(name); - if (!tmpfile) { - ERROR("could not build log path"); - return -1; - } + if (file) { + lxc_logfile_specified = 1; + if (strcmp(file, "none") == 0) + return 0; + ret = __lxc_log_set_file(file, 1); } else { - want_lxc_log_specified = 1; + ret = -1; + + if (!lxcpath) + lxcpath = LOGPATH; + + /* try LOGPATH if lxcpath is the default */ + if (strcmp(lxcpath, default_lxc_path()) == 0) + ret = _lxc_log_set_file(name, NULL, 0); + + /* try in lxcpath */ + if (ret < 0) + ret = _lxc_log_set_file(name, lxcpath, 1); + + /* try LOGPATH in case its writable by the caller */ + if (ret < 0) + ret = _lxc_log_set_file(name, NULL, 0); } - ret = do_lxc_log_set_file(tmpfile ? tmpfile : file, !want_lxc_log_specified); - - if (want_lxc_log_specified) - lxc_log_specified = 1; /* - * If !want_lxc_log_specified, that is, if the user did not request - * this logpath, then ignore failures and continue logging to console + * If !file, that is, if the user did not request this logpath, then + * ignore failures and continue logging to console */ - if (!want_lxc_log_specified && ret != 0) { + if (!file && ret != 0) { INFO("Ignoring failure to open default logfile."); ret = 0; } - if (tmpfile) - free(tmpfile); - return ret; } @@ -294,51 +363,6 @@ extern int lxc_log_set_level(int level) return 0; } -char *log_fname; // default to NULL, set in lxc_log_set_file. -/* - * This can be called: - * 1. when a program calls lxc_log_init with no logfile parameter (in which - * case the default is used). In this case lxc.logfile can override this. - * 2. when a program calls lxc_log_init with a logfile parameter. In this - * case we don't want lxc.logfile to override this. - * 3. When a lxc.logfile entry is found in config file. - */ -int do_lxc_log_set_file(const char *fname, int from_default) -{ - if (lxc_log_specified) { - INFO("lxc.logfile overridden by command line"); - return 0; - } - if (lxc_log_fd != -1) { - // we are overriding the default. - close(lxc_log_fd); - free(log_fname); - } - -#if USE_CONFIGPATH_LOGS - // we don't build_dir for the default if the default is - // i.e. /var/lib/lxc/$container/$container.log - if (!from_default) -#endif - if (build_dir(fname)) { - ERROR("failed to create dir for log file \"%s\" : %s", fname, - strerror(errno)); - return -1; - } - - lxc_log_fd = log_open(fname); - if (lxc_log_fd == -1) - return -1; - - log_fname = strdup(fname); - return 0; -} - -extern int lxc_log_set_file(const char *fname) -{ - return do_lxc_log_set_file(fname, 0); -} - extern int lxc_log_get_level(void) { if (!lxc_loglevel_specified) @@ -346,7 +370,38 @@ extern int lxc_log_get_level(void) return lxc_log_category_lxc.priority; } +extern bool lxc_log_has_valid_level(void) +{ + int log_level = lxc_log_get_level(); + if (log_level < 0 || log_level >= LXC_LOG_PRIORITY_NOTSET) + return false; + return true; +} + +/* + * This is called when we read a lxc.logfile entry in a lxc.conf file. This + * happens after processing command line arguments, which override the .conf + * settings. So only set the file if previously unset. + */ +extern int lxc_log_set_file(const char *fname) +{ + if (lxc_logfile_specified) + return 0; + return __lxc_log_set_file(fname, 0); +} + extern const char *lxc_log_get_file(void) { return log_fname; } + +extern void lxc_log_set_prefix(const char *prefix) +{ + strncpy(log_prefix, prefix, sizeof(log_prefix)); + log_prefix[sizeof(log_prefix) - 1] = 0; +} + +extern const char *lxc_log_get_prefix(void) +{ + return log_prefix; +} diff --git a/src/lxc/log.h b/src/lxc/log.h index 4252fa17f..d3c40fb17 100644 --- a/src/lxc/log.h +++ b/src/lxc/log.h @@ -19,7 +19,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef _log_h #define _log_h @@ -28,6 +28,7 @@ #include #include #include +#include #ifndef O_CLOEXEC #define O_CLOEXEC 02000000 @@ -172,7 +173,7 @@ __lxc_log(const struct lxc_log_category* category, } /* - * Helper macro to define log fonctions. + * Helper macro to define log functions. */ #define lxc_log_priority_define(acategory, PRIORITY) \ \ @@ -288,11 +289,14 @@ extern struct lxc_log_category lxc_log_category_lxc; extern int lxc_log_fd; extern int lxc_log_init(const char *name, const char *file, - const char *priority, const char *prefix, int quiet); + const char *priority, const char *prefix, int quiet, + const char *lxcpath); -extern void lxc_log_setprefix(const char *a_prefix); -extern int lxc_log_set_level(int level); extern int lxc_log_set_file(const char *fname); -extern int lxc_log_get_level(void); +extern int lxc_log_set_level(int level); +extern void lxc_log_set_prefix(const char *prefix); extern const char *lxc_log_get_file(void); +extern int lxc_log_get_level(void); +extern bool lxc_log_has_valid_level(void); +extern const char *lxc_log_get_prefix(void); #endif diff --git a/src/lxc/lxc-checkconfig.in b/src/lxc/lxc-checkconfig.in index 472755f6f..7927aad3a 100644 --- a/src/lxc/lxc-checkconfig.in +++ b/src/lxc/lxc-checkconfig.in @@ -10,7 +10,7 @@ SETCOLOR_WARNING="printf \\033[1;33m" SETCOLOR_NORMAL="printf \\033[0;39m" is_set() { - $GREP -q "$1=[y|m]" $CONFIG + $GREP "$1=[y|m]" $CONFIG > /dev/null return $? } diff --git a/src/lxc/lxc-clone.in b/src/lxc/lxc-clone.in deleted file mode 100755 index 4c8acb491..000000000 --- a/src/lxc/lxc-clone.in +++ /dev/null @@ -1,324 +0,0 @@ -#!/bin/sh - -# -# lxc: linux Container library - -# Authors: -# Serge Hallyn -# Daniel Lezcano - -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. - -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. - -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -set -e - -usage() { - echo "usage: $(basename $0) -o ORIG_NAME -n NEW_NAME [-s] [-h] [-L FS_SIZE]" >&2 - echo " [-v VG_NAME] [-p LV_PREFIX] [-t FS_TYPE]" >&2 -} - -help() { - usage - echo >&2 - echo "Clone an existing container on the system." >&2 - echo >&2 - echo "Options:" >&2 - echo " -o ORIG_NAME specify the name of the original container" >&2 - echo " -n NEW_NAME specify the name of the new container" >&2 - echo " -s make the new rootfs a snapshot of the original" >&2 - echo " -L FS_SIZE specify the new filesystem size (default: same as original)" >&2 - echo " -v VG_NAME specify the new LVM volume group name (default: lxc)" >&2 - echo " -p LV_PREFIX add a prefix to new LVM logical volume names" >&2 - echo " -t FS_TYPE specify the new filesystem type (default: ext3;" >&2 - echo " only works for non-snapshot LVM)" >&2 -} - -usage_err() { - [ -n "$1" ] && echo "$1" >&2 - usage - exit 1 -} - -optarg_check() { - [ -n "$2" ] || usage_err "option $1 requires an argument" -} - -. @DATADIR@/lxc/lxc.functions -snapshot=no -lxc_size=_unset -lxc_vg=lxc -lxc_lv_prefix="" -fstype=ext3 - -while [ $# -gt 0 ]; do - opt="$1" - shift - case "$opt" in - -h|--help) - help - exit 1 - ;; - -s|--snapshot) - snapshot=yes - snapshot_opt="-s" - ;; - -o|--orig) - optarg_check $opt $1 - lxc_orig=$1 - shift - ;; - -L|--fssize) - optarg_check $opt $1 - lxc_size=$1 - shift - ;; - -t|--fstype) - optarg_check $opt $1 - fstype=$1 - shift - ;; - -v|--vgname) - optarg_check $opt $1 - lxc_vg=$1 - shift - ;; - -n|--name) - optarg_check $opt $1 - lxc_new=$1 - shift - ;; - -p|--lvprefix) - optarg_check $opt $1 - lxc_lv_prefix=$1 - shift - ;; - --) - break - ;; - -?) - usage_err "Unknown option: '$opt'" - ;; - -*) - # split opts -abc into -a -b -c - set -- $(echo "${opt#-}" | sed 's/\(.\)/ -\1/g') "$@" - ;; - *) - usage_err - ;; - esac -done - -if [ -z "$lxc_path" ]; then - echo "$(basename $0): no configuration path defined" >&2 - exit 1 -fi - -if [ ! -r $lxc_path ]; then - echo "$(basename $0): configuration path '$lxc_path' not found" >&2 - exit 1 -fi - -if [ -z "$lxc_orig" ]; then - echo "$(basename $0): no original container name specified" >&2 - usage - exit 1 -fi - -if [ -z "$lxc_new" ]; then - echo "$(basename $0): no new container name specified" >&2 - usage - exit 1 -fi - -if [ "$(id -u)" != "0" ]; then - echo "$(basename $0): must be run as root" >&2 - exit 1 -fi - -if [ ! -d "$lxc_path/$lxc_orig" ]; then - echo "$(basename $0): '$lxc_orig' does not exist" >&2 - exit 1 -fi - -if [ -d "$lxc_path/$lxc_new" ]; then - echo "$(basename $0): '$lxc_new' already exists" >&2 - exit 1 -fi - -mounted=0 -frozen=0 -oldroot=`grep lxc.rootfs $lxc_path/$lxc_orig/config | awk -F= '{ print $2 '}` - -cleanup() { - if [ -b $oldroot ]; then - if [ $mounted -eq 1 ]; then - umount $rootfs || true - fi - lvremove -f $rootdev || true - fi - ${bindir}/lxc-destroy -n $lxc_new || true - if [ $frozen -eq 1 ]; then - lxc-unfreeze -n $lxc_orig - fi - echo "$(basename $0): aborted" >&2 - exit 1 -} -trap cleanup HUP INT TERM - -mkdir -p $lxc_path/$lxc_new -hostname=$lxc_new - -echo "Tweaking configuration" -cp $lxc_path/$lxc_orig/config $lxc_path/$lxc_new/config -sed -i '/lxc.utsname/d' $lxc_path/$lxc_new/config -echo "lxc.utsname = $hostname" >> $lxc_path/$lxc_new/config - -grep "lxc.mount[ \t]" $lxc_path/$lxc_new/config >/dev/null 2>&1 && { sed -i '/lxc.mount[ \t]/d' $lxc_path/$lxc_new/config; echo "lxc.mount = $lxc_path/$lxc_new/fstab" >> $lxc_path/$lxc_new/config; } - -if [ -e $lxc_path/$lxc_orig/fstab ];then - cp $lxc_path/$lxc_orig/fstab $lxc_path/$lxc_new/fstab - sed -i "s@$lxc_path/$lxc_orig@$lxc_path/$lxc_new@" $lxc_path/$lxc_new/fstab -fi - -echo "Copying rootfs..." -oldroot=`grep lxc.rootfs $lxc_path/$lxc_orig/config | awk -F'[= \t]+' '{ print $2 }'` -rootfs=`echo $oldroot |sed "s/$lxc_orig/$lxc_new/"` - -container_running=True -lxc-info -n $lxc_orig --state-is RUNNING || container_running=False - -sed -i '/lxc.rootfs/d' $lxc_path/$lxc_new/config -if [ -b $oldroot ]; then - which vgscan >/dev/null 2>&1 || { echo "$(basename $0): lvm is not installed" >&2; false; } - lvdisplay $oldroot > /dev/null 2>&1 || { echo "$(basename $0): non-lvm blockdev cloning is not supported" >&2; false; } - lvm=TRUE - # ok, create a snapshot of the lvm device - if [ $container_running = "True" ]; then - lxc-freeze -n $lxc_orig - frozen=1 - fi - if [ $lxc_size = "_unset" ]; then - lxc_size=`lvdisplay $oldroot | grep Size | awk '{ print $3 $4 }'` - fi - newlv="${lxc_lv_prefix}${lxc_new}_snapshot" - lvcreate -s -L $lxc_size -n $newlv $oldroot - which xfs_admin > /dev/null 2>&1 && { - # change filesystem UUID if it is an xfs filesystem - xfs_admin -u /dev/$lxc_vg/$newlv && xfs_admin -U generate /dev/$lxc_vg/$newlv - } - - if [ $container_running = "True" ]; then - lxc-unfreeze -n $lxc_orig - frozen=0 - fi - if [ $snapshot = "no" ]; then - #mount snapshot - mkdir -p ${rootfs}_snapshot - mount /dev/$lxc_vg/${lxc_lv_prefix}${lxc_new}_snapshot ${rootfs}_snapshot || { echo "$(basename $0): failed to mount new rootfs_snapshot" >&2; false; } - #create a new lv - lvcreate -L $lxc_size $lxc_vg -n ${lxc_lv_prefix}$lxc_new - echo "lxc.rootfs = /dev/$lxc_vg/${lxc_lv_prefix}$lxc_new" >> $lxc_path/$lxc_new/config - # and mount it so we can tweak it - mkdir -p $rootfs - mkfs -t $fstype /dev/$lxc_vg/${lxc_lv_prefix}$lxc_new - mount /dev/$lxc_vg/${lxc_lv_prefix}$lxc_new $rootfs || { echo "$(basename $0): failed to mount new rootfs" >&2; false; } - mounted=1 - rsync -Hax ${rootfs}_snapshot/ ${rootfs}/ || { echo "$(basename $0): copying data to new lv failed" >&2; false; } - umount ${rootfs}_snapshot - rmdir ${rootfs}_snapshot - lvremove -f $lxc_vg/${lxc_lv_prefix}${lxc_new}_snapshot - else - lvrename $lxc_vg/${lxc_lv_prefix}${lxc_new}_snapshot $lxc_vg/${lxc_lv_prefix}$lxc_new - echo "lxc.rootfs = /dev/$lxc_vg/${lxc_lv_prefix}$lxc_new" >> $lxc_path/$lxc_new/config - # and mount it so we can tweak it - mkdir -p $rootfs - mount /dev/$lxc_vg/${lxc_lv_prefix}$lxc_new $rootfs || { echo "$(basename $0): failed to mount new rootfs" >&2; false; } - mounted=1 - fi - -elif which btrfs >/dev/null 2>&1 && btrfs subvolume list $oldroot >/dev/null 2>&1; then - # if oldroot is a btrfs subvolume, assume they want a snapshot - btrfs subvolume snapshot "$oldroot" "$rootfs" 2>&1 || { echo "$(basename $0): btrfs snapshot failed" >&2; false; } - echo "lxc.rootfs = $rootfs" >> "$lxc_path/$lxc_new/config" -elif [ -d $lxc_path/$lxc_orig/delta0 ]; then # this is a quasi-ephemeral container - if [ $container_running = "True" ]; then - echo "$(basename $0): container $lxc_orig is running." >&2 - cleanup - fi - rsync -Hax $lxc_path/$lxc_orig/delta0 $lxc_path/$lxc_new/ - touch $lxc_path/$lxc_new/configured - cp -f $lxc_path/$lxc_orig/pre-mount $lxc_path/$lxc_new/ - sed -i "s@$lxc_path/$lxc_orig@$lxc_path/$lxc_new@g" $lxc_path/$lxc_new/config - sed -i "s@$lxc_path/$lxc_orig@$lxc_path/$lxc_new@g" $lxc_path/$lxc_new/pre-mount - sed -i "s@LXC_NAME=\"$lxc_orig@LXC_NAME=\"$lxc_new@" $lxc_path/$lxc_new/pre-mount - # lxc-start-ephemeral will have updated /etc/hostname and such under the - # delta0, so just mounting the delta should suffice. - mkdir -p $rootfs - mount --bind $lxc_path/$lxc_new/delta0 $rootfs - mounted=1 - echo "lxc.rootfs = $rootfs" >> "$lxc_path/$lxc_new/config" -else - if [ $snapshot = "yes" ]; then - echo "$(basename $0): cannot snapshot a directory" >&2 - cleanup - fi - if [ $container_running = "True" ]; then - lxc-freeze -n $lxc_orig - frozen=1 - fi - mkdir -p $rootfs/ - rsync -Hax $oldroot/ $rootfs/ - echo "lxc.rootfs = $rootfs" >> $lxc_path/$lxc_new/config - if [ $container_running = "True" ]; then - lxc-unfreeze -n $lxc_orig - frozen=0 - fi -fi - -echo "Updating rootfs..." - -# so you can 'ssh $hostname.' or 'ssh $hostname.local' -if [ -f $rootfs/etc/dhcp/dhclient.conf ] && ! grep -q "^send host-name.*hostname" $rootfs/etc/dhcp/dhclient.conf; then - sed -i "s/send host-name.*$/send host-name \"$hostname\";/" $rootfs/etc/dhcp/dhclient.conf -fi - -c=$lxc_path/$lxc_new/config -# change hwaddrs -mv ${c} ${c}.old -( -while read line; do - if echo $line | grep -q -w '^lxc.network.hwaddr'; then - echo "lxc.network.hwaddr= 00:16:3e:$(openssl rand -hex 3| sed 's/\(..\)/\1:/g; s/.$//')" - else - echo "$line" - fi -done -) < ${c}.old > ${c} -rm -f ${c}.old - -# set the hostname -cat < $rootfs/etc/hostname -$hostname -EOF -# set minimal hosts -cat < $rootfs/etc/hosts -127.0.0.1 localhost $hostname -EOF - -# if this was a block device, then umount it now -if [ $mounted -eq 1 ]; then - umount $rootfs -fi - -echo "'$lxc_new' created" diff --git a/src/lxc/lxc-create.in b/src/lxc/lxc-create.in deleted file mode 100644 index ebbdd7b42..000000000 --- a/src/lxc/lxc-create.in +++ /dev/null @@ -1,357 +0,0 @@ -#!/bin/sh - -# -# lxc: linux Container library - -# Authors: -# Daniel Lezcano - -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. - -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. - -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -. @DATADIR@/lxc/lxc.functions - -usage() { - echo "usage: $(basename $0) -n NAME [-f CONFIG_FILE] [-t TEMPLATE] [FS_OPTIONS] --" >&2 - echo " [-P lxcpath] [TEMPLATE_OPTIONS]" >&2 - echo >&2 - echo "where FS_OPTIONS is one of:" >&2 - echo " -B none" >&2 - echo " -B dir [--dir rootfs_dir]" >&2 - echo " -B lvm [--lvname LV_NAME] [--vgname VG_NAME] [--fstype FS_TYPE]" >&2 - echo " [--fssize FS_SIZE]" >&2 - echo " -B btrfs" >&2 -} - -help() { - usage - echo >&2 - echo "Create a new container on the system." >&2 - echo >&2 - echo "Options:" >&2 - echo " -n NAME specify the name of the container" >&2 - echo " -f CONFIG_FILE use an existing configuration file" >&2 - echo " -t TEMPLATE use an accessible template script" >&2 - echo " -B BACKING_STORE alter the container backing store (default: none)" >&2 - echo " --lxcpath path specify an alternate container patch (default: $lxc_path)" >&2 - echo " --lvname LV_NAME specify the LVM logical volume name" >&2 - echo " (default: container name)" >&2 - echo " --dir ROOTFS_DIR specify path for custom rootfs directory location" >&2 - echo " --vgname VG_NAME specify the LVM volume group name (default: lxc)" >&2 - echo " --fstype FS_TYPE specify the filesystem type (default: ext4)" >&2 - echo " --fssize FS_SIZE specify the filesystem size (default: 500M)" >&2 - echo >&2 - if [ -z "$lxc_template" ]; then - echo "To see template-specific options, specify a template. For example:" >&2 - echo " $(basename $0) -t ubuntu -h" >&2 - exit 0 - fi - if [ -x ${templatedir}/lxc-$lxc_template ]; then - echo >&2 - echo "Template-specific options (TEMPLATE_OPTIONS):" >&2 - ${templatedir}/lxc-$lxc_template -h - fi -} - -usage_err() { - [ -n "$1" ] && echo "$1" >&2 - usage - exit 1 -} - -optarg_check() { - if [ -z "$2" ]; then - usage_err "option '$1' requires an argument" - fi -} - -backingstore=_unset -fstype=ext4 -fssize=500M -vgname=lxc -custom_rootfs="" - -while [ $# -gt 0 ]; do - opt="$1" - shift - case "$opt" in - -h|--help) - help - exit 1 - ;; - -n|--name) - optarg_check $opt "$1" - lxc_name=$1 - shift - ;; - -f|--config) - optarg_check $opt "$1" - lxc_config=$1 - shift - ;; - -P|--lxcpath) - optarg_check $opt "$1" - lxc_path=$1 - shift - ;; - -t|--template) - optarg_check $opt "$1" - lxc_template=$1 - shift - ;; - -B|--backingstore) - optarg_check $opt "$1" - backingstore=$1 - shift - ;; - --dir) - optarg_check $opt "$1" - custom_rootfs=$1 - shift - ;; - --lvname) - optarg_check $opt "$1" - lvname=$1 - shift - ;; - --vgname) - optarg_check $opt "$1" - vgname=$1 - shift - ;; - --fstype) - optarg_check $opt "$1" - fstype=$1 - shift - ;; - --fssize) - optarg_check $opt "$1" - fssize=$1 - shift - ;; - --) - break;; - -?) - usage_err "unknown option '$opt'" - ;; - -*) - # split opts -abc into -a -b -c - set -- $(echo "${opt#-}" | sed 's/\(.\)/ -\1/g') "$@" - ;; - *) - usage - exit 1 - ;; - esac -done - -# If -h or --help was passed into the container, we'll want to cleanup -# afterward -wantedhelp=0 -for var in "$@"; do - if [ "$var" = "-h" ] || [ "$var" = "--help" ]; then - help - exit 1 - fi -done - - -if [ -z "$lxc_path" ]; then - echo "$(basename $0): no configuration path defined" >&2 - exit 1 -fi - -if [ ! -r $lxc_path ]; then - echo "$(basename $0): configuration path '$lxc_path' not found" >&2 - exit 1 -fi - -if [ -z "$lxc_name" ]; then - echo "$(basename $0): no container name specified" >&2 - usage - exit 1 -fi - -if [ -z "$lvname" ]; then - lvname="$lxc_name" -fi - -if [ "$(id -u)" != "0" ]; then - echo "$(basename $0): must be run as root" >&2 - exit 1 -fi - -if [ -n "$custom_rootfs" ] && [ "$backingstore" != "dir" ]; then - echo "--dir is only valid with -B dir" -fi - -case "$backingstore" in - dir|lvm|none|btrfs|_unset) :;; - *) - echo "$(basename $0): '$backingstore' is not known (try 'none', 'dir', 'lvm', 'btrfs')" >&2 - usage - exit 1 - ;; -esac - -if [ -d "$lxc_path/$lxc_name" ]; then - echo "$(basename $0): '$lxc_name' already exists" >&2 - exit 1 -fi - -rootfs="$lxc_path/$lxc_name/rootfs" - -if [ "$backingstore" = "_unset" ] || [ "$backingstore" = "btrfs" ]; then -# if no backing store was given, then see if btrfs would work - if which btrfs >/dev/null 2>&1 && \ - btrfs filesystem df "$lxc_path/" >/dev/null 2>&1; then - backingstore="btrfs" - else - if [ "$backingstore" = "btrfs" ]; then - echo "$(basename $0): missing 'btrfs' command or $lxc_path is not btrfs" >&2 - exit 1; - fi - backingstore="none" - fi -fi - -if [ "$backingstore" = "lvm" ]; then - which vgscan > /dev/null 2>&1 - if [ $? -ne 0 ]; then - echo "$(basename $0): vgscan not found (is lvm2 installed?)" >&2 - exit 1 - fi - - grep -q "\<$fstype\>" /proc/filesystems - if [ $? -ne 0 ]; then - echo "$(basename $0): $fstype is not listed in /proc/filesystems" >&2 - exit 1 - fi - - vgscan | grep -q "Found volume group \"$vgname\"" - if [ $? -ne 0 ]; then - echo "$(basename $0): could not find volume group \"$vgname\"" >&2 - exit 1 - fi - - rootdev=/dev/$vgname/$lvname - lvdisplay $rootdev > /dev/null 2>&1 - if [ $? -eq 0 ]; then - echo "$(basename $0): backing store already exists: $rootdev" >&2 - echo "please delete it (using \"lvremove $rootdev\") and try again" >&2 - exit 1 - fi - -elif [ "$backingstore" = "btrfs" ]; then - mkdir "$lxc_path/$lxc_name" - if ! out=$(btrfs subvolume create "$rootfs" 2>&1); then - echo "$(basename $0): failed to create subvolume in $rootfs: $out" >&2 - exit 1; - fi -fi - -cleanup() { - if [ "$backingstore" = "lvm" ]; then - umount $rootfs - lvremove -f $rootdev - elif [ "$backingstore" = "btrfs" ]; then - btrfs subvolume delete "$rootfs" - fi - - ${bindir}/lxc-destroy -n $lxc_name - echo "$(basename $0): aborted" >&2 - exit 1 -} - -trap cleanup HUP INT TERM - -mkdir -p $lxc_path/$lxc_name - -if [ -z "$lxc_config" ]; then - lxc_config="@SYSCONFDIR@/lxc/default.conf" - echo - echo "$(basename $0): No config file specified, using the default config $lxc_config" -fi - -if [ ! -r "$lxc_config" ]; then - echo "$(basename $0): '$lxc_config' configuration file not found" >&2 - exit 1 -fi - -if [ ! -z "$lxc_template" ]; then - # Allow for a path to be provided as the template name - if [ -x "$lxc_template" -a $(echo "$lxc_template" | cut -c 1) = '/' ]; then - template_path=$lxc_template - else - template_path=${templatedir}/lxc-$lxc_template - fi - - if ! [ -x "$template_path" ]; then - echo "$(basename $0): unknown template '$lxc_template'" >&2 - cleanup - fi - - sum=$(sha1sum $template_path | cut -d ' ' -f1) - echo "# Template used to create this container: $lxc_template" >> $lxc_path/$lxc_name/config - if [ -n "$*" ]; then - echo "# Parameters passed to the template: $*" >> $lxc_path/$lxc_name/config - fi - echo "# Template script checksum (SHA-1): $sum" >> $lxc_path/$lxc_name/config - echo "" >> $lxc_path/$lxc_name/config -fi - -cat $lxc_config >> $lxc_path/$lxc_name/config -echo "" >> $lxc_path/$lxc_name/config - -if [ -n "$custom_rootfs" ]; then - if grep -q "lxc.rootfs" $lxc_path/$lxc_name/config ; then - echo "configuration file already specifies a lxc.rootfs" - exit 1 - fi - if [ -d "$custom_rootfs" ]; then - echo "specified rootfs ($custom_rootfs) already exists. Bailing." - exit 1 - fi - echo "lxc.rootfs = $custom_rootfs" >> $lxc_path/$lxc_name/config -fi - -# Create the fs as needed -if [ "$backingstore" = "lvm" ]; then - [ -d "$rootfs" ] || mkdir $rootfs - lvcreate -L $fssize -n $lvname $vgname || exit 1 - udevadm settle - mkfs -t $fstype $rootdev || exit 1 - mount -t $fstype $rootdev $rootfs -fi - -if [ ! -z "$lxc_template" ]; then - $template_path --path=$lxc_path/$lxc_name --name=$lxc_name $* - if [ $? -ne 0 ]; then - echo "$(basename $0): failed to execute template '$lxc_template'" >&2 - cleanup - fi - - echo "'$lxc_template' template installed" -fi - -if [ "$backingstore" = "lvm" ]; then - echo "Unmounting LVM" - umount $rootfs - - # TODO: make the templates set this right from the start? - sed -i '/lxc.rootfs/d' $lxc_path/$lxc_name/config - echo "lxc.rootfs = $rootdev" >> $lxc_path/$lxc_name/config -fi - -echo "'$lxc_name' created" diff --git a/src/lxc/lxc-destroy.in b/src/lxc/lxc-destroy.in index 1c68f9d81..bca5264aa 100644 --- a/src/lxc/lxc-destroy.in +++ b/src/lxc/lxc-destroy.in @@ -18,7 +18,7 @@ # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # # This script allows to set or remove the capabilities on the lxc tools. @@ -40,19 +40,55 @@ help() { echo " -P lxcpath container is in specified lxcpath" >&2 } +. @DATADIR@/lxc/lxc.functions + usage_err() { [ -n "$1" ] && echo "$1" >&2 usage exit 1 } +verify_zfs() { + local path=$1 + which zfs > /dev/null 2>&1 || { echo no; return; } + if zfs list -H $path >/dev/null 2>&1; then + echo zfs + else + echo no + fi +} + +busy_zfs() { + local path=$1 + local dev + dev=`zfs list -H $path 2>/dev/null | awk '{ print $1 }'` + if zfs list -t snapshot | grep -q "$dev"; then + echo busy + else + echo zfs + fi +} + +verify_lvm() { + local path=$1 + if [ -b $path -o -h $path ]; then + lvdisplay $path > /dev/null 2>&1 && { echo lvm; return; } + fi + echo no +} + +busy_lvm() { + local path=$1 + lvdisplay $path | grep -q "LV snapshot status.*source of" && { echo busy; return; } + echo lvm +} + optarg_check() { if [ -z "$2" ]; then usage_err "option '$1' requires an argument" fi } -. @DATADIR@/lxc/lxc.functions force=0 while [ $# -gt 0 ]; do @@ -121,16 +157,29 @@ if ! lxc-info -n $lxc_name -P $lxc_path --state-is "STOPPED"; then fi # Deduce the type of rootfs -# If LVM partition, destroy it. For btrfs, we delete the subvolue. If anything +# If LVM partition, destroy it. For btrfs, we delete the subvolume. If anything # else, ignore it. We'll support deletion of others later. -rootdev=`grep lxc.rootfs $lxc_path/$lxc_name/config 2>/dev/null | sed -e 's/^[^/]*/\//'` +rootdev=`grep '^\s*lxc\.rootfs' $lxc_path/$lxc_name/config 2>/dev/null | sed -e 's/^[^/]*//'` if [ -n "$rootdev" ]; then - if [ -b "$rootdev" -o -h "$rootdev" ]; then - lvdisplay $rootdev > /dev/null 2>&1 - if [ $? -eq 0 ]; then + if [ `verify_lvm $rootdev` = "lvm" ]; then + if [ `busy_lvm $rootdev` = "busy" ]; then + echo "$rootdev has lvm snapshots - not deleting" + exit 1 + else echo "removing backing store: $rootdev" lvremove -f $rootdev fi + elif [ `verify_zfs $rootdev` = "zfs" ]; then + if [ `busy_zfs $rootdev` = "busy" ]; then + echo "$rootdev has zfs snapshots - not deleting" + exit 1 + else + zfs destroy $(zfs list | grep $rootdev | awk '{ print $1 }') + if [ $? -ne 0 ]; then + echo "zfs destroy failed - please wait a bit and try again" + exit 1 + fi + fi elif [ -h "$rootdev" -o -d "$rootdev" ]; then if which btrfs >/dev/null 2>&1 && btrfs subvolume list "$rootdev" >/dev/null 2>&1; then diff --git a/src/lxc/lxc-device b/src/lxc/lxc-device index b194b056f..9d24248ec 100644 --- a/src/lxc/lxc-device +++ b/src/lxc/lxc-device @@ -22,7 +22,7 @@ # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # # NOTE: To remove once the API is stabilized diff --git a/src/lxc/lxc-ls b/src/lxc/lxc-ls index 4308edea7..cc0a1e758 100644 --- a/src/lxc/lxc-ls +++ b/src/lxc/lxc-ls @@ -22,7 +22,7 @@ # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # # NOTE: To remove once the API is stabilized @@ -89,12 +89,13 @@ def getTerminalSize(): def getSubContainers(container, lxcpath): - attach = ['lxc-attach', '-R', '-s', 'NETWORK|PID', '-n', container, + attach = ['lxc-attach', '-P', lxcpath, '-R', '-s', 'NETWORK|PID', + '-n', container, '--', sys.argv[0], "--nesting"] with open(os.devnull, "w") as fd: newenv = dict(os.environ) - newenv['NESTED'] = "/proc/1/root/%s" % lxcpath + newenv['NESTED'] = "/proc/1/root/%s" % lxc.default_config_path sp = subprocess.Popen(attach, stderr=fd, stdout=subprocess.PIPE, env=newenv, universal_newlines=True) sp.wait() @@ -201,10 +202,10 @@ for container_name in lxc.list_containers(config_path=lxcpath): entry['pid'] = str(container.init_pid) # Get the IPs - for protocol in ('ipv4', 'ipv6'): + for family, protocol in {'inet': 'ipv4', 'inet6': 'ipv6'}.items(): if protocol in args.fancy_format or args.nesting: entry[protocol] = "-" - ips = container.get_ips(protocol=protocol, timeout=1) + ips = container.get_ips(family=family) if ips: entry[protocol] = ", ".join(ips) diff --git a/src/lxc/lxc-netstat.in b/src/lxc/lxc-netstat.in index 2fa2d23ae..2add53003 100644 --- a/src/lxc/lxc-netstat.in +++ b/src/lxc/lxc-netstat.in @@ -15,10 +15,12 @@ # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +. @DATADIR@/lxc/lxc.functions usage() { - echo "usage: $(basename $0) -n|--name -- [netstat_options]" >&2 + echo "usage: $(basename $0) -n|--name [-P|--lxcpath ] -- [netstat_options]" >&2 } help() { @@ -26,8 +28,9 @@ help() { echo >&2 echo "Execute 'netstat' for the specified container." >&2 echo >&2 - echo " --name NAME specify the container name" >&2 - echo " NETSTAT_OPTIONS netstat command options (see \`netstat --help')" >&2 + echo " --name NAME specify the container name" >&2 + echo " --lxcpath LXC_PATH use an alternate container path" >&2 + echo " NETSTAT_OPTIONS netstat command options (see \`netstat --help')" >&2 } get_parent_cgroup() @@ -71,8 +74,20 @@ while true; do case $1 in -h|--help) help; exit 1;; - -n|--name) + -n) + # If we already have a value for $name, treat -n as being an + # argument for netstat + if [ -n "$name" ] + then + break + else + name="$2"; shift 2; + fi + ;; + --name) name=$2; shift 2;; + -P|--lxcpath) + lxc_path="$2"; shift 2;; --exec) exec="exec"; shift;; --) @@ -92,11 +107,17 @@ if [ -z "$name" ]; then exit 1 fi -if [ -z "$exec" ]; then - exec @BINDIR@/lxc-unshare -s MOUNT -- $0 -n $name --exec "$@" +if [ -z "$lxc_path" ]; then + echo "$(basename $0): no configuration path defined" >&2 + usage + exit 1 fi -if lxc-info -n $name --state-is 'STOPPED'; then +if [ -z "$exec" ]; then + exec @BINDIR@/lxc-unshare -s MOUNT -- $0 -n $name -P "$lxc_path" --exec -- "$@" +fi + +if lxc-info -n $name -P "$lxc_path" --state-is 'STOPPED'; then echo "$(basename $0): container '$name' is not running" >&2 exit 1 fi diff --git a/src/lxc/lxc-ps.in b/src/lxc/lxc-ps.in index 55a05ce48..75d5610bf 100644 --- a/src/lxc/lxc-ps.in +++ b/src/lxc/lxc-ps.in @@ -15,11 +15,13 @@ # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +. @DATADIR@/lxc/lxc.functions usage() { - echo "usage: $(basename $0) [--lxc | --host | --name NAME] [--] [PS_OPTIONS...]" >&2 + echo "usage: $(basename $0) [-P PATH] [--lxc | --host | --name NAME] [[--] [PS_OPTIONS...]" >&2 } help() { @@ -31,14 +33,16 @@ help() { echo " --host show processes not related to any container, i.e. to the host" >&2 echo " --name NAME show processes in the specified container" >&2 echo " (multiple containers can be separated by commas)" >&2 + echo " -P PATH show container in lxcpath PATH" >&2 echo " PS_OPTIONS ps command options (see \`ps --help')" >&2 } get_parent_cgroup() { - local hierarchies hierarchy fields subsystems init_cgroup mountpoint + local hierarchies hierarchy fields init_cgroup mountpoint parent_cgroup="" + subsystems="" # Obtain a list of hierarchies that contain one or more subsystems hierarchies=$(tail -n +2 /proc/cgroups | cut -f 2) @@ -62,11 +66,7 @@ get_parent_cgroup() if [ -z "$mountpoint" ]; then continue; fi # Return the absolute path to the containers' parent cgroup - # (do not append '/lxc' if the hierarchy contains the 'ns' subsystem) - case ",$subsystems," in - *,ns,*) parent_cgroup="${mountpoint}${init_cgroup%/}";; - *) parent_cgroup="${mountpoint}${init_cgroup%/}/lxc";; - esac + parent_cgroup="${mountpoint}${init_cgroup%/}"; break done } @@ -83,6 +83,8 @@ while true; do list_container_processes=1; shift;; --host) list_container_processes=-1; shift;; + -P|--lxcpath) + lxc_path=$2; shift 2;; --) shift; break;; *) @@ -101,7 +103,10 @@ if [ ! -d "$parent_cgroup" ]; then fi if [ -z "$containers" ]; then - containers="$(find $parent_cgroup -mindepth 1 -maxdepth 1 -type d 2>/dev/null | sed 's:.*/::')" + case ",$subsystems," in + *,ns,*) containers="$(find $parent_cgroup -mindepth 1 -maxdepth 1 -type d 2>/dev/null | sed 's:.*/::')";; + *) containers="$(find $parent_cgroup/lxc -mindepth 1 -maxdepth 1 -type d 2>/dev/null | sed 's:.*/::')";; + esac fi container_field_width=9 @@ -111,8 +116,12 @@ for container in ${containers}; do container_field_width=${#container} fi - if [ -f "$parent_cgroup/$container/tasks" ]; then - tasks_files="$tasks_files $parent_cgroup/$container/tasks" + if ! lxc-info -P $lxc_path -t STOPPED -n $container; then + initpid=`lxc-info -P $lxc_path -p -n $container | awk -F: '{ print $2 }' | awk '{ print $1 }'` + cgroup=`head -n 1 /proc/$initpid/cgroup | awk -F: '{ print $3}'` + if [ -f "$parent_cgroup/$cgroup/tasks" ]; then + tasks_files="$tasks_files $parent_cgroup$cgroup/tasks" + fi fi done diff --git a/src/lxc/lxc-shutdown.in b/src/lxc/lxc-shutdown.in deleted file mode 100644 index ee07f7595..000000000 --- a/src/lxc/lxc-shutdown.in +++ /dev/null @@ -1,151 +0,0 @@ -#!/bin/sh - -# (C) Copyright Canonical 2011,2012 - -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. - -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. - -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -set -e - -usage() { - echo "usage: lxc-shutdown -n name [-w] [-r]" - echo " Cleanly shut down a container." - echo " -w: wait for shutdown to complete." - echo " -r: reboot (ignore -w)." - echo " -t timeout: wait at most timeout seconds (implies -w), then kill" - echo " the container." -} - -alarm() { - trap 'exit 0' TERM - pid=$1 - timeout=$2 - sleep $timeout - kill $pid -} - -dolxcstop() -{ - echo "Calling lxc-stop on $lxc_name" - lxc-stop -n $lxc_name - exit 0 -} - -usage_err() { - [ -n "$1" ] && echo "$1" >&2 - usage - exit 1 -} - -optarg_check() { - [ -n "$2" ] || usage_err "option '$1' requires an argument" -} - -timeout="-1" - -reboot=0 -dowait=0 - -while [ $# -gt 0 ]; do - opt="$1" - shift - case "$opt" in - -h|--help) - usage - exit 0 - ;; - -n|--name) - optarg_check $opt "$1" - lxc_name=$1 - shift - ;; - -w|--wait) - dowait=1 - ;; - -r|--reboot) - reboot=1 - ;; - -t|--timeout) - optarg_check $opt "$1" - timeout=$1 - dowait=1 - shift - ;; - --) - break;; - -?) - usage_err "unknown option '$opt'" - ;; - -*) - # split opts -abc into -a -b -c - set -- $(echo "${opt#-}" | sed 's/\(.\)/ -\1/g') "$@" - ;; - *) - usage_err "unknown option '$opt'" - exit 1 - ;; - esac -done - -if [ -z "$lxc_name" ]; then - echo "no container name specified" - usage - exit 1 -fi - -if [ "$(id -u)" != "0" ]; then - echo "This command has to be run as root" - exit 1 -fi - -which lxc-info > /dev/null 2>&1 || { echo "lxc-info not found."; exit 1; } -which lxc-wait > /dev/null 2>&1 || { echo "lxc-wait not found."; exit 1; } - -pid=`lxc-info -n $lxc_name -p 2>/dev/null | awk '{ print $2 }'` -if [ "$pid" = "-1" ]; then - echo "$lxc_name is not running" - exit 1 -fi - -if [ $reboot -eq 1 ]; then - kill -s INT $pid - exit 0 -else - kill -s PWR $pid -fi - -if [ $dowait -eq 0 ]; then - exit 0 -fi - -if [ $timeout != "-1" ]; then - trap dolxcstop EXIT - alarm $$ $timeout 2>/dev/null & - alarmpid=$! -fi - -while ! lxc-info -n $lxc_name --state-is STOPPED; do - sleep 1 -done - -if [ $timeout != "-1" ]; then - trap - EXIT - # include subprocesses; otherwise, we may have to wait until sleep completes - # if called from a non-interactive context - kill $alarmpid $(ps --no-headers --ppid $alarmpid -o pid) 2>/dev/null || : -fi - -echo "Container $lxc_name has shut down" - -exit 0 diff --git a/src/lxc/lxc-start-ephemeral.in b/src/lxc/lxc-start-ephemeral.in index d1bb6be45..af8da80bc 100644 --- a/src/lxc/lxc-start-ephemeral.in +++ b/src/lxc/lxc-start-ephemeral.in @@ -22,7 +22,7 @@ # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # # NOTE: To remove once the API is stabilized @@ -85,13 +85,17 @@ parser.add_argument("--key", "-S", type=str, parser.add_argument("--daemon", "-d", action="store_true", help=_("run in the background")) +parser.add_argument("--storage-type", "-s", type=str, default=None, + choices=("tmpfs", "dir"), + help=("type of storage use by the container")) + parser.add_argument("--union-type", "-U", type=str, default="overlayfs", choices=("overlayfs", "aufs"), help=_("type of union (overlayfs or aufs), " "defaults to overlayfs.")) parser.add_argument("--keep-data", "-k", action="store_true", - help=_("Use a persistent backend instead of tmpfs.")) + help=_("don't wipe everything clean at the end")) parser.add_argument("command", metavar='CMD', type=str, nargs="*", help=_("Run specific command in container " @@ -104,6 +108,16 @@ args = parser.parse_args() if args.command and args.daemon: parser.error(_("You can't use -d and a command at the same time.")) +## Check that -k isn't used with -s tmpfs +if not args.storage_type: + if args.keep_data: + args.storage_type = "dir" + else: + args.storage_type = "tmpfs" + +if args.keep_data and args.storage_type == "tmpfs": + parser.error(_("You can't use -k with the tmpfs storage type.")) + ## The user needs to be uid 0 if not os.geteuid() == 0: parser.error(_("You must be root to run this script. Try running: sudo %s" @@ -134,7 +148,9 @@ dest = lxc.Container(os.path.basename(dest_path), args.lxcpath) dest.load_config(orig.config_file_name) dest.set_config_item("lxc.utsname", dest.name) dest.set_config_item("lxc.rootfs", os.path.join(dest_path, "rootfs")) -dest.set_config_item("lxc.network.hwaddr", randomMAC()) +for nic in dest.network: + if hasattr(nic, 'hwaddr'): + nic.hwaddr = randomMAC() overlay_dirs = [(orig.get_config_item("lxc.rootfs"), "%s/rootfs/" % dest_path)] @@ -188,7 +204,7 @@ LXC_NAME="%s" target = "%s/delta%s" % (dest_path, count) fd.write("mkdir -p %s %s\n" % (target, entry[1])) - if not args.keep_data: + if args.storage_type == "tmpfs": fd.write("mount -n -t tmpfs none %s\n" % (target)) if args.union_type == "overlayfs": @@ -252,7 +268,7 @@ if not dest.start() or not dest.wait("RUNNING", timeout=5): # Deal with the case where we just attach to the container's console if not args.command and not args.daemon: - dest.console(tty=1) + dest.console() dest.shutdown(timeout=5) sys.exit(0) @@ -287,7 +303,7 @@ if args.user: cmd += ["-l", args.user] if args.key: - cmd += ["-k", args.key] + cmd += ["-i", args.key] for ip in ips: ssh_cmd = cmd + [ip] + args.command diff --git a/src/lxc/lxc-top b/src/lxc/lxc-top index 31aaecf9d..35d7e02ac 100755 --- a/src/lxc/lxc-top +++ b/src/lxc/lxc-top @@ -22,6 +22,7 @@ -- local lxc = require("lxc") +local core = require("lxc.core") local getopt = require("alt_getopt") local lfs = require("lfs") @@ -95,15 +96,6 @@ function strsisize(size, width) return string.format("%3d.00 ", size) end -function usleep(n) - if (n ~= 0) then - ret = os.execute("usleep " .. tonumber(n)) - if (ret ~= 0) then - os.exit(0) - end - end -end - function tty_lines() local rows = 25 local f = assert(io.popen("stty -a | head -n 1")) @@ -138,7 +130,6 @@ end function container_list_update() local now_running - lxc.stats_clear(stats_total) now_running = lxc.containers_running(true) -- check for newly started containers @@ -238,5 +229,5 @@ do end stats_print(string.format("TOTAL (%-2d)", #containers), stats_total) io.flush() - usleep(optarg["d"] * 1000000) + core.usleep(optarg["d"] * 1000000) end diff --git a/src/lxc/lxc.functions.in b/src/lxc/lxc.functions.in index b41f9b899..4c52f8cc9 100644 --- a/src/lxc/lxc.functions.in +++ b/src/lxc/lxc.functions.in @@ -16,7 +16,7 @@ # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # This file contains helpers for the various lxc shell scripts @@ -25,13 +25,6 @@ bindir=@BINDIR@ templatedir=@LXCTEMPLATEDIR@ lxcinitdir=@LXCINITDIR@ -get_default_lxcpath() { - LXC_PATH=$(grep -v "^#" "$globalconf" 2>/dev/null | grep "[ \t]*lxcpath[ \t]*=") - if [ -n "$LXC_PATH" ]; then - echo $LXC_PATH | awk -F= '{ print $2 }' - else - echo @LXCPATH@ - fi -} - -lxc_path=`get_default_lxcpath` +lxc_path=`lxc-config lxcpath` +lxc_vg=`lxc-config lvm_vg` +lxc_zfsroot=`lxc-config zfsroot` diff --git a/src/lxc/lxc.h b/src/lxc/lxc.h index db921f0ff..3477e839d 100644 --- a/src/lxc/lxc.h +++ b/src/lxc/lxc.h @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef __lxc_h #define __lxc_h @@ -28,6 +28,8 @@ extern "C" { #endif #include +#include +#include #include struct lxc_msg; @@ -50,14 +52,6 @@ struct lxc_arguments; extern int lxc_start(const char *name, char *const argv[], struct lxc_conf *conf, const char *lxcpath); -/* - * Stop the container previously started with lxc_start, all - * the processes running inside this container will be killed. - * @name : the name of the container - * Returns 0 on success, < 0 otherwise - */ -extern int lxc_stop(const char *name, const char *lxcpath); - /* * Start the specified command inside an application container * @name : the name of the container @@ -77,16 +71,36 @@ extern int lxc_execute(const char *name, char *const argv[], int quiet, extern int lxc_monitor_open(const char *lxcpath); /* - * Read the state of the container if this one has changed - * The function will block until there is an event available - * @fd : the file descriptor provided by lxc_monitor_open - * @state : the variable which will be filled with the state + * Blocking read for the next container state change + * @fd : the file descriptor provided by lxc_monitor_open + * @msg : the variable which will be filled with the state * Returns 0 if the monitored container has exited, > 0 if - * data was readen, < 0 otherwise + * data was read, < 0 otherwise */ extern int lxc_monitor_read(int fd, struct lxc_msg *msg); + +/* + * Blocking read for the next container state change with timeout + * @fd : the file descriptor provided by lxc_monitor_open + * @msg : the variable which will be filled with the state + * @timeout : the timeout in seconds to wait for a state change + * Returns 0 if the monitored container has exited, > 0 if + * data was read, < 0 otherwise + */ extern int lxc_monitor_read_timeout(int fd, struct lxc_msg *msg, int timeout); +/* + * Blocking read from multiple monitors for the next container state + * change with timeout + * @rfds : an fd_set of file descriptors provided by lxc_monitor_open + * @nfds : the maximum fd number in rfds + 1 + * @msg : the variable which will be filled with the state + * @timeout : the timeout in seconds to wait for a state change + * Returns 0 if the monitored container has exited, > 0 if + * data was read, < 0 otherwise + */ +extern int lxc_monitor_read_fdset(fd_set *rfds, int nfds, struct lxc_msg *msg, int timeout); + /* * Close the fd associated with the monitoring * @fd : the file descriptor provided by lxc_monitor_open @@ -94,15 +108,6 @@ extern int lxc_monitor_read_timeout(int fd, struct lxc_msg *msg, int timeout); */ extern int lxc_monitor_close(int fd); -/* - * Show the console of the container. - * @name : the name of container - * @tty : the tty number - * @fd : a pointer to a tty file descriptor - * Returns 0 on sucess, < 0 otherwise - */ -extern int lxc_console(const char *name, int ttynum, int *fd, const char *lxcpath); - /* * Freeze all the tasks running inside the container * @name : the container name @@ -132,15 +137,17 @@ extern int lxc_unfreeze_bypath(const char *cgpath); */ extern lxc_state_t lxc_state(const char *name, const char *lxcpath); +struct lxc_handler; /* * Set a specified value for a specified subsystem. The specified * subsystem must be fully specified, eg. "cpu.shares" - * @cgpath : the cgroup path of the container + * @d : the cgroup descriptor for the container * @filename : the cgroup attribute filename * @value : the value to be set * Returns 0 on success, < 0 otherwise */ -extern int lxc_cgroup_set_bypath(const char *cgpath, const char *filename, const char *value); +extern int lxc_cgroup_set_value(struct lxc_handler *hander, const char *filename, + const char *value); /* * Set a specified value for a specified subsystem. The specified @@ -220,6 +227,11 @@ extern int lxc_container_put(struct lxc_container *c); */ extern int lxc_get_wait_states(const char **states); +/* + * Add a dependency to a container + */ +extern int add_rdepend(struct lxc_conf *lxc_conf, char *rdepend); + #ifdef __cplusplus } #endif diff --git a/src/lxc/lxc_attach.c b/src/lxc/lxc_attach.c index 77039fbea..4ca00a97f 100644 --- a/src/lxc/lxc_attach.c +++ b/src/lxc/lxc_attach.c @@ -18,35 +18,22 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #define _GNU_SOURCE -#include -#include -#include -#include -#include -#include -#include #include +#include +#include #include "attach.h" -#include "commands.h" #include "arguments.h" -#include "caps.h" -#include "cgroup.h" #include "config.h" #include "confile.h" -#include "start.h" -#include "sync.h" -#include "log.h" #include "namespace.h" -#include "apparmor.h" - -#if HAVE_SYS_PERSONALITY_H -#include -#endif +#include "caps.h" +#include "log.h" +#include "utils.h" lxc_log_define(lxc_attach_ui, lxc); @@ -58,6 +45,8 @@ static const struct option my_longopts[] = { /* TODO: decide upon short option names */ {"clear-env", no_argument, 0, 500}, {"keep-env", no_argument, 0, 501}, + {"keep-var", required_argument, 0, 502}, + {"set-var", required_argument, 0, 'v'}, LXC_COMMON_OPTIONS }; @@ -66,6 +55,32 @@ static signed long new_personality = -1; static int namespace_flags = -1; static int remount_sys_proc = 0; static lxc_attach_env_policy_t env_policy = LXC_ATTACH_KEEP_ENV; +static char **extra_env = NULL; +static ssize_t extra_env_size = 0; +static char **extra_keep = NULL; +static ssize_t extra_keep_size = 0; + +static int add_to_simple_array(char ***array, ssize_t *capacity, char *value) +{ + ssize_t count = 0; + + if (*array) + for (; (*array)[count]; count++); + + /* we have to reallocate */ + if (count >= *capacity - 1) { + ssize_t new_capacity = ((count + 1) / 32 + 1) * 32; + char **new_array = realloc((void*)*array, sizeof(char *) * new_capacity); + if (!new_array) + return -1; + memset(&new_array[count], 0, sizeof(char*)*(new_capacity - count)); + *array = new_array; + *capacity = new_capacity; + } + + (*array)[count] = value; + return 0; +} static int my_parser(struct lxc_arguments* args, int c, char* arg) { @@ -95,6 +110,20 @@ static int my_parser(struct lxc_arguments* args, int c, char* arg) case 501: /* keep-env */ env_policy = LXC_ATTACH_KEEP_ENV; break; + case 502: /* keep-var */ + ret = add_to_simple_array(&extra_keep, &extra_keep_size, arg); + if (ret < 0) { + lxc_error(args, "memory allocation error"); + return -1; + } + break; + case 'v': + ret = add_to_simple_array(&extra_env, &extra_env_size, arg); + if (ret < 0) { + lxc_error(args, "memory allocation error"); + return -1; + } + break; } return 0; @@ -103,9 +132,9 @@ static int my_parser(struct lxc_arguments* args, int c, char* arg) static struct lxc_arguments my_args = { .progname = "lxc-attach", .help = "\ ---name=NAME\n\ +--name=NAME [-- COMMAND]\n\ \n\ -Execute the specified command - enter the container NAME\n\ +Execute the specified COMMAND - enter the container NAME\n\ \n\ Options :\n\ -n, --name=NAME NAME for name of the container\n\ @@ -127,14 +156,18 @@ Options :\n\ mount namespace when using -s in order to properly\n\ reflect the correct namespace context. See the\n\ lxc-attach(1) manual page for details.\n\ - --clear-env\n\ - Clear all environment variables before attaching.\n\ + --clear-env Clear all environment variables before attaching.\n\ The attached shell/program will start with only\n\ container=lxc set.\n\ - --keep-env\n\ - Keep all current enivornment variables. This\n\ + --keep-env Keep all current enivornment variables. This\n\ is the current default behaviour, but is likely to\n\ - change in the future.\n", + change in the future.\n\ + -v, --set-var Set an additional variable that is seen by the\n\ + attached program in the container. May be specified\n\ + multiple times.\n\ + --keep-var Keep an additional environment variable. Only\n\ + applicable if --clear-env is specified. May be used\n\ + multiple times.\n", .options = my_longopts, .parser = my_parser, .checker = NULL, @@ -143,14 +176,9 @@ Options :\n\ int main(int argc, char *argv[]) { int ret; - pid_t pid, init_pid; - struct passwd *passwd; - struct lxc_proc_context_info *init_ctx; - struct lxc_handler *handler; - uid_t uid; - char *curdir; - int cgroup_ipc_sockets[2]; - char *user_shell; + pid_t pid; + lxc_attach_options_t attach_options = LXC_ATTACH_OPTIONS_DEFAULT; + lxc_attach_command_t command; ret = lxc_caps_init(); if (ret) @@ -161,353 +189,37 @@ int main(int argc, char *argv[]) return ret; ret = lxc_log_init(my_args.name, my_args.log_file, my_args.log_priority, - my_args.progname, my_args.quiet); + my_args.progname, my_args.quiet, my_args.lxcpath[0]); if (ret) return ret; - init_pid = get_init_pid(my_args.name, my_args.lxcpath); - if (init_pid < 0) { - ERROR("failed to get the init pid"); + if (remount_sys_proc) + attach_options.attach_flags |= LXC_ATTACH_REMOUNT_PROC_SYS; + if (elevated_privileges) + attach_options.attach_flags &= ~(LXC_ATTACH_MOVE_TO_CGROUP | LXC_ATTACH_DROP_CAPABILITIES | LXC_ATTACH_APPARMOR); + attach_options.namespaces = namespace_flags; + attach_options.personality = new_personality; + attach_options.env_policy = env_policy; + attach_options.extra_env_vars = extra_env; + attach_options.extra_keep_env = extra_keep; + + if (my_args.argc) { + command.program = my_args.argv[0]; + command.argv = (char**)my_args.argv; + ret = lxc_attach(my_args.name, my_args.lxcpath[0], lxc_attach_run_command, &command, &attach_options, &pid); + } else { + ret = lxc_attach(my_args.name, my_args.lxcpath[0], lxc_attach_run_shell, NULL, &attach_options, &pid); + } + + if (ret < 0) return -1; - } - init_ctx = lxc_proc_get_context_info(init_pid); - if (!init_ctx) { - ERROR("failed to get context of the init process, pid = %d", init_pid); + ret = lxc_wait_for_pid_status(pid); + if (ret < 0) return -1; - } - curdir = getcwd(NULL, 0); + if (WIFEXITED(ret)) + return WEXITSTATUS(ret); - /* determine which namespaces the container was created with - * by asking lxc-start - */ - if (namespace_flags == -1) { - namespace_flags = lxc_get_clone_flags(my_args.name, my_args.lxcpath); - /* call failed */ - if (namespace_flags == -1) { - ERROR("failed to automatically determine the " - "namespaces which the container unshared"); - return -1; - } - } - - /* For the cgroup attaching logic to work in conjunction with pid and user namespaces, - * we need to have the following hierarchy: - * - * lxc-attach [process executed externally] - * | socketpair(cgroup_ipc_sockets) - * | fork() -> child - * | | setns() - * | | fork() -> grandchild - * | | | initialize - * | | | signal parent - * | |<------------------|----+ - * | | signal parent | - * |<----------------------|-----+ | - * | add to cgroups | | - * | signal child -------->| | - * | | signal child ---->| - * | waitpid() | waitpid() | exec() - * | |<------------------| exit() - * |<----------------------| exit() - * | exit() - * - * The rationale is the following: The first parent is needed because after - * setns() (mount + user namespace) we can't access the cgroup filesystem - * to add the pid to the corresponding cgroup. Therefore, we need to do that - * in a process executed on the host, so that's why we need to fork and wait - * for it to have done some initialization (cgroups may restrict certain - * operations so we have to do that in the end) and use IPC for signaling. - * - * Then in the child process we do the setns(). However, a process is never - * really attached to a pid namespace (never changes its pid, doesn't appear - * in the pid namespace /proc), only child processes of that process are - * truely inside the new pid namespace. That's why we need to fork() again - * after setns() before performing final initializations, then signal our - * parent, which signals the primary process, which does cgroup adding, - * which then signals to the grandchild that it can exec(). - */ - ret = socketpair(PF_LOCAL, SOCK_STREAM, 0, cgroup_ipc_sockets); - if (ret < 0) { - SYSERROR("could not set up required IPC mechanism for attaching"); - return -1; - } - - pid = fork(); - if (pid < 0) { - SYSERROR("failed to create first subprocess"); - return -1; - } - - if (pid) { - int status; - pid_t grandchild; - - close(cgroup_ipc_sockets[1]); - - gparent_reread: - ret = read(cgroup_ipc_sockets[0], &grandchild, sizeof(grandchild)); - if (ret <= 0) { - if (ret < 0 && (errno == EAGAIN || errno == EINTR)) - goto gparent_reread; - ERROR("failed to get pid of attached process to add to cgroup"); - return -1; - } - - if (!elevated_privileges) { - ret = lxc_cgroup_attach(grandchild, my_args.name, my_args.lxcpath); - if (ret < 0) { - ERROR("failed to attach process to cgroup"); - return -1; - } - } - - status = 0; - ret = write(cgroup_ipc_sockets[0], &status, sizeof(status)); - if (ret <= 0) { - ERROR("failed to signal child that cgroup logic has finished"); - return -1; - } - - close(cgroup_ipc_sockets[0]); - - gparent_again: - ret = waitpid(pid, &status, 0); - if (ret < 0) { - if (errno == EINTR) - goto gparent_again; - SYSERROR("failed to wait for process '%d'", pid); - return -1; - } - - if (WIFEXITED(status)) - return WEXITSTATUS(status); - - return -1; - } - - /* at this point we are in the 'parent' process so we need to close the - * socket reserved for the 'grandparent' process - */ - close(cgroup_ipc_sockets[0]); - - /* we need to attach before we fork since certain namespaces - * (such as pid namespaces) only really affect children of the - * current process and not the process itself - */ - ret = lxc_attach_to_ns(init_pid, namespace_flags); - if (ret < 0) { - ERROR("failed to enter the namespace"); - return -1; - } - - if (curdir && chdir(curdir)) - WARN("could not change directory to '%s'", curdir); - - free(curdir); - - /* hack: we need sync.h infrastructure - and that needs a handler - * FIXME: perhaps we should also just use a very simple socketpair() - * here? - like with the grandparent <-> parent communication? - */ - handler = calloc(1, sizeof(*handler)); - - if (lxc_sync_init(handler)) { - ERROR("failed to initialize synchronization socket"); - return -1; - } - - pid = fork(); - - if (pid < 0) { - SYSERROR("failed to fork"); - return -1; - } - - if (pid) { - int status; - - lxc_sync_fini_child(handler); - - /* wait until the child has done configuring itself before - * we put it in a cgroup that potentially limits these - * possibilities */ - if (lxc_sync_wait_child(handler, LXC_SYNC_CONFIGURE)) - return -1; - - /* ask grandparent to add child to cgroups, the grandparent will - * itself check whether that's actually necessary - */ - ret = write(cgroup_ipc_sockets[1], &pid, sizeof(pid)); - if (ret != sizeof(pid)) { - ERROR("error using IPC to notify main process of pid to add to the cgroups of the container"); - return -1; - } - - parent_reread: - /* we need some mechanism to check whether the grandparent could - * add us to the cgroups or not - so we await a dummy integer - * on the same socket (that's why we don't use a pipe - we need - * two-way communication). So if the parent fails and exits, that - * will close the socket, which will cause a read of 0 bytes for - * us, so we just terminate. If we read at least a byte, we don't - * care about the contents... - */ - ret = read(cgroup_ipc_sockets[1], &status, sizeof(status)); - if (ret <= 0) { - if (ret < 0 && (errno == EAGAIN || errno == EINTR)) - goto parent_reread; - /* only print someting if we can't assume the parent already - * gave an error message, that will reduce confusion for the - * user - */ - if (ret != 0) - ERROR("failed to get notification that the child process was added to the container's cgroups"); - return -1; - } - - /* we don't need that IPC interface anymore */ - close(cgroup_ipc_sockets[1]); - - /* tell the child we are done initializing */ - if (lxc_sync_wake_child(handler, LXC_SYNC_POST_CONFIGURE)) - return -1; - - lxc_sync_fini(handler); - - again: - if (waitpid(pid, &status, 0) < 0) { - if (errno == EINTR) - goto again; - SYSERROR("failed to wait '%d'", pid); - return -1; - } - - if (WIFEXITED(status)) - return WEXITSTATUS(status); - - return -1; - } - - if (!pid) { - lxc_sync_fini_parent(handler); - close(cgroup_ipc_sockets[1]); - - if ((namespace_flags & CLONE_NEWNS)) { - if (attach_apparmor(init_ctx->aa_profile) < 0) { - ERROR("failed switching apparmor profiles"); - return -1; - } - } - - /* A description of the purpose of this functionality is - * provided in the lxc-attach(1) manual page. We have to - * remount here and not in the parent process, otherwise - * /proc may not properly reflect the new pid namespace. - */ - if (!(namespace_flags & CLONE_NEWNS) && remount_sys_proc) { - ret = lxc_attach_remount_sys_proc(); - if (ret < 0) { - return -1; - } - } - - #if HAVE_SYS_PERSONALITY_H - if (new_personality < 0) - new_personality = init_ctx->personality; - - if (personality(new_personality) == -1) { - ERROR("could not ensure correct architecture: %s", - strerror(errno)); - return -1; - } - #endif - - if (!elevated_privileges && lxc_attach_drop_privs(init_ctx)) { - ERROR("could not drop privileges"); - return -1; - } - - if (lxc_attach_set_environment(env_policy, NULL, NULL)) { - ERROR("could not set environment"); - return -1; - } - - /* tell parent we are done setting up the container and wait - * until we have been put in the container's cgroup, if - * applicable */ - if (lxc_sync_barrier_parent(handler, LXC_SYNC_CONFIGURE)) - return -1; - - lxc_sync_fini(handler); - - if (namespace_flags & CLONE_NEWUSER) { - uid_t init_uid = 0; - gid_t init_gid = 0; - - /* ignore errors, we will fall back to root in that case - * (/proc was not mounted etc.) - */ - lxc_attach_get_init_uidgid(&init_uid, &init_gid); - - /* try to set the uid/gid combination */ - if (setgid(init_gid)) { - SYSERROR("switching to container gid"); - return -1; - } - if (setuid(init_uid)) { - SYSERROR("switching to container uid"); - return -1; - } - } - - if (my_args.argc) { - execvp(my_args.argv[0], my_args.argv); - SYSERROR("failed to exec '%s'", my_args.argv[0]); - return -1; - } - - uid = getuid(); - - passwd = getpwuid(uid); - - /* this probably happens because of incompatible nss - * implementations in host and container (remember, this - * code is still using the host's glibc but our mount - * namespace is in the container) - * we may try to get the information by spawning a - * [getent passwd uid] process and parsing the result - */ - if (!passwd) - user_shell = lxc_attach_getpwshell(uid); - else - user_shell = passwd->pw_shell; - - if (user_shell) { - char *const args[] = { - user_shell, - NULL, - }; - - (void) execvp(args[0], args); - } - - /* executed if either no passwd entry or execvp fails, - * we will fall back on /bin/sh as a default shell - */ - { - char *const args[] = { - "/bin/sh", - NULL, - }; - - execvp(args[0], args); - SYSERROR("failed to exec '%s'", args[0]); - return -1; - } - - } - - return 0; + return -1; } diff --git a/src/lxc/lxc_cgroup.c b/src/lxc/lxc_cgroup.c index f7c88a89c..2c0508c7e 100644 --- a/src/lxc/lxc_cgroup.c +++ b/src/lxc/lxc_cgroup.c @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include @@ -29,6 +29,7 @@ #include #include +#include #include "arguments.h" lxc_log_define(lxc_cgroup_ui, lxc_cgroup); @@ -64,39 +65,47 @@ Options :\n\ int main(int argc, char *argv[]) { char *state_object = NULL, *value = NULL; + struct lxc_container *c; if (lxc_arguments_parse(&my_args, argc, argv)) return -1; if (lxc_log_init(my_args.name, my_args.log_file, my_args.log_priority, - my_args.progname, my_args.quiet)) + my_args.progname, my_args.quiet, my_args.lxcpath[0])) return -1; state_object = my_args.argv[0]; - if ((argc) > 1) - value = my_args.argv[1]; + c = lxc_container_new(my_args.name, my_args.lxcpath[0]); + if (!c) + return -1; + if (!c->is_running(c)) { + ERROR("'%s:%s' is not running", my_args.lxcpath[0], my_args.name); + lxc_container_put(c); + return -1; + } - if (value) { - if (lxc_cgroup_set(my_args.name, state_object, value, my_args.lxcpath)) { + if ((my_args.argc) > 1) { + value = my_args.argv[1]; + if (!c->set_cgroup_item(c, state_object, value)) { ERROR("failed to assign '%s' value to '%s' for '%s'", value, state_object, my_args.name); + lxc_container_put(c); return -1; } } else { - const unsigned long len = 4096; - int ret; + int len = 4096; char buffer[len]; - - ret = lxc_cgroup_get(my_args.name, state_object, buffer, len, my_args.lxcpath); + int ret = c->get_cgroup_item(c, state_object, buffer, len); if (ret < 0) { - ERROR("failed to retrieve value of '%s' for '%s'", - state_object, my_args.name); + ERROR("failed to retrieve value of '%s' for '%s:%s'", + state_object, my_args.lxcpath[0], my_args.name); + lxc_container_put(c); return -1; } - printf("%*s", ret, buffer); } + lxc_container_put(c); return 0; } diff --git a/src/lxc/lxc_checkpoint.c b/src/lxc/lxc_checkpoint.c index 947d9d941..ecf19b139 100644 --- a/src/lxc/lxc_checkpoint.c +++ b/src/lxc/lxc_checkpoint.c @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #define _GNU_SOURCE #include @@ -116,7 +116,7 @@ int main(int argc, char *argv[]) return ret; ret = lxc_log_init(my_args.name, my_args.log_file, my_args.log_priority, - my_args.progname, my_args.quiet); + my_args.progname, my_args.quiet, my_args.lxcpath[0]); if (ret) return ret; diff --git a/src/lxc/lxc_clone.c b/src/lxc/lxc_clone.c new file mode 100644 index 000000000..8b5c6e957 --- /dev/null +++ b/src/lxc/lxc_clone.c @@ -0,0 +1,179 @@ +/* + * + * Copyright © 2013 Serge Hallyn . + * Copyright © 2013 Canonical Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "log.h" +#include "config.h" +#include "lxc.h" +#include "conf.h" +#include "state.h" +#include "lxccontainer.h" + +lxc_log_define(lxc_clone, lxc); + +static unsigned long get_fssize(char *s) +{ + unsigned long ret; + char *end; + + ret = strtoul(s, &end, 0); + if (end == s) + return 0; + while (isblank(*end)) + end++; + if (!(*end)) + return ret; + if (*end == 'g' || *end == 'G') + ret *= 1000000000; + else if (*end == 'm' || *end == 'M') + ret *= 1000000; + else if (*end == 'k' || *end == 'K') + ret *= 1000; + return ret; +} + +void usage(const char *me) +{ + printf("Usage: %s [-s] [-B backingstore] [-L size] [-K] [-M] [-H]\n", me); + printf(" [-p lxcpath] [-P newlxcpath] orig new\n"); + printf("\n"); + printf(" -s: snapshot rather than copy\n"); + printf(" -B: use specified new backingstore. Default is the same as\n"); + printf(" the original. Options include btrfs, lvm, overlayfs, \n"); + printf(" dir and loop\n"); + printf(" -L: for blockdev-backed backingstore, use specified size\n"); + printf(" -K: Keep name - do not change the container name\n"); + printf(" -M: Keep macaddr - do not choose a random new mac address\n"); + printf(" -H: copy Hooks - copy mount hooks into container directory\n"); + printf(" and substitute container names and lxcpaths\n"); + printf(" -p: use container orig from custom lxcpath\n"); + printf(" -P: create container new in custom lxcpath\n"); + exit(1); +} + +static struct option options[] = { + { "snapshot", no_argument, 0, 's'}, + { "backingstore", required_argument, 0, 'B'}, + { "size", required_argument, 0, 'L'}, + { "orig", required_argument, 0, 'o'}, + { "new", required_argument, 0, 'n'}, + { "vgname", required_argument, 0, 'v'}, + { "keepname", no_argument, 0, 'K'}, + { "keepmac", no_argument, 0, 'M'}, + { "copyhooks", no_argument, 0, 'H'}, // should this be default? + { "lxcpath", required_argument, 0, 'p'}, + { "newpath", required_argument, 0, 'P'}, + { "fstype", required_argument, 0, 't'}, + { "help", no_argument, 0, 'h'}, + { 0, 0, 0, 0 }, +}; + +int main(int argc, char *argv[]) +{ + struct lxc_container *c1 = NULL, *c2 = NULL; + int snapshot = 0, keepname = 0, keepmac = 0, copyhooks = 0; + int flags = 0, option_index; + long newsize = 0; + char *bdevtype = NULL, *lxcpath = NULL, *newpath = NULL, *fstype = NULL; + char *orig = NULL, *new = NULL, *vgname = NULL; + char **args = NULL; + char c; + + if (argc < 3) + usage(argv[0]); + + while (1) { + c = getopt_long(argc, argv, "sB:L:o:n:v:KMHp:P:t:h", options, &option_index); + if (c == -1) + break; + switch (c) { + case 's': snapshot = 1; break; + case 'B': bdevtype = optarg; break; + case 'L': newsize = get_fssize(optarg); break; + case 'o': orig = optarg; break; + case 'n': new = optarg; break; + case 'v': vgname = optarg; break; + case 'K': keepname = 1; break; + case 'M': keepmac = 1; break; + case 'H': copyhooks = 1; break; + case 'p': lxcpath = optarg; break; + case 'P': newpath = optarg; break; + case 't': fstype = optarg; break; + case 'h': usage(argv[0]); + default: break; + } + } + if (optind < argc && !orig) + orig = argv[optind++]; + if (optind < argc && !new) + new = argv[optind++]; + if (optind < argc) + /* arguments for the clone hook */ + args = &argv[optind]; + if (!new || !orig) { + printf("Error: you must provide orig and new names\n"); + usage(argv[0]); + } + + if (snapshot) flags |= LXC_CLONE_SNAPSHOT; + if (keepname) flags |= LXC_CLONE_KEEPNAME; + if (keepmac) flags |= LXC_CLONE_KEEPMACADDR; + if (copyhooks) flags |= LXC_CLONE_COPYHOOKS; + + // vgname and fstype could be supported by sending them through the + // bdevdata. However, they currently are not yet. I'm not convinced + // they are worthwhile. + if (vgname) { + printf("Error: vgname not supported\n"); + usage(argv[0]); + } + if (fstype) { + printf("Error: fstype not supported\n"); + usage(argv[0]); + } + + c1 = lxc_container_new(orig, lxcpath); + if (!c1) + exit(1); + if (!c1->is_defined(c1)) { + fprintf(stderr, "Error: container %s is not defined\n", orig); + lxc_container_put(c1); + exit(1); + } + c2 = c1->clone(c1, new, newpath, flags, bdevtype, NULL, newsize, args); + if (c2 == NULL) { + lxc_container_put(c1); + fprintf(stderr, "clone failed\n"); + exit(1); + } + printf("Created container %s as %s of %s\n", new, + snapshot ? "snapshot" : "copy", orig); + lxc_container_put(c1); + lxc_container_put(c2); + return(0); +} diff --git a/src/lxc/lxc_config.c b/src/lxc/lxc_config.c new file mode 100644 index 000000000..b3f2ed897 --- /dev/null +++ b/src/lxc/lxc_config.c @@ -0,0 +1,70 @@ +/* lxc_config + * + * Copyright © 2012 Serge Hallyn . + * Copyright © 2012 Canonical Ltd. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include "config.h" +#include "lxccontainer.h" + +struct lxc_config_items { + char *name; + const char *(*fn)(void); +}; + +struct lxc_config_items items[] = +{ + { .name = "lxcpath", .fn = &lxc_get_default_config_path, }, + { .name = "lvm_vg", .fn = &lxc_get_default_lvm_vg, }, + { .name = "zfsroot", .fn = &lxc_get_default_zfs_root, }, + { .name = NULL, }, +}; + +void usage(char *me) +{ + printf("Usage: %s -l: list all available configuration items\n", me); + printf(" %s item: print configuration item\n", me); + exit(1); +} + +void list_config_items(void) +{ + struct lxc_config_items *i; + + for (i = &items[0]; i->name; i++) + printf("%s\n", i->name); + exit(0); +} + +int main(int argc, char *argv[]) +{ + struct lxc_config_items *i; + + if (argc < 2) + usage(argv[0]); + if (strcmp(argv[1], "-l") == 0) + list_config_items(); + for (i = &items[0]; i->name; i++) { + if (strcmp(argv[1], i->name) == 0) { + printf("%s\n", i->fn()); + exit(0); + } + } + printf("Unknown configuration item: %s\n", argv[1]); + exit(-1); +} diff --git a/src/lxc/lxc_console.c b/src/lxc/lxc_console.c index 3dd2155c6..28ad77244 100644 --- a/src/lxc/lxc_console.c +++ b/src/lxc/lxc_console.c @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #define _GNU_SOURCE @@ -38,11 +38,13 @@ #include #include +#include "../lxc/lxccontainer.h" #include "error.h" #include "lxc.h" #include "log.h" #include "mainloop.h" #include "arguments.h" +#include "commands.h" lxc_log_define(lxc_console_ui, lxc_console); @@ -86,176 +88,34 @@ Options :\n\ .escape = 1, }; -static int master = -1; - -static void winsz(void) -{ - struct winsize wsz; - if (ioctl(0, TIOCGWINSZ, &wsz) == 0) - ioctl(master, TIOCSWINSZ, &wsz); -} - -static void sigwinch(int sig) -{ - winsz(); -} - -static int setup_tios(int fd, struct termios *newtios, struct termios *oldtios) -{ - if (!isatty(fd)) { - ERROR("'%d' is not a tty", fd); - return -1; - } - - /* Get current termios */ - if (tcgetattr(0, oldtios)) { - SYSERROR("failed to get current terminal settings"); - return -1; - } - - *newtios = *oldtios; - - /* Remove the echo characters and signal reception, the echo - * will be done below with master proxying */ - newtios->c_iflag &= ~IGNBRK; - newtios->c_iflag &= BRKINT; - newtios->c_lflag &= ~(ECHO|ICANON|ISIG); - newtios->c_cc[VMIN] = 1; - newtios->c_cc[VTIME] = 0; - - /* Set new attributes */ - if (tcsetattr(0, TCSAFLUSH, newtios)) { - ERROR("failed to set new terminal settings"); - return -1; - } - - return 0; -} - -static int stdin_handler(int fd, void *data, struct lxc_epoll_descr *descr) -{ - static int wait4q = 0; - int *peer = (int *)data; - char c; - - if (read(0, &c, 1) < 0) { - SYSERROR("failed to read"); - return 1; - } - - /* we want to exit the console with Ctrl+a q */ - if (c == my_args.escape && !wait4q) { - wait4q = !wait4q; - return 0; - } - - if (c == 'q' && wait4q) - return 1; - - wait4q = 0; - if (write(*peer, &c, 1) < 0) { - SYSERROR("failed to write"); - return 1; - } - - return 0; -} - -static int master_handler(int fd, void *data, struct lxc_epoll_descr *descr) -{ - char buf[1024]; - int *peer = (int *)data; - int r; - - r = read(fd, buf, sizeof(buf)); - if (r < 0) { - SYSERROR("failed to read"); - return 1; - } - r = write(*peer, buf, r); - - return 0; -} - int main(int argc, char *argv[]) { - int err, std_in = 1; - struct lxc_epoll_descr descr; - struct termios newtios, oldtios; + int ret; + struct lxc_container *c; - err = lxc_arguments_parse(&my_args, argc, argv); - if (err) - return -1; + ret = lxc_arguments_parse(&my_args, argc, argv); + if (ret) + return EXIT_FAILURE; - err = lxc_log_init(my_args.name, my_args.log_file, my_args.log_priority, - my_args.progname, my_args.quiet); - if (err) - return -1; + ret = lxc_log_init(my_args.name, my_args.log_file, my_args.log_priority, + my_args.progname, my_args.quiet, my_args.lxcpath[0]); + if (ret) + return EXIT_FAILURE; - err = setup_tios(0, &newtios, &oldtios); - if (err) { - ERROR("failed to setup tios"); - return -1; + c = lxc_container_new(my_args.name, my_args.lxcpath[0]); + if (!c) { + fprintf(stderr, "System error loading container\n"); + exit(EXIT_FAILURE); } - err = lxc_console(my_args.name, my_args.ttynum, &master, my_args.lxcpath); - if (err) - goto out; - - fprintf(stderr, "\n\ -Type to exit the console, \ - to enter Ctrl+%1$c itself\n", - 'a' + my_args.escape - 1); - - err = setsid(); - if (err) - INFO("already group leader"); - - if (signal(SIGWINCH, sigwinch) == SIG_ERR) { - SYSERROR("failed to set SIGWINCH handler"); - err = -1; - goto out; + if (!c->is_running(c)) { + fprintf(stderr, "%s is not running\n", my_args.name); + exit(EXIT_FAILURE); } - winsz(); - - err = lxc_mainloop_open(&descr); - if (err) { - ERROR("failed to create mainloop"); - goto out; + ret = c->console(c, my_args.ttynum, 0, 1, 2, my_args.escape); + if (ret < 0) { + exit(EXIT_FAILURE); } - - err = lxc_mainloop_add_handler(&descr, 0, stdin_handler, &master); - if (err) { - ERROR("failed to add handler for the stdin"); - goto out_mainloop_open; - } - - err = lxc_mainloop_add_handler(&descr, master, master_handler, &std_in); - if (err) { - ERROR("failed to add handler for the master"); - goto out_mainloop_open; - } - - err = lxc_mainloop(&descr); - if (err) { - ERROR("mainloop returned an error"); - goto out_mainloop_open; - } - - err = 0; - -out_mainloop_open: - lxc_mainloop_close(&descr); - -out: - /* Restore previous terminal parameter */ - tcsetattr(0, TCSAFLUSH, &oldtios); - - /* Return to line it is */ - printf("\n"); - - close(master); - - return err; + return EXIT_SUCCESS; } diff --git a/src/lxc/lxc_create.c b/src/lxc/lxc_create.c new file mode 100644 index 000000000..ab5886bd9 --- /dev/null +++ b/src/lxc/lxc_create.c @@ -0,0 +1,246 @@ +/* + * + * Copyright © 2013 Serge Hallyn . + * Copyright © 2013 Canonical Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "../lxc/lxccontainer.h" + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "arguments.h" +#include "utils.h" + +lxc_log_define(lxc_create, lxc); + +/* we pass fssize in bytes */ +static unsigned long get_fssize(char *s) +{ + unsigned long ret; + char *end; + + ret = strtoul(s, &end, 0); + if (end == s) + return 0; + while (isblank(*end)) + end++; + if (!(*end)) + return ret; + if (*end == 'g' || *end == 'G') + ret *= 1000000000; + else if (*end == 'm' || *end == 'M') + ret *= 1000000; + else if (*end == 'k' || *end == 'K') + ret *= 1000; + return ret; +} + +static int my_parser(struct lxc_arguments* args, int c, char* arg) +{ + switch (c) { + case 'B': args->bdevtype = arg; break; + case 'f': args->configfile = arg; break; + case 't': args->template = arg; break; + case '0': args->lvname = arg; break; + case '1': args->vgname = arg; break; + case '2': args->fstype = arg; break; + case '3': args->fssize = get_fssize(arg); break; + case '4': args->zfsroot = arg; break; + case '5': args->dir = arg; break; + } + return 0; +} + +static const struct option my_longopts[] = { + {"bdev", required_argument, 0, 'B'}, + {"config", required_argument, 0, 'f'}, + {"template", required_argument, 0, 't'}, + {"lvname", required_argument, 0, '0'}, + {"vgname", required_argument, 0, '1'}, + {"fstype", required_argument, 0, '2'}, + {"fssize", required_argument, 0, '3'}, + {"zfsroot", required_argument, 0, '4'}, + {"dir", required_argument, 0, '5'}, + LXC_COMMON_OPTIONS +}; + +static void create_helpfn(const struct lxc_arguments *args) { + char *argv[3], *path; + size_t len; + int ret; + pid_t pid; + + if (!args->template) + return; + if ((pid = fork()) < 0) + return; + if (pid) + wait_for_pid(pid); + len = strlen(LXCTEMPLATEDIR) + strlen(args->template) + strlen("/lxc-") + 1; + path = alloca(len); + ret = snprintf(path, len, "%s/lxc-%s", LXCTEMPLATEDIR, args->template); + if (ret < 0 || ret >= len) + return; + + argv[0] = path; + argv[1] = "-h"; + argv[2] = NULL; + execv(path, argv); + ERROR("Error executing %s -h", path); + exit(1); +} + +static struct lxc_arguments my_args = { + .progname = "lxc-create", + .helpfn = create_helpfn, + .help = "\ +--name=NAME [-w] [-r] [-t timeout] [-P lxcpath]\n\ +\n\ +lxc-create creates a container\n\ +\n\ +Options :\n\ + -n, --name=NAME NAME for name of the container\n\ + -f, --config=file initial configuration file\n\ + -t, --template=t template to use to setup container\n\ + -B, --bdev=BDEV backing store type to use\n\ + --lxcpath=PATH place container under PATH\n\ + --lvname=LVNAME Use LVM lv name LVNAME\n\ + (Default: container name)\n\ + --vgname=VG Use LVM vg called VG\n\ + (Default: lxc))\n\ + --fstype=TYPE Create fstype TYPE\n\ + (Default: ext3))\n\ + --fssize=SIZE Create filesystem of size SIZE\n\ + (Default: 1G))\n\ + --dir=DIR Place rootfs directory under DIR\n\ + --zfsroot=PATH Create zfs under given zfsroot\n\ + (Default: tank/lxc))\n", + .options = my_longopts, + .parser = my_parser, + .checker = NULL, +}; + +bool validate_bdev_args(struct lxc_arguments *a) +{ + if (a->fstype || a->fssize) { + if (strcmp(a->bdevtype, "lvm") != 0 && + strcmp(a->bdevtype, "loop") != 0) { + fprintf(stderr, "filesystem type and size are only valid with block devices\n"); + return false; + } + } + if (strcmp(a->bdevtype, "lvm") != 0) { + if (a->lvname || a->vgname) { + fprintf(stderr, "--lvname and --vgname are only valid with -B lvm\n"); + return false; + } + } + if (strcmp(a->bdevtype, "zfs") != 0) { + if (a->zfsroot) { + fprintf(stderr, "zfsroot is only valid with -B zfs\n"); + return false; + } + } + return true; +} + +int main(int argc, char *argv[]) +{ + struct lxc_container *c; + struct bdev_specs spec; + int flags = 0; + + if (lxc_arguments_parse(&my_args, argc, argv)) + exit(1); + + if (lxc_log_init(my_args.name, my_args.log_file, my_args.log_priority, + my_args.progname, my_args.quiet, my_args.lxcpath[0])) + exit(1); + + memset(&spec, 0, sizeof(spec)); + if (!my_args.bdevtype) + my_args.bdevtype = "_unset"; + if (!validate_bdev_args(&my_args)) + exit(1); + + if (geteuid()) { + if (access(my_args.lxcpath[0], O_RDWR) < 0) { + fprintf(stderr, "You lack access to %s\n", my_args.lxcpath[0]); + exit(1); + } + if (strcmp(my_args.bdevtype, "dir") && strcmp(my_args.bdevtype, "_unset")) { + fprintf(stderr, "Unprivileged users can only create directory backed containers\n"); + exit(1); + } + } + + + c = lxc_container_new(my_args.name, my_args.lxcpath[0]); + if (!c) { + fprintf(stderr, "System error loading container\n"); + exit(1); + } + if (c->is_defined(c)) { + fprintf(stderr, "Container already exists\n"); + exit(1); + } + if (my_args.configfile) + c->load_config(c, my_args.configfile); + else + c->load_config(c, LXC_DEFAULT_CONFIG); + + if (strcmp(my_args.bdevtype, "zfs") == 0) { + if (my_args.zfsroot) + spec.u.zfs.zfsroot = my_args.zfsroot; + } else if (strcmp(my_args.bdevtype, "lvm") == 0) { + if (my_args.lvname) + spec.u.lvm.lv = my_args.lvname; + if (my_args.vgname) + spec.u.lvm.vg = my_args.vgname; + if (my_args.fstype) + spec.u.lvm.fstype = my_args.fstype; + if (my_args.fssize) + spec.u.lvm.fssize = my_args.fssize; + } else if (strcmp(my_args.bdevtype, "loop") == 0) { + if (my_args.fstype) + spec.u.lvm.fstype = my_args.fstype; + if (my_args.fssize) + spec.u.lvm.fssize = my_args.fssize; + } else if (my_args.dir) { + ERROR("--dir is not yet supported"); + exit(1); + } + + if (strcmp(my_args.bdevtype, "_unset") == 0) + my_args.bdevtype = NULL; + if (my_args.quiet) + flags = LXC_CREATE_QUIET; + if (!c->create(c, my_args.template, my_args.bdevtype, &spec, flags, &argv[optind])) { + ERROR("Error creating container %s", c->name); + lxc_container_put(c); + exit(1); + } + INFO("container %s created", c->name); + exit(0); +} diff --git a/src/lxc/lxc_destroy.c b/src/lxc/lxc_destroy.c new file mode 100644 index 000000000..0724eb808 --- /dev/null +++ b/src/lxc/lxc_destroy.c @@ -0,0 +1,103 @@ +/* + * + * Copyright © 2013 Serge Hallyn . + * Copyright © 2013 Canonical Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "../lxc/lxccontainer.h" + +#include +#include +#include +#include + +#include +#include + +#include "arguments.h" +#include "utils.h" + +lxc_log_define(lxc_destroy, lxc); + +static int my_parser(struct lxc_arguments* args, int c, char* arg) +{ + switch (c) { + case 'f': args->force = 1; break; + } + return 0; +} + +static const struct option my_longopts[] = { + {"force", no_argument, 0, 'f'}, + LXC_COMMON_OPTIONS +}; + +static struct lxc_arguments my_args = { + .progname = "lxc-destroy", + .help = "\ +--name=NAME [-f] [-P lxcpath]\n\ +\n\ +lxc-destroy destroys a container with the identifier NAME\n\ +\n\ +Options :\n\ + -n, --name=NAME NAME for name of the container\n\ + -f, --force wait for the container to shut down\n", + .options = my_longopts, + .parser = my_parser, + .checker = NULL, +}; + +int main(int argc, char *argv[]) +{ + struct lxc_container *c; + + /* this is a short term test. We'll probably want to check for + * write access to lxcpath instead */ + if (geteuid()) { + fprintf(stderr, "%s must be run as root\n", argv[0]); + exit(1); + } + + if (lxc_arguments_parse(&my_args, argc, argv)) + exit(1); + + if (lxc_log_init(my_args.name, my_args.log_file, my_args.log_priority, + my_args.progname, my_args.quiet, my_args.lxcpath[0])) + exit(1); + + c = lxc_container_new(my_args.name, my_args.lxcpath[0]); + if (!c) { + fprintf(stderr, "System error loading container\n"); + exit(1); + } + + if (!c->is_defined(c)) { + fprintf(stderr, "Container is not defined\n"); + lxc_container_put(c); + exit(1); + } + + if (c->is_running(c)) { + if (!my_args.force) { + fprintf(stderr, "%s is running\n", my_args.name); + lxc_container_put(c); + exit(1); + } + c->stop(c); + } + + exit(c->destroy(c) ? 0 : 1); +} diff --git a/src/lxc/lxc_execute.c b/src/lxc/lxc_execute.c index 41e29aa9b..6a54bf63a 100644 --- a/src/lxc/lxc_execute.c +++ b/src/lxc/lxc_execute.c @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #define _GNU_SOURCE #include @@ -101,7 +101,7 @@ int main(int argc, char *argv[]) return -1; if (lxc_log_init(my_args.name, my_args.log_file, my_args.log_priority, - my_args.progname, my_args.quiet)) + my_args.progname, my_args.quiet, my_args.lxcpath[0])) return -1; /* rcfile is specified in the cli option */ @@ -110,7 +110,7 @@ int main(int argc, char *argv[]) else { int rc; - rc = asprintf(&rcfile, "%s/%s/config", my_args.lxcpath, my_args.name); + rc = asprintf(&rcfile, "%s/%s/config", my_args.lxcpath[0], my_args.name); if (rc == -1) { SYSERROR("failed to allocate memory"); return -1; @@ -137,5 +137,5 @@ int main(int argc, char *argv[]) if (lxc_config_define_load(&defines, conf)) return -1; - return lxc_execute(my_args.name, my_args.argv, my_args.quiet, conf, my_args.lxcpath); + return lxc_execute(my_args.name, my_args.argv, my_args.quiet, conf, my_args.lxcpath[0]); } diff --git a/src/lxc/lxc_freeze.c b/src/lxc/lxc_freeze.c index b52ab5bf1..39483a637 100644 --- a/src/lxc/lxc_freeze.c +++ b/src/lxc/lxc_freeze.c @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include @@ -28,9 +28,12 @@ #include #include +#include #include "arguments.h" +lxc_log_define(lxc_freeze_ui, lxc_cgroup); + static const struct option my_longopts[] = { LXC_COMMON_OPTIONS }; @@ -51,13 +54,28 @@ Options :\n\ int main(int argc, char *argv[]) { + struct lxc_container *c; + if (lxc_arguments_parse(&my_args, argc, argv)) - return -1; + exit(1); if (lxc_log_init(my_args.name, my_args.log_file, my_args.log_priority, - my_args.progname, my_args.quiet)) - return -1; + my_args.progname, my_args.quiet, my_args.lxcpath[0])) + exit(1); - return lxc_freeze(my_args.name, my_args.lxcpath); + c = lxc_container_new(my_args.name, my_args.lxcpath[0]); + if (!c) { + ERROR("No such container: %s:%s", my_args.lxcpath[0], my_args.name); + exit(1); + } + + if (!c->freeze(c)) { + ERROR("Failed to freeze %s:%s", my_args.lxcpath[0], my_args.name); + lxc_container_put(c); + exit(1); + } + + lxc_container_put(c); + + return 0; } - diff --git a/src/lxc/lxc_info.c b/src/lxc/lxc_info.c index 3fcead583..e61ae63b1 100644 --- a/src/lxc/lxc_info.c +++ b/src/lxc/lxc_info.c @@ -18,10 +18,11 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include +#include #include #include #include @@ -35,10 +36,17 @@ static bool state; static bool pid; static char *test_state = NULL; +static char **key = NULL; +static int keys = 0; static int my_parser(struct lxc_arguments* args, int c, char* arg) { switch (c) { + case 'c': + key = realloc(key, keys+1 * sizeof(key[0])); + key[keys] = arg; + keys++; + break; case 's': state = true; break; case 'p': pid = true; break; case 't': test_state = arg; break; @@ -47,6 +55,7 @@ static int my_parser(struct lxc_arguments* args, int c, char* arg) } static const struct option my_longopts[] = { + {"config", required_argument, 0, 'c'}, {"state", no_argument, 0, 's'}, {"pid", no_argument, 0, 'p'}, {"state-is", required_argument, 0, 't'}, @@ -62,8 +71,9 @@ lxc-info display some information about a container with the identifier NAME\n\ \n\ Options :\n\ -n, --name=NAME NAME for name of the container\n\ - -s, --state shows the state of the container\n\ + -c, --config=KEY show configuration variable KEY from running container\n\ -p, --pid shows the process id of the init container\n\ + -s, --state shows the state of the container\n\ -t, --state-is=STATE test if current state is STATE\n\ returns success if it matches, false otherwise\n", .options = my_longopts, @@ -73,21 +83,21 @@ Options :\n\ int main(int argc, char *argv[]) { - int ret; + int ret,i; ret = lxc_arguments_parse(&my_args, argc, argv); if (ret) return 1; if (lxc_log_init(my_args.name, my_args.log_file, my_args.log_priority, - my_args.progname, my_args.quiet)) + my_args.progname, my_args.quiet, my_args.lxcpath[0])) return 1; - if (!state && !pid) + if (!state && !pid && keys <= 0) state = pid = true; if (state || test_state) { - ret = lxc_getstate(my_args.name, my_args.lxcpath); + ret = lxc_getstate(my_args.name, my_args.lxcpath[0]); if (ret < 0) return 1; if (test_state) @@ -96,8 +106,25 @@ int main(int argc, char *argv[]) printf("state:%10s\n", lxc_state2str(ret)); } - if (pid) - printf("pid:%10d\n", get_init_pid(my_args.name, my_args.lxcpath)); + if (pid) { + pid_t initpid; + + initpid = lxc_cmd_get_init_pid(my_args.name, my_args.lxcpath[0]); + if (initpid >= 0) + printf("pid:%10d\n", initpid); + } + + for(i = 0; i < keys; i++) { + char *val; + + val = lxc_cmd_get_config_item(my_args.name, key[i], my_args.lxcpath[0]); + if (val) { + printf("%s = %s\n", key[i], val); + free(val); + } else { + fprintf(stderr, "%s unset or invalid\n", key[i]); + } + } return 0; } diff --git a/src/lxc/lxc_init.c b/src/lxc/lxc_init.c index 5693da58f..a806038f7 100644 --- a/src/lxc/lxc_init.c +++ b/src/lxc/lxc_init.c @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include @@ -43,52 +43,81 @@ lxc_log_define(lxc_init, lxc); static int quiet; static struct option options[] = { - { "quiet", no_argument, &quiet, 1 }, + { "name", required_argument, NULL, 'n' }, + { "logpriority", required_argument, NULL, 'l' }, + { "quiet", no_argument, NULL, 'q' }, + { "lxcpath", required_argument, NULL, 'P' }, { 0, 0, 0, 0 }, }; -static int was_interrupted = 0; +static int was_interrupted = 0; + +static void interrupt_handler(int sig) +{ + if (!was_interrupted) + was_interrupted = sig; +} + +static void usage(void) { + fprintf(stderr, "Usage: lxc-init [OPTION]...\n\n" + "Common options :\n" + " -n, --name=NAME NAME for name of the container\n" + " -l, --logpriority=LEVEL Set log priority to LEVEL\n" + " -q, --quiet Don't produce any output\n" + " -P, --lxcpath=PATH Use specified container path\n" + " -?, --help Give this help list\n" + "\n" + "Mandatory or optional arguments to long options are also mandatory or optional\n" + "for any corresponding short options.\n" + "\n" + "NOTE: lxc-init is intended for use by lxc internally\n" + " and does not need to be run by hand\n\n"); +} int main(int argc, char *argv[]) { - - void interrupt_handler(int sig) - { - if (!was_interrupted) - was_interrupted = sig; - } - pid_t pid; - int nbargs = 0; - int err = -1; + int err; char **aargv; sigset_t mask, omask; - int i, shutdown = 0; + int i, have_status = 0, shutdown = 0; + int opt; + char *lxcpath = NULL, *name = NULL, *logpriority = NULL; - while (1) { - int ret = getopt_long_only(argc, argv, "", options, NULL); - if (ret == -1) { + while ((opt = getopt_long(argc, argv, "n:l:qP:", options, NULL)) != -1) { + switch(opt) { + case 'n': + name = optarg; break; + case 'l': + logpriority = optarg; + break; + case 'q': + quiet = 1; + break; + case 'P': + lxcpath = optarg; + break; + default: /* '?' */ + usage(); + exit(EXIT_FAILURE); } - if (ret == '?') - exit(err); - - nbargs++; } if (lxc_caps_init()) - exit(err); + exit(EXIT_FAILURE); - if (lxc_log_init(NULL, "none", 0, basename(argv[0]), quiet)) - exit(err); + err = lxc_log_init(name, name ? NULL : "none", logpriority, + basename(argv[0]), quiet, lxcpath); + if (err < 0) + exit(EXIT_FAILURE); if (!argv[optind]) { ERROR("missing command to launch"); - exit(err); + exit(EXIT_FAILURE); } aargv = &argv[optind]; - argc -= nbargs; /* * mask all the signals so we are safe to install a @@ -126,15 +155,15 @@ int main(int argc, char *argv[]) } if (lxc_setup_fs()) - exit(err); + exit(EXIT_FAILURE); if (lxc_caps_reset()) - exit(err); + exit(EXIT_FAILURE); pid = fork(); if (pid < 0) - exit(err); + exit(EXIT_FAILURE); if (!pid) { @@ -159,10 +188,9 @@ int main(int argc, char *argv[]) close(fileno(stdin)); close(fileno(stdout)); - err = 0; + err = EXIT_SUCCESS; for (;;) { int status; - int orphan = 0; pid_t waited_pid; switch (was_interrupted) { @@ -209,10 +237,10 @@ int main(int argc, char *argv[]) * (not wrapped pid) and continue to wait for * the end of the orphan group. */ - if ((waited_pid != pid) || (orphan ==1)) - continue; - orphan = 1; - err = lxc_error_set_and_log(waited_pid, status); + if (waited_pid == pid && !have_status) { + err = lxc_error_set_and_log(waited_pid, status); + have_status = 1; + } } out: return err; diff --git a/src/lxc/lxc_kill.c b/src/lxc/lxc_kill.c index ba00aa8c1..3ed6e4e81 100644 --- a/src/lxc/lxc_kill.c +++ b/src/lxc/lxc_kill.c @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include @@ -62,7 +62,7 @@ int main(int argc, char *argv[], char *envp[]) return ret; ret = lxc_log_init(my_args.name, my_args.log_file, my_args.log_priority, - my_args.progname, my_args.quiet); + my_args.progname, my_args.quiet, my_args.lxcpath[0]); if (ret) return ret; @@ -76,7 +76,7 @@ int main(int argc, char *argv[], char *envp[]) } else sig=SIGKILL; - pid = get_init_pid(my_args.name, my_args.lxcpath); + pid = lxc_cmd_get_init_pid(my_args.name, my_args.lxcpath[0]); if (pid < 0) { ERROR("failed to get the init pid"); return -1; diff --git a/src/lxc/lxc_monitor.c b/src/lxc/lxc_monitor.c index 053037390..b52fa2e1a 100644 --- a/src/lxc/lxc_monitor.c +++ b/src/lxc/lxc_monitor.c @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include @@ -52,6 +52,7 @@ Options :\n\ .options = my_longopts, .parser = NULL, .checker = NULL, + .lxcpath_additional = -1, }; int main(int argc, char *argv[]) @@ -59,14 +60,14 @@ int main(int argc, char *argv[]) char *regexp; struct lxc_msg msg; regex_t preg; - int fd; - int len, rc; + fd_set rfds, rfds_save; + int len, rc, i, nfds = -1; if (lxc_arguments_parse(&my_args, argc, argv)) return -1; if (lxc_log_init(my_args.name, my_args.log_file, my_args.log_priority, - my_args.progname, my_args.quiet)) + my_args.progname, my_args.quiet, my_args.lxcpath[0])) return -1; len = strlen(my_args.name) + 3; @@ -87,16 +88,36 @@ int main(int argc, char *argv[]) return -1; } - fd = lxc_monitor_open(my_args.lxcpath); - if (fd < 0) - return -1; + if (my_args.lxcpath_cnt > FD_SETSIZE) { + ERROR("too many paths requested, only the first %d will be monitored", FD_SETSIZE); + my_args.lxcpath_cnt = FD_SETSIZE; + } + + FD_ZERO(&rfds); + for (i = 0; i < my_args.lxcpath_cnt; i++) { + int fd; + + lxc_monitord_spawn(my_args.lxcpath[i]); + + fd = lxc_monitor_open(my_args.lxcpath[i]); + if (fd < 0) + return -1; + FD_SET(fd, &rfds); + if (fd > nfds) + nfds = fd; + } + memcpy(&rfds_save, &rfds, sizeof(rfds_save)); + nfds++; setlinebuf(stdout); for (;;) { - if (lxc_monitor_read(fd, &msg) < 0) + memcpy(&rfds, &rfds_save, sizeof(rfds)); + + if (lxc_monitor_read_fdset(&rfds, nfds, &msg, -1) < 0) return -1; + msg.name[sizeof(msg.name)-1] = '\0'; if (regexec(&preg, msg.name, 0, NULL, 0)) continue; @@ -115,4 +136,3 @@ int main(int argc, char *argv[]) return 0; } - diff --git a/src/lxc/lxc_monitord.c b/src/lxc/lxc_monitord.c new file mode 100644 index 000000000..c5e156e10 --- /dev/null +++ b/src/lxc/lxc_monitord.c @@ -0,0 +1,409 @@ +/* + * lxc: linux Container library + * + * Copyright © 2012 Oracle. + * + * Authors: + * Dwight Engen + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#define CLIENTFDS_CHUNK 64 + +lxc_log_define(lxc_monitord, lxc); + +static struct lxc_monitor mon; + +static void lxc_monitord_cleanup(void); + +/* + * Defines the structure to store the monitor information + * @lxcpath : the path being monitored + * @fifofd : the file descriptor for publishers (containers) to write state + * @listenfd : the file descriptor for subscribers (lxc-monitors) to connect + * @clientfds : accepted client file descriptors + * @clientfds_size : number of file descriptors clientfds can hold + * @clientfds_cnt : the count of valid fds in clientfds + * @descr : the lxc_mainloop state + */ +struct lxc_monitor { + const char *lxcpath; + int fifofd; + int listenfd; + int *clientfds; + int clientfds_size; + int clientfds_cnt; + struct lxc_epoll_descr descr; +}; + +static int lxc_monitord_fifo_create(struct lxc_monitor *mon) +{ + char fifo_path[PATH_MAX]; + int ret; + + ret = snprintf(fifo_path, sizeof(fifo_path), "%s/monitor-fifo", mon->lxcpath); + if (ret < 0 || ret >= sizeof(fifo_path)) { + ERROR("lxcpath too long to monitor fifo"); + return -1; + } + + ret = mknod(fifo_path, S_IFIFO|S_IRUSR|S_IWUSR, 0); + if (ret < 0) { + INFO("monitor fifo %s exists, already running?", fifo_path); + return -1; + } + + mon->fifofd = open(fifo_path, O_RDWR); + if (mon->fifofd < 0) { + unlink(fifo_path); + ERROR("failed to open monitor fifo"); + return -1; + } + return 0; +} + +static int lxc_monitord_fifo_delete(struct lxc_monitor *mon) +{ + char fifo_path[PATH_MAX]; + int ret; + + ret = snprintf(fifo_path, sizeof(fifo_path), "%s/monitor-fifo", mon->lxcpath); + if (ret < 0 || ret >= sizeof(fifo_path)) { + ERROR("lxcpath too long to monitor fifo"); + return -1; + } + unlink(fifo_path); + return 0; +} + +static void lxc_monitord_sockfd_remove(struct lxc_monitor *mon, int fd) { + int i; + + if (lxc_mainloop_del_handler(&mon->descr, fd)) + CRIT("fd:%d not found in mainloop", fd); + close(fd); + + for (i = 0; i < mon->clientfds_cnt; i++) { + if (mon->clientfds[i] == fd) + break; + } + if (i >= mon->clientfds_cnt) { + CRIT("fd:%d not found in clients array", fd); + lxc_monitord_cleanup(); + exit(EXIT_FAILURE); + } + + memmove(&mon->clientfds[i], &mon->clientfds[i+1], + (mon->clientfds_cnt - i - 1) * sizeof(mon->clientfds[0])); + mon->clientfds_cnt--; +} + +static int lxc_monitord_sock_handler(int fd, void *data, + struct lxc_epoll_descr *descr) +{ + struct lxc_monitor *mon = data; + + lxc_monitord_sockfd_remove(mon, fd); + return 0; +} + +static int lxc_monitord_sock_accept(int fd, void *data, + struct lxc_epoll_descr *descr) +{ + int ret,clientfd; + struct lxc_monitor *mon = data; + struct ucred cred; + socklen_t credsz = sizeof(cred); + + ret = -1; + clientfd = accept(fd, NULL, 0); + if (clientfd < 0) { + SYSERROR("failed to accept connection"); + goto out; + } + + if (fcntl(clientfd, F_SETFD, FD_CLOEXEC)) { + SYSERROR("failed to set close-on-exec on incoming connection"); + goto err1; + } + + if (getsockopt(clientfd, SOL_SOCKET, SO_PEERCRED, &cred, &credsz)) + { + ERROR("failed to get credentials on socket"); + goto err1; + } + if (cred.uid && cred.uid != geteuid()) { + WARN("monitor denied for uid:%d", cred.uid); + ret = -EACCES; + goto err1; + } + + if (mon->clientfds_cnt + 1 > mon->clientfds_size) { + int *clientfds; + DEBUG("realloc space for %d clientfds", + mon->clientfds_size + CLIENTFDS_CHUNK); + clientfds = realloc(mon->clientfds, + (mon->clientfds_size + CLIENTFDS_CHUNK) * + sizeof(mon->clientfds[0])); + if (clientfds == NULL) { + ERROR("failed to realloc memory for clientfds"); + goto err1; + } + mon->clientfds = clientfds; + mon->clientfds_size += CLIENTFDS_CHUNK; + } + + ret = lxc_mainloop_add_handler(&mon->descr, clientfd, + lxc_monitord_sock_handler, mon); + if (ret) { + ERROR("failed to add socket handler"); + goto err1; + } + + mon->clientfds[mon->clientfds_cnt++] = clientfd; + INFO("accepted client fd:%d clients:%d", clientfd, mon->clientfds_cnt); + goto out; + +err1: + close(clientfd); +out: + return ret; +} + +static int lxc_monitord_sock_create(struct lxc_monitor *mon) +{ + struct sockaddr_un addr; + int fd; + + if (lxc_monitor_sock_name(mon->lxcpath, &addr) < 0) + return -1; + + fd = lxc_af_unix_open(addr.sun_path, SOCK_STREAM, O_TRUNC); + if (fd < 0) { + ERROR("failed to open unix socket : %s", strerror(errno)); + return -1; + } + + mon->listenfd = fd; + return 0; +} + +static int lxc_monitord_sock_delete(struct lxc_monitor *mon) +{ + struct sockaddr_un addr; + + if (lxc_monitor_sock_name(mon->lxcpath, &addr) < 0) + return -1; + if (addr.sun_path[0]) + unlink(addr.sun_path); + return 0; +} + +static int lxc_monitord_create(struct lxc_monitor *mon) +{ + int ret; + + ret = lxc_monitord_fifo_create(mon); + if (ret < 0) + return ret; + + ret = lxc_monitord_sock_create(mon); + return ret; +} + +static void lxc_monitord_delete(struct lxc_monitor *mon) +{ + int i; + + lxc_mainloop_del_handler(&mon->descr, mon->listenfd); + close(mon->listenfd); + lxc_monitord_sock_delete(mon); + + lxc_mainloop_del_handler(&mon->descr, mon->fifofd); + close(mon->fifofd); + lxc_monitord_fifo_delete(mon); + + for (i = 0; i < mon->clientfds_cnt; i++) { + lxc_mainloop_del_handler(&mon->descr, mon->clientfds[i]); + close(mon->clientfds[i]); + } + mon->clientfds_cnt = 0; +} + +static int lxc_monitord_fifo_handler(int fd, void *data, + struct lxc_epoll_descr *descr) +{ + int ret,i; + struct lxc_msg msglxc; + struct lxc_monitor *mon = data; + + ret = read(fd, &msglxc, sizeof(msglxc)); + if (ret != sizeof(msglxc)) { + SYSERROR("read fifo failed : %s", strerror(errno)); + return 1; + } + + for (i = 0; i < mon->clientfds_cnt; i++) { + DEBUG("writing client fd:%d", mon->clientfds[i]); + ret = write(mon->clientfds[i], &msglxc, sizeof(msglxc)); + if (ret < 0) { + ERROR("write failed to client sock:%d %d %s", + mon->clientfds[i], errno, strerror(errno)); + } + } + + return 0; +} + +static int lxc_monitord_mainloop_add(struct lxc_monitor *mon) +{ + int ret; + + ret = lxc_mainloop_add_handler(&mon->descr, mon->fifofd, + lxc_monitord_fifo_handler, mon); + if (ret < 0) { + ERROR("failed to add to mainloop monitor handler for fifo"); + return -1; + } + + ret = lxc_mainloop_add_handler(&mon->descr, mon->listenfd, + lxc_monitord_sock_accept, mon); + if (ret < 0) { + ERROR("failed to add to mainloop monitor handler for listen socket"); + return -1; + } + + return 0; +} + +static void lxc_monitord_cleanup(void) +{ + lxc_monitord_delete(&mon); +} + +static void lxc_monitord_sig_handler(int sig) +{ + INFO("caught signal %d", sig); + lxc_monitord_cleanup(); + exit(EXIT_SUCCESS); +} + +int main(int argc, char *argv[]) +{ + int ret,pipefd; + char *lxcpath = argv[1]; + char logpath[PATH_MAX]; + sigset_t mask; + + if (argc != 3) { + fprintf(stderr, + "Usage: lxc-monitord lxcpath sync-pipe-fd\n\n" + "NOTE: lxc-monitord is intended for use by lxc internally\n" + " and does not need to be run by hand\n\n"); + exit(EXIT_FAILURE); + } + + ret = snprintf(logpath, sizeof(logpath), "%s/lxc-monitord.log", + lxcpath); + if (ret < 0 || ret >= sizeof(logpath)) + return EXIT_FAILURE; + + ret = lxc_log_init(NULL, logpath, "NOTICE", "lxc-monitord", 0, lxcpath); + if (ret) + return ret; + + pipefd = atoi(argv[2]); + + if (sigfillset(&mask) || + sigdelset(&mask, SIGILL) || + sigdelset(&mask, SIGSEGV) || + sigdelset(&mask, SIGBUS) || + sigdelset(&mask, SIGTERM) || + sigprocmask(SIG_BLOCK, &mask, NULL)) { + SYSERROR("failed to set signal mask"); + return -1; + } + + signal(SIGILL, lxc_monitord_sig_handler); + signal(SIGSEGV, lxc_monitord_sig_handler); + signal(SIGBUS, lxc_monitord_sig_handler); + signal(SIGTERM, lxc_monitord_sig_handler); + + ret = EXIT_FAILURE; + memset(&mon, 0, sizeof(mon)); + mon.lxcpath = lxcpath; + if (lxc_mainloop_open(&mon.descr)) { + ERROR("failed to create mainloop"); + goto out; + } + + if (lxc_monitord_create(&mon)) { + goto out; + } + + /* sync with parent, we're ignoring the return from write + * because regardless if it works or not, the following + * close will sync us with the parent process. the + * if-empty-statement construct is to quiet the + * warn-unused-result warning. + */ + if (write(pipefd, "S", 1)) ; + close(pipefd); + + if (lxc_monitord_mainloop_add(&mon)) { + ERROR("failed to add mainloop handlers"); + goto out; + } + + NOTICE("monitoring lxcpath %s", mon.lxcpath); + for(;;) { + ret = lxc_mainloop(&mon.descr, 1000 * 30); + if (mon.clientfds_cnt <= 0) + { + NOTICE("no clients for 30 seconds, exiting"); + break; + } + } + + lxc_mainloop_close(&mon.descr); + lxc_monitord_cleanup(); + ret = EXIT_SUCCESS; + NOTICE("monitor exiting"); +out: + return ret; +} diff --git a/src/lxc/lxc_restart.c b/src/lxc/lxc_restart.c index 118d4b879..85afb1e67 100644 --- a/src/lxc/lxc_restart.c +++ b/src/lxc/lxc_restart.c @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #define _GNU_SOURCE #include @@ -124,7 +124,7 @@ int main(int argc, char *argv[]) return -1; if (lxc_log_init(my_args.name, my_args.log_file, my_args.log_priority, - my_args.progname, my_args.quiet)) + my_args.progname, my_args.quiet, my_args.lxcpath[0])) return -1; /* rcfile is specified in the cli option */ @@ -133,7 +133,7 @@ int main(int argc, char *argv[]) else { int rc; - rc = asprintf(&rcfile, "%s/%s/config", my_args.lxcpath, my_args.name); + rc = asprintf(&rcfile, "%s/%s/config", my_args.lxcpath[0], my_args.name); if (rc == -1) { SYSERROR("failed to allocate memory"); return -1; @@ -172,7 +172,7 @@ int main(int argc, char *argv[]) } } - ret = lxc_restart(my_args.name, sfd, conf, my_args.flags, my_args.lxcpath); + ret = lxc_restart(my_args.name, sfd, conf, my_args.flags, my_args.lxcpath[0]); if (my_args.statefile) close(sfd); diff --git a/src/lxc/lxc_start.c b/src/lxc/lxc_start.c index 957fdb0e7..dfc514e1d 100644 --- a/src/lxc/lxc_start.c +++ b/src/lxc/lxc_start.c @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #define _GNU_SOURCE #include @@ -43,6 +43,7 @@ #include "log.h" #include "caps.h" #include "lxc.h" +#include "lxccontainer.h" #include "conf.h" #include "cgroup.h" #include "utils.h" @@ -62,11 +63,12 @@ static int ensure_path(char **confpath, const char *path) if (path) { if (access(path, W_OK)) { fd = creat(path, 0600); - if (fd < 0) { + if (fd < 0 && errno != EEXIST) { SYSERROR("failed to create '%s'", path); goto err; } - close(fd); + if (fd >= 0) + close(fd); } fullpath = realpath(path, NULL); @@ -150,6 +152,8 @@ int main(int argc, char *argv[]) '\0', }; FILE *pid_fp = NULL; + struct lxc_container *c; + char *anonpath; lxc_list_init(&defines); @@ -165,16 +169,35 @@ int main(int argc, char *argv[]) args = my_args.argv; if (lxc_log_init(my_args.name, my_args.log_file, my_args.log_priority, - my_args.progname, my_args.quiet)) + my_args.progname, my_args.quiet, my_args.lxcpath[0])) return err; + anonpath = alloca(strlen(LXCPATH) + 6); + sprintf(anonpath, "%s_anon", LXCPATH); + /* + * rcfile possibilities: + * 1. rcfile from random path specified in cli option + * 2. rcfile not specified, use $lxcpath/$lxcname/config + * 3. rcfile not specified and does not exist. + */ /* rcfile is specified in the cli option */ - if (my_args.rcfile) + if (my_args.rcfile) { rcfile = (char *)my_args.rcfile; - else { + c = lxc_container_new(my_args.name, anonpath); + if (!c) { + ERROR("Failed to create lxc_container"); + return err; + } + if (!c->load_config(c, rcfile)) { + ERROR("Failed to load rcfile"); + lxc_container_put(c); + return err; + } + } else { int rc; + const char *lxcpath = my_args.lxcpath[0]; - rc = asprintf(&rcfile, "%s/%s/config", my_args.lxcpath, my_args.name); + rc = asprintf(&rcfile, "%s/%s/config", lxcpath, my_args.name); if (rc == -1) { SYSERROR("failed to allocate memory"); return err; @@ -185,36 +208,39 @@ int main(int argc, char *argv[]) if (access(rcfile, F_OK)) { free(rcfile); rcfile = NULL; + lxcpath = anonpath; + } + c = lxc_container_new(my_args.name, lxcpath); + if (!c) { + ERROR("Failed to create lxc_container"); + return err; } } - conf = lxc_conf_init(); - if (!conf) { - ERROR("failed to initialize configuration"); - return err; - } - - if (rcfile && lxc_config_read(rcfile, conf)) { - ERROR("failed to read configuration file"); - return err; - } + /* + * We should use set_config_item() over &defines, which would handle + * unset c->lxc_conf for us and let us not use lxc_config_define_load() + */ + if (!c->lxc_conf) + c->lxc_conf = lxc_conf_init(); + conf = c->lxc_conf; if (lxc_config_define_load(&defines, conf)) - return err; + goto out; if (!rcfile && !strcmp("/sbin/init", args[0])) { - ERROR("no configuration file for '/sbin/init' (may crash the host)"); - return err; + ERROR("Executing '/sbin/init' with no configuration file may crash the host"); + goto out; } if (ensure_path(&conf->console.path, my_args.console) < 0) { ERROR("failed to ensure console path '%s'", my_args.console); - return err; + goto out; } if (ensure_path(&conf->console.log_path, my_args.console_log) < 0) { ERROR("failed to ensure console log '%s'", my_args.console_log); - return err; + goto out; } if (my_args.pidfile != NULL) { @@ -222,29 +248,18 @@ int main(int argc, char *argv[]) if (pid_fp == NULL) { SYSERROR("failed to create pidfile '%s' for '%s'", my_args.pidfile, my_args.name); - return err; + goto out; } } if (my_args.daemonize) { - /* do an early check for needed privs, since otherwise the - * user won't see the error */ - - if (!lxc_caps_check()) { - ERROR("Not running with sufficient privilege"); - return err; - } - - if (daemon(0, 0)) { - SYSERROR("failed to daemonize '%s'", my_args.name); - return err; - } + c->want_daemonize(c); } if (pid_fp != NULL) { if (fprintf(pid_fp, "%d\n", getpid()) < 0) { SYSERROR("failed to write '%s'", my_args.pidfile); - return err; + goto out; } fclose(pid_fp); } @@ -252,22 +267,13 @@ int main(int argc, char *argv[]) if (my_args.close_all_fds) conf->close_all_fds = 1; - err = lxc_start(my_args.name, args, conf, my_args.lxcpath); - - /* - * exec ourself, that requires to have all opened fd - * with the close-on-exec flag set - */ - if (conf->reboot) { - INFO("rebooting container"); - execvp(argv[0], argv); - SYSERROR("failed to exec"); - err = -1; - } + err = c->start(c, 0, args) ? 0 : -1; if (my_args.pidfile) unlink(my_args.pidfile); +out: + lxc_container_put(c); return err; } diff --git a/src/lxc/lxc_stop.c b/src/lxc/lxc_stop.c index b4d9f23dd..77de7e5d1 100644 --- a/src/lxc/lxc_stop.c +++ b/src/lxc/lxc_stop.c @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include @@ -28,10 +28,29 @@ #include #include +#include #include "arguments.h" +#include "commands.h" #include "utils.h" +static int my_parser(struct lxc_arguments* args, int c, char* arg) +{ + switch (c) { + case 'r': args->reboot = 1; break; + case 'W': args->nowait = 1; break; + case 't': args->timeout = atoi(arg); break; + case 'k': args->hardstop = 1; break; + case 's': args->shutdown = 1; break; + } + return 0; +} + static const struct option my_longopts[] = { + {"reboot", no_argument, 0, 'r'}, + {"nowait", no_argument, 0, 'W'}, + {"timeout", required_argument, 0, 't'}, + {"kill", no_argument, 0, 'k'}, + {"shutdown", no_argument, 0, 's'}, LXC_COMMON_OPTIONS }; @@ -43,20 +62,114 @@ static struct lxc_arguments my_args = { lxc-stop stops a container with the identifier NAME\n\ \n\ Options :\n\ - -n, --name=NAME NAME for name of the container\n", + -n, --name=NAME NAME for name of the container\n\ + -r, --reboot reboot the container\n\ + -W, --nowait don't wait for shutdown or reboot to complete\n\ + -t, --timeout=T wait T seconds before hard-stopping\n\ + -k, --kill kill container rather than request clean shutdown\n\ + -s, --shutdown Only request clean shutdown, don't later force kill\n", .options = my_longopts, - .parser = NULL, + .parser = my_parser, .checker = NULL, + .timeout = 60, }; +/* returns -1 on failure, 0 on success */ +int do_reboot_and_check(struct lxc_arguments *a, struct lxc_container *c) +{ + int ret; + pid_t pid; + pid_t newpid; + int timeout = a->timeout; + + pid = c->init_pid(c); + if (pid == -1) + return -1; + if (!c->reboot(c)) + return -1; + if (a->nowait) + return 0; + if (timeout <= 0) + goto out; + + for (;;) { + /* can we use c-> wait for this, assuming it will + * re-enter RUNNING? For now just sleep */ + int elapsed_time, curtime = 0; + struct timeval tv; + + newpid = c->init_pid(c); + if (newpid != -1 && newpid != pid) + return 0; + + ret = gettimeofday(&tv, NULL); + if (ret) + break; + curtime = tv.tv_sec; + + sleep(1); + ret = gettimeofday(&tv, NULL); + if (ret) + break; + elapsed_time = tv.tv_sec - curtime; + if (timeout - elapsed_time <= 0) + break; + timeout -= elapsed_time; + } + +out: + newpid = c->init_pid(c); + if (newpid == -1 || newpid == pid) { + printf("Reboot did not complete before timeout\n"); + return -1; + } + return 0; +} + int main(int argc, char *argv[]) { + struct lxc_container *c; + bool s; + int ret = 1; + if (lxc_arguments_parse(&my_args, argc, argv)) - return -1; + return 1; if (lxc_log_init(my_args.name, my_args.log_file, my_args.log_priority, - my_args.progname, my_args.quiet)) - return -1; + my_args.progname, my_args.quiet, my_args.lxcpath[0])) + return 1; - return lxc_stop(my_args.name, my_args.lxcpath); + c = lxc_container_new(my_args.name, my_args.lxcpath[0]); + if (!c) { + fprintf(stderr, "Error opening container\n"); + goto out; + } + + if (!c->is_running(c)) { + fprintf(stderr, "%s is not running\n", c->name); + ret = 2; + goto out; + } + + if (my_args.hardstop) { + ret = c->stop(c) ? 0 : 1; + goto out; + } + if (my_args.reboot) { + ret = do_reboot_and_check(&my_args, c); + goto out; + } + + s = c->shutdown(c, my_args.timeout); + if (!s) { + if (!my_args.shutdown) + ret = c->wait(c, "STOPPED", -1) ? 0 : 1; + else + ret = 1; // fail + } else + ret = 0; + +out: + lxc_container_put(c); + return ret; } diff --git a/src/lxc/lxc_unfreeze.c b/src/lxc/lxc_unfreeze.c index 0bb5dc502..0130224a2 100644 --- a/src/lxc/lxc_unfreeze.c +++ b/src/lxc/lxc_unfreeze.c @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include @@ -27,9 +27,12 @@ #include #include +#include #include "arguments.h" +lxc_log_define(lxc_unfreeze_ui, lxc_cgroup); + static const struct option my_longopts[] = { LXC_COMMON_OPTIONS }; @@ -50,13 +53,28 @@ Options :\n\ int main(int argc, char *argv[]) { + struct lxc_container *c; + if (lxc_arguments_parse(&my_args, argc, argv)) - return -1; + exit(1); if (lxc_log_init(my_args.name, my_args.log_file, my_args.log_priority, - my_args.progname, my_args.quiet)) - return -1; + my_args.progname, my_args.quiet, my_args.lxcpath[0])) + exit(1); - return lxc_unfreeze(my_args.name, my_args.lxcpath); + c = lxc_container_new(my_args.name, my_args.lxcpath[0]); + if (!c) { + ERROR("No such container: %s:%s", my_args.lxcpath[0], my_args.name); + exit(1); + } + + if (!c->unfreeze(c)) { + ERROR("Failed to unfreeze %s:%s", my_args.lxcpath[0], my_args.name); + lxc_container_put(c); + exit(1); + } + + lxc_container_put(c); + + return 0; } - diff --git a/src/lxc/lxc_unshare.c b/src/lxc/lxc_unshare.c index 5ee66fda6..8ffdd212a 100644 --- a/src/lxc/lxc_unshare.c +++ b/src/lxc/lxc_unshare.c @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #define _GNU_SOURCE #include diff --git a/src/lxc/lxc_user_nic.c b/src/lxc/lxc_user_nic.c new file mode 100644 index 000000000..6c3a09e98 --- /dev/null +++ b/src/lxc/lxc_user_nic.c @@ -0,0 +1,782 @@ +/* + * + * Copyright © 2013 Serge Hallyn . + * Copyright © 2013 Canonical Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "config.h" + +#ifndef HAVE_GETLINE +#ifdef HAVE_FGETLN +#include <../include/getline.h> +#endif +#endif + +#if ISTEST +#define CONF_FILE "/tmp/lxc-usernet" +#define DB_FILE "/tmp/nics" +#else +#define CONF_FILE LXC_USERNIC_CONF +#define DB_FILE LXC_USERNIC_DB +#endif + + +#include "nl.h" + +#ifndef IFLA_LINKMODE +# define IFLA_LINKMODE 17 +#endif + +#ifndef IFLA_LINKINFO +# define IFLA_LINKINFO 18 +#endif + +#ifndef IFLA_NET_NS_PID +# define IFLA_NET_NS_PID 19 +#endif + +#ifndef IFLA_INFO_KIND +# define IFLA_INFO_KIND 1 +#endif + +#ifndef IFLA_VLAN_ID +# define IFLA_VLAN_ID 1 +#endif + +#ifndef IFLA_INFO_DATA +# define IFLA_INFO_DATA 2 +#endif + +#ifndef VETH_INFO_PEER +# define VETH_INFO_PEER 1 +#endif + +#ifndef IFLA_MACVLAN_MODE +# define IFLA_MACVLAN_MODE 1 +#endif + +void usage(char *me, bool fail) +{ + fprintf(stderr, "Usage: %s pid type bridge\n", me); + exit(fail ? 1 : 0); +} + +int open_and_lock(char *path) +{ + int fd; + struct flock lk; + + fd = open(path, O_RDWR|O_CREAT, S_IWUSR | S_IRUSR); + if (fd < 0) { + perror("open"); + return(fd); + } + + lk.l_type = F_WRLCK; + lk.l_whence = SEEK_SET; + lk.l_start = 0; + lk.l_len = 0; + if (fcntl(fd, F_SETLKW, &lk) < 0) { + perror("fcntl lock"); + exit(1); + } + + return fd; +} + + +char *get_username(char **buf) +{ + struct passwd *pwd = getpwuid(getuid()); + + if (pwd == NULL) { + perror("getpwuid"); + return NULL; + } + + return pwd->pw_name; +} + +/* The configuration file consists of lines of the form: + * + * user type bridge nic-name count + * + * We simply count the number of lines in the file, making sure that + * every listed nic is still present. Any nics which have disappeared + * is removed when we count, in case the container died a harsh death + * without being able to clean up after itself. + */ +int get_alloted(char *me, char *intype, char *link) +{ + FILE *fin = fopen(CONF_FILE, "r"); + char *line = NULL; + char user[100], type[100], br[100]; + size_t len = 0; + int n = -1, ret; + + if (!fin) + return -1; + + while ((getline(&line, &len, fin)) != -1) { + ret = sscanf(line, "%99[^ \t] %99[^ \t] %99[^ \t] %d", user, type, br, &n); + + if (ret != 4) + continue; + if (strcmp(user, me) != 0) + continue; + if (strcmp(type, intype) != 0) + continue; + if (strcmp(link, br) != 0) + continue; + free(line); + return n; + } + fclose(fin); + if (line) + free(line); + return -1; +} + +char *get_eol(char *s) +{ + while (*s && *s != '\n') + s++; + return s; +} + +char *get_eow(char *s) +{ + while (*s && !isblank(*s) && *s != '\n') + s++; + return s; +} + +char *find_line(char *p, char *e, char *u, char *t, char *l) +{ + char *p1, *p2, *ret; + + while (p < e && (p1 = get_eol(p)) < e) { + ret = p; + if (*p == '#') + goto next; + while (isblank(*p)) p++; + p2 = get_eow(p); + if (!p2 || p2-p != strlen(u) || strncmp(p, u, strlen(u)) != 0) + goto next; + p = p2+1; + while (isblank(*p)) p++; + p2 = get_eow(p); + if (!p2 || p2-p != strlen(t) || strncmp(p, t, strlen(t)) != 0) + goto next; + p = p2+1; + while (isblank(*p)) p++; + p2 = get_eow(p); + if (!p2 || p2-p != strlen(l) || strncmp(p, l, strlen(l)) != 0) + goto next; + return ret; +next: + p = p1 + 1; + } + + return NULL; +} + +bool nic_exists(char *nic) +{ + char path[200]; + int ret; + struct stat sb; + +#if ISTEST + ret = snprintf(path, 200, "/tmp/lxcnettest/%s", nic); +#else + ret = snprintf(path, 200, "/sys/class/net/%s", nic); +#endif + if (ret < 0 || ret >= 200) + exit(1); + ret = stat(path, &sb); + if (ret != 0) + return false; + return true; +} + +#if ! ISTEST +struct link_req { + struct nlmsg nlmsg; + struct ifinfomsg ifinfomsg; +}; + +int lxc_veth_create(const char *name1, const char *name2) +{ + struct nl_handler nlh; + struct nlmsg *nlmsg = NULL, *answer = NULL; + struct link_req *link_req; + struct rtattr *nest1, *nest2, *nest3; + int len, err; + + err = netlink_open(&nlh, NETLINK_ROUTE); + if (err) + return err; + + err = -EINVAL; + len = strlen(name1); + if (len == 1 || len >= IFNAMSIZ) + goto out; + + len = strlen(name2); + if (len == 1 || len >= IFNAMSIZ) + goto out; + + err = -ENOMEM; + nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE); + if (!nlmsg) + goto out; + + answer = nlmsg_alloc(NLMSG_GOOD_SIZE); + if (!answer) + goto out; + + link_req = (struct link_req *)nlmsg; + link_req->ifinfomsg.ifi_family = AF_UNSPEC; + nlmsg->nlmsghdr.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); + nlmsg->nlmsghdr.nlmsg_flags = + NLM_F_REQUEST|NLM_F_CREATE|NLM_F_EXCL|NLM_F_ACK; + nlmsg->nlmsghdr.nlmsg_type = RTM_NEWLINK; + + err = -EINVAL; + nest1 = nla_begin_nested(nlmsg, IFLA_LINKINFO); + if (!nest1) + goto out; + + if (nla_put_string(nlmsg, IFLA_INFO_KIND, "veth")) + goto out; + + nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA); + if (!nest2) + goto out; + + nest3 = nla_begin_nested(nlmsg, VETH_INFO_PEER); + if (!nest3) + goto out; + + nlmsg->nlmsghdr.nlmsg_len += sizeof(struct ifinfomsg); + + if (nla_put_string(nlmsg, IFLA_IFNAME, name2)) + goto out; + + nla_end_nested(nlmsg, nest3); + + nla_end_nested(nlmsg, nest2); + + nla_end_nested(nlmsg, nest1); + + if (nla_put_string(nlmsg, IFLA_IFNAME, name1)) + goto out; + + err = netlink_transaction(&nlh, nlmsg, answer); +out: + netlink_close(&nlh); + nlmsg_free(answer); + nlmsg_free(nlmsg); + return err; +} + +int lxc_netdev_move(char *ifname, pid_t pid) +{ + struct nl_handler nlh; + struct nlmsg *nlmsg = NULL; + struct link_req *link_req; + int err, index; + + index = if_nametoindex(ifname); + if (!ifname) + return -EINVAL; + + err = netlink_open(&nlh, NETLINK_ROUTE); + if (err) + return err; + + err = -ENOMEM; + nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE); + if (!nlmsg) + goto out; + + link_req = (struct link_req *)nlmsg; + link_req->ifinfomsg.ifi_family = AF_UNSPEC; + link_req->ifinfomsg.ifi_index = index; + nlmsg->nlmsghdr.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); + nlmsg->nlmsghdr.nlmsg_flags = NLM_F_REQUEST|NLM_F_ACK; + nlmsg->nlmsghdr.nlmsg_type = RTM_NEWLINK; + + if (nla_put_u32(nlmsg, IFLA_NET_NS_PID, pid)) + goto out; + + err = netlink_transaction(&nlh, nlmsg, nlmsg); +out: + netlink_close(&nlh); + nlmsg_free(nlmsg); + return err; +} + +static int setup_private_host_hw_addr(char *veth1) +{ + struct ifreq ifr; + int err; + int sockfd; + + sockfd = socket(AF_INET, SOCK_DGRAM, 0); + if (sockfd < 0) + return -errno; + + snprintf((char *)ifr.ifr_name, IFNAMSIZ, "%s", veth1); + err = ioctl(sockfd, SIOCGIFHWADDR, &ifr); + if (err < 0) { + close(sockfd); + return -errno; + } + + ifr.ifr_hwaddr.sa_data[0] = 0xfe; + err = ioctl(sockfd, SIOCSIFHWADDR, &ifr); + close(sockfd); + if (err < 0) + return -errno; + + return 0; +} + +static int netdev_set_flag(const char *name, int flag) +{ + struct nl_handler nlh; + struct nlmsg *nlmsg = NULL, *answer = NULL; + struct link_req *link_req; + int index, len, err; + + err = netlink_open(&nlh, NETLINK_ROUTE); + if (err) + return err; + + err = -EINVAL; + len = strlen(name); + if (len == 1 || len >= IFNAMSIZ) + goto out; + + err = -ENOMEM; + nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE); + if (!nlmsg) + goto out; + + answer = nlmsg_alloc(NLMSG_GOOD_SIZE); + if (!answer) + goto out; + + err = -EINVAL; + index = if_nametoindex(name); + if (!index) + goto out; + + link_req = (struct link_req *)nlmsg; + link_req->ifinfomsg.ifi_family = AF_UNSPEC; + link_req->ifinfomsg.ifi_index = index; + link_req->ifinfomsg.ifi_change |= IFF_UP; + link_req->ifinfomsg.ifi_flags |= flag; + nlmsg->nlmsghdr.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); + nlmsg->nlmsghdr.nlmsg_flags = NLM_F_REQUEST|NLM_F_ACK; + nlmsg->nlmsghdr.nlmsg_type = RTM_NEWLINK; + + err = netlink_transaction(&nlh, nlmsg, answer); +out: + netlink_close(&nlh); + nlmsg_free(nlmsg); + nlmsg_free(answer); + return err; +} + +static int instanciate_veth(char *n1, char **n2) +{ + int err; + + err = snprintf(*n2, IFNAMSIZ, "%sp", n1); + if (err < 0 || err >= IFNAMSIZ) { + fprintf(stderr, "nic name too long\n"); + exit(1); + } + + err = lxc_veth_create(n1, *n2); + if (err) { + fprintf(stderr, "failed to create %s-%s : %s\n", n1, *n2, + strerror(-err)); + exit(1); + } + + /* changing the high byte of the mac address to 0xfe, the bridge interface + * will always keep the host's mac address and not take the mac address + * of a container */ + err = setup_private_host_hw_addr(n1); + if (err) { + fprintf(stderr, "failed to change mac address of host interface '%s' : %s", + n1, strerror(-err)); + } + + return netdev_set_flag(n1, IFF_UP); +} + +int lxc_bridge_attach(const char *bridge, const char *ifname) +{ + int fd, index, err; + struct ifreq ifr; + + if (strlen(ifname) >= IFNAMSIZ) + return -EINVAL; + + index = if_nametoindex(ifname); + if (!index) + return -EINVAL; + + fd = socket(AF_INET, SOCK_STREAM, 0); + if (fd < 0) + return -errno; + + strncpy(ifr.ifr_name, bridge, IFNAMSIZ); + ifr.ifr_ifindex = index; + err = ioctl(fd, SIOCBRADDIF, &ifr); + close(fd); + if (err) + err = -errno; + + return err; +} + +int lxc_netdev_delete_by_index(int ifindex) +{ + struct nl_handler nlh; + struct nlmsg *nlmsg = NULL, *answer = NULL; + struct link_req *link_req; + int err; + + err = netlink_open(&nlh, NETLINK_ROUTE); + if (err) + return err; + + err = -ENOMEM; + nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE); + if (!nlmsg) + goto out; + + answer = nlmsg_alloc(NLMSG_GOOD_SIZE); + if (!answer) + goto out; + + link_req = (struct link_req *)nlmsg; + link_req->ifinfomsg.ifi_family = AF_UNSPEC; + link_req->ifinfomsg.ifi_index = ifindex; + nlmsg->nlmsghdr.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); + nlmsg->nlmsghdr.nlmsg_flags = NLM_F_ACK|NLM_F_REQUEST; + nlmsg->nlmsghdr.nlmsg_type = RTM_DELLINK; + + err = netlink_transaction(&nlh, nlmsg, answer); +out: + netlink_close(&nlh); + nlmsg_free(answer); + nlmsg_free(nlmsg); + return err; +} + +int lxc_netdev_delete_by_name(const char *name) +{ + int index; + + index = if_nametoindex(name); + if (!index) + return -EINVAL; + + return lxc_netdev_delete_by_index(index); +} +#else +int lxc_netdev_delete_by_name(const char *name) +{ + char path[200]; + sprintf(path, "/tmp/lxcnettest/%s", name); + return unlink(path); +} + +#endif + +bool create_nic(char *nic, char *br, char *pidstr) +{ +#if ISTEST + char path[200]; + sprintf(path, "/tmp/lxcnettest/%s", nic); + int fd = open(path, O_RDWR|O_CREAT, S_IWUSR | S_IRUSR); + if (fd < 0) + return false; + close(fd); + return true; +#else + // not yet implemented + char *veth1buf, *veth2buf; + veth1buf = alloca(IFNAMSIZ); + veth2buf = alloca(IFNAMSIZ); + int ret; + int pid = atoi(pidstr); + + ret = snprintf(veth1buf, IFNAMSIZ, "%s", nic); + if (ret < 0 || ret >= IFNAMSIZ) { + fprintf(stderr, "nic name too long\n"); + exit(1); + } + + /* create the nics */ + if (instanciate_veth(veth1buf, &veth2buf) < 0) { + fprintf(stderr, "Error creating veth tunnel\n"); + return false; + } + + /* attach veth1 to bridge */ + if (lxc_bridge_attach(br, veth1buf) < 0) { + fprintf(stderr, "Error attaching %s to %s\n", veth1buf, br); + goto out_del; + } + + /* pass veth2 to target netns */ + ret = lxc_netdev_move(veth2buf, pid); + if (ret < 0) { + fprintf(stderr, "Error moving %s to netns %d\n", veth2buf, pid); + goto out_del; + } + return true; + +out_del: + lxc_netdev_delete_by_name(veth1buf); + exit(1); +#endif +} + +void get_new_nicname(char **dest, char *br, char *pid) +{ + int i = 0; + // TODO - speed this up. For large installations we won't + // want n stats for every nth container startup. + while (1) { + sprintf(*dest, "lxcuser-%d", i); + if (!nic_exists(*dest) && create_nic(*dest, br, pid)) + return; + i++; + } +} + +bool get_nic_from_line(char *p, char **nic) +{ + char user[100], type[100], br[100]; + int ret; + + ret = sscanf(p, "%99[^ \t\n] %99[^ \t\n] %99[^ \t\n] %99[^ \t\n]", user, type, br, *nic); + if (ret != 4) + return false; + return true; +} + +bool cull_entries(int fd, char *me, char *t, char *br) +{ + struct stat sb; + char *buf, *p, *e, *nic; + off_t len; + + nic = alloca(100); + + fstat(fd, &sb); + len = sb.st_size; + if (len == 0) + return true; + buf = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); + if (buf == MAP_FAILED) { + fprintf(stderr, "Failed to create mapping: error %d\n", errno); + return false; + } + + p = buf; + e = buf + len; + while ((p = find_line(p, e, me, t, br)) != NULL) { + if (!get_nic_from_line(p, &nic)) + continue; + if (nic && !nic_exists(nic)) { + // copy from eol(p)+1..e to p + char *src = get_eol(p) + 1, *dest = p; + int diff = src - p; + while (src < e) + *(dest++) = *(src)++; + e -= diff; + } else + p = get_eol(p) + 1; + if (p >= e) + break; + } + munmap(buf, sb.st_size); + if (ftruncate(fd, e-buf)) + fprintf(stderr, "Failed to set new file size\n"); + return true; +} + +int count_entries(char *buf, off_t len, char *me, char *t, char *br) +{ + char *e = &buf[len]; + int count = 0; + while ((buf = find_line(buf, e, me, t, br)) != NULL) { + count++; + buf = get_eol(buf)+1; + if (buf >= e) + break; + } + + return count; +} + +/* + * The dbfile has lines of the format: + * user type bridge nicname + */ +bool get_nic_if_avail(int fd, char *me, char *pid, char *intype, char *br, int allowed, char **nicname) +{ + off_t len, slen; + struct stat sb; + char *buf = NULL, *newline; + int ret, count = 0; + + cull_entries(fd, me, intype, br); + + fstat(fd, &sb); + len = sb.st_size; + if (len != 0) { + buf = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); + if (buf == MAP_FAILED) { + fprintf(stderr, "Failed to create mapping\n"); + return false; + } + + count = count_entries(buf, len, me, intype, br); + if (count >= allowed) + return false; + } + + + get_new_nicname(nicname, br, pid); + /* me ' ' intype ' ' br ' ' *nicname + '\n' + '\0' */ + slen = strlen(me) + strlen(intype) + strlen(br) + strlen(*nicname) + 5; + newline = alloca(slen); + ret = snprintf(newline, slen, "%s %s %s %s\n", me, intype, br, *nicname); + if (ret < 0 || ret >= slen) { + if (lxc_netdev_delete_by_name(*nicname) != 0) + fprintf(stderr, "Error unlinking %s!\n", *nicname); + return false; + } + if (len) + munmap(buf, len); + if (ftruncate(fd, len + slen)) + fprintf(stderr, "Failed to set new file size\n"); + buf = mmap(NULL, len + slen, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); + if (buf == MAP_FAILED) { + fprintf(stderr, "Failed to create mapping after extending: error %d\n", errno); + if (lxc_netdev_delete_by_name(*nicname) != 0) + fprintf(stderr, "Error unlinking %s!\n", *nicname); + return false; + } + strcpy(buf+len, newline); + munmap(buf, len+slen); + return true; +} + +bool create_db_dir(char *fnam) +{ + char *p = alloca(strlen(fnam)+1); + + strcpy(p, fnam); + fnam = p; + p = p + 1; +again: + while (*p && *p != '/') p++; + if (!*p) + return true; + *p = '\0'; + if (mkdir(fnam, 0755) && errno != EEXIST) { + fprintf(stderr, "failed to create %s\n", fnam); + *p = '/'; + return false; + } + *(p++) = '/'; + goto again; +} + +int main(int argc, char *argv[]) +{ + int n, fd; + bool gotone = false; + char *me, *buf = alloca(400); + char *nicname = alloca(40); + + if ((me = get_username(&buf)) == NULL) { + fprintf(stderr, "Failed to get username\n"); + exit(1); + } + + if (argc != 4) + usage(argv[0], true); + + if (!create_db_dir(DB_FILE)) { + fprintf(stderr, "Failed to create directory for db file\n"); + exit(1); + } + + if ((fd = open_and_lock(DB_FILE)) < 0) { + fprintf(stderr, "Failed to lock %s\n", DB_FILE); + exit(1); + } + + n = get_alloted(me, argv[2], argv[3]); + if (n > 0) + gotone = get_nic_if_avail(fd, me, argv[1], argv[2], argv[3], n, &nicname); + close(fd); + if (!gotone) { + fprintf(stderr, "Quota reached\n"); + exit(1); + } + + // Now create the link + + exit(0); +} diff --git a/src/lxc/lxc_usernsexec.c b/src/lxc/lxc_usernsexec.c new file mode 100644 index 000000000..9416f2747 --- /dev/null +++ b/src/lxc/lxc_usernsexec.c @@ -0,0 +1,417 @@ +/* + * (C) Copyright IBM Corp. 2008 + * (C) Copyright Canonical, Inc 2010-2013 + * + * Authors: + * Serge Hallyn + * (Once upon a time, this was based on nsexec from the IBM + * container tools) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "namespace.h" + +int unshare(int flags); + +static void usage(const char *name) +{ + printf("usage: %s [-h] [-c] [-mnuUip] [-P ]" + "[command [arg ..]]\n", name); + printf("\n"); + printf(" -h this message\n"); + printf("\n"); + printf(" -m uid maps to use\n"); + printf("\n"); + printf(" uid-maps: [u|g|b]:ns_id:host_id:range\n"); + printf(" [u|g|b]: map user id, group id, or both\n"); + printf(" ns_id: the base id in the new namespace\n"); + printf(" host_id: the base id in the parent namespace\n"); + printf(" range: how many ids to map\n"); + printf(" Note: This program uses newuidmap(2) and newgidmap(2).\n"); + printf(" As such, /etc/subuid and /etc/subgid must grant the\n"); + printf(" calling user permission to use the mapped ranges\n"); + exit(1); +} + +static void opentty(const char * tty) { + int i, fd, flags; + + fd = open(tty, O_RDWR | O_NONBLOCK); + if (fd == -1) { + printf("FATAL: can't reopen tty: %s", strerror(errno)); + sleep(1); + exit(1); + } + + flags = fcntl(fd, F_GETFL); + flags &= ~O_NONBLOCK; + fcntl(fd, F_SETFL, flags); + + for (i = 0; i < fd; i++) + close(i); + for (i = 0; i < 3; i++) + if (fd != i) + dup2(fd, i); + if (fd >= 3) + close(fd); +} +// Code copy end + +static int do_child(void *vargv) +{ + char **argv = (char **)vargv; + + // Assume we want to become root + if (setgid(0) < 0) { + perror("setgid"); + return -1; + } + if (setuid(0) < 0) { + perror("setuid"); + return -1; + } + if (setgroups(0, NULL) < 0) { + perror("setgroups"); + return -1; + } + if (unshare(CLONE_NEWNS) < 0) { + perror("unshare CLONE_NEWNS"); + return -1; + } + execvp(argv[0], argv); + perror("execvpe"); + return -1; +} + +struct id_map { + char which; // b or u or g + long host_id, ns_id, range; + struct id_map *next; +}; + +struct id_map default_map = { + .which = 'b', + .host_id = 100000, + .ns_id = 0, + .range = 10000, +}; +static struct id_map *active_map = &default_map; + +/* + * given a string like "b:0:100000:10", map both uids and gids + * 0-10 to 100000 to 100010 + */ +static int parse_map(char *map) +{ + struct id_map *newmap; + int ret; + + if (!map) + return -1; + newmap = malloc(sizeof(*newmap)); + if (!newmap) + return -1; + ret = sscanf(map, "%c:%ld:%ld:%ld", &newmap->which, &newmap->ns_id, &newmap->host_id, &newmap->range); + if (ret != 4) + goto out_free_map; + if (newmap->which != 'b' && newmap->which != 'u' && newmap->which != 'g') + goto out_free_map; + if (active_map != &default_map) + newmap->next = active_map; + else + newmap->next = NULL; + active_map = newmap; + return 0; + +out_free_map: + free(newmap); + return -1; +} + +/* + * go through /etc/subuids and /etc/subgids to find this user's + * allowed map. We only use the first one (bc otherwise we're + * not sure which ns ids he wants to use). + */ +static int read_default_map(char *fnam, char which, char *username) +{ + FILE *fin; + char *line = NULL; + size_t sz = 0; + struct id_map *newmap; + char *p1, *p2; + + fin = fopen(fnam, "r"); + if (!fin) + return -1; + while (getline(&line, &sz, fin) != -1) { + if (sz <= strlen(username) || + strncmp(line, username, strlen(username)) != 0 || + line[strlen(username)] != ':') + continue; + p1 = index(line, ':'); + if (!p1) + continue; + p2 = index(p1+1, ':'); + if (!p2) + continue; + newmap = malloc(sizeof(*newmap)); + if (!newmap) + return -1; + newmap->host_id = atol(p1+1); + newmap->range = atol(p2+1); + newmap->ns_id = 0; + newmap->which = which; + if (active_map != &default_map) + newmap->next = active_map; + else + newmap->next = NULL; + break; + } + + if (line) + free(line); + fclose(fin); + return 0; +} + +#define subuidfile "/etc/subuid" +#define subgidfile "/etc/subgid" +static int find_default_map(void) +{ + struct passwd *p = getpwuid(getuid()); + if (!p) + return -1; + if (read_default_map(subuidfile, 'u', p->pw_name) < 0) + return -1; + if (read_default_map(subgidfile, 'g', p->pw_name) < 0) + return -1; + return 0; +} + +static int run_cmd(char **argv) +{ + int status; + pid_t pid = fork(); + + if (pid < 0) + return pid; + if (pid == 0) { + execvp(argv[0], argv); + perror("exec failed"); + exit(1); + } + if (waitpid(pid, &status, __WALL) < 0) { + perror("waitpid"); + return -1; + } + + return WEXITSTATUS(status); +} + +static int map_child_uids(int pid, struct id_map *map) +{ + char **uidargs = NULL, **gidargs = NULL; + int i, nuargs = 2, ngargs = 2; + struct id_map *m; + + uidargs = malloc(3 * sizeof(*uidargs)); + gidargs = malloc(3 * sizeof(*gidargs)); + if (uidargs == NULL || gidargs == NULL) + return -1; + uidargs[0] = malloc(10); + gidargs[0] = malloc(10); + uidargs[1] = malloc(21); + gidargs[1] = malloc(21); + uidargs[2] = NULL; + gidargs[2] = NULL; + if (!uidargs[0] || !uidargs[1] || !gidargs[0] || !gidargs[1]) + return -1; + sprintf(uidargs[0], "newuidmap"); + sprintf(gidargs[0], "newgidmap"); + sprintf(uidargs[1], "%d", pid); + sprintf(gidargs[1], "%d", pid); + for (m=map; m; m = m->next) { + if (m->which == 'b' || m->which == 'u') { + nuargs += 3; + uidargs = realloc(uidargs, (nuargs+1) * sizeof(*uidargs)); + if (!uidargs) + return -1; + uidargs[nuargs - 3] = malloc(21); + uidargs[nuargs - 2] = malloc(21); + uidargs[nuargs - 1] = malloc(21); + if (!uidargs[nuargs-3] || !uidargs[nuargs-2] || !uidargs[nuargs-1]) + return -1; + sprintf(uidargs[nuargs - 3], "%ld", m->ns_id); + sprintf(uidargs[nuargs - 2], "%ld", m->host_id); + sprintf(uidargs[nuargs - 1], "%ld", m->range); + uidargs[nuargs] = NULL; + } + if (m->which == 'b' || m->which == 'g') { + ngargs += 3; + gidargs = realloc(gidargs, (ngargs+1) * sizeof(*gidargs)); + if (!gidargs) + return -1; + gidargs[ngargs - 3] = malloc(21); + gidargs[ngargs - 2] = malloc(21); + gidargs[ngargs - 1] = malloc(21); + if (!gidargs[ngargs-3] || !gidargs[ngargs-2] || !gidargs[ngargs-1]) + return -1; + sprintf(gidargs[ngargs - 3], "%ld", m->ns_id); + sprintf(gidargs[ngargs - 2], "%ld", m->host_id); + sprintf(gidargs[ngargs - 1], "%ld", m->range); + gidargs[ngargs] = NULL; + } + } + + // exec newuidmap + if (nuargs > 2 && run_cmd(uidargs) != 0) { + fprintf(stderr, "Error mapping uids\n"); + return -2; + } + // exec newgidmap + if (ngargs > 2 && run_cmd(gidargs) != 0) { + fprintf(stderr, "Error mapping gids\n"); + return -2; + } + + for (i=0; i #include @@ -30,7 +30,7 @@ #include #include -#include +#include #include "arguments.h" lxc_log_define(lxc_wait_ui, lxc_monitor); @@ -80,12 +80,22 @@ Options :\n\ int main(int argc, char *argv[]) { + struct lxc_container *c; + if (lxc_arguments_parse(&my_args, argc, argv)) return -1; if (lxc_log_init(my_args.name, my_args.log_file, my_args.log_priority, - my_args.progname, my_args.quiet)) + my_args.progname, my_args.quiet, my_args.lxcpath[0])) return -1; - return lxc_wait(strdup(my_args.name), my_args.states, my_args.timeout, my_args.lxcpath); + c = lxc_container_new(my_args.name, my_args.lxcpath[0]); + if (!c) + return -1; + + if (!c->wait(c, my_args.states, my_args.timeout)) { + lxc_container_put(c); + return -1; + } + return 0; } diff --git a/src/lxc/lxccontainer.c b/src/lxc/lxccontainer.c index a4376b405..3c657cab0 100644 --- a/src/lxc/lxccontainer.c +++ b/src/lxc/lxccontainer.c @@ -3,41 +3,181 @@ * Copyright © 2012 Serge Hallyn . * Copyright © 2012 Canonical Ltd. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include "lxc.h" -#include "state.h" -#include "lxccontainer.h" -#include "conf.h" -#include "config.h" -#include "confile.h" -#include "cgroup.h" -#include "commands.h" -#include "version.h" -#include "log.h" +#define _GNU_SOURCE +#include +#include #include #include #include #include +#include +#include +#include "config.h" +#include "lxc.h" +#include "state.h" +#include "lxccontainer.h" +#include "conf.h" +#include "confile.h" +#include "console.h" +#include "cgroup.h" +#include "commands.h" +#include "version.h" +#include "log.h" +#include "bdev.h" +#include "utils.h" +#include "attach.h" #include +#include +#include +#include +#include + +#if HAVE_IFADDRS_H +#include +#else +#include <../include/ifaddrs.h> +#endif + +#ifndef HAVE_GETLINE +#ifdef HAVE_FGETLN +#include <../include/getline.h> +#endif +#endif lxc_log_define(lxc_container, lxc); +static bool file_exists(char *f) +{ + struct stat statbuf; + + return stat(f, &statbuf) == 0; +} + +/* + * A few functions to help detect when a container creation failed. + * If a container creation was killed partway through, then trying + * to actually start that container could harm the host. We detect + * this by creating a 'partial' file under the container directory, + * and keeping an advisory lock. When container creation completes, + * we remove that file. When we load or try to start a container, if + * we find that file, without a flock, we remove the container. + */ +int ongoing_create(struct lxc_container *c) +{ + int len = strlen(c->config_path) + strlen(c->name) + 10; + char *path = alloca(len); + int fd, ret; + struct flock lk; + + ret = snprintf(path, len, "%s/%s/partial", c->config_path, c->name); + if (ret < 0 || ret >= len) { + ERROR("Error writing partial pathname"); + return -1; + } + + if (!file_exists(path)) + return 0; + if (process_lock()) + return -1; + if ((fd = open(path, O_RDWR)) < 0) { + // give benefit of the doubt + SYSERROR("Error opening partial file"); + process_unlock(); + return 0; + } + lk.l_type = F_WRLCK; + lk.l_whence = SEEK_SET; + lk.l_start = 0; + lk.l_len = 0; + lk.l_pid = -1; + if (fcntl(fd, F_GETLK, &lk) == 0 && lk.l_pid != -1) { + // create is still ongoing + close(fd); + process_unlock(); + return 1; + } + // create completed but partial is still there. + close(fd); + process_unlock(); + return 2; +} + +int create_partial(struct lxc_container *c) +{ + // $lxcpath + '/' + $name + '/partial' + \0 + int len = strlen(c->config_path) + strlen(c->name) + 10; + char *path = alloca(len); + int fd, ret; + struct flock lk; + + ret = snprintf(path, len, "%s/%s/partial", c->config_path, c->name); + if (ret < 0 || ret >= len) { + ERROR("Error writing partial pathname"); + return -1; + } + if (process_lock()) + return -1; + if ((fd=open(path, O_RDWR | O_CREAT | O_EXCL, 0755)) < 0) { + SYSERROR("Erorr creating partial file"); + process_unlock(); + return -1; + } + lk.l_type = F_WRLCK; + lk.l_whence = SEEK_SET; + lk.l_start = 0; + lk.l_len = 0; + if (fcntl(fd, F_SETLKW, &lk) < 0) { + SYSERROR("Error locking partial file %s", path); + close(fd); + process_unlock(); + return -1; + } + process_unlock(); + + return fd; +} + +void remove_partial(struct lxc_container *c, int fd) +{ + // $lxcpath + '/' + $name + '/partial' + \0 + int len = strlen(c->config_path) + strlen(c->name) + 10; + char *path = alloca(len); + int ret; + + close(fd); + ret = snprintf(path, len, "%s/%s/partial", c->config_path, c->name); + if (ret < 0 || ret >= len) { + ERROR("Error writing partial pathname"); + return; + } + if (process_lock()) + return; + if (unlink(path) < 0) + SYSERROR("Error unlink partial file %s", path); + process_unlock(); +} + /* LOCKING - * c->privlock protects the struct lxc_container from multiple threads. - * c->slock protects the on-disk container data + * 1. container_mem_lock(c) protects the struct lxc_container from multiple threads. + * 2. container_disk_lock(c) protects the on-disk container data - in particular the + * container configuration file. + * The container_disk_lock also takes the container_mem_lock. + * 3. thread_mutex protects process data (ex: fd table) from multiple threads. * NOTHING mutexes two independent programs with their own struct * lxc_container for the same c->name, between API calls. For instance, * c->config_read(); c->start(); Between those calls, data on disk @@ -48,7 +188,7 @@ lxc_log_define(lxc_container, lxc); * due to hung callers. So I prefer to keep the locks only within our own * functions, not across functions. * - * If you're going to fork while holding a lxccontainer, increment + * If you're going to clone while holding a lxccontainer, increment * c->numthreads (under privlock) before forking. When deleting, * decrement numthreads under privlock, then if it hits 0 you can delete. * Do not ever use a lxccontainer whose numthreads you did not bump. @@ -68,12 +208,11 @@ static void lxc_container_free(struct lxc_container *c) c->error_string = NULL; } if (c->slock) { - sem_close(c->slock); + lxc_putlock(c->slock); c->slock = NULL; } if (c->privlock) { - sem_destroy(c->privlock); - free(c->privlock); + lxc_putlock(c->privlock); c->privlock = NULL; } if (c->name) { @@ -91,12 +230,40 @@ static void lxc_container_free(struct lxc_container *c) free(c); } +/* + * Consider the following case: +freer | racing get()er +================================================================== +lxc_container_put() | lxc_container_get() +\ lxclock(c->privlock) | c->numthreads < 1? (no) +\ c->numthreads = 0 | \ lxclock(c->privlock) -> waits +\ lxcunlock() | \ +\ lxc_container_free() | \ lxclock() returns + | \ c->numthreads < 1 -> return 0 +\ \ (free stuff) | +\ \ sem_destroy(privlock) | + + * When the get()er checks numthreads the first time, one of the following + * is true: + * 1. freer has set numthreads = 0. get() returns 0 + * 2. freer is between lxclock and setting numthreads to 0. get()er will + * sem_wait on privlock, get lxclock after freer() drops it, then see + * numthreads is 0 and exit without touching lxclock again.. + * 3. freer has not yet locked privlock. If get()er runs first, then put()er + * will see --numthreads = 1 and not call lxc_container_free(). +*/ + int lxc_container_get(struct lxc_container *c) { if (!c) return 0; - if (lxclock(c->privlock, 0)) + // if someone else has already started freeing the container, don't + // try to take the lock, which may be invalid + if (c->numthreads < 1) + return 0; + + if (container_mem_lock(c)) return 0; if (c->numthreads < 1) { // bail without trying to unlock, bc the privlock is now probably @@ -104,7 +271,7 @@ int lxc_container_get(struct lxc_container *c) return 0; } c->numthreads++; - lxcunlock(c->privlock); + container_mem_unlock(c); return 1; } @@ -112,24 +279,17 @@ int lxc_container_put(struct lxc_container *c) { if (!c) return -1; - if (lxclock(c->privlock, 0)) + if (container_mem_lock(c)) return -1; if (--c->numthreads < 1) { - lxcunlock(c->privlock); + container_mem_unlock(c); lxc_container_free(c); return 1; } - lxcunlock(c->privlock); + container_mem_unlock(c); return 0; } -static bool file_exists(char *f) -{ - struct stat statbuf; - - return stat(f, &statbuf) == 0; -} - static bool lxcapi_is_defined(struct lxc_container *c) { struct stat statbuf; @@ -139,7 +299,7 @@ static bool lxcapi_is_defined(struct lxc_container *c) if (!c) return false; - if (lxclock(c->privlock, 0)) + if (container_mem_lock(c)) return false; if (!c->configfile) goto out; @@ -149,27 +309,21 @@ static bool lxcapi_is_defined(struct lxc_container *c) ret = true; out: - lxcunlock(c->privlock); + container_mem_unlock(c); return ret; } static const char *lxcapi_state(struct lxc_container *c) { - const char *ret; lxc_state_t s; if (!c) return NULL; - if (lxclock(c->slock, 0)) - return NULL; s = lxc_getstate(c->name, c->config_path); - ret = lxc_state2str(s); - lxcunlock(c->slock); - - return ret; + return lxc_state2str(s); } -static bool is_stopped_nolock(struct lxc_container *c) +static bool is_stopped(struct lxc_container *c) { lxc_state_t s; s = lxc_getstate(c->name, c->config_path); @@ -194,10 +348,7 @@ static bool lxcapi_freeze(struct lxc_container *c) if (!c) return false; - if (lxclock(c->slock, 0)) - return false; ret = lxc_freeze(c->name, c->config_path); - lxcunlock(c->slock); if (ret) return false; return true; @@ -209,26 +360,34 @@ static bool lxcapi_unfreeze(struct lxc_container *c) if (!c) return false; - if (lxclock(c->slock, 0)) - return false; ret = lxc_unfreeze(c->name, c->config_path); - lxcunlock(c->slock); if (ret) return false; return true; } -static pid_t lxcapi_init_pid(struct lxc_container *c) +static int lxcapi_console_getfd(struct lxc_container *c, int *ttynum, int *masterfd) { - pid_t ret; + int ttyfd; if (!c) return -1; - if (lxclock(c->slock, 0)) + ttyfd = lxc_console_getfd(c, ttynum, masterfd); + return ttyfd; +} + +static int lxcapi_console(struct lxc_container *c, int ttynum, int stdinfd, + int stdoutfd, int stderrfd, int escape) +{ + return lxc_console(c, ttynum, stdinfd, stdoutfd, stderrfd, escape); +} + +static pid_t lxcapi_init_pid(struct lxc_container *c) +{ + if (!c) return -1; - ret = get_init_pid(c->name, c->config_path); - lxcunlock(c->slock); - return ret; + + return lxc_cmd_get_init_pid(c->name, c->config_path); } static bool load_config_locked(struct lxc_container *c, const char *fname) @@ -242,7 +401,8 @@ static bool load_config_locked(struct lxc_container *c, const char *fname) static bool lxcapi_load_config(struct lxc_container *c, const char *alt_file) { - bool ret = false; + bool ret = false, need_disklock = false; + int lret; const char *fname; if (!c) return false; @@ -252,10 +412,27 @@ static bool lxcapi_load_config(struct lxc_container *c, const char *alt_file) fname = alt_file; if (!fname) return false; - if (lxclock(c->slock, 0)) + /* + * If we're reading something other than the container's config, + * we only need to lock the in-memory container. If loading the + * container's config file, take the disk lock. + */ + if (strcmp(fname, c->configfile) == 0) + need_disklock = true; + + if (need_disklock) + lret = container_disk_lock(c); + else + lret = container_mem_lock(c); + if (lret) return false; + ret = load_config_locked(c, fname); - lxcunlock(c->slock); + + if (need_disklock) + container_disk_unlock(c); + else + container_mem_unlock(c); return ret; } @@ -263,7 +440,12 @@ static void lxcapi_want_daemonize(struct lxc_container *c) { if (!c) return; + if (container_mem_lock(c)) { + ERROR("Error getting mem lock"); + return; + } c->daemonize = 1; + container_mem_unlock(c); } static bool lxcapi_wait(struct lxc_container *c, const char *state, int timeout) @@ -314,15 +496,28 @@ static bool lxcapi_start(struct lxc_container *c, int useinit, char * const argv if (!c->lxc_conf) return false; + if ((ret = ongoing_create(c)) < 0) { + ERROR("Error checking for incomplete creation"); + return false; + } + if (ret == 2) { + ERROR("Error: %s creation was not completed", c->name); + c->destroy(c); + return false; + } else if (ret == 1) { + ERROR("Error: creation of %s is ongoing", c->name); + return false; + } + /* is this app meant to be run through lxcinit, as in lxc-execute? */ if (useinit && !argv) return false; - if (lxclock(c->privlock, 0)) + if (container_mem_lock(c)) return false; conf = c->lxc_conf; daemonize = c->daemonize; - lxcunlock(c->privlock); + container_mem_unlock(c); if (useinit) { ret = lxc_execute(c->name, argv, 1, conf, c->config_path); @@ -341,13 +536,22 @@ static bool lxcapi_start(struct lxc_container *c, int useinit, char * const argv if (daemonize) { if (!lxc_container_get(c)) return false; + lxc_monitord_spawn(c->config_path); + + if (process_lock()) + return false; pid_t pid = fork(); if (pid < 0) { lxc_container_put(c); + process_unlock(); return false; } - if (pid != 0) - return wait_on_daemonized_start(c); + if (pid != 0) { + ret = wait_on_daemonized_start(c); + process_unlock(); + return ret; + } + process_unlock(); /* second fork to be reparented by init */ pid = fork(); if (pid < 0) { @@ -364,7 +568,7 @@ static bool lxcapi_start(struct lxc_container *c, int useinit, char * const argv close(0); close(1); close(2); - open("/dev/null", O_RDONLY); + open("/dev/zero", O_RDONLY); open("/dev/null", O_RDWR); open("/dev/null", O_RDWR); setsid(); @@ -394,49 +598,30 @@ reboot: static bool lxcapi_startl(struct lxc_container *c, int useinit, ...) { va_list ap; - char **inargs = NULL, **temp; - int n_inargs = 0; + char **inargs = NULL; bool bret = false; /* container exists */ if (!c) return false; - /* build array of arguments if any */ va_start(ap, useinit); - while (1) { - char *arg; - arg = va_arg(ap, char *); - if (!arg) - break; - n_inargs++; - temp = realloc(inargs, n_inargs * sizeof(*inargs)); - if (!temp) - goto out; - inargs = temp; - inargs[n_inargs - 1] = strdup(arg); // not sure if it's safe not to copy - } + inargs = lxc_va_arg_list_to_argv(ap, 0, 1); va_end(ap); - /* add trailing NULL */ - if (n_inargs) { - n_inargs++; - temp = realloc(inargs, n_inargs * sizeof(*inargs)); - if (!temp) - goto out; - inargs = temp; - inargs[n_inargs - 1] = NULL; + if (!inargs) { + ERROR("Memory allocation error."); + goto out; } - bret = lxcapi_start(c, useinit, inargs); + /* pass NULL if no arguments were supplied */ + bret = lxcapi_start(c, useinit, *inargs ? inargs : NULL); out: if (inargs) { - int i; - for (i = 0; i < n_inargs; i++) { - if (inargs[i]) - free(inargs[i]); - } + char *arg; + for (arg = *inargs; arg; arg++) + free(arg); free(inargs); } @@ -450,22 +635,11 @@ static bool lxcapi_stop(struct lxc_container *c) if (!c) return false; - ret = lxc_stop(c->name, c->config_path); + ret = lxc_cmd_stop(c->name, c->config_path); return ret == 0; } -static bool valid_template(char *t) -{ - struct stat statbuf; - int statret; - - statret = stat(t, &statbuf); - if (statret == 0) - return true; - return false; -} - /* * create the standard expected container dir */ @@ -494,80 +668,152 @@ static bool create_container_dir(struct lxc_container *c) return ret == 0; } -/* - * backing stores not (yet) supported - * for ->create, argv contains the arguments to pass to the template, - * terminated by NULL. If no arguments, you can just pass NULL. - */ -static bool lxcapi_create(struct lxc_container *c, char *t, char *const argv[]) -{ - bool bret = false; - pid_t pid; - int ret, status; - char *tpath = NULL; - int len, nargs = 0; - char **newargv; +static const char *lxcapi_get_config_path(struct lxc_container *c); +static bool lxcapi_set_config_item(struct lxc_container *c, const char *key, const char *v); - if (!c) - return false; +/* + * do_bdev_create: thin wrapper around bdev_create(). Like bdev_create(), + * it returns a mounted bdev on success, NULL on error. + */ +static struct bdev *do_bdev_create(struct lxc_container *c, const char *type, + struct bdev_specs *specs) +{ + char *dest; + const char *lxcpath = lxcapi_get_config_path(c); + size_t len; + struct bdev *bdev; + int ret; + + /* lxcpath/lxcname/rootfs */ + len = strlen(c->name) + strlen(lxcpath) + 9; + dest = alloca(len); + ret = snprintf(dest, len, "%s/%s/rootfs", lxcpath, c->name); + if (ret < 0 || ret >= len) + return NULL; + + bdev = bdev_create(dest, type, c->name, specs); + if (!bdev) { + ERROR("Failed to create backing store type %s\n", type); + return NULL; + } + + lxcapi_set_config_item(c, "lxc.rootfs", bdev->src); + return bdev; +} + +/* + * Given the '-t' template option to lxc-create, figure out what to + * do. If the template is a full executable path, use that. If it + * is something like 'sshd', then return $templatepath/lxc-sshd. If + * no template was passed in, return NULL (this is ok). + * On error return (char *) -1. + */ +char *get_template_path(const char *t) +{ + int ret, len; + char *tpath; + + if (!t) + return NULL; + + if (t[0] == '/' && access(t, X_OK) == 0) { + tpath = strdup(t); + if (!tpath) + return (char *) -1; + return tpath; + } len = strlen(LXCTEMPLATEDIR) + strlen(t) + strlen("/lxc-") + 1; tpath = malloc(len); if (!tpath) - return false; + return (char *) -1; ret = snprintf(tpath, len, "%s/lxc-%s", LXCTEMPLATEDIR, t); - if (ret < 0 || ret >= len) - goto out; - if (!valid_template(tpath)) { - ERROR("bad template: %s\n", t); - goto out; + if (ret < 0 || ret >= len) { + free(tpath); + return (char *) -1; + } + if (access(tpath, X_OK) < 0) { + SYSERROR("bad template: %s\n", t); + free(tpath); + return (char *) -1; } - if (!c->save_config(c, NULL)) { - ERROR("failed to save starting configuration for %s\n", c->name); - goto out; - } + return tpath; +} - /* container is already created if we have a config and rootfs.path is accessible */ - if (lxcapi_is_defined(c) && c->lxc_conf && c->lxc_conf->rootfs.path && access(c->lxc_conf->rootfs.path, F_OK) == 0) - return false; +static char *lxcbasename(char *path) +{ + char *p = path + strlen(path) - 1; + while (*p != '/' && p > path) + p--; + return p; +} - /* we're going to fork. but since we'll wait for our child, we - don't need to lxc_container_get */ +static bool create_run_template(struct lxc_container *c, char *tpath, bool quiet, + char *const argv[]) +{ + pid_t pid; - if (lxclock(c->slock, 0)) { - ERROR("failed to grab global container lock for %s\n", c->name); - goto out; - } + if (!tpath) + return true; pid = fork(); if (pid < 0) { SYSERROR("failed to fork task for container creation template\n"); - goto out_unlock; + return false; } if (pid == 0) { // child - char *patharg, *namearg; + char *patharg, *namearg, *rootfsarg, *src; + struct bdev *bdev = NULL; int i; + int ret, len, nargs = 0; + char **newargv; - close(0); - close(1); - close(2); - open("/dev/null", O_RDONLY); - open("/dev/null", O_RDWR); - open("/dev/null", O_RDWR); + if (quiet) { + close(0); + close(1); + close(2); + open("/dev/zero", O_RDONLY); + open("/dev/null", O_RDWR); + open("/dev/null", O_RDWR); + } + if (unshare(CLONE_NEWNS) < 0) { + ERROR("error unsharing mounts"); + exit(1); + } + + src = c->lxc_conf->rootfs.path; + /* + * for an overlayfs create, what the user wants is the template to fill + * in what will become the readonly lower layer. So don't mount for + * the template + */ + if (strncmp(src, "overlayfs:", 10) == 0) { + src = overlayfs_getlower(src+10); + } + bdev = bdev_init(src, c->lxc_conf->rootfs.mount, NULL); + if (!bdev) { + ERROR("Error opening rootfs"); + exit(1); + } + + if (bdev->ops->mount(bdev) < 0) { + ERROR("Error mounting rootfs"); + exit(1); + } /* * create our new array, pre-pend the template name and * base args */ if (argv) - for (; argv[nargs]; nargs++) ; - nargs += 3; // template, path and name args + for (nargs = 0; argv[nargs]; nargs++) ; + nargs += 4; // template, path, rootfs and name args newargv = malloc(nargs * sizeof(*newargv)); if (!newargv) exit(1); - newargv[0] = t; + newargv[0] = lxcbasename(tpath); len = strlen(c->config_path) + strlen(c->name) + strlen("--path=") + 2; patharg = malloc(len); @@ -586,10 +832,19 @@ static bool lxcapi_create(struct lxc_container *c, char *t, char *const argv[]) exit(1); newargv[2] = namearg; + len = strlen("--rootfs=") + 1 + strlen(bdev->dest); + rootfsarg = malloc(len); + if (!rootfsarg) + exit(1); + ret = snprintf(rootfsarg, len, "--rootfs=%s", bdev->dest); + if (ret < 0 || ret >= len) + exit(1); + newargv[3] = rootfsarg; + /* add passed-in args */ if (argv) - for (i = 3; i < nargs; i++) - newargv[i] = argv[i-3]; + for (i = 4; i < nargs; i++) + newargv[i] = argv[i-4]; /* add trailing NULL */ nargs++; @@ -599,49 +854,257 @@ static bool lxcapi_create(struct lxc_container *c, char *t, char *const argv[]) newargv[nargs - 1] = NULL; /* execute */ - ret = execv(tpath, newargv); + execv(tpath, newargv); SYSERROR("failed to execute template %s", tpath); exit(1); } -again: - ret = waitpid(pid, &status, 0); - if (ret == -1) { - if (errno == -EINTR) - goto again; - SYSERROR("waitpid failed"); - goto out_unlock; + if (wait_for_pid(pid) != 0) { + ERROR("container creation template for %s failed\n", c->name); + return false; } - if (ret != pid) - goto again; - if (!WIFEXITED(status)) { // did not exit normally - // we could set an error code and string inside the - // container_struct here if we like - ERROR("container creation template exited abnormally\n"); + + return true; +} + +bool prepend_lxc_header(char *path, const char *t, char *const argv[]) +{ + long flen; + char *contents; + FILE *f; +#if HAVE_LIBGNUTLS + int i, ret; + unsigned char md_value[SHA_DIGEST_LENGTH]; + char *tpath; + bool have_tpath = false; +#endif + + if ((f = fopen(path, "r")) == NULL) { + SYSERROR("Opening old config"); + return false; + } + if (fseek(f, 0, SEEK_END) < 0) { + SYSERROR("Seeking to end of old config file"); + fclose(f); + return false; + } + if ((flen = ftell(f)) < 0) { + SYSERROR("telling size of old config"); + fclose(f); + return false; + } + if (fseek(f, 0, SEEK_SET) < 0) { + SYSERROR("rewinding old config"); + fclose(f); + return false; + } + if ((contents = malloc(flen + 1)) == NULL) { + SYSERROR("out of memory"); + fclose(f); + return false; + } + if (fread(contents, 1, flen, f) != flen) { + SYSERROR("Reading old config"); + free(contents); + fclose(f); + return false; + } + contents[flen] = '\0'; + if (fclose(f) < 0) { + SYSERROR("closing old config"); + free(contents); + return false; + } + +#if HAVE_LIBGNUTLS + tpath = get_template_path(t); + if (tpath == (char *) -1) { + ERROR("bad template: %s\n", t); + free(contents); + return false; + } + + if (tpath) { + have_tpath = true; + ret = sha1sum_file(tpath, md_value); + if (ret < 0) { + ERROR("Error getting sha1sum of %s", tpath); + free(contents); + return false; + } + free(tpath); + } +#endif + + if ((f = fopen(path, "w")) == NULL) { + SYSERROR("reopening config for writing"); + free(contents); + return false; + } + fprintf(f, "# Template used to create this container: %s\n", t); + if (argv) { + fprintf(f, "# Parameters passed to the template:"); + while (*argv) { + fprintf(f, " %s", *argv); + argv++; + } + fprintf(f, "\n"); + } +#if HAVE_LIBGNUTLS + if (have_tpath) { + fprintf(f, "# Template script checksum (SHA-1): "); + for (i=0; isave_config(c, NULL)) { + ERROR("failed to save starting configuration for %s\n", c->name); + goto out; + } + + /* container is already created if we have a config and rootfs.path is accessible */ + if (lxcapi_is_defined(c) && c->lxc_conf && c->lxc_conf->rootfs.path && access(c->lxc_conf->rootfs.path, F_OK) == 0) + goto out; + + /* Mark that this container is being created */ + if ((partial_fd = create_partial(c)) < 0) + goto out; + + /* no need to get disk lock bc we have the partial locked */ + + /* + * Create the backing store + * Note we can't do this in the same task as we use to execute the + * template because of the way zfs works. + * After you 'zfs create', zfs mounts the fs only in the initial + * namespace. + */ + pid = fork(); + if (pid < 0) { + SYSERROR("failed to fork task for container creation template\n"); goto out_unlock; } - if (WEXITSTATUS(status) != 0) { - ERROR("container creation template for %s exited with %d\n", - c->name, WEXITSTATUS(status)); - goto out_unlock; + if (pid == 0) { // child + struct bdev *bdev = NULL; + + if (!(bdev = do_bdev_create(c, bdevtype, specs))) { + ERROR("Error creating backing store type %s for %s", + bdevtype ? bdevtype : "(none)", c->name); + exit(1); + } + + /* save config file again to store the new rootfs location */ + if (!c->save_config(c, NULL)) { + ERROR("failed to save starting configuration for %s\n", c->name); + // parent task won't see bdev in config so we delete it + bdev->ops->umount(bdev); + bdev->ops->destroy(bdev); + exit(1); + } + exit(0); } + if (wait_for_pid(pid) != 0) + goto out_unlock; + + /* reload config to get the rootfs */ + if (c->lxc_conf) + lxc_conf_free(c->lxc_conf); + c->lxc_conf = NULL; + if (!load_config_locked(c, c->configfile)) + goto out_unlock; + + if (!create_run_template(c, tpath, !!(flags & LXC_CREATE_QUIET), argv)) + goto out_unlock; // now clear out the lxc_conf we have, reload from the created // container if (c->lxc_conf) lxc_conf_free(c->lxc_conf); c->lxc_conf = NULL; + + if (!prepend_lxc_header(c->configfile, tpath, argv)) { + ERROR("Error prepending header to configuration file"); + goto out_unlock; + } bret = load_config_locked(c, c->configfile); out_unlock: - lxcunlock(c->slock); + if (partial_fd >= 0) + remove_partial(c, partial_fd); out: if (tpath) free(tpath); + if (!bret && c) + lxcapi_destroy(c); return bret; } +static bool lxcapi_reboot(struct lxc_container *c) +{ + pid_t pid; + + if (!c) + return false; + if (!c->is_running(c)) + return false; + pid = c->init_pid(c); + if (pid <= 0) + return false; + if (kill(pid, SIGINT) < 0) + return false; + return true; + +} + static bool lxcapi_shutdown(struct lxc_container *c, int timeout) { bool retv; @@ -666,12 +1129,12 @@ static bool lxcapi_shutdown(struct lxc_container *c, int timeout) return retv; } -static bool lxcapi_createl(struct lxc_container *c, char *t, ...) +static bool lxcapi_createl(struct lxc_container *c, const char *t, + const char *bdevtype, struct bdev_specs *specs, int flags, ...) { bool bret = false; - char **args = NULL, **temp; + char **args = NULL; va_list ap; - int nargs = 0; if (!c) return false; @@ -680,28 +1143,18 @@ static bool lxcapi_createl(struct lxc_container *c, char *t, ...) * since we're going to wait for create to finish, I don't think we * need to get a copy of the arguments. */ - va_start(ap, t); - while (1) { - char *arg; - arg = va_arg(ap, char *); - if (!arg) - break; - nargs++; - temp = realloc(args, (nargs+1) * sizeof(*args)); - if (!temp) - goto out; - args = temp; - args[nargs - 1] = arg; - } + va_start(ap, flags); + args = lxc_va_arg_list_to_argv(ap, 0, 0); va_end(ap); - if (args) - args[nargs] = NULL; + if (!args) { + ERROR("Memory allocation error."); + goto out; + } - bret = c->create(c, t, args); + bret = c->create(c, t, bdevtype, specs, flags, args); out: - if (args) - free(args); + free(args); return bret; } @@ -711,25 +1164,135 @@ static bool lxcapi_clear_config_item(struct lxc_container *c, const char *key) if (!c || !c->lxc_conf) return false; - if (lxclock(c->privlock, 0)) { + if (container_mem_lock(c)) return false; - } ret = lxc_clear_config_item(c->lxc_conf, key); - lxcunlock(c->privlock); + container_mem_unlock(c); return ret == 0; } +char** lxcapi_get_ips(struct lxc_container *c, char* interface, char* family, int scope) +{ + int count = 0; + struct ifaddrs *interfaceArray = NULL, *tempIfAddr = NULL; + char addressOutputBuffer[INET6_ADDRSTRLEN]; + void *tempAddrPtr = NULL; + char **addresses = NULL, **temp; + char *address = NULL; + char new_netns_path[MAXPATHLEN]; + int old_netns = -1, new_netns = -1, ret = 0; + + if (!c->is_running(c)) + goto out; + + /* Save reference to old netns */ + old_netns = open("/proc/self/ns/net", O_RDONLY); + if (old_netns < 0) { + SYSERROR("failed to open /proc/self/ns/net"); + goto out; + } + + /* Switch to new netns */ + ret = snprintf(new_netns_path, MAXPATHLEN, "/proc/%d/ns/net", c->init_pid(c)); + if (ret < 0 || ret >= MAXPATHLEN) + goto out; + + new_netns = open(new_netns_path, O_RDONLY); + if (new_netns < 0) { + SYSERROR("failed to open %s", new_netns_path); + goto out; + } + + if (setns(new_netns, CLONE_NEWNET)) { + SYSERROR("failed to setns"); + goto out; + } + + /* Grab the list of interfaces */ + if (getifaddrs(&interfaceArray)) { + SYSERROR("failed to get interfaces list"); + goto out; + } + + /* Iterate through the interfaces */ + for (tempIfAddr = interfaceArray; tempIfAddr != NULL; tempIfAddr = tempIfAddr->ifa_next) { + if(tempIfAddr->ifa_addr->sa_family == AF_INET) { + if (family && strcmp(family, "inet")) + continue; + tempAddrPtr = &((struct sockaddr_in *)tempIfAddr->ifa_addr)->sin_addr; + } + else { + if (family && strcmp(family, "inet6")) + continue; + + if (((struct sockaddr_in6 *)tempIfAddr->ifa_addr)->sin6_scope_id != scope) + continue; + + tempAddrPtr = &((struct sockaddr_in6 *)tempIfAddr->ifa_addr)->sin6_addr; + } + + if (interface && strcmp(interface, tempIfAddr->ifa_name)) + continue; + else if (!interface && strcmp("lo", tempIfAddr->ifa_name) == 0) + continue; + + address = (char *)inet_ntop(tempIfAddr->ifa_addr->sa_family, + tempAddrPtr, + addressOutputBuffer, + sizeof(addressOutputBuffer)); + if (!address) + continue; + + count += 1; + temp = realloc(addresses, count * sizeof(*addresses)); + if (!temp) { + count--; + goto out; + } + addresses = temp; + addresses[count - 1] = strdup(address); + } + +out: + if(interfaceArray) + freeifaddrs(interfaceArray); + + /* Switch back to original netns */ + if (old_netns >= 0 && setns(old_netns, CLONE_NEWNET)) + SYSERROR("failed to setns"); + if (new_netns >= 0) + close(new_netns); + if (old_netns >= 0) + close(old_netns); + + /* Append NULL to the array */ + if (count) { + count++; + temp = realloc(addresses, count * sizeof(*addresses)); + if (!temp) { + int i; + for (i = 0; i < count-1; i++) + free(addresses[i]); + free(addresses); + return NULL; + } + addresses = temp; + addresses[count - 1] = NULL; + } + + return addresses; +} + static int lxcapi_get_config_item(struct lxc_container *c, const char *key, char *retv, int inlen) { int ret; if (!c || !c->lxc_conf) return -1; - if (lxclock(c->privlock, 0)) { + if (container_mem_lock(c)) return -1; - } ret = lxc_get_config_item(c->lxc_conf, key, retv, inlen); - lxcunlock(c->privlock); + container_mem_unlock(c); return ret; } @@ -744,111 +1307,260 @@ static int lxcapi_get_keys(struct lxc_container *c, const char *key, char *retv, */ if (!c || !c->lxc_conf) return -1; - if (lxclock(c->privlock, 0)) + if (container_mem_lock(c)) return -1; int ret = -1; if (strncmp(key, "lxc.network.", 12) == 0) ret = lxc_list_nicconfigs(c->lxc_conf, key, retv, inlen); - lxcunlock(c->privlock); + container_mem_unlock(c); return ret; } - -/* default config file - should probably come through autoconf */ -#define LXC_DEFAULT_CONFIG "/etc/lxc/default.conf" static bool lxcapi_save_config(struct lxc_container *c, const char *alt_file) { + FILE *fout; + bool ret = false, need_disklock = false; + int lret; + if (!alt_file) alt_file = c->configfile; if (!alt_file) return false; // should we write to stdout if no file is specified? - if (!c->lxc_conf) + + // If we haven't yet loaded a config, load the stock config + if (!c->lxc_conf) { if (!c->load_config(c, LXC_DEFAULT_CONFIG)) { ERROR("Error loading default configuration file %s while saving %s\n", LXC_DEFAULT_CONFIG, c->name); return false; } + } if (!create_container_dir(c)) return false; - FILE *fout = fopen(alt_file, "w"); + /* + * If we're writing to the container's config file, take the + * disk lock. Otherwise just take the memlock to protect the + * struct lxc_container while we're traversing it. + */ + if (strcmp(c->configfile, alt_file) == 0) + need_disklock = true; + + if (need_disklock) + lret = container_disk_lock(c); + else + lret = container_mem_lock(c); + + if (lret) + return false; + + fout = fopen(alt_file, "w"); if (!fout) - return false; - if (lxclock(c->privlock, 0)) { - fclose(fout); - return false; - } + goto out; write_config(fout, c->lxc_conf); fclose(fout); - lxcunlock(c->privlock); - return true; + ret = true; + +out: + if (need_disklock) + container_disk_unlock(c); + else + container_mem_unlock(c); + return ret; } +static bool mod_rdep(struct lxc_container *c, bool inc) +{ + char path[MAXPATHLEN]; + int ret, v = 0; + FILE *f; + bool bret = false; + + if (container_disk_lock(c)) + return false; + ret = snprintf(path, MAXPATHLEN, "%s/%s/lxc_snapshots", c->config_path, + c->name); + if (ret < 0 || ret > MAXPATHLEN) + goto out; + f = fopen(path, "r"); + if (f) { + ret = fscanf(f, "%d", &v); + fclose(f); + if (ret != 1) { + ERROR("Corrupted file %s", path); + goto out; + } + } + v += inc ? 1 : -1; + f = fopen(path, "w"); + if (!f) + goto out; + if (fprintf(f, "%d\n", v) < 0) { + ERROR("Error writing new snapshots value"); + fclose(f); + goto out; + } + if (fclose(f) != 0) { + SYSERROR("Error writing to or closing snapshots file"); + goto out; + } + + bret = true; + +out: + container_disk_unlock(c); + return bret; +} + +static void strip_newline(char *p) +{ + size_t len = strlen(p); + if (len < 1) + return; + if (p[len-1] == '\n') + p[len-1] = '\0'; +} + +static void mod_all_rdeps(struct lxc_container *c, bool inc) +{ + struct lxc_container *p; + char *lxcpath = NULL, *lxcname = NULL, path[MAXPATHLEN]; + size_t pathlen = 0, namelen = 0; + FILE *f; + int ret; + + ret = snprintf(path, MAXPATHLEN, "%s/%s/lxc_rdepends", + c->config_path, c->name); + if (ret < 0 || ret >= MAXPATHLEN) { + ERROR("Path name too long"); + return; + } + if ((f = fopen(path, "r")) == NULL) + return; + while (getline(&lxcpath, &pathlen, f) != -1) { + if (getline(&lxcname, &namelen, f) == -1) { + ERROR("badly formatted file %s\n", path); + goto out; + } + strip_newline(lxcpath); + strip_newline(lxcname); + if ((p = lxc_container_new(lxcname, lxcpath)) == NULL) { + ERROR("Unable to find dependent container %s:%s", + lxcpath, lxcname); + continue; + } + if (!mod_rdep(p, inc)) + ERROR("Failed to increase numsnapshots for %s:%s", + lxcpath, lxcname); + lxc_container_put(p); + } +out: + if (lxcpath) free(lxcpath); + if (lxcname) free(lxcname); + fclose(f); +} + +static bool has_snapshots(struct lxc_container *c) +{ + char path[MAXPATHLEN]; + int ret, v; + FILE *f; + bool bret = false; + + ret = snprintf(path, MAXPATHLEN, "%s/%s/lxc_snapshots", c->config_path, + c->name); + if (ret < 0 || ret > MAXPATHLEN) + goto out; + f = fopen(path, "r"); + if (!f) + goto out; + ret = fscanf(f, "%d", &v); + fclose(f); + if (ret != 1) + goto out; + bret = v != 0; + +out: + return bret; +} + +// do we want the api to support --force, or leave that to the caller? static bool lxcapi_destroy(struct lxc_container *c) { - pid_t pid; - int ret, status; + struct bdev *r = NULL; + bool ret = false; - if (!c) + if (!c || !lxcapi_is_defined(c)) return false; - /* container is already destroyed if we don't have a config and rootfs.path is not accessible */ - if (!lxcapi_is_defined(c) && (!c->lxc_conf || !c->lxc_conf->rootfs.path || access(c->lxc_conf->rootfs.path, F_OK) != 0)) + if (container_disk_lock(c)) return false; - pid = fork(); - if (pid < 0) - return false; - if (pid == 0) { // child - ret = execlp("lxc-destroy", "lxc-destroy", "-n", c->name, "-P", c->config_path, NULL); - perror("execl"); - exit(1); + if (!is_stopped(c)) { + // we should queue some sort of error - in c->error_string? + ERROR("container %s is not stopped", c->name); + goto out; } -again: - ret = waitpid(pid, &status, 0); - if (ret == -1) { - if (errno == -EINTR) - goto again; - perror("waitpid"); - return false; - } - if (ret != pid) - goto again; - if (!WIFEXITED(status)) { // did not exit normally - // we could set an error code and string inside the - // container_struct here if we like - return false; + if (c->lxc_conf && has_snapshots(c)) { + ERROR("container %s has dependent snapshots", c->name); + goto out; } - return WEXITSTATUS(status) == 0; + if (c->lxc_conf && c->lxc_conf->rootfs.path && c->lxc_conf->rootfs.mount) + r = bdev_init(c->lxc_conf->rootfs.path, c->lxc_conf->rootfs.mount, NULL); + if (r) { + if (r->ops->destroy(r) < 0) { + bdev_put(r); + ERROR("Error destroying rootfs for %s", c->name); + goto out; + } + bdev_put(r); + } + + mod_all_rdeps(c, false); + + const char *p1 = lxcapi_get_config_path(c); + char *path = alloca(strlen(p1) + strlen(c->name) + 2); + sprintf(path, "%s/%s", p1, c->name); + if (lxc_rmdir_onedev(path) < 0) { + ERROR("Error destroying container directory for %s", c->name); + goto out; + } + ret = true; + +out: + container_disk_unlock(c); + return ret; } -static bool lxcapi_set_config_item(struct lxc_container *c, const char *key, const char *v) +static bool set_config_item_locked(struct lxc_container *c, const char *key, const char *v) { - int ret; - bool b = false; struct lxc_config_t *config; - if (!c) - return false; - - if (lxclock(c->privlock, 0)) - return false; - if (!c->lxc_conf) c->lxc_conf = lxc_conf_init(); if (!c->lxc_conf) - goto err; + return false; config = lxc_getconfig(key); if (!config) - goto err; - ret = config->cb(key, v, c->lxc_conf); - if (!ret) - b = true; + return false; + return (0 == config->cb(key, v, c->lxc_conf)); +} -err: - lxcunlock(c->privlock); +static bool lxcapi_set_config_item(struct lxc_container *c, const char *key, const char *v) +{ + bool b = false; + + if (!c) + return false; + + if (container_mem_lock(c)) + return false; + + b = set_config_item_locked(c, key, v); + + container_mem_unlock(c); return b; } @@ -909,7 +1621,7 @@ static bool lxcapi_set_config_path(struct lxc_container *c, const char *path) if (!c) return b; - if (lxclock(c->privlock, 0)) + if (container_mem_lock(c)) return b; p = strdup(path); @@ -935,7 +1647,7 @@ static bool lxcapi_set_config_path(struct lxc_container *c, const char *path) err: if (oldpath) free(oldpath); - lxcunlock(c->privlock); + container_mem_unlock(c); return b; } @@ -943,42 +1655,38 @@ err: static bool lxcapi_set_cgroup_item(struct lxc_container *c, const char *subsys, const char *value) { int ret; - bool b = false; if (!c) return false; - if (lxclock(c->privlock, 0)) + if (is_stopped(c)) return false; - if (is_stopped_nolock(c)) - goto err; + if (container_disk_lock(c)) + return false; ret = lxc_cgroup_set(c->name, subsys, value, c->config_path); - if (!ret) - b = true; -err: - lxcunlock(c->privlock); - return b; + + container_disk_unlock(c); + return ret == 0; } static int lxcapi_get_cgroup_item(struct lxc_container *c, const char *subsys, char *retv, int inlen) { - int ret = -1; + int ret; if (!c || !c->lxc_conf) return -1; - if (lxclock(c->privlock, 0)) + if (is_stopped(c)) return -1; - if (is_stopped_nolock(c)) - goto out; + if (container_disk_lock(c)) + return -1; ret = lxc_cgroup_get(c->name, subsys, retv, inlen, c->config_path); -out: - lxcunlock(c->privlock); + container_disk_unlock(c); return ret; } @@ -987,11 +1695,527 @@ const char *lxc_get_default_config_path(void) return default_lxc_path(); } +const char *lxc_get_default_lvm_vg(void) +{ + return default_lvm_vg(); +} + +const char *lxc_get_default_zfs_root(void) +{ + return default_zfs_root(); +} + const char *lxc_get_version(void) { return lxc_version(); } +static int copy_file(char *old, char *new) +{ + int in, out; + ssize_t len, ret; + char buf[8096]; + struct stat sbuf; + + if (file_exists(new)) { + ERROR("copy destination %s exists", new); + return -1; + } + ret = stat(old, &sbuf); + if (ret < 0) { + INFO("Error stat'ing %s", old); + return -1; + } + + in = open(old, O_RDONLY); + if (in < 0) { + SYSERROR("Error opening original file %s", old); + return -1; + } + out = open(new, O_CREAT | O_EXCL | O_WRONLY, 0644); + if (out < 0) { + SYSERROR("Error opening new file %s", new); + close(in); + return -1; + } + + while (1) { + len = read(in, buf, 8096); + if (len < 0) { + SYSERROR("Error reading old file %s", old); + goto err; + } + if (len == 0) + break; + ret = write(out, buf, len); + if (ret < len) { // should we retry? + SYSERROR("Error: write to new file %s was interrupted", new); + goto err; + } + } + close(in); + close(out); + + // we set mode, but not owner/group + ret = chmod(new, sbuf.st_mode); + if (ret) { + SYSERROR("Error setting mode on %s", new); + return -1; + } + + return 0; + +err: + close(in); + close(out); + return -1; +} + +static int copyhooks(struct lxc_container *oldc, struct lxc_container *c) +{ + int i; + int ret; + struct lxc_list *it; + + for (i=0; ilxc_conf->hooks[i]) { + char *hookname = it->elem; + char *fname = strrchr(hookname, '/'); + char tmppath[MAXPATHLEN]; + if (!fname) // relative path - we don't support, but maybe we should + return 0; + // copy the script, and change the entry in confile + ret = snprintf(tmppath, MAXPATHLEN, "%s/%s/%s", + c->config_path, c->name, fname+1); + if (ret < 0 || ret >= MAXPATHLEN) + return -1; + ret = copy_file(it->elem, tmppath); + if (ret < 0) + return -1; + free(it->elem); + it->elem = strdup(tmppath); + if (!it->elem) { + ERROR("out of memory copying hook path"); + return -1; + } + } + } + + c->save_config(c, NULL); + return 0; +} + +static void new_hwaddr(char *hwaddr) +{ + FILE *f = fopen("/dev/urandom", "r"); + if (f) { + unsigned int seed; + int ret = fread(&seed, sizeof(seed), 1, f); + if (ret != 1) + seed = time(NULL); + fclose(f); + srand(seed); + } else + srand(time(NULL)); + snprintf(hwaddr, 18, "00:16:3e:%02x:%02x:%02x", + rand() % 255, rand() % 255, rand() % 255); +} + +static void network_new_hwaddrs(struct lxc_container *c) +{ + struct lxc_list *it; + + lxc_list_for_each(it, &c->lxc_conf->network) { + struct lxc_netdev *n = it->elem; + if (n->hwaddr) + new_hwaddr(n->hwaddr); + } +} + +static int copy_fstab(struct lxc_container *oldc, struct lxc_container *c) +{ + char newpath[MAXPATHLEN]; + char *oldpath = oldc->lxc_conf->fstab; + int ret; + + if (!oldpath) + return 0; + + char *p = strrchr(oldpath, '/'); + if (!p) + return -1; + ret = snprintf(newpath, MAXPATHLEN, "%s/%s%s", + c->config_path, c->name, p); + if (ret < 0 || ret >= MAXPATHLEN) { + ERROR("error printing new path for %s", oldpath); + return -1; + } + if (file_exists(newpath)) { + ERROR("error: fstab file %s exists", newpath); + return -1; + } + + if (copy_file(oldpath, newpath) < 0) { + ERROR("error: copying %s to %s", oldpath, newpath); + return -1; + } + free(c->lxc_conf->fstab); + c->lxc_conf->fstab = strdup(newpath); + if (!c->lxc_conf->fstab) { + ERROR("error: allocating pathname"); + return -1; + } + + return 0; +} + +static void copy_rdepends(struct lxc_container *c, struct lxc_container *c0) +{ + char path0[MAXPATHLEN], path1[MAXPATHLEN]; + int ret; + + ret = snprintf(path0, MAXPATHLEN, "%s/%s/lxc_rdepends", c0->config_path, + c0->name); + if (ret < 0 || ret >= MAXPATHLEN) { + WARN("Error copying reverse dependencies"); + return; + } + ret = snprintf(path1, MAXPATHLEN, "%s/%s/lxc_rdepends", c->config_path, + c->name); + if (ret < 0 || ret >= MAXPATHLEN) { + WARN("Error copying reverse dependencies"); + return; + } + if (copy_file(path0, path1) < 0) { + INFO("Error copying reverse dependencies"); + return; + } +} + +static bool add_rdepends(struct lxc_container *c, struct lxc_container *c0) +{ + int ret; + char path[MAXPATHLEN]; + FILE *f; + bool bret; + + ret = snprintf(path, MAXPATHLEN, "%s/%s/lxc_rdepends", c->config_path, + c->name); + if (ret < 0 || ret >= MAXPATHLEN) + return false; + f = fopen(path, "a"); + if (!f) + return false; + bret = true; + // if anything goes wrong, just return an error + if (fprintf(f, "%s\n%s\n", c0->config_path, c0->name) < 0) + bret = false; + if (fclose(f) != 0) + bret = false; + return bret; +} + +static int copy_storage(struct lxc_container *c0, struct lxc_container *c, + const char *newtype, int flags, const char *bdevdata, unsigned long newsize) +{ + struct bdev *bdev; + int need_rdep; + + bdev = bdev_copy(c0->lxc_conf->rootfs.path, c0->name, c->name, + c0->config_path, c->config_path, newtype, !!(flags & LXC_CLONE_SNAPSHOT), + bdevdata, newsize, &need_rdep); + if (!bdev) { + ERROR("Error copying storage"); + return -1; + } + free(c->lxc_conf->rootfs.path); + c->lxc_conf->rootfs.path = strdup(bdev->src); + bdev_put(bdev); + if (!c->lxc_conf->rootfs.path) { + ERROR("Out of memory while setting storage path"); + return -1; + } + if (flags & LXC_CLONE_SNAPSHOT) + copy_rdepends(c, c0); + if (need_rdep) { + if (!add_rdepends(c, c0)) + WARN("Error adding reverse dependency from %s to %s", + c->name, c0->name); + } + + mod_all_rdeps(c, true); + + return 0; +} + +static int clone_update_rootfs(struct lxc_container *c0, + struct lxc_container *c, int flags, + char **hookargs) +{ + int ret = -1; + char path[MAXPATHLEN]; + struct bdev *bdev; + FILE *fout; + pid_t pid; + struct lxc_conf *conf = c->lxc_conf; + + /* update hostname in rootfs */ + /* we're going to mount, so run in a clean namespace to simplify cleanup */ + + pid = fork(); + if (pid < 0) + return -1; + if (pid > 0) + return wait_for_pid(pid); + + if (unshare(CLONE_NEWNS) < 0) { + ERROR("error unsharing mounts"); + exit(1); + } + bdev = bdev_init(c->lxc_conf->rootfs.path, c->lxc_conf->rootfs.mount, NULL); + if (!bdev) + exit(1); + if (bdev->ops->mount(bdev) < 0) + exit(1); + + if (!lxc_list_empty(&conf->hooks[LXCHOOK_CLONE])) { + /* Start of environment variable setup for hooks */ + if (setenv("LXC_SRC_NAME", c0->name, 1)) { + SYSERROR("failed to set environment variable for source container name"); + } + if (setenv("LXC_NAME", c->name, 1)) { + SYSERROR("failed to set environment variable for container name"); + } + if (setenv("LXC_CONFIG_FILE", conf->rcfile, 1)) { + SYSERROR("failed to set environment variable for config path"); + } + if (setenv("LXC_ROOTFS_MOUNT", conf->rootfs.mount, 1)) { + SYSERROR("failed to set environment variable for rootfs mount"); + } + if (setenv("LXC_ROOTFS_PATH", conf->rootfs.path, 1)) { + SYSERROR("failed to set environment variable for rootfs mount"); + } + + if (run_lxc_hooks(c->name, "clone", conf, c->get_config_path(c), hookargs)) { + ERROR("Error executing clone hook for %s", c->name); + exit(1); + } + } + + if (!(flags & LXC_CLONE_KEEPNAME)) { + ret = snprintf(path, MAXPATHLEN, "%s/etc/hostname", bdev->dest); + if (ret < 0 || ret >= MAXPATHLEN) + exit(1); + if (!file_exists(path)) + exit(0); + if (!(fout = fopen(path, "w"))) { + SYSERROR("unable to open %s: ignoring\n", path); + exit(0); + } + if (fprintf(fout, "%s", c->name) < 0) + exit(1); + if (fclose(fout) < 0) + exit(1); + } + exit(0); +} + +/* + * We want to support: +sudo lxc-clone -o o1 -n n1 -s -L|-fssize fssize -v|--vgname vgname \ + -p|--lvprefix lvprefix -t|--fstype fstype -B backingstore + +-s [ implies overlayfs] +-s -B overlayfs +-s -B aufs + +only rootfs gets converted (copied/snapshotted) on clone. +*/ + +static int create_file_dirname(char *path) +{ + char *p = strrchr(path, '/'); + int ret; + + if (!p) + return -1; + *p = '\0'; + ret = mkdir(path, 0755); + if (ret && errno != EEXIST) + SYSERROR("creating container path %s\n", path); + *p = '/'; + return ret; +} + +struct lxc_container *lxcapi_clone(struct lxc_container *c, const char *newname, + const char *lxcpath, int flags, + const char *bdevtype, const char *bdevdata, unsigned long newsize, + char **hookargs) +{ + struct lxc_container *c2 = NULL; + char newpath[MAXPATHLEN]; + int ret, storage_copied = 0; + const char *n, *l; + FILE *fout; + + if (!c || !c->is_defined(c)) + return NULL; + + if (container_mem_lock(c)) + return NULL; + + if (!is_stopped(c)) { + ERROR("error: Original container (%s) is running", c->name); + goto out; + } + + // Make sure the container doesn't yet exist. + n = newname ? newname : c->name; + l = lxcpath ? lxcpath : c->get_config_path(c); + ret = snprintf(newpath, MAXPATHLEN, "%s/%s/config", l, n); + if (ret < 0 || ret >= MAXPATHLEN) { + SYSERROR("clone: failed making config pathname"); + goto out; + } + if (file_exists(newpath)) { + ERROR("error: clone: %s exists", newpath); + goto out; + } + + ret = create_file_dirname(newpath); + if (ret < 0 && errno != EEXIST) { + ERROR("Error creating container dir for %s", newpath); + goto out; + } + + // copy the configuration, tweak it as needed, + fout = fopen(newpath, "w"); + if (!fout) { + SYSERROR("open %s", newpath); + goto out; + } + write_config(fout, c->lxc_conf); + fclose(fout); + + sprintf(newpath, "%s/%s/rootfs", l, n); + if (mkdir(newpath, 0755) < 0) { + SYSERROR("error creating %s", newpath); + goto out; + } + + c2 = lxc_container_new(n, l); + if (!c2) { + ERROR("clone: failed to create new container (%s %s)", n, l); + goto out; + } + + // update utsname + if (!set_config_item_locked(c2, "lxc.utsname", newname)) { + ERROR("Error setting new hostname"); + goto out; + } + + + // copy hooks if requested + if (flags & LXC_CLONE_COPYHOOKS) { + ret = copyhooks(c, c2); + if (ret < 0) { + ERROR("error copying hooks"); + goto out; + } + } + + if (copy_fstab(c, c2) < 0) { + ERROR("error copying fstab"); + goto out; + } + + // update macaddrs + if (!(flags & LXC_CLONE_KEEPMACADDR)) + network_new_hwaddrs(c2); + + // copy/snapshot rootfs's + ret = copy_storage(c, c2, bdevtype, flags, bdevdata, newsize); + if (ret < 0) + goto out; + + // We've now successfully created c2's storage, so clear it out if we + // fail after this + storage_copied = 1; + + if (!c2->save_config(c2, NULL)) + goto out; + + if (clone_update_rootfs(c, c2, flags, hookargs) < 0) + goto out; + + // TODO: update c's lxc.snapshot = count + container_mem_unlock(c); + return c2; + +out: + container_mem_unlock(c); + if (c2) { + if (!storage_copied) + c2->lxc_conf->rootfs.path = NULL; + c2->destroy(c2); + lxc_container_put(c2); + } + + return NULL; +} + +static int lxcapi_attach(struct lxc_container *c, lxc_attach_exec_t exec_function, void *exec_payload, lxc_attach_options_t *options, pid_t *attached_process) +{ + if (!c) + return -1; + + return lxc_attach(c->name, c->config_path, exec_function, exec_payload, options, attached_process); +} + +static int lxcapi_attach_run_wait(struct lxc_container *c, lxc_attach_options_t *options, const char *program, const char * const argv[]) +{ + lxc_attach_command_t command; + pid_t pid; + int r; + + if (!c) + return -1; + + command.program = (char*)program; + command.argv = (char**)argv; + r = lxc_attach(c->name, c->config_path, lxc_attach_run_command, &command, options, &pid); + if (r < 0) { + ERROR("ups"); + return r; + } + return lxc_wait_for_pid_status(pid); +} + +static int lxcapi_attach_run_waitl(struct lxc_container *c, lxc_attach_options_t *options, const char *program, const char *arg, ...) +{ + va_list ap; + const char **argv; + int ret; + + if (!c) + return -1; + + va_start(ap, arg); + argv = lxc_va_arg_list_to_argv_const(ap, 1); + va_end(ap); + + if (!argv) { + ERROR("Memory allocation error."); + return -1; + } + argv[0] = arg; + + ret = lxcapi_attach_run_wait(c, options, program, (const char * const *)argv); + free((void*)argv); + return ret; +} + struct lxc_container *lxc_container_new(const char *name, const char *configpath) { struct lxc_container *c; @@ -1021,14 +2245,12 @@ struct lxc_container *lxc_container_new(const char *name, const char *configpath strcpy(c->name, name); c->numthreads = 1; - c->slock = lxc_newlock(name); - if (!c->slock) { + if (!(c->slock = lxc_newlock(c->config_path, name))) { fprintf(stderr, "failed to create lock\n"); goto err; } - c->privlock = lxc_newlock(NULL); - if (!c->privlock) { + if (!(c->privlock = lxc_newlock(NULL, NULL))) { fprintf(stderr, "failed to alloc privlock\n"); goto err; } @@ -1041,12 +2263,21 @@ struct lxc_container *lxc_container_new(const char *name, const char *configpath if (file_exists(c->configfile)) lxcapi_load_config(c, NULL); + if (ongoing_create(c) == 2) { + ERROR("Error: %s creation was not completed", c->name); + lxcapi_destroy(c); + lxc_conf_free(c->lxc_conf); + c->lxc_conf = NULL; + } + // assign the member functions c->is_defined = lxcapi_is_defined; c->state = lxcapi_state; c->is_running = lxcapi_is_running; c->freeze = lxcapi_freeze; c->unfreeze = lxcapi_unfreeze; + c->console = lxcapi_console; + c->console_getfd = lxcapi_console_getfd; c->init_pid = lxcapi_init_pid; c->load_config = lxcapi_load_config; c->want_daemonize = lxcapi_want_daemonize; @@ -1062,15 +2293,21 @@ struct lxc_container *lxc_container_new(const char *name, const char *configpath c->create = lxcapi_create; c->createl = lxcapi_createl; c->shutdown = lxcapi_shutdown; + c->reboot = lxcapi_reboot; c->clear_config_item = lxcapi_clear_config_item; c->get_config_item = lxcapi_get_config_item; c->get_cgroup_item = lxcapi_get_cgroup_item; c->set_cgroup_item = lxcapi_set_cgroup_item; c->get_config_path = lxcapi_get_config_path; c->set_config_path = lxcapi_set_config_path; + c->clone = lxcapi_clone; + c->get_ips = lxcapi_get_ips; + c->attach = lxcapi_attach; + c->attach_run_wait = lxcapi_attach_run_wait; + c->attach_run_waitl = lxcapi_attach_run_waitl; /* we'll allow the caller to update these later */ - if (lxc_log_init(NULL, "none", NULL, "lxc_container", 0)) { + if (lxc_log_init(NULL, "none", NULL, "lxc_container", 0, c->config_path)) { fprintf(stderr, "failed to open log\n"); goto err; } diff --git a/src/lxc/lxccontainer.h b/src/lxc/lxccontainer.h index de9854c90..ad6afa1a7 100644 --- a/src/lxc/lxccontainer.h +++ b/src/lxc/lxccontainer.h @@ -1,15 +1,49 @@ +/* liblxcapi + * + * Copyright © 2012 Serge Hallyn . + * Copyright © 2012 Canonical Ltd. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef __LXC_CONTAINER_H +#define __LXC_CONTAINER_H #include "lxclock.h" +#include "attach_options.h" #include #include #include +#define LXC_CLONE_KEEPNAME (1 << 0) +#define LXC_CLONE_COPYHOOKS (1 << 1) +#define LXC_CLONE_KEEPMACADDR (1 << 2) +#define LXC_CLONE_SNAPSHOT (1 << 3) +#define LXC_CLONE_MAXFLAGS (1 << 4) + +#define LXC_CREATE_QUIET (1 << 0) +#define LXC_CREATE_MAXFLAGS (1 << 1) + +struct bdev_specs; + struct lxc_container { // private fields char *name; char *configfile; - sem_t *slock; - sem_t *privlock; + struct lxc_lock *slock; + struct lxc_lock *privlock; int numthreads; /* protected by privlock. */ struct lxc_conf *lxc_conf; // maybe we'll just want the whole lxc_handler? @@ -40,8 +74,12 @@ struct lxc_container { bool (*set_config_item)(struct lxc_container *c, const char *key, const char *value); bool (*destroy)(struct lxc_container *c); bool (*save_config)(struct lxc_container *c, const char *alt_file); - bool (*create)(struct lxc_container *c, char *t, char *const argv[]); - bool (*createl)(struct lxc_container *c, char *t, ...); + bool (*create)(struct lxc_container *c, const char *t, const char *bdevtype, + struct bdev_specs *specs, int flags, char *const argv[]); + bool (*createl)(struct lxc_container *c, const char *t, const char *bdevtype, + struct bdev_specs *specs, int flags, ...); + /* send SIGINT to ask container to reboot */ + bool (*reboot)(struct lxc_container *c); /* send SIGPWR. if timeout is not 0 or -1, do a hard stop after timeout seconds */ bool (*shutdown)(struct lxc_container *c, int timeout); /* clear all network or capability items in the in-memory configuration */ @@ -50,6 +88,7 @@ struct lxc_container { * the length which was our would be printed. */ int (*get_config_item)(struct lxc_container *c, const char *key, char *retv, int inlen); int (*get_keys)(struct lxc_container *c, const char *key, char *retv, int inlen); + char** (*get_ips)(struct lxc_container *c, char* interface, char* family, int scope); /* * get_cgroup_item returns the number of bytes read, or an error (<0). * If retv NULL or inlen 0 is passed in, then the length of the cgroup @@ -72,14 +111,72 @@ struct lxc_container { const char *(*get_config_path)(struct lxc_container *c); bool (*set_config_path)(struct lxc_container *c, const char *path); -#if 0 - bool (*commit_cgroups)(struct lxc_container *c); - bool (*reread_cgroups)(struct lxc_container *c); - // question with clone: how do we handle non-standard config file in orig? - struct lxc_container (*clone)(struct container *c); - int (*ns_attach)(struct lxc_container *c, int ns_mask); - // we'll need some plumbing to support lxc-console -#endif + /* + * @c: the original container + * @newname: new name for the container. If NULL, the same name is used, and + * a new lxcpath MUST be specified. + * @lxcpath: lxcpath in which to create the new container. If NULL, then the + * original container's lxcpath will be used. (Shoudl we use the default + * instead?) + * @flags: additional flags to modify cloning behavior. + * LXC_CLONE_KEEPNAME: don't edit the rootfs to change the hostname. + * LXC_CLONE_COPYHOOKS: copy all hooks into the container dir + * LXC_CLONE_KEEPMACADDR: don't change the mac address on network interfaces. + * LXC_CLONE_SNAPSHOT: snapshot the original filesystem(s). If @devtype was not + * specified, then do so with the native bdevtype if possible, else use an + * overlayfs. + * @bdevtype: optionally force the cloned bdevtype to a specified plugin. By + * default the original is used (subject to snapshot requirements). + * @bdevdata: information about how to create the new storage (i.e. fstype and + * fsdata) + * @newsize: in case of a block device backing store, an optional size. If 0, + * then the original backing store's size will be used if possible. Note this + * only applies to the rootfs. For any other filesystems, the original size + * will be duplicated. + * @hookargs: additional arguments to pass to the clone hook script + */ + struct lxc_container *(*clone)(struct lxc_container *c, const char *newname, + const char *lxcpath, int flags, const char *bdevtype, + const char *bdevdata, unsigned long newsize, char **hookargs); + + /* lxcapi_console_getfd: allocate a console tty from container @c + * + * @c : the running container + * @ttynum : in : tty number to attempt to allocate or -1 to + * allocate the first available tty + * out: the tty number that was allocated + * @masterfd : out: fd refering to the master side of pty + * + * Returns "ttyfd" on success, -1 on failure. The returned "ttyfd" is + * used to keep the tty allocated. The caller should close "ttyfd" to + * indicate that it is done with the allocated console so that it can + * be allocated by another caller. + */ + int (*console_getfd)(struct lxc_container *c, int *ttynum, int *masterfd); + + /* lxcapi_console: allocate and run a console tty from container @c + * + * @c : the running container + * @ttynum : tty number to attempt to allocate, -1 to + * allocate the first available tty, or 0 to allocate + * the console + * @stdinfd : fd to read input from + * @stdoutfd : fd to write output to + * @stderrfd : fd to write error output to + * @escape : the escape character (1 == 'a', 2 == 'b', ...) + * + * Returns 0 on success, -1 on failure. This function will not return + * until the console has been exited by the user. + */ + int (*console)(struct lxc_container *c, int ttynum, + int stdinfd, int stdoutfd, int stderrfd, int escape); + + /* create subprocess and attach it to the container, run exec_function inside */ + int (*attach)(struct lxc_container *c, lxc_attach_exec_t exec_function, void *exec_payload, lxc_attach_options_t *options, pid_t *attached_process); + + /* run program in container, wait for it to exit */ + int (*attach_run_wait)(struct lxc_container *c, lxc_attach_options_t *options, const char *program, const char * const argv[]); + int (*attach_run_waitl)(struct lxc_container *c, lxc_attach_options_t *options, const char *program, const char *arg, ...); }; struct lxc_container *lxc_container_new(const char *name, const char *configpath); @@ -87,9 +184,12 @@ int lxc_container_get(struct lxc_container *c); int lxc_container_put(struct lxc_container *c); int lxc_get_wait_states(const char **states); const char *lxc_get_default_config_path(void); +const char *lxc_get_default_lvm_vg(void); +const char *lxc_get_default_zfs_root(void); const char *lxc_get_version(void); #if 0 char ** lxc_get_valid_keys(); char ** lxc_get_valid_values(char *key); #endif +#endif diff --git a/src/lxc/lxclock.c b/src/lxc/lxclock.c index bbc39efa4..79ebf84a7 100644 --- a/src/lxc/lxclock.c +++ b/src/lxc/lxclock.c @@ -3,39 +3,91 @@ * Copyright © 2012 Serge Hallyn . * Copyright © 2012 Canonical Ltd. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#include #include "lxclock.h" #include #include +#include +#include +#include +#define _GNU_SOURCE +#include +#include +#include +#include #define OFLAG (O_CREAT | O_RDWR) #define SEMMODE 0660 #define SEMVALUE 1 #define SEMVALUE_LOCKED 0 -#define LXCLOCK_PREFIX "/lxcapi." +lxc_log_define(lxc_lock, lxc); -static char *lxclock_name(const char *container) +pthread_mutex_t thread_mutex = PTHREAD_MUTEX_INITIALIZER; + +static char *lxclock_name(const char *p, const char *n) { int ret; - int len = strlen(container) + strlen(LXCLOCK_PREFIX) + 1; - char *dest = malloc(len); - if (!dest) + int len; + char *dest; + const char *rundir; + struct stat sb; + + /* lockfile will be: + * "/run" + "/lock/lxc/$lxcpath/$lxcname + '\0' if root + * or + * $XDG_RUNTIME_DIR + "/lock/lxc/$lxcpath/$lxcname + '\0' if non-root + */ + + /* length of "/lock/lxc/" + $lxcpath + "/" + $lxcname + '\0' */ + len = strlen("/lock/lxc/") + strlen(n) + strlen(p) + 2; + rundir = getenv("XDG_RUNTIME_DIR"); + if (geteuid() == 0 || rundir == NULL) + rundir = "/run"; + + len += strlen(rundir); + + if ((dest = malloc(len)) == NULL) return NULL; - ret = snprintf(dest, len, "%s%s", LXCLOCK_PREFIX, container); + + ret = snprintf(dest, len, "%s/lock/lxc/%s", rundir, p); + if (ret < 0 || ret >= len) { + free(dest); + return NULL; + } + process_lock(); + ret = mkdir_p(dest, 0755); + process_unlock(); + if (ret < 0) { + free(dest); + return NULL; + } + + ret = stat(p, &sb); + if (ret == 0) { + // best effort. If this fails, ignore it + if (chown(dest, sb.st_uid, sb.st_gid) < 0) + ERROR("Failed ot set owner for lockdir %s\n", dest); + if (chmod(dest, sb.st_mode) < 0) + ERROR("Failed to set mode for lockdir %s\n", dest); + } + + ret = snprintf(dest, len, "%s/lock/lxc/%s/%s", rundir, p, n); if (ret < 0 || ret >= len) { free(dest); return NULL; @@ -43,64 +95,224 @@ static char *lxclock_name(const char *container) return dest; } -static void lxcfree_name(char *name) -{ - if (name) - free(name); -} - static sem_t *lxc_new_unnamed_sem(void) { - sem_t *s; - int ret; - - s = malloc(sizeof(*s)); - if (!s) - return NULL; - ret = sem_init(s, 0, 1); - if (ret) - return NULL; - return s; -} - -sem_t *lxc_newlock(const char *name) -{ - char *lname; - sem_t *lock; - - if (!name) - return lxc_new_unnamed_sem(); - - lname = lxclock_name(name); - if (!lname) - return NULL; - lock = sem_open(lname, OFLAG, SEMMODE, SEMVALUE); - lxcfree_name(lname); - if (lock == SEM_FAILED) - return NULL; - return lock; -} - -int lxclock(sem_t *sem, int timeout) -{ + sem_t *s; int ret; - if (!timeout) { - ret = sem_wait(sem); - } else { - struct timespec ts; - if (clock_gettime(CLOCK_REALTIME, &ts) == -1) - return -2; - ts.tv_sec += timeout; - ret = sem_timedwait(sem, &ts); + s = malloc(sizeof(*s)); + if (!s) + return NULL; + ret = sem_init(s, 0, 1); + if (ret) { + free(s); + return NULL; + } + return s; +} + +struct lxc_lock *lxc_newlock(const char *lxcpath, const char *name) +{ + struct lxc_lock *l; + + l = malloc(sizeof(*l)); + if (!l) + goto out; + + if (!name) { + l->type = LXC_LOCK_ANON_SEM; + l->u.sem = lxc_new_unnamed_sem(); + if (!l->u.sem) { + free(l); + l = NULL; + } + goto out; } + l->type = LXC_LOCK_FLOCK; + l->u.f.fname = lxclock_name(lxcpath, name); + if (!l->u.f.fname) { + free(l); + l = NULL; + goto out; + } + l->u.f.fd = -1; + +out: + return l; +} + +int lxclock(struct lxc_lock *l, int timeout) +{ + int ret = -1, saved_errno = errno; + struct flock lk; + + switch(l->type) { + case LXC_LOCK_ANON_SEM: + if (!timeout) { + ret = sem_wait(l->u.sem); + if (ret == -1) + saved_errno = errno; + } else { + struct timespec ts; + if (clock_gettime(CLOCK_REALTIME, &ts) == -1) { + ret = -2; + goto out; + } + ts.tv_sec += timeout; + ret = sem_timedwait(l->u.sem, &ts); + if (ret == -1) + saved_errno = errno; + } + break; + case LXC_LOCK_FLOCK: + ret = -2; + if (timeout) { + ERROR("Error: timeout not supported with flock"); + ret = -2; + goto out; + } + if (!l->u.f.fname) { + ERROR("Error: filename not set for flock"); + ret = -2; + goto out; + } + process_lock(); + if (l->u.f.fd == -1) { + l->u.f.fd = open(l->u.f.fname, O_RDWR|O_CREAT, + S_IWUSR | S_IRUSR); + if (l->u.f.fd == -1) { + process_unlock(); + ERROR("Error opening %s", l->u.f.fname); + goto out; + } + } + lk.l_type = F_WRLCK; + lk.l_whence = SEEK_SET; + lk.l_start = 0; + lk.l_len = 0; + ret = fcntl(l->u.f.fd, F_SETLKW, &lk); + process_unlock(); + if (ret == -1) + saved_errno = errno; + break; + } + +out: + errno = saved_errno; return ret; } -int lxcunlock(sem_t *sem) +int lxcunlock(struct lxc_lock *l) { - if (!sem) - return -2; - return sem_post(sem); + int ret = 0, saved_errno = errno; + struct flock lk; + + switch(l->type) { + case LXC_LOCK_ANON_SEM: + if (!l->u.sem) + ret = -2; + else { + ret = sem_post(l->u.sem); + saved_errno = errno; + } + break; + case LXC_LOCK_FLOCK: + process_lock(); + if (l->u.f.fd != -1) { + lk.l_type = F_UNLCK; + lk.l_whence = SEEK_SET; + lk.l_start = 0; + lk.l_len = 0; + ret = fcntl(l->u.f.fd, F_SETLK, &lk); + if (ret < 0) + saved_errno = errno; + close(l->u.f.fd); + l->u.f.fd = -1; + } else + ret = -2; + process_unlock(); + break; + } + + errno = saved_errno; + return ret; +} + +/* + * lxc_putlock() is only called when a container_new() fails, + * or during container_put(), which is already guaranteed to + * only be done by one task. + * So the only exclusion we need to provide here is for regular + * thread safety (i.e. file descriptor table changes). + */ +void lxc_putlock(struct lxc_lock *l) +{ + if (!l) + return; + switch(l->type) { + case LXC_LOCK_ANON_SEM: + if (l->u.sem) { + sem_close(l->u.sem); + free(l->u.sem); + l->u.sem = NULL; + } + break; + case LXC_LOCK_FLOCK: + process_lock(); + if (l->u.f.fd != -1) { + close(l->u.f.fd); + l->u.f.fd = -1; + } + process_unlock(); + if (l->u.f.fname) { + free(l->u.f.fname); + l->u.f.fname = NULL; + } + break; + } + free(l); +} + +int process_lock(void) +{ + int ret; + ret = pthread_mutex_lock(&thread_mutex); + if (ret != 0) + ERROR("pthread_mutex_lock returned:%d %s", ret, strerror(ret)); + return ret; +} + +void process_unlock(void) +{ + pthread_mutex_unlock(&thread_mutex); +} + +int container_mem_lock(struct lxc_container *c) +{ + return lxclock(c->privlock, 0); +} + +void container_mem_unlock(struct lxc_container *c) +{ + lxcunlock(c->privlock); +} + +int container_disk_lock(struct lxc_container *c) +{ + int ret; + + if ((ret = lxclock(c->privlock, 0))) + return ret; + if ((ret = lxclock(c->slock, 0))) { + lxcunlock(c->privlock); + return ret; + } + return 0; +} + +void container_disk_unlock(struct lxc_container *c) +{ + lxcunlock(c->slock); + lxcunlock(c->privlock); } diff --git a/src/lxc/lxclock.h b/src/lxc/lxclock.h index 1226c22f0..fae7e4d97 100644 --- a/src/lxc/lxclock.h +++ b/src/lxc/lxclock.h @@ -1,61 +1,95 @@ +#ifndef __LXCLOCK_H +#define __LXCLOCK_H /* liblxcapi * * Copyright © 2012 Serge Hallyn . * Copyright © 2012 Canonical Ltd. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2, as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include /* For O_* constants */ #include /* For mode constants */ +#include #include #include #include +#define LXC_LOCK_ANON_SEM 1 +#define LXC_LOCK_FLOCK 2 +struct lxc_lock { + short type; + union { + sem_t *sem; // an anonymous semaphore + struct { + int fd; // fd on which a lock is held (if not -1) + char *fname; + } f; + } u; +}; + /* - * lxc_newlock: + * lxc_newlock: Create a new (unlocked) lock. + * * if name is not given, create an unnamed semaphore. We use these * to protect against racing threads. * Note that an unnamed sem was malloced by us and needs to be freed. * - * If name is given, it is prepended with '/lxcapi.', and used as the - * name for a system-wide (well, ipcns-wide) semaphore. We use that - * to protect the containers as represented on disk. - * A named sem should not be freed. - * - * XXX TODO - * We should probably introduce a lxclock_close() which detecs the type - * of lock and calls sem_close() or sem_destroy()+free() not as appropriate. - * For now, it is up to the caller to do so. - * * sem is initialized to value of 1 + * A sem_t * which can be passed to lxclock() and lxcunlock() + * will be placed in l->u.sem * - * return NULL on failure, else a sem_t * which can be passed to - * lxclock() and lxcunlock(). + * If lxcpath and name are given (both must be given if either is + * given) then a lockfile is created, $lxcpath/$lxcname/locks/$name. + * We use that to protect the containers as represented on disk. + * lxc_newlock() for the named lock only allocates the pathname in + * memory so we can quickly open+lock it at lxclock. + * l->u.f.fname will contain the malloc'ed name (which must be + * freed when the container is freed), and u.f.fd = -1. + * + * return lxclock on success, NULL on failure. */ -extern sem_t *lxc_newlock(const char *name); +extern struct lxc_lock *lxc_newlock(const char *lxcpath, const char *name); /* * lxclock: take an existing lock. If timeout is 0, wait * indefinately. Otherwise use given timeout. * return 0 if we got the lock, -2 on failure to set timeout, or -1 * otherwise in which case errno will be set by sem_wait()). + * + * Note that timeout is (currently?) only supported for privlock, not + * for slock. Since currently there is not a single use of the timeout + * (except in the test case) I may remove the support for it in sem as + * well. */ -extern int lxclock(sem_t *sem, int timeout); +extern int lxclock(struct lxc_lock *lock, int timeout); /* - * lxcunlock: unlock given sem. Return 0 on success. Otherwise returns - * -1 and sem_post will leave errno set. + * lxcunlock: unlock given sem. Return 0 on success, or -2 if we did not + * have the lock. Otherwise returns -1 with errno saved from flock + * or sem_post function. */ -extern int lxcunlock(sem_t *lock); +extern int lxcunlock(struct lxc_lock *lock); + +extern void lxc_putlock(struct lxc_lock *l); + +extern int process_lock(void); +extern void process_unlock(void); +struct lxc_container; +extern int container_mem_lock(struct lxc_container *c); +extern void container_mem_unlock(struct lxc_container *c); +extern int container_disk_lock(struct lxc_container *c); +extern void container_disk_unlock(struct lxc_container *c); +#endif diff --git a/src/lxc/lxcseccomp.h b/src/lxc/lxcseccomp.h index 4f146dd2b..2f64c05dd 100644 --- a/src/lxc/lxcseccomp.h +++ b/src/lxc/lxcseccomp.h @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef _lxc_seccomp_h diff --git a/src/lxc/lxcutmp.c b/src/lxc/lxcutmp.c index fd261b518..ee51f875c 100644 --- a/src/lxc/lxcutmp.c +++ b/src/lxc/lxcutmp.c @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include "config.h" @@ -283,7 +283,7 @@ static int utmp_get_ntasks(struct lxc_handler *handler) { int ntasks; - ntasks = lxc_cgroup_nrtasks(handler->cgroup); + ntasks = lxc_cgroup_nrtasks(handler); if (ntasks < 0) { ERROR("failed to get the number of tasks"); diff --git a/src/lxc/lxcutmp.h b/src/lxc/lxcutmp.h index 88feea72f..81848d128 100644 --- a/src/lxc/lxcutmp.h +++ b/src/lxc/lxcutmp.h @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include "config.h" diff --git a/src/lxc/mainloop.c b/src/lxc/mainloop.c index 975215db4..2ac49964b 100644 --- a/src/lxc/mainloop.c +++ b/src/lxc/mainloop.c @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include @@ -38,7 +38,7 @@ struct mainloop_handler { #define MAX_EVENTS 10 -int lxc_mainloop(struct lxc_epoll_descr *descr) +int lxc_mainloop(struct lxc_epoll_descr *descr, int timeout_ms) { int i, nfds; struct mainloop_handler *handler; @@ -46,7 +46,7 @@ int lxc_mainloop(struct lxc_epoll_descr *descr) for (;;) { - nfds = epoll_wait(descr->epfd, events, MAX_EVENTS, -1); + nfds = epoll_wait(descr->epfd, events, MAX_EVENTS, timeout_ms); if (nfds < 0) { if (errno == EINTR) continue; @@ -64,6 +64,9 @@ int lxc_mainloop(struct lxc_epoll_descr *descr) return 0; } + if (nfds == 0 && timeout_ms != 0) + return 0; + if (lxc_list_empty(&descr->handlers)) return 0; } diff --git a/src/lxc/mainloop.h b/src/lxc/mainloop.h index 6b16242aa..217a58f32 100644 --- a/src/lxc/mainloop.h +++ b/src/lxc/mainloop.h @@ -18,9 +18,12 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#ifndef _mainloop_h +#define _mainloop_h + #include "list.h" struct lxc_epoll_descr { @@ -31,7 +34,7 @@ struct lxc_epoll_descr { typedef int (*lxc_mainloop_callback_t)(int fd, void *data, struct lxc_epoll_descr *descr); -extern int lxc_mainloop(struct lxc_epoll_descr *descr); +extern int lxc_mainloop(struct lxc_epoll_descr *descr, int timeout_ms); extern int lxc_mainloop_add_handler(struct lxc_epoll_descr *descr, int fd, lxc_mainloop_callback_t callback, @@ -42,3 +45,5 @@ extern int lxc_mainloop_del_handler(struct lxc_epoll_descr *descr, int fd); extern int lxc_mainloop_open(struct lxc_epoll_descr *descr); extern int lxc_mainloop_close(struct lxc_epoll_descr *descr); + +#endif diff --git a/src/lxc/monitor.c b/src/lxc/monitor.c index afdaf67a2..412d38f4b 100644 --- a/src/lxc/monitor.c +++ b/src/lxc/monitor.c @@ -5,6 +5,7 @@ * * Authors: * Daniel Lezcano + * Dwight Engen * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -18,8 +19,9 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ + #include #include #include @@ -30,7 +32,7 @@ #include #include #include -#include +#include #include #include @@ -40,37 +42,37 @@ #include #include #include +#include lxc_log_define(lxc_monitor, lxc); -#ifndef UNIX_PATH_MAX -#define UNIX_PATH_MAX 108 -#endif - -static void lxc_monitor_send(struct lxc_msg *msg, const char *lxcpath) +/* routines used by monitor publishers (containers) */ +static void lxc_monitor_fifo_send(struct lxc_msg *msg, const char *lxcpath) { - int fd; - struct sockaddr_un addr = { .sun_family = AF_UNIX }; - char *offset = &addr.sun_path[1]; - size_t ret, len; + int fd,ret; + char fifo_path[PATH_MAX]; - /* - * addr.sun_path is only 108 bytes. - * should we take a hash of lxcpath? a subset of it? - */ - len = sizeof(addr.sun_path) - 1; - ret = snprintf(offset, len, "%s/lxc-monitor", lxcpath); - if (ret < 0 || ret >= len) { - ERROR("lxcpath too long to open monitor"); + BUILD_BUG_ON(sizeof(*msg) > PIPE_BUF); /* write not guaranteed atomic */ + ret = snprintf(fifo_path, sizeof(fifo_path), "%s/monitor-fifo", lxcpath); + if (ret < 0 || ret >= sizeof(fifo_path)) { + ERROR("lxcpath too long to open monitor fifo"); return; } - fd = socket(PF_UNIX, SOCK_DGRAM, 0); - if (fd < 0) + fd = open(fifo_path, O_WRONLY); + if (fd < 0) { + /* it is normal for this open to fail when there is no monitor + * running, so we don't log it + */ return; + } - sendto(fd, msg, sizeof(*msg), 0, - (const struct sockaddr *)&addr, sizeof(addr)); + ret = write(fd, msg, sizeof(*msg)); + if (ret != sizeof(*msg)) { + close(fd); + SYSERROR("failed to write monitor fifo %s", fifo_path); + return; + } close(fd); } @@ -82,73 +84,113 @@ void lxc_monitor_send_state(const char *name, lxc_state_t state, const char *lxc strncpy(msg.name, name, sizeof(msg.name)); msg.name[sizeof(msg.name) - 1] = 0; - lxc_monitor_send(&msg, lxcpath); + lxc_monitor_fifo_send(&msg, lxcpath); +} + + +/* routines used by monitor subscribers (lxc-monitor) */ +int lxc_monitor_close(int fd) +{ + return close(fd); +} + +int lxc_monitor_sock_name(const char *lxcpath, struct sockaddr_un *addr) { + size_t len; + int ret; + char *sockname = &addr->sun_path[0]; // 1 for abstract + + /* addr.sun_path is only 108 bytes. + * should we take a hash of lxcpath? a subset of it? ftok()? we need + * to make sure it is unique. + */ + memset(addr, 0, sizeof(*addr)); + addr->sun_family = AF_UNIX; + len = sizeof(addr->sun_path) - 1; + ret = snprintf(sockname, len, "%s/monitor-sock", lxcpath); + if (ret < 0 || ret >= len) { + ERROR("lxcpath too long for unix socket"); + return -1; + } + return 0; } int lxc_monitor_open(const char *lxcpath) { - struct sockaddr_un addr = { .sun_family = AF_UNIX }; - char *offset = &addr.sun_path[1]; - int fd; - size_t ret, len; + struct sockaddr_un addr; + int fd,ret; + int retry,backoff_ms[] = {10, 50, 100}; - /* - * addr.sun_path is only 108 bytes. - * should we take a hash of lxcpath? a subset of it? - */ - len = sizeof(addr.sun_path) - 1; - ret = snprintf(offset, len, "%s/lxc-monitor", lxcpath); - if (ret < 0 || ret >= len) { - ERROR("lxcpath too long to open monitor"); + if (lxc_monitor_sock_name(lxcpath, &addr) < 0) return -1; - } - fd = socket(PF_UNIX, SOCK_DGRAM, 0); + fd = socket(PF_UNIX, SOCK_STREAM, 0); if (fd < 0) { ERROR("socket : %s", strerror(errno)); return -1; } - if (bind(fd, (struct sockaddr *)&addr, sizeof(addr))) { - ERROR("bind : %s", strerror(errno)); - close(fd); - return -1; + for (retry = 0; retry < sizeof(backoff_ms)/sizeof(backoff_ms[0]); retry++) { + ret = connect(fd, (struct sockaddr *)&addr, sizeof(addr)); + if (ret == 0 || errno != ECONNREFUSED) + break; + ERROR("connect : backing off %d", backoff_ms[retry]); + usleep(backoff_ms[retry] * 1000); } + if (ret < 0) { + ERROR("connect : %s", strerror(errno)); + goto err1; + } return fd; +err1: + close(fd); + return ret; } -/* timeout of 0 means return immediately; -1 means wait forever */ -int lxc_monitor_read_timeout(int fd, struct lxc_msg *msg, int timeout) +int lxc_monitor_read_fdset(fd_set *rfds, int nfds, struct lxc_msg *msg, + int timeout) { - struct sockaddr_un from; - socklen_t len = sizeof(from); - int ret; - fd_set rfds; - struct timeval tv; + struct timeval tval,*tv = NULL; + int ret,i; if (timeout != -1) { - FD_ZERO(&rfds); - FD_SET(fd, &rfds); - - tv.tv_sec = timeout; - tv.tv_usec = 0; - - ret = select(fd+1, &rfds, NULL, NULL, &tv); - if (ret == -1) - return -1; - else if (!ret) - return -2; // timed out + tv = &tval; + tv->tv_sec = timeout; + tv->tv_usec = 0; } - ret = recvfrom(fd, msg, sizeof(*msg), 0, - (struct sockaddr *)&from, &len); - if (ret < 0) { - SYSERROR("failed to receive state"); + ret = select(nfds, rfds, NULL, NULL, tv); + if (ret == -1) return -1; - } + else if (ret == 0) + return -2; // timed out - return ret; + /* only read from the first ready fd, the others will remain ready + * for when this routine is called again + */ + for (i = 0; i < nfds; i++) { + if (FD_ISSET(i, rfds)) { + ret = recv(i, msg, sizeof(*msg), 0); + if (ret <= 0) { + SYSERROR("client failed to recv (monitord died?) %s", + strerror(errno)); + return -1; + } + return ret; + } + } + SYSERROR("no ready fd found?"); + return -1; +} + +int lxc_monitor_read_timeout(int fd, struct lxc_msg *msg, int timeout) +{ + fd_set rfds; + + FD_ZERO(&rfds); + FD_SET(fd, &rfds); + + return lxc_monitor_read_fdset(&rfds, fd+1, msg, timeout); } int lxc_monitor_read(int fd, struct lxc_msg *msg) @@ -156,7 +198,74 @@ int lxc_monitor_read(int fd, struct lxc_msg *msg) return lxc_monitor_read_timeout(fd, msg, -1); } -int lxc_monitor_close(int fd) + + +/* used to spawn a monitord either on startup of a daemon container, or when + * lxc-monitor starts + */ +int lxc_monitord_spawn(const char *lxcpath) { - return close(fd); + pid_t pid1,pid2; + int pipefd[2]; + char pipefd_str[11]; + + char * const args[] = { + "lxc-monitord", + (char *)lxcpath, + pipefd_str, + NULL, + }; + + /* double fork to avoid zombies when monitord exits */ + pid1 = fork(); + if (pid1 < 0) { + SYSERROR("failed to fork"); + return -1; + } + + if (pid1) { + if (waitpid(pid1, NULL, 0) != pid1) + return -1; + return 0; + } + + if (pipe(pipefd) < 0) { + SYSERROR("failed to create pipe"); + exit(EXIT_FAILURE); + } + + pid2 = fork(); + if (pid2 < 0) { + SYSERROR("failed to fork"); + exit(EXIT_FAILURE); + } + if (pid2) { + char c; + /* wait for daemon to create socket */ + close(pipefd[1]); + /* sync with child, we're ignoring the return from read + * because regardless if it works or not, either way we've + * synced with the child process. the if-empty-statement + * construct is to quiet the warn-unused-result warning. + */ + if (read(pipefd[0], &c, 1)) ; + close(pipefd[0]); + exit(EXIT_SUCCESS); + } + + umask(0); + if (setsid() < 0) { + SYSERROR("failed to setsid"); + exit(EXIT_FAILURE); + } + close(0); + close(1); + close(2); + open("/dev/null", O_RDONLY); + open("/dev/null", O_RDWR); + open("/dev/null", O_RDWR); + close(pipefd[0]); + sprintf(pipefd_str, "%d", pipefd[1]); + execvp(args[0], args); + exit(EXIT_FAILURE); } diff --git a/src/lxc/monitor.h b/src/lxc/monitor.h index 8bef4c784..2a61091a5 100644 --- a/src/lxc/monitor.h +++ b/src/lxc/monitor.h @@ -18,12 +18,15 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef __monitor_h #define __monitor_h #include +#include + +#include typedef enum { lxc_msg_state, @@ -32,11 +35,14 @@ typedef enum { struct lxc_msg { lxc_msg_type_t type; - char name[MAXPATHLEN]; + char name[NAME_MAX+1]; int value; }; -void lxc_monitor_send_state(const char *name, lxc_state_t state, +extern int lxc_monitor_open(const char *lxcpath); +extern int lxc_monitor_sock_name(const char *lxcpath, struct sockaddr_un *addr); +extern void lxc_monitor_send_state(const char *name, lxc_state_t state, const char *lxcpath); +extern int lxc_monitord_spawn(const char *lxcpath); #endif diff --git a/src/lxc/namespace.c b/src/lxc/namespace.c index 644705472..a152c6b12 100644 --- a/src/lxc/namespace.c +++ b/src/lxc/namespace.c @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include @@ -53,7 +53,7 @@ pid_t lxc_clone(int (*fn)(void *), void *arg, int flags) .arg = arg, }; - long stack_size = sysconf(_SC_PAGESIZE); + size_t stack_size = sysconf(_SC_PAGESIZE); void *stack = alloca(stack_size); pid_t ret; diff --git a/src/lxc/namespace.h b/src/lxc/namespace.h index c6a97ec93..64499cd3b 100644 --- a/src/lxc/namespace.h +++ b/src/lxc/namespace.h @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef __namespace_h #define __namespace_h @@ -53,9 +53,6 @@ #if defined(__ia64__) int __clone2(int (*__fn) (void *__arg), void *__child_stack_base, size_t __child_stack_size, int __flags, void *__arg, ...); -#elif defined(IS_BIONIC) -int clone(int (*fn)(void *), void *child_stack, - int flags, void *arg); #else int clone(int (*fn)(void *), void *child_stack, int flags, void *arg, ... diff --git a/src/lxc/network.c b/src/lxc/network.c index 3829757ec..f7c9ce4ce 100644 --- a/src/lxc/network.c +++ b/src/lxc/network.c @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #define _GNU_SOURCE #include @@ -783,8 +783,11 @@ static int ifa_get_local_ip(int family, struct ip_req *ip_info, void** res) { /* We might have found an IFA_ADDRESS before, * which we now overwrite with an IFA_LOCAL. */ - if (!*res) + if (!*res) { *res = malloc(addrlen); + if (!*res) + return -1; + } memcpy(*res, RTA_DATA(rta), addrlen); @@ -836,7 +839,6 @@ static int ip_addr_get(int family, int ifindex, void **res) err = netlink_send(&nlh, nlmsg); if (err < 0) goto out; - err = 0; do { /* Restore the answer buffer length, it might have been diff --git a/src/lxc/network.h b/src/lxc/network.h index 08e0bf88b..0ca7a9a95 100644 --- a/src/lxc/network.h +++ b/src/lxc/network.h @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef _network_h #define _network_h diff --git a/src/lxc/nl.c b/src/lxc/nl.c index 15ba58b93..7c0f1e53b 100644 --- a/src/lxc/nl.c +++ b/src/lxc/nl.c @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include diff --git a/src/lxc/nl.h b/src/lxc/nl.h index 13778aabc..621cbc6c5 100644 --- a/src/lxc/nl.h +++ b/src/lxc/nl.h @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef __nl_h #define __nl_h diff --git a/src/lxc/parse.c b/src/lxc/parse.c index 4504ec292..26cbbdd00 100644 --- a/src/lxc/parse.c +++ b/src/lxc/parse.c @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #define _GNU_SOURCE #include @@ -30,15 +30,9 @@ #include "parse.h" #include "config.h" +#include "utils.h" #include -/* Define getline() if missing from the C library */ -#ifndef HAVE_GETLINE -#ifdef HAVE_FGETLN -#include <../include/getline.h> -#endif -#endif - /* Workaround for the broken signature of alphasort() in bionic. This was fixed upstream in 40e467ec668b59be25491bd44bf348a884d6a68d so the workaround can probably be dropped with the next version of the Android NDK. diff --git a/src/lxc/parse.h b/src/lxc/parse.h index 6f476e22c..e91bfd9cc 100644 --- a/src/lxc/parse.h +++ b/src/lxc/parse.h @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef __parse_h #define __parse_h diff --git a/src/lxc/restart.c b/src/lxc/restart.c index a4bdf12ca..72320740f 100644 --- a/src/lxc/restart.c +++ b/src/lxc/restart.c @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include "config.h" diff --git a/src/lxc/rtnl.c b/src/lxc/rtnl.c index 37ee8c79c..186c788a0 100644 --- a/src/lxc/rtnl.c +++ b/src/lxc/rtnl.c @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include diff --git a/src/lxc/rtnl.h b/src/lxc/rtnl.h index c5b5f404c..d13f2b3dd 100644 --- a/src/lxc/rtnl.h +++ b/src/lxc/rtnl.h @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef __genl_h #define __genl_h diff --git a/src/lxc/seccomp.c b/src/lxc/seccomp.c index 2f0b44708..1abd69711 100644 --- a/src/lxc/seccomp.c +++ b/src/lxc/seccomp.c @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #define _GNU_SOURCE diff --git a/src/lxc/start.c b/src/lxc/start.c index aefccd650..e841bac5a 100644 --- a/src/lxc/start.c +++ b/src/lxc/start.c @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include "config.h" @@ -50,69 +50,6 @@ #include #endif -#ifdef HAVE_SYS_SIGNALFD_H -# include -#else -/* assume kernel headers are too old */ -#include -struct signalfd_siginfo -{ - uint32_t ssi_signo; - int32_t ssi_errno; - int32_t ssi_code; - uint32_t ssi_pid; - uint32_t ssi_uid; - int32_t ssi_fd; - uint32_t ssi_tid; - uint32_t ssi_band; - uint32_t ssi_overrun; - uint32_t ssi_trapno; - int32_t ssi_status; - int32_t ssi_int; - uint64_t ssi_ptr; - uint64_t ssi_utime; - uint64_t ssi_stime; - uint64_t ssi_addr; - uint8_t __pad[48]; -}; - -# ifndef __NR_signalfd4 -/* assume kernel headers are too old */ -# if __i386__ -# define __NR_signalfd4 327 -# elif __x86_64__ -# define __NR_signalfd4 289 -# elif __powerpc__ -# define __NR_signalfd4 313 -# elif __s390x__ -# define __NR_signalfd4 322 -# endif -#endif - -# ifndef __NR_signalfd -/* assume kernel headers are too old */ -# if __i386__ -# define __NR_signalfd 321 -# elif __x86_64__ -# define __NR_signalfd 282 -# elif __powerpc__ -# define __NR_signalfd 305 -# elif __s390x__ -# define __NR_signalfd 316 -# endif -#endif - -int signalfd(int fd, const sigset_t *mask, int flags) -{ - int retval; - - retval = syscall (__NR_signalfd4, fd, mask, _NSIG / 8, flags); - if (errno == ENOSYS && flags == 0) - retval = syscall (__NR_signalfd, fd, mask, _NSIG / 8); - return retval; -} -#endif - #if !HAVE_DECL_PR_CAPBSET_DROP #define PR_CAPBSET_DROP 24 #endif @@ -198,6 +135,7 @@ static int setup_signal_fd(sigset_t *oldmask) sigdelset(&mask, SIGILL) || sigdelset(&mask, SIGSEGV) || sigdelset(&mask, SIGBUS) || + sigdelset(&mask, SIGWINCH) || sigprocmask(SIG_BLOCK, &mask, oldmask)) { SYSERROR("failed to set signal mask"); return -1; @@ -224,8 +162,10 @@ static int signal_handler(int fd, void *data, struct lxc_epoll_descr *descr) { struct signalfd_siginfo siginfo; + siginfo_t info; int ret; pid_t *pid = data; + bool init_died = false; ret = read(fd, &siginfo, sizeof(siginfo)); if (ret < 0) { @@ -238,16 +178,23 @@ static int signal_handler(int fd, void *data, return -1; } + // check whether init is running + info.si_pid = 0; + ret = waitid(P_PID, *pid, &info, WEXITED | WNOWAIT | WNOHANG); + if (ret == 0 && info.si_pid == *pid) { + init_died = true; + } + if (siginfo.ssi_signo != SIGCHLD) { kill(*pid, siginfo.ssi_signo); INFO("forwarded signal %d to pid %d", siginfo.ssi_signo, *pid); - return 0; + return init_died ? 1 : 0; } if (siginfo.ssi_code == CLD_STOPPED || siginfo.ssi_code == CLD_CONTINUED) { INFO("container init process was stopped/continued"); - return 0; + return init_died ? 1 : 0; } /* more robustness, protect ourself from a SIGCHLD sent @@ -255,97 +202,13 @@ static int signal_handler(int fd, void *data, */ if (siginfo.ssi_pid != *pid) { WARN("invalid pid for SIGCHLD"); - return 0; + return init_died ? 1 : 0; } DEBUG("container init process exited"); return 1; } -int lxc_pid_callback(int fd, struct lxc_request *request, - struct lxc_handler *handler) -{ - struct lxc_answer answer; - int ret; - - memset(&answer, 0, sizeof(answer)); - answer.pid = handler->pid; - answer.ret = 0; - - ret = send(fd, &answer, sizeof(answer), 0); - if (ret < 0) { - WARN("failed to send answer to the peer"); - return -1; - } - - if (ret != sizeof(answer)) { - ERROR("partial answer sent"); - return -1; - } - - return 0; -} - -int lxc_cgroup_callback(int fd, struct lxc_request *request, - struct lxc_handler *handler) -{ - struct lxc_answer answer; - int ret; - - memset(&answer, 0, sizeof(answer)); - answer.pathlen = strlen(handler->cgroup) + 1; - answer.path = handler->cgroup; - answer.ret = 0; - - ret = send(fd, &answer, sizeof(answer), 0); - if (ret < 0) { - WARN("failed to send answer to the peer"); - return -1; - } - - if (ret != sizeof(answer)) { - ERROR("partial answer sent"); - return -1; - } - - ret = send(fd, answer.path, answer.pathlen, 0); - if (ret < 0) { - WARN("failed to send answer to the peer"); - return -1; - } - - if (ret != answer.pathlen) { - ERROR("partial answer sent"); - return -1; - } - - return 0; -} - -int lxc_clone_flags_callback(int fd, struct lxc_request *request, - struct lxc_handler *handler) -{ - struct lxc_answer answer; - int ret; - - memset(&answer, 0, sizeof(answer)); - answer.pid = 0; - answer.ret = handler->clone_flags; - - ret = send(fd, &answer, sizeof(answer), 0); - if (ret < 0) { - WARN("failed to send answer to the peer"); - return -1; - } - - if (ret != sizeof(answer)) { - ERROR("partial answer sent"); - return -1; - } - - return 0; -} - int lxc_set_state(const char *name, struct lxc_handler *handler, lxc_state_t state) { handler->state = state; @@ -374,7 +237,7 @@ int lxc_poll(const char *name, struct lxc_handler *handler) goto out_mainloop_open; } - if (lxc_command_mainloop_add(name, &descr, handler)) { + if (lxc_cmd_mainloop_add(name, &descr, handler)) { ERROR("failed to add command handler to mainloop"); goto out_mainloop_open; } @@ -390,7 +253,7 @@ int lxc_poll(const char *name, struct lxc_handler *handler) #endif } - return lxc_mainloop(&descr); + return lxc_mainloop(&descr, -1); out_mainloop_open: lxc_mainloop_close(&descr); @@ -399,17 +262,10 @@ out_sigfd: return -1; } -extern int lxc_caps_check(void); - struct lxc_handler *lxc_init(const char *name, struct lxc_conf *conf, const char *lxcpath) { struct lxc_handler *handler; - if (!lxc_caps_check()) { - ERROR("Not running with sufficient privilege"); - return NULL; - } - handler = malloc(sizeof(*handler)); if (!handler) return NULL; @@ -418,6 +274,7 @@ struct lxc_handler *lxc_init(const char *name, struct lxc_conf *conf, const char handler->conf = conf; handler->lxcpath = lxcpath; + handler->pinfd = -1; apparmor_handler_init(handler); handler->name = strdup(name); @@ -426,7 +283,7 @@ struct lxc_handler *lxc_init(const char *name, struct lxc_conf *conf, const char goto out_free; } - if (lxc_command_init(name, handler, lxcpath)) + if (lxc_cmd_init(name, handler, lxcpath)) goto out_free_name; if (lxc_read_seccomp_config(conf) != 0) { @@ -434,10 +291,10 @@ struct lxc_handler *lxc_init(const char *name, struct lxc_conf *conf, const char goto out_close_maincmd_fd; } - /* Begin the set the state to STARTING*/ + /* Begin by setting the state to STARTING */ if (lxc_set_state(name, handler, STARTING)) { ERROR("failed to set state '%s'", lxc_state2str(STARTING)); - goto out_free_name; + goto out_close_maincmd_fd; } /* Start of environment variable setup for hooks */ @@ -461,7 +318,7 @@ struct lxc_handler *lxc_init(const char *name, struct lxc_conf *conf, const char } /* End of environment variable setup for hooks */ - if (run_lxc_hooks(name, "pre-start", conf)) { + if (run_lxc_hooks(name, "pre-start", conf, handler->lxcpath, NULL)) { ERROR("failed to run pre-start hooks for container '%s'.", name); goto out_aborting; } @@ -471,25 +328,26 @@ struct lxc_handler *lxc_init(const char *name, struct lxc_conf *conf, const char goto out_aborting; } - if (lxc_create_console(conf)) { - ERROR("failed to create console"); - goto out_delete_tty; - } - /* the signal fd has to be created before forking otherwise * if the child process exits before we setup the signal fd, * the event will be lost and the command will be stuck */ handler->sigfd = setup_signal_fd(&handler->oldmask); if (handler->sigfd < 0) { ERROR("failed to set sigchild fd handler"); - goto out_delete_console; + goto out_delete_tty; + } + + /* do this after setting up signals since it might unblock SIGWINCH */ + if (lxc_console_create(conf)) { + ERROR("failed to create console"); + goto out_restore_sigmask; } INFO("'%s' is initialized", name); return handler; -out_delete_console: - lxc_delete_console(&conf->console); +out_restore_sigmask: + sigprocmask(SIG_SETMASK, &handler->oldmask, NULL); out_delete_tty: lxc_delete_tty(&conf->tty_info); out_aborting: @@ -513,21 +371,20 @@ static void lxc_fini(const char *name, struct lxc_handler *handler) lxc_set_state(name, handler, STOPPING); lxc_set_state(name, handler, STOPPED); - if (run_lxc_hooks(name, "post-stop", handler->conf)) + if (run_lxc_hooks(name, "post-stop", handler->conf, handler->lxcpath, NULL)) ERROR("failed to run post-stop hooks for container '%s'.", name); /* reset mask set by setup_signal_fd */ if (sigprocmask(SIG_SETMASK, &handler->oldmask, NULL)) WARN("failed to restore sigprocmask"); - lxc_delete_console(&handler->conf->console); + lxc_console_delete(&handler->conf->console); lxc_delete_tty(&handler->conf->tty_info); close(handler->conf->maincmd_fd); handler->conf->maincmd_fd = -1; free(handler->name); if (handler->cgroup) { - lxc_cgroup_destroy(handler->cgroup); - free(handler->cgroup); + lxc_cgroup_destroy_desc(handler->cgroup); handler->cgroup = NULL; } free(handler); @@ -535,9 +392,12 @@ static void lxc_fini(const char *name, struct lxc_handler *handler) void lxc_abort(const char *name, struct lxc_handler *handler) { + int ret, status; + lxc_set_state(name, handler, ABORTING); if (handler->pid > 0) kill(handler->pid, SIGKILL); + while ((ret = waitpid(-1, &status, 0)) > 0) ; } #include @@ -559,10 +419,10 @@ static int container_reboot_supported(void *arg) return 0; } -static int must_drop_cap_sys_boot(void) +static int must_drop_cap_sys_boot(struct lxc_conf *conf) { FILE *f = fopen("/proc/sys/kernel/ctrl-alt-del", "r"); - int ret, cmd, v; + int ret, cmd, v, flags; long stack_size = 4096; void *stack = alloca(stack_size); int status; @@ -581,11 +441,15 @@ static int must_drop_cap_sys_boot(void) } cmd = v ? LINUX_REBOOT_CMD_CAD_ON : LINUX_REBOOT_CMD_CAD_OFF; + flags = CLONE_NEWPID | SIGCHLD; + if (!lxc_list_empty(&conf->id_map)) + flags |= CLONE_NEWUSER; + #ifdef __ia64__ - pid = __clone2(container_reboot_supported, stack, stack_size, CLONE_NEWPID | SIGCHLD, &cmd); + pid = __clone2(container_reboot_supported, stack, stack_size, flags, &cmd); #else stack += stack_size; - pid = clone(container_reboot_supported, stack, CLONE_NEWPID | SIGCHLD, &cmd); + pid = clone(container_reboot_supported, stack, flags, &cmd); #endif if (pid < 0) { SYSERROR("failed to clone\n"); @@ -661,7 +525,7 @@ static int do_start(void *data) #endif /* Setup the container, ip, names, utsname, ... */ - if (lxc_setup(handler->name, handler->conf)) { + if (lxc_setup(handler->name, handler->conf, handler->lxcpath)) { ERROR("failed to setup the container"); goto out_warn_father; } @@ -676,7 +540,7 @@ static int do_start(void *data) if (lxc_seccomp_load(handler->conf) != 0) goto out_warn_father; - if (run_lxc_hooks(handler->name, "start", handler->conf)) { + if (run_lxc_hooks(handler->name, "start", handler->conf, handler->lxcpath, NULL)) { ERROR("failed to run start hooks for container '%s'.", handler->name); goto out_warn_father; } @@ -739,7 +603,7 @@ int save_phys_nics(struct lxc_conf *conf) return 0; } - +extern bool is_in_subcgroup(int pid, const char *subsystem, struct cgroup_desc *d); int lxc_spawn(struct lxc_handler *handler) { int failed_before_rename = 0; @@ -805,10 +669,14 @@ int lxc_spawn(struct lxc_handler *handler) if (lxc_sync_wait_child(handler, LXC_SYNC_CONFIGURE)) failed_before_rename = 1; - /* TODO - pass lxc.cgroup.dir (or user's pam cgroup) in for first argument */ - if ((handler->cgroup = lxc_cgroup_path_create(NULL, name)) == NULL) + if ((handler->cgroup = lxc_cgroup_path_create(name)) == NULL) goto out_delete_net; - + + if (setup_cgroup(handler, &handler->conf->cgroup)) { + ERROR("failed to setup the cgroups for '%s'", name); + goto out_delete_net; + } + if (lxc_cgroup_enter(handler->cgroup, handler->pid) < 0) goto out_delete_net; @@ -839,12 +707,11 @@ int lxc_spawn(struct lxc_handler *handler) if (lxc_sync_barrier_child(handler, LXC_SYNC_POST_CONFIGURE)) goto out_delete_net; - if (setup_cgroup(handler->cgroup, &handler->conf->cgroup)) { - ERROR("failed to setup the cgroups for '%s'", name); + if (setup_cgroup_devices(handler, &handler->conf->cgroup)) { + ERROR("failed to setup the devices cgroup for '%s'", name); goto out_delete_net; } - /* Tell the child to complete its initialization and wait for * it to exec or return an error. (the child will never * return LXC_SYNC_POST_CGROUP+1. It will either close the @@ -874,9 +741,6 @@ int lxc_spawn(struct lxc_handler *handler) lxc_sync_fini(handler); - if (handler->pinfd >= 0) - close(handler->pinfd); - return 0; out_delete_net: @@ -885,6 +749,11 @@ out_delete_net: out_abort: lxc_abort(name, handler); lxc_sync_fini(handler); + if (handler->pinfd >= 0) { + close(handler->pinfd); + handler->pinfd = -1; + } + return -1; } @@ -903,7 +772,7 @@ int __lxc_start(const char *name, struct lxc_conf *conf, handler->ops = ops; handler->data = data; - if (must_drop_cap_sys_boot()) { + if (must_drop_cap_sys_boot(handler->conf)) { #if HAVE_SYS_CAPABILITY_H DEBUG("Dropping cap_sys_boot\n"); #else @@ -952,6 +821,11 @@ int __lxc_start(const char *name, struct lxc_conf *conf, lxc_rename_phys_nics_on_shutdown(handler->conf); + if (handler->pinfd >= 0) { + close(handler->pinfd); + handler->pinfd = -1; + } + err = lxc_error_set_and_log(handler->pid, status); out_fini: lxc_delete_network(handler); diff --git a/src/lxc/start.h b/src/lxc/start.h index ee011eaa5..3e5ad64c2 100644 --- a/src/lxc/start.h +++ b/src/lxc/start.h @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef __lxc_state_h #define __lxc_state_h @@ -37,6 +37,8 @@ struct lxc_operations { int (*post_start)(struct lxc_handler *, void *); }; +struct cgroup_desc; + struct lxc_handler { pid_t pid; char *name; @@ -53,7 +55,7 @@ struct lxc_handler { #endif int pinfd; const char *lxcpath; - char *cgroup; + struct cgroup_desc *cgroup; }; extern struct lxc_handler *lxc_init(const char *name, struct lxc_conf *, const char *); diff --git a/src/lxc/state.c b/src/lxc/state.c index 3e7e94afb..54926343e 100644 --- a/src/lxc/state.c +++ b/src/lxc/state.c @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include @@ -56,7 +56,8 @@ const char *lxc_state2str(lxc_state_t state) lxc_state_t lxc_str2state(const char *state) { - int i, len; + size_t len; + lxc_state_t i; len = sizeof(strstate)/sizeof(strstate[0]); for (i = 0; i < len; i++) if (!strcmp(strstate[i], state)) @@ -66,106 +67,52 @@ lxc_state_t lxc_str2state(const char *state) return -1; } -static int freezer_state(const char *name, const char *lxcpath) +static lxc_state_t freezer_state(const char *name, const char *lxcpath) { - char *nsgroup; + char *cgabspath = NULL; char freezer[MAXPATHLEN]; char status[MAXPATHLEN]; FILE *file; - int err; + int ret; - err = lxc_cgroup_path_get(&nsgroup, "freezer", name, lxcpath); - if (err) + cgabspath = lxc_cgroup_path_get("freezer", name, lxcpath); + if (!cgabspath) return -1; - err = snprintf(freezer, MAXPATHLEN, "%s/freezer.state", nsgroup); - if (err < 0 || err >= MAXPATHLEN) - return -1; + ret = snprintf(freezer, MAXPATHLEN, "%s/freezer.state", cgabspath); + if (ret < 0 || ret >= MAXPATHLEN) + goto out; file = fopen(freezer, "r"); - if (!file) - return -1; + if (!file) { + ret = -1; + goto out; + } - err = fscanf(file, "%s", status); + ret = fscanf(file, "%s", status); fclose(file); - if (err == EOF) { + if (ret == EOF) { SYSERROR("failed to read %s", freezer); - return -1; + ret = -1; + goto out; } - return lxc_str2state(status); -} + ret = lxc_str2state(status); -static lxc_state_t __lxc_getstate(const char *name, const char *lxcpath) -{ - struct lxc_command command = { - .request = { .type = LXC_COMMAND_STATE }, - }; - - int ret, stopped = 0; - - ret = lxc_command(name, &command, &stopped, lxcpath); - if (ret < 0 && stopped) - return STOPPED; - - if (ret < 0) { - ERROR("failed to send command"); - return -1; - } - - if (!ret) { - WARN("'%s' has stopped before sending its state", name); - return -1; - } - - if (command.answer.ret < 0) { - ERROR("failed to get state for '%s': %s", - name, strerror(-command.answer.ret)); - return -1; - } - - DEBUG("'%s' is in '%s' state", name, lxc_state2str(command.answer.ret)); - - return command.answer.ret; +out: + free(cgabspath); + return ret; } lxc_state_t lxc_getstate(const char *name, const char *lxcpath) { - int state = freezer_state(name, lxcpath); + lxc_state_t state = freezer_state(name, lxcpath); if (state != FROZEN && state != FREEZING) - state = __lxc_getstate(name, lxcpath); + state = lxc_cmd_get_state(name, lxcpath); return state; } -/*---------------------------------------------------------------------------- - * functions used by lxc-start mainloop - * to handle above command request. - *--------------------------------------------------------------------------*/ -extern int lxc_state_callback(int fd, struct lxc_request *request, - struct lxc_handler *handler) -{ - struct lxc_answer answer; - int ret; - - memset(&answer, 0, sizeof(answer)); - answer.ret = handler->state; - - ret = send(fd, &answer, sizeof(answer), 0); - if (ret < 0) { - WARN("failed to send answer to the peer"); - goto out; - } - - if (ret != sizeof(answer)) { - ERROR("partial answer sent"); - goto out; - } - -out: - return ret; -} - static int fillwaitedstates(const char *strstates, int *states) { char *token, *saveptr = NULL; @@ -201,6 +148,9 @@ extern int lxc_wait(const char *lxcname, const char *states, int timeout, const if (fillwaitedstates(states, s)) return -1; + if (lxc_monitord_spawn(lxcpath)) + return -1; + fd = lxc_monitor_open(lxcpath); if (fd < 0) return -1; @@ -230,8 +180,11 @@ extern int lxc_wait(const char *lxcname, const char *states, int timeout, const goto out_close; curtime = tv.tv_sec; } - if (lxc_monitor_read_timeout(fd, &msg, timeout) < 0) - goto out_close; + if (lxc_monitor_read_timeout(fd, &msg, timeout) < 0) { + /* try again if select interrupted by signal */ + if (errno != EINTR) + goto out_close; + } if (timeout != -1) { retval = gettimeofday(&tv, NULL); diff --git a/src/lxc/state.h b/src/lxc/state.h index 1c75f1812..bad29430d 100644 --- a/src/lxc/state.h +++ b/src/lxc/state.h @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef _state_h #define _state_h diff --git a/src/lxc/stop.c b/src/lxc/stop.c deleted file mode 100644 index 4fb4480f8..000000000 --- a/src/lxc/stop.c +++ /dev/null @@ -1,115 +0,0 @@ -/* - * lxc: linux Container library - * - * (C) Copyright IBM Corp. 2007, 2008 - * - * Authors: - * Daniel Lezcano - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "lxc.h" -#include "commands.h" - -lxc_log_define(lxc_stop, lxc); - -int lxc_stop(const char *name, const char *lxcpath) -{ - struct lxc_command command = { - .request = { .type = LXC_COMMAND_STOP }, - }; - - int ret, stopped = 0; - - ret = lxc_command(name, &command,&stopped, lxcpath); - if (ret < 0 && stopped) { - INFO("'%s' is already stopped", name); - return 0; - } - - if (ret < 0) { - ERROR("failed to send command"); - return -1; - } - - /* we do not expect any answer, because we wait for the connection to be - * closed - */ - if (ret > 0) { - ERROR("failed to stop '%s': %s", - name, strerror(-command.answer.ret)); - return -1; - } - - INFO("'%s' has stopped", name); - - return 0; -} - -/*---------------------------------------------------------------------------- - * functions used by lxc-start mainloop - * to handle above command request. - *--------------------------------------------------------------------------*/ -extern int lxc_stop_callback(int fd, struct lxc_request *request, - struct lxc_handler *handler) -{ - struct lxc_answer answer; - int ret; - int stopsignal = SIGKILL; - - if (handler->conf->stopsignal) - stopsignal = handler->conf->stopsignal; - memset(&answer, 0, sizeof(answer)); - answer.ret = kill(handler->pid, stopsignal); - if (!answer.ret) { - ret = lxc_unfreeze_bypath(handler->cgroup); - if (!ret) - return 0; - - ERROR("failed to unfreeze container"); - answer.ret = ret; - } - - ret = send(fd, &answer, sizeof(answer), 0); - if (ret < 0) { - WARN("failed to send answer to the peer"); - goto out; - } - - if (ret != sizeof(answer)) { - ERROR("partial answer sent"); - goto out; - } - -out: - return -1; -} - diff --git a/src/lxc/sync.c b/src/lxc/sync.c index c4ce4faf1..4df2b5fad 100644 --- a/src/lxc/sync.c +++ b/src/lxc/sync.c @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include diff --git a/src/lxc/sync.h b/src/lxc/sync.h index 0bc75c6db..fd129af94 100644 --- a/src/lxc/sync.h +++ b/src/lxc/sync.h @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef __lxc_sync_h #define __lxc_sync_h diff --git a/src/lxc/utils.c b/src/lxc/utils.c index f4ba90520..9a7a41d9e 100644 --- a/src/lxc/utils.c +++ b/src/lxc/utils.c @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #define _GNU_SOURCE @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -34,80 +35,87 @@ #include #include #include +#include +#include +#include "utils.h" #include "log.h" lxc_log_define(lxc_utils, lxc); -int lxc_copy_file(const char *srcfile, const char *dstfile) +static int _recursive_rmdir_onedev(char *dirname, dev_t pdev) { - void *srcaddr = NULL, *dstaddr; - struct stat stat; - int srcfd, dstfd, ret = -1; - char c = '\0'; + struct dirent dirent, *direntp; + DIR *dir; + int ret, failed=0; + char pathname[MAXPATHLEN]; - dstfd = open(dstfile, O_CREAT | O_EXCL | O_RDWR, 0600); - if (dstfd < 0) { - SYSERROR("failed to creat '%s'", dstfile); - goto out; + dir = opendir(dirname); + if (!dir) { + ERROR("%s: failed to open %s", __func__, dirname); + return 0; } - srcfd = open(srcfile, O_RDONLY); - if (srcfd < 0) { - SYSERROR("failed to open '%s'", srcfile); - goto err; + while (!readdir_r(dir, &dirent, &direntp)) { + struct stat mystat; + int rc; + + if (!direntp) + break; + + if (!strcmp(direntp->d_name, ".") || + !strcmp(direntp->d_name, "..")) + continue; + + rc = snprintf(pathname, MAXPATHLEN, "%s/%s", dirname, direntp->d_name); + if (rc < 0 || rc >= MAXPATHLEN) { + ERROR("pathname too long"); + failed=1; + continue; + } + ret = lstat(pathname, &mystat); + if (ret) { + ERROR("%s: failed to stat %s", __func__, pathname); + failed=1; + continue; + } + if (mystat.st_dev != pdev) + continue; + if (S_ISDIR(mystat.st_mode)) { + if (!_recursive_rmdir_onedev(pathname, pdev)) + failed=1; + } else { + if (unlink(pathname) < 0) { + ERROR("%s: failed to delete %s", __func__, pathname); + failed=1; + } + } } - if (fstat(srcfd, &stat)) { - SYSERROR("failed to stat '%s'", srcfile); - goto err; + if (rmdir(dirname) < 0) { + ERROR("%s: failed to delete %s", __func__, dirname); + failed=1; } - if (!stat.st_size) { - INFO("copy '%s' which is an empty file", srcfile); - ret = 0; - goto out_close; + if (closedir(dir)) { + ERROR("%s: failed to close directory %s", __func__, dirname); + failed=1; } - if (lseek(dstfd, stat.st_size - 1, SEEK_SET) < 0) { - SYSERROR("failed to seek dest file '%s'", dstfile); - goto err; + return !failed; +} + +/* returns 1 on success, 0 if there were any failures */ +extern int lxc_rmdir_onedev(char *path) +{ + struct stat mystat; + + if (lstat(path, &mystat) < 0) { + ERROR("%s: failed to stat %s", __func__, path); + return 0; } - /* fixup length */ - if (write(dstfd, &c, 1) < 0) { - SYSERROR("failed to write to '%s'", dstfile); - goto err; - } - - srcaddr = mmap(NULL, stat.st_size, PROT_READ, MAP_SHARED, srcfd, 0L); - if (srcaddr == MAP_FAILED) { - SYSERROR("failed to mmap '%s'", srcfile); - goto err; - } - - dstaddr = mmap(NULL, stat.st_size, PROT_WRITE, MAP_SHARED, dstfd, 0L); - if (dstaddr == MAP_FAILED) { - SYSERROR("failed to mmap '%s'", dstfile); - goto err; - } - - ret = 0; - - memcpy(dstaddr, srcaddr, stat.st_size); - - munmap(dstaddr, stat.st_size); -out_mmap: - if (srcaddr) - munmap(srcaddr, stat.st_size); -out_close: - close(dstfd); - close(srcfd); -out: - return ret; -err: - unlink(dstfile); - goto out_mmap; + return _recursive_rmdir_onedev(path, mystat.st_dev); } static int mount_fs(const char *source, const char *target, const char *type) @@ -142,8 +150,9 @@ extern int lxc_setup_fs(void) return 0; } + /* continue even without posix message queue support */ if (mount_fs("mqueue", "/dev/mqueue", "mqueue")) - return -1; + INFO("failed to mount /dev/mqueue"); return 0; } @@ -166,32 +175,27 @@ extern int get_u16(unsigned short *val, const char *arg, int base) return 0; } -extern int mkdir_p(char *dir, mode_t mode) +extern int mkdir_p(const char *dir, mode_t mode) { - int ret; - char *d; + const char *tmp = dir; + const char *orig = dir; + char *makeme; - if (!strcmp(dir, "/")) - return 0; + do { + dir = tmp + strspn(tmp, "/"); + tmp = dir + strcspn(dir, "/"); + makeme = strndup(orig, dir - orig); + if (*makeme) { + if (mkdir(makeme, mode) && errno != EEXIST) { + SYSERROR("failed to create directory '%s'\n", makeme); + free(makeme); + return -1; + } + } + free(makeme); + } while(tmp != dir); - d = strdup(dir); - if (!d) - return -1; - - ret = mkdir_p(dirname(d), mode); - free(d); - if (ret) - return -1; - - if (!access(dir, F_OK)) - return 0; - - if (mkdir(dir, mode)) { - SYSERROR("failed to create directory '%s'\n", dir); - return -1; - } - - return 0; + return 0; } static char *copypath(char *p) @@ -213,7 +217,80 @@ static char *copypath(char *p) } char *default_lxcpath; +#define DEFAULT_VG "lxc" +char *default_lvmvg; +#define DEFAULT_ZFSROOT "lxc" +char *default_zfsroot; +const char *default_lvm_vg(void) +{ + char buf[1024], *p; + FILE *fin; + + if (default_lvmvg) + return default_lvmvg; + + fin = fopen(LXC_GLOBAL_CONF, "r"); + if (fin) { + while (fgets(buf, 1024, fin)) { + if (buf[0] == '#') + continue; + p = strstr(buf, "lvm_vg"); + if (!p) + continue; + p = strchr(p, '='); + if (!p) + continue; + p++; + while (*p && (*p == ' ' || *p == '\t')) p++; + if (!*p) + continue; + default_lvmvg = copypath(p); + goto out; + } + } + default_lvmvg = DEFAULT_VG; + +out: + if (fin) + fclose(fin); + return default_lvmvg; +} + +const char *default_zfs_root(void) +{ + char buf[1024], *p; + FILE *fin; + + if (default_zfsroot) + return default_zfsroot; + + fin = fopen(LXC_GLOBAL_CONF, "r"); + if (fin) { + while (fgets(buf, 1024, fin)) { + if (buf[0] == '#') + continue; + p = strstr(buf, "zfsroot"); + if (!p) + continue; + p = strchr(p, '='); + if (!p) + continue; + p++; + while (*p && (*p == ' ' || *p == '\t')) p++; + if (!*p) + continue; + default_zfsroot = copypath(p); + goto out; + } + } + default_zfsroot = DEFAULT_ZFSROOT; + +out: + if (fin) + fclose(fin); + return default_zfsroot; +} const char *default_lxc_path(void) { char buf[1024], *p; @@ -250,3 +327,171 @@ out: fclose(fin); return default_lxcpath; } + +int wait_for_pid(pid_t pid) +{ + int status, ret; + +again: + ret = waitpid(pid, &status, 0); + if (ret == -1) { + if (errno == EINTR) + goto again; + return -1; + } + if (ret != pid) + goto again; + if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) + return -1; + return 0; +} + +int lxc_wait_for_pid_status(pid_t pid) +{ + int status, ret; + +again: + ret = waitpid(pid, &status, 0); + if (ret == -1) { + if (errno == EINTR) + goto again; + return -1; + } + if (ret != pid) + goto again; + return status; +} + +ssize_t lxc_write_nointr(int fd, const void* buf, size_t count) +{ + ssize_t ret; +again: + ret = write(fd, buf, count); + if (ret < 0 && errno == EINTR) + goto again; + return ret; +} + +ssize_t lxc_read_nointr(int fd, void* buf, size_t count) +{ + ssize_t ret; +again: + ret = read(fd, buf, count); + if (ret < 0 && errno == EINTR) + goto again; + return ret; +} + +ssize_t lxc_read_nointr_expect(int fd, void* buf, size_t count, const void* expected_buf) +{ + ssize_t ret; + ret = lxc_read_nointr(fd, buf, count); + if (ret <= 0) + return ret; + if ((size_t)ret != count) + return -1; + if (expected_buf && memcmp(buf, expected_buf, count) != 0) { + errno = EINVAL; + return -1; + } + return ret; +} + +#if HAVE_LIBGNUTLS +#include +#include +int sha1sum_file(char *fnam, unsigned char *digest) +{ + char *buf; + int ret; + FILE *f; + long flen; + + if (!fnam) + return -1; + if ((f = fopen(fnam, "r")) < 0) { + SYSERROR("Error opening template"); + return -1; + } + if (fseek(f, 0, SEEK_END) < 0) { + SYSERROR("Error seeking to end of template"); + fclose(f); + return -1; + } + if ((flen = ftell(f)) < 0) { + SYSERROR("Error telling size of template"); + fclose(f); + return -1; + } + if (fseek(f, 0, SEEK_SET) < 0) { + SYSERROR("Error seeking to start of template"); + fclose(f); + return -1; + } + if ((buf = malloc(flen+1)) == NULL) { + SYSERROR("Out of memory"); + fclose(f); + return -1; + } + if (fread(buf, 1, flen, f) != flen) { + SYSERROR("Failure reading template"); + free(buf); + fclose(f); + return -1; + } + if (fclose(f) < 0) { + SYSERROR("Failre closing template"); + free(buf); + return -1; + } + buf[flen] = '\0'; + ret = gnutls_hash_fast(GNUTLS_DIG_SHA1, buf, flen, (void *)digest); + free(buf); + return ret; +} +#endif + +char** lxc_va_arg_list_to_argv(va_list ap, size_t skip, int do_strdup) +{ + va_list ap2; + size_t count = 1 + skip; + char **result; + + /* first determine size of argument list, we don't want to reallocate + * constantly... + */ + va_copy(ap2, ap); + while (1) { + char* arg = va_arg(ap2, char*); + if (!arg) + break; + count++; + } + va_end(ap2); + + result = calloc(count, sizeof(char*)); + if (!result) + return NULL; + count = skip; + while (1) { + char* arg = va_arg(ap, char*); + if (!arg) + break; + arg = do_strdup ? strdup(arg) : arg; + if (!arg) + goto oom; + result[count++] = arg; + } + + /* calloc has already set last element to NULL*/ + return result; + +oom: + free(result); + return NULL; +} + +const char** lxc_va_arg_list_to_argv_const(va_list ap, size_t skip) +{ + return (const char**)lxc_va_arg_list_to_argv(ap, skip, 0); +} diff --git a/src/lxc/utils.h b/src/lxc/utils.h index bf5b6cdc6..2c53da438 100644 --- a/src/lxc/utils.h +++ b/src/lxc/utils.h @@ -18,12 +18,20 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef _utils_h #define _utils_h -extern int lxc_copy_file(const char *src, const char *dst); +#include +#include +#include +#include +#include +#include "config.h" + +/* returns 1 on success, 0 if there were any failures */ +extern int lxc_rmdir_onedev(char *path); extern int lxc_setup_fs(void); extern int get_u16(unsigned short *val, const char *arg, int base); extern int mkdir_p(const char *dir, mode_t mode); @@ -32,5 +40,156 @@ extern int mkdir_p(const char *dir, mode_t mode); * path. Caller must free this buffer. */ extern const char *default_lxc_path(void); +extern const char *default_zfs_root(void); +extern const char *default_lvm_vg(void); + +/* Define getline() if missing from the C library */ +#ifndef HAVE_GETLINE +#ifdef HAVE_FGETLN +#include <../include/getline.h> +#endif +#endif + +/* Define setns() if missing from the C library */ +#ifndef HAVE_SETNS +static inline int setns(int fd, int nstype) +{ +#ifdef __NR_setns + return syscall(__NR_setns, fd, nstype); +#else + errno = ENOSYS; + return -1; +#endif +} +#endif + +/* Define unshare() if missing from the C library */ +#ifndef HAVE_UNSHARE +static inline int unshare(int flags) +{ +#ifdef __NR_unshare + return syscall(__NR_unshare, flags); +#else + errno = ENOSYS; + return -1; +#endif +} +#else +int unshare(int); +#endif + +/* Define signalfd() if missing from the C library */ +#ifdef HAVE_SYS_SIGNALFD_H +# include +#else +/* assume kernel headers are too old */ +#include +struct signalfd_siginfo +{ + uint32_t ssi_signo; + int32_t ssi_errno; + int32_t ssi_code; + uint32_t ssi_pid; + uint32_t ssi_uid; + int32_t ssi_fd; + uint32_t ssi_tid; + uint32_t ssi_band; + uint32_t ssi_overrun; + uint32_t ssi_trapno; + int32_t ssi_status; + int32_t ssi_int; + uint64_t ssi_ptr; + uint64_t ssi_utime; + uint64_t ssi_stime; + uint64_t ssi_addr; + uint8_t __pad[48]; +}; + +# ifndef __NR_signalfd4 +/* assume kernel headers are too old */ +# if __i386__ +# define __NR_signalfd4 327 +# elif __x86_64__ +# define __NR_signalfd4 289 +# elif __powerpc__ +# define __NR_signalfd4 313 +# elif __s390x__ +# define __NR_signalfd4 322 +# elif __arm__ +# define __NR_signalfd4 355 +# endif +#endif + +# ifndef __NR_signalfd +/* assume kernel headers are too old */ +# if __i386__ +# define __NR_signalfd 321 +# elif __x86_64__ +# define __NR_signalfd 282 +# elif __powerpc__ +# define __NR_signalfd 305 +# elif __s390x__ +# define __NR_signalfd 316 +# elif __arm__ +# define __NR_signalfd 349 +# endif +#endif + +static inline int signalfd(int fd, const sigset_t *mask, int flags) +{ + int retval; + + retval = syscall (__NR_signalfd4, fd, mask, _NSIG / 8, flags); + if (errno == ENOSYS && flags == 0) + retval = syscall (__NR_signalfd, fd, mask, _NSIG / 8); + return retval; +} +#endif + + +/** + * BUILD_BUG_ON - break compile if a condition is true. + * @condition: the condition which the compiler should know is false. + * + * If you have some code which relies on certain constants being equal, or + * other compile-time-evaluated condition, you should use BUILD_BUG_ON to + * detect if someone changes it. + * + * The implementation uses gcc's reluctance to create a negative array, but + * gcc (as of 4.4) only emits that error for obvious cases (eg. not arguments + * to inline functions). So as a fallback we use the optimizer; if it can't + * prove the condition is false, it will cause a link error on the undefined + * "__build_bug_on_failed". This error message can be harder to track down + * though, hence the two different methods. + */ +#ifndef __OPTIMIZE__ +#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) +#else +extern int __build_bug_on_failed; +#define BUILD_BUG_ON(condition) \ + do { \ + ((void)sizeof(char[1 - 2*!!(condition)])); \ + if (condition) __build_bug_on_failed = 1; \ + } while(0) +#endif + +/* + * wait on a child we forked + */ +extern int wait_for_pid(pid_t pid); +extern int lxc_wait_for_pid_status(pid_t pid); + +/* send and receive buffers completely */ +extern ssize_t lxc_write_nointr(int fd, const void* buf, size_t count); +extern ssize_t lxc_read_nointr(int fd, void* buf, size_t count); +extern ssize_t lxc_read_nointr_expect(int fd, void* buf, size_t count, const void* expected_buf); +#if HAVE_LIBGNUTLS +#define SHA_DIGEST_LENGTH 20 +extern int sha1sum_file(char *fnam, unsigned char *md_value); +#endif + +/* convert variadic argument lists to arrays (for execl type argument lists) */ +extern char** lxc_va_arg_list_to_argv(va_list ap, size_t skip, int do_strdup); +extern const char** lxc_va_arg_list_to_argv_const(va_list ap, size_t skip); #endif diff --git a/src/lxc/version.c b/src/lxc/version.c index b1f5276c1..c47d5520e 100644 --- a/src/lxc/version.c +++ b/src/lxc/version.c @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include "../config.h" diff --git a/src/lxc/version.h b/src/lxc/version.h index be7d8f16a..a88ffad42 100644 --- a/src/lxc/version.h +++ b/src/lxc/version.h @@ -18,7 +18,7 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef _version_h #define _version_h diff --git a/src/python-lxc/examples/api_test.py b/src/python-lxc/examples/api_test.py index 367bb7ab9..e078d2bcd 100644 --- a/src/python-lxc/examples/api_test.py +++ b/src/python-lxc/examples/api_test.py @@ -19,7 +19,7 @@ # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # import warnings @@ -28,6 +28,7 @@ warnings.filterwarnings("ignore", "The python-lxc API isn't yet stable") import lxc import uuid import sys +import time # Some constants LXC_TEMPLATE = "ubuntu" @@ -90,8 +91,15 @@ assert(container.state == "RUNNING") ## Checking IP address print("Getting the IP addresses") -ips = container.get_ips(timeout=10) -container.attach("NETWORK|UTSNAME", "/sbin/ifconfig", "eth0") + +count = 0 +ips = [] +while not ips or count == 10: + ips = container.get_ips() + time.sleep(1) + count += 1 +container.attach_wait(lxc.attach_run_command, ["ifconfig", "eth0"], + namespaces=(lxc.CLONE_NEWNET + lxc.CLONE_NEWUTS)) # A few basic checks of the current state assert(len(ips) > 0) diff --git a/src/python-lxc/examples/pyconsole-vte.py b/src/python-lxc/examples/pyconsole-vte.py new file mode 100755 index 000000000..53867b751 --- /dev/null +++ b/src/python-lxc/examples/pyconsole-vte.py @@ -0,0 +1,80 @@ +#!/usr/bin/python3 +# +# pyconsole-vte: Example program showing use of console functions +# in the lxc python binding +# +# (C) Copyright Oracle. 2013 +# +# Authors: +# Dwight Engen +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +# + +import warnings +warnings.filterwarnings("ignore", "The python-lxc API isn't yet stable") + +import gtk +import vte +import lxc +import sys + + +def gtk_exit_cb(terminal): + gtk.main_quit() + + +def vte_con(ct, ttynum): + print("Doing console in a VTE widget...") + masterfd = ct.console_getfd(ttynum) + term = vte.Terminal() + term.set_cursor_blinks(True) + term.set_scrollback_lines(1000) + term.connect('eof', gtk_exit_cb) + + term.set_pty(masterfd) + term.feed_child('\n') + #term.feed_child('ps aux\n') + + vscrollbar = gtk.VScrollbar() + vscrollbar.set_adjustment(term.get_adjustment()) + + hbox = gtk.HBox() + hbox.pack_start(term) + hbox.pack_start(vscrollbar) + + window = gtk.Window() + window.add(hbox) + window.connect('delete-event', lambda window, event: gtk.main_quit()) + window.show_all() + gtk.main() + print("Console done") + +if __name__ == '__main__': + ttynum = -1 + if len(sys.argv) < 2: + sys.exit("Usage: %s container-name [ttynum]" % sys.argv[0]) + if len(sys.argv) > 2: + ttynum = int(sys.argv[2]) + + ct = lxc.Container(sys.argv[1]) + + print("Container:%s tty:%d" % (ct.name, ttynum)) + if not ct.defined: + sys.exit("Container %s not defined" % ct.name) + if not ct.running: + sys.exit("Container %s not running" % ct.name) + + vte_con(ct, ttynum) diff --git a/src/python-lxc/examples/pyconsole.py b/src/python-lxc/examples/pyconsole.py new file mode 100755 index 000000000..49437139c --- /dev/null +++ b/src/python-lxc/examples/pyconsole.py @@ -0,0 +1,54 @@ +#!/usr/bin/python3 +# +# pyconsole: Example program showing use of console functions +# in the lxc python binding +# +# (C) Copyright Oracle. 2013 +# +# Authors: +# Dwight Engen +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +# + +import warnings +warnings.filterwarnings("ignore", "The python-lxc API isn't yet stable") + +import lxc +import sys +import time + +if __name__ == '__main__': + ttynum = -1 + escape = 1 + if len(sys.argv) < 2: + sys.exit("Usage: %s container-name [ttynum [escape]]" % sys.argv[0]) + if len(sys.argv) > 2: + ttynum = int(sys.argv[2]) + if len(sys.argv) > 3: + escape = ord(sys.argv[3]) - ord('a') + 1 + + ct = lxc.Container(sys.argv[1]) + + print("Container:%s tty:%d Ctrl-%c q to quit" % + (ct.name, ttynum, ord('a') + escape-1)) + time.sleep(1) + if not ct.defined: + sys.exit("Container %s not defined" % ct.name) + if not ct.running: + sys.exit("Container %s not running" % ct.name) + + ct.console(ttynum, 0, 1, 2, escape) + print("Console done") diff --git a/src/python-lxc/lxc.c b/src/python-lxc/lxc.c index 06d339691..bd053a7b2 100644 --- a/src/python-lxc/lxc.c +++ b/src/python-lxc/lxc.c @@ -1,7 +1,7 @@ /* * python-lxc: Python bindings for LXC * - * (C) Copyright Canonical Ltd. 2012 + * (C) Copyright Canonical Ltd. 2012-2013 * * Authors: * Stéphane Graber @@ -18,12 +18,15 @@ * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include "structmember.h" #include +#include +#include +#include #include #include @@ -34,34 +37,85 @@ typedef struct { char** convert_tuple_to_char_pointer_array(PyObject *argv) { - int argc = PyTuple_Size(argv); - int i; + int argc; + int i, j; - char **result = (char**) malloc(sizeof(char*)*argc + 1); + /* not a list or tuple */ + if (!PyList_Check(argv) && !PyTuple_Check(argv)) { + PyErr_SetString(PyExc_TypeError, "Expected list or tuple."); + return NULL; + } + + argc = PySequence_Fast_GET_SIZE(argv); + + char **result = (char**) calloc(argc + 1, sizeof(char*)); + + if (result == NULL) { + PyErr_SetNone(PyExc_MemoryError); + return NULL; + } for (i = 0; i < argc; i++) { - PyObject *pyobj = PyTuple_GetItem(argv, i); + PyObject *pyobj = PySequence_Fast_GET_ITEM(argv, i); + assert(pyobj != NULL); char *str = NULL; - PyObject *pystr; + PyObject *pystr = NULL; + if (!PyUnicode_Check(pyobj)) { PyErr_SetString(PyExc_ValueError, "Expected a string"); - return NULL; + goto error; } pystr = PyUnicode_AsUTF8String(pyobj); + if (!pystr) { + /* Maybe it wasn't UTF-8 encoded. An exception is already set. */ + goto error; + } + str = PyBytes_AsString(pystr); - memcpy((char *) &result[i], (char *) &str, sizeof(str)); + if (!str) { + /* Maybe pystr wasn't a valid object. An exception is already set. + */ + Py_DECREF(pystr); + goto error; + } + + /* We must make a copy of str, because it points into internal memory + * which we do not own. Assume it's NULL terminated, otherwise we'd + * have to use PyUnicode_AsUTF8AndSize() and be explicit about copying + * the memory. + */ + result[i] = strdup(str); + + /* Do not decref pyobj since we stole a reference by using + * PyTuple_GET_ITEM(). + */ + Py_DECREF(pystr); + if (result[i] == NULL) { + PyErr_SetNone(PyExc_MemoryError); + goto error; + } } result[argc] = NULL; - return result; + +error: + /* We can only iterate up to but not including i because malloc() does not + * initialize its memory. Thus if we got here, i points to the index + * after the last strdup'd entry in result. + */ + for (j = 0; j < i; j++) + free(result[j]); + free(result); + return NULL; } static void Container_dealloc(Container* self) { + lxc_container_put(self->container); Py_TYPE(self)->tp_free((PyObject*)self); } @@ -80,18 +134,27 @@ Container_init(Container *self, PyObject *args, PyObject *kwds) { static char *kwlist[] = {"name", "config_path", NULL}; char *name = NULL; + PyObject *fs_config_path = NULL; char *config_path = NULL; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|s", kwlist, - &name, &config_path)) + if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|O&", kwlist, + &name, + PyUnicode_FSConverter, &fs_config_path)) return -1; + if (fs_config_path != NULL) { + config_path = PyBytes_AS_STRING(fs_config_path); + assert(config_path != NULL); + } + self->container = lxc_container_new(name, config_path); if (!self->container) { - fprintf(stderr, "%d: error creating lxc_container %s\n", __LINE__, name); + Py_XDECREF(fs_config_path); + fprintf(stderr, "%d: error creating container %s\n", __LINE__, name); return -1; } + Py_XDECREF(fs_config_path); return 0; } @@ -109,13 +172,14 @@ LXC_get_version(PyObject *self, PyObject *args) // Container properties static PyObject * -Container_config_file_name(Container *self, PyObject *args, PyObject *kwds) +Container_config_file_name(Container *self, void *closure) { - return PyUnicode_FromString(self->container->config_file_name(self->container)); + return PyUnicode_FromString( + self->container->config_file_name(self->container)); } static PyObject * -Container_defined(Container *self, PyObject *args, PyObject *kwds) +Container_defined(Container *self, void *closure) { if (self->container->is_defined(self->container)) { Py_RETURN_TRUE; @@ -125,19 +189,19 @@ Container_defined(Container *self, PyObject *args, PyObject *kwds) } static PyObject * -Container_init_pid(Container *self, PyObject *args, PyObject *kwds) +Container_init_pid(Container *self, void *closure) { - return Py_BuildValue("i", self->container->init_pid(self->container)); + return PyLong_FromLong(self->container->init_pid(self->container)); } static PyObject * -Container_name(Container *self, PyObject *args, PyObject *kwds) +Container_name(Container *self, void *closure) { return PyUnicode_FromString(self->container->name); } static PyObject * -Container_running(Container *self, PyObject *args, PyObject *kwds) +Container_running(Container *self, void *closure) { if (self->container->is_running(self->container)) { Py_RETURN_TRUE; @@ -147,7 +211,7 @@ Container_running(Container *self, PyObject *args, PyObject *kwds) } static PyObject * -Container_state(Container *self, PyObject *args, PyObject *kwds) +Container_state(Container *self, void *closure) { return PyUnicode_FromString(self->container->state(self->container)); } @@ -159,9 +223,9 @@ Container_clear_config_item(Container *self, PyObject *args, PyObject *kwds) static char *kwlist[] = {"key", NULL}; char *key = NULL; - if (! PyArg_ParseTupleAndKeywords(args, kwds, "s|", kwlist, + if (! PyArg_ParseTupleAndKeywords(args, kwds, "s", kwlist, &key)) - Py_RETURN_FALSE; + return NULL; if (self->container->clear_config_item(self->container, key)) { Py_RETURN_TRUE; @@ -175,25 +239,43 @@ Container_create(Container *self, PyObject *args, PyObject *kwds) { char* template_name = NULL; char** create_args = {NULL}; - PyObject *vargs = NULL; + PyObject *retval = NULL, *vargs = NULL; + int i = 0; static char *kwlist[] = {"template", "args", NULL}; if (! PyArg_ParseTupleAndKeywords(args, kwds, "s|O", kwlist, &template_name, &vargs)) - Py_RETURN_FALSE; + return NULL; - if (vargs && PyTuple_Check(vargs)) { - create_args = convert_tuple_to_char_pointer_array(vargs); - if (!create_args) { + if (vargs) { + if (PyTuple_Check(vargs)) { + create_args = convert_tuple_to_char_pointer_array(vargs); + if (!create_args) { + return NULL; + } + } + else { + PyErr_SetString(PyExc_ValueError, "args needs to be a tuple"); return NULL; } } - if (self->container->create(self->container, template_name, create_args)) { - Py_RETURN_TRUE; + if (self->container->create(self->container, template_name, NULL, NULL, 0, create_args)) + retval = Py_True; + else + retval = Py_False; + + if (vargs) { + /* We cannot have gotten here unless vargs was given and create_args + * was successfully allocated. + */ + for (i = 0; i < PyTuple_GET_SIZE(vargs); i++) + free(create_args[i]); + free(create_args); } - Py_RETURN_FALSE; + Py_INCREF(retval); + return retval; } static PyObject * @@ -222,23 +304,33 @@ Container_get_cgroup_item(Container *self, PyObject *args, PyObject *kwds) static char *kwlist[] = {"key", NULL}; char* key = NULL; int len = 0; + PyObject *ret = NULL; - if (! PyArg_ParseTupleAndKeywords(args, kwds, "s|", kwlist, + if (! PyArg_ParseTupleAndKeywords(args, kwds, "s", kwlist, &key)) - Py_RETURN_FALSE; + return NULL; len = self->container->get_cgroup_item(self->container, key, NULL, 0); - if (len <= 0) { - Py_RETURN_FALSE; + if (len < 0) { + PyErr_SetString(PyExc_KeyError, "Invalid cgroup entry"); + return NULL; } char* value = (char*) malloc(sizeof(char)*len + 1); - if (self->container->get_cgroup_item(self->container, key, value, len + 1) != len) { - Py_RETURN_FALSE; + if (value == NULL) + return PyErr_NoMemory(); + + if (self->container->get_cgroup_item(self->container, + key, value, len + 1) != len) { + PyErr_SetString(PyExc_ValueError, "Unable to read config value"); + free(value); + return NULL; } - return PyUnicode_FromString(value); + ret = PyUnicode_FromString(value); + free(value); + return ret; } static PyObject * @@ -247,29 +339,40 @@ Container_get_config_item(Container *self, PyObject *args, PyObject *kwds) static char *kwlist[] = {"key", NULL}; char* key = NULL; int len = 0; + PyObject *ret = NULL; if (! PyArg_ParseTupleAndKeywords(args, kwds, "s|", kwlist, &key)) - Py_RETURN_FALSE; + return NULL; len = self->container->get_config_item(self->container, key, NULL, 0); - if (len <= 0) { - Py_RETURN_FALSE; + if (len < 0) { + PyErr_SetString(PyExc_KeyError, "Invalid configuration key"); + return NULL; } char* value = (char*) malloc(sizeof(char)*len + 1); - if (self->container->get_config_item(self->container, key, value, len + 1) != len) { - Py_RETURN_FALSE; + if (value == NULL) + return PyErr_NoMemory(); + + if (self->container->get_config_item(self->container, + key, value, len + 1) != len) { + PyErr_SetString(PyExc_ValueError, "Unable to read config value"); + free(value); + return NULL; } - return PyUnicode_FromString(value); + ret = PyUnicode_FromString(value); + free(value); + return ret; } static PyObject * Container_get_config_path(Container *self, PyObject *args, PyObject *kwds) { - return PyUnicode_FromString(self->container->get_config_path(self->container)); + return PyUnicode_FromString( + self->container->get_config_path(self->container)); } static PyObject * @@ -278,39 +381,112 @@ Container_get_keys(Container *self, PyObject *args, PyObject *kwds) static char *kwlist[] = {"key", NULL}; char* key = NULL; int len = 0; + PyObject *ret = NULL; if (! PyArg_ParseTupleAndKeywords(args, kwds, "|s", kwlist, &key)) - Py_RETURN_FALSE; + return NULL; len = self->container->get_keys(self->container, key, NULL, 0); - if (len <= 0) { - Py_RETURN_FALSE; + if (len < 0) { + PyErr_SetString(PyExc_KeyError, "Invalid configuration key"); + return NULL; } char* value = (char*) malloc(sizeof(char)*len + 1); - if (self->container->get_keys(self->container, key, value, len + 1) != len) { - Py_RETURN_FALSE; + if (value == NULL) + return PyErr_NoMemory(); + + if (self->container->get_keys(self->container, + key, value, len + 1) != len) { + PyErr_SetString(PyExc_ValueError, "Unable to read config keys"); + free(value); + return NULL; } - return PyUnicode_FromString(value); + ret = PyUnicode_FromString(value); + free(value); + return ret; +} + +static PyObject * +Container_get_ips(Container *self, PyObject *args, PyObject *kwds) +{ + static char *kwlist[] = {"interface", "family", "scope", NULL}; + char* interface = NULL; + char* family = NULL; + int scope = 0; + + int i = 0; + char** ips = NULL; + + PyObject* ret; + + if (! PyArg_ParseTupleAndKeywords(args, kwds, "|ssi", kwlist, + &interface, &family, &scope)) + return NULL; + + /* Get the IPs */ + ips = self->container->get_ips(self->container, interface, family, scope); + if (!ips) + return PyTuple_New(0); + + /* Count the entries */ + while (ips[i]) + i++; + + /* Create the new tuple */ + ret = PyTuple_New(i); + if (!ret) + return NULL; + + /* Add the entries to the tuple and free the memory */ + i = 0; + while (ips[i]) { + PyObject *unicode = PyUnicode_FromString(ips[i]); + if (!unicode) { + Py_DECREF(ret); + ret = NULL; + break; + } + PyTuple_SET_ITEM(ret, i, unicode); + i++; + } + + /* Free the list of IPs */ + i = 0; + while (ips[i]) { + free(ips[i]); + i++; + } + free(ips); + + return ret; } static PyObject * Container_load_config(Container *self, PyObject *args, PyObject *kwds) { static char *kwlist[] = {"path", NULL}; + PyObject *fs_path = NULL; char* path = NULL; - if (! PyArg_ParseTupleAndKeywords(args, kwds, "|s", kwlist, - &path)) - Py_RETURN_FALSE; + if (! PyArg_ParseTupleAndKeywords(args, kwds, "|O&", kwlist, + PyUnicode_FSConverter, &fs_path)) + return NULL; + + if (fs_path != NULL) { + path = PyBytes_AS_STRING(fs_path); + assert(path != NULL); + } if (self->container->load_config(self->container, path)) { + Py_XDECREF(fs_path); Py_RETURN_TRUE; } + Py_XDECREF(fs_path); Py_RETURN_FALSE; } @@ -318,16 +494,24 @@ static PyObject * Container_save_config(Container *self, PyObject *args, PyObject *kwds) { static char *kwlist[] = {"path", NULL}; + PyObject *fs_path = NULL; char* path = NULL; - if (! PyArg_ParseTupleAndKeywords(args, kwds, "|s", kwlist, - &path)) - Py_RETURN_FALSE; + if (! PyArg_ParseTupleAndKeywords(args, kwds, "|O&", kwlist, + PyUnicode_FSConverter, &fs_path)) + return NULL; + + if (fs_path != NULL) { + path = PyBytes_AS_STRING(fs_path); + assert(path != NULL); + } if (self->container->save_config(self->container, path)) { + Py_XDECREF(fs_path); Py_RETURN_TRUE; } + Py_XDECREF(fs_path); Py_RETURN_FALSE; } @@ -338,9 +522,9 @@ Container_set_cgroup_item(Container *self, PyObject *args, PyObject *kwds) char *key = NULL; char *value = NULL; - if (! PyArg_ParseTupleAndKeywords(args, kwds, "ss|", kwlist, + if (! PyArg_ParseTupleAndKeywords(args, kwds, "ss", kwlist, &key, &value)) - Py_RETURN_FALSE; + return NULL; if (self->container->set_cgroup_item(self->container, key, value)) { Py_RETURN_TRUE; @@ -356,9 +540,9 @@ Container_set_config_item(Container *self, PyObject *args, PyObject *kwds) char *key = NULL; char *value = NULL; - if (! PyArg_ParseTupleAndKeywords(args, kwds, "ss|", kwlist, + if (! PyArg_ParseTupleAndKeywords(args, kwds, "ss", kwlist, &key, &value)) - Py_RETURN_FALSE; + return NULL; if (self->container->set_config_item(self->container, key, value)) { Py_RETURN_TRUE; @@ -373,9 +557,9 @@ Container_set_config_path(Container *self, PyObject *args, PyObject *kwds) static char *kwlist[] = {"path", NULL}; char *path = NULL; - if (! PyArg_ParseTupleAndKeywords(args, kwds, "s|", kwlist, + if (! PyArg_ParseTupleAndKeywords(args, kwds, "s", kwlist, &path)) - Py_RETURN_FALSE; + return NULL; if (self->container->set_config_path(self->container, path)) { Py_RETURN_TRUE; @@ -392,7 +576,7 @@ Container_shutdown(Container *self, PyObject *args, PyObject *kwds) if (! PyArg_ParseTupleAndKeywords(args, kwds, "|i", kwlist, &timeout)) - Py_RETURN_FALSE; + return NULL; if (self->container->shutdown(self->container, timeout)) { Py_RETURN_TRUE; @@ -405,13 +589,13 @@ static PyObject * Container_start(Container *self, PyObject *args, PyObject *kwds) { char** init_args = {NULL}; - PyObject *useinit = NULL, *vargs = NULL; - int init_useinit = 0; + PyObject *useinit = NULL, *retval = NULL, *vargs = NULL; + int init_useinit = 0, i = 0; static char *kwlist[] = {"useinit", "cmd", NULL}; if (! PyArg_ParseTupleAndKeywords(args, kwds, "|OO", kwlist, &useinit, &vargs)) - Py_RETURN_FALSE; + return NULL; if (useinit && useinit == Py_True) { init_useinit = 1; @@ -426,11 +610,22 @@ Container_start(Container *self, PyObject *args, PyObject *kwds) self->container->want_daemonize(self->container); - if (self->container->start(self->container, init_useinit, init_args)) { - Py_RETURN_TRUE; + if (self->container->start(self->container, init_useinit, init_args)) + retval = Py_True; + else + retval = Py_False; + + if (vargs) { + /* We cannot have gotten here unless vargs was given and create_args + * was successfully allocated. + */ + for (i = 0; i < PyTuple_GET_SIZE(vargs); i++) + free(init_args[i]); + free(init_args); } - Py_RETURN_FALSE; + Py_INCREF(retval); + return retval; } static PyObject * @@ -453,6 +648,40 @@ Container_unfreeze(Container *self, PyObject *args, PyObject *kwds) Py_RETURN_FALSE; } +static PyObject * +Container_console(Container *self, PyObject *args, PyObject *kwds) +{ + static char *kwlist[] = {"ttynum", "stdinfd", "stdoutfd", "stderrfd", "escape", NULL}; + int ttynum = -1, stdinfd = 0, stdoutfd = 1, stderrfd = 2, escape = 1; + + if (! PyArg_ParseTupleAndKeywords(args, kwds, "|iiiii", kwlist, + &ttynum, &stdinfd, &stdoutfd, &stderrfd, + &escape)) + return NULL; + + if (self->container->console(self->container, ttynum, + stdinfd, stdoutfd, stderrfd, escape) == 0) { + Py_RETURN_TRUE; + } + Py_RETURN_FALSE; +} + +static PyObject * +Container_console_getfd(Container *self, PyObject *args, PyObject *kwds) +{ + static char *kwlist[] = {"ttynum", NULL}; + int ttynum = -1, masterfd; + + if (! PyArg_ParseTupleAndKeywords(args, kwds, "|i", kwlist, &ttynum)) + return NULL; + + if (self->container->console_getfd(self->container, &ttynum, &masterfd) < 0) { + PyErr_SetString(PyExc_ValueError, "Unable to allocate tty"); + return NULL; + } + return PyLong_FromLong(masterfd); +} + static PyObject * Container_wait(Container *self, PyObject *args, PyObject *kwds) { @@ -462,7 +691,7 @@ Container_wait(Container *self, PyObject *args, PyObject *kwds) if (! PyArg_ParseTupleAndKeywords(args, kwds, "s|i", kwlist, &state, &timeout)) - Py_RETURN_FALSE; + return NULL; if (self->container->wait(self->container, state, timeout)) { Py_RETURN_TRUE; @@ -471,131 +700,426 @@ Container_wait(Container *self, PyObject *args, PyObject *kwds) Py_RETURN_FALSE; } +struct lxc_attach_python_payload { + PyObject *fn; + PyObject *arg; +}; + +static int lxc_attach_python_exec(void* _payload) +{ + struct lxc_attach_python_payload *payload = (struct lxc_attach_python_payload *)_payload; + PyObject *result = PyObject_CallFunctionObjArgs(payload->fn, payload->arg, NULL); + + if (!result) { + PyErr_Print(); + return -1; + } + if (PyLong_Check(result)) + return (int)PyLong_AsLong(result); + else + return -1; +} + +static void lxc_attach_free_options(lxc_attach_options_t *options); + +static lxc_attach_options_t *lxc_attach_parse_options(PyObject *kwds) +{ + static char *kwlist[] = {"attach_flags", "namespaces", "personality", "initial_cwd", "uid", "gid", "env_policy", "extra_env_vars", "extra_keep_env", "stdin", "stdout", "stderr", NULL}; + long temp_uid, temp_gid; + int temp_env_policy; + PyObject *extra_env_vars_obj = NULL; + PyObject *extra_keep_env_obj = NULL; + PyObject *stdin_obj = NULL; + PyObject *stdout_obj = NULL; + PyObject *stderr_obj = NULL; + PyObject *initial_cwd_obj = NULL; + PyObject *dummy; + bool parse_result; + + lxc_attach_options_t default_options = LXC_ATTACH_OPTIONS_DEFAULT; + lxc_attach_options_t *options = malloc(sizeof(*options)); + + if (!options) { + PyErr_SetNone(PyExc_MemoryError); + return NULL; + } + memcpy(options, &default_options, sizeof(*options)); + + /* we need some dummy variables because we can't be sure + * the data types match completely */ + temp_uid = -1; + temp_gid = -1; + temp_env_policy = options->env_policy; + + /* we need a dummy tuple */ + dummy = PyTuple_New(0); + + parse_result = PyArg_ParseTupleAndKeywords(dummy, kwds, "|iilO&lliOOOOO", kwlist, + &options->attach_flags, &options->namespaces, &options->personality, + PyUnicode_FSConverter, &initial_cwd_obj, &temp_uid, &temp_gid, + &temp_env_policy, &extra_env_vars_obj, &extra_keep_env_obj, + &stdin_obj, &stdout_obj, &stderr_obj); + + /* immediately get rid of the dummy tuple */ + Py_DECREF(dummy); + + if (!parse_result) { + lxc_attach_free_options(options); + return NULL; + } + + /* duplicate the string, so we don't depend on some random Python object */ + if (initial_cwd_obj != NULL) { + options->initial_cwd = strndup(PyBytes_AsString(initial_cwd_obj), PyBytes_Size(initial_cwd_obj)); + Py_DECREF(initial_cwd_obj); + } + + /* do the type conversion from the types that match the parse string */ + if (temp_uid != -1) options->uid = (uid_t)temp_uid; + if (temp_gid != -1) options->gid = (gid_t)temp_gid; + options->env_policy = (lxc_attach_env_policy_t)temp_env_policy; + + if (extra_env_vars_obj) + options->extra_env_vars = convert_tuple_to_char_pointer_array(extra_env_vars_obj); + if (extra_keep_env_obj) + options->extra_keep_env = convert_tuple_to_char_pointer_array(extra_keep_env_obj); + if (stdin_obj) { + options->stdin_fd = PyObject_AsFileDescriptor(stdin_obj); + if (options->stdin_fd < 0) { + lxc_attach_free_options(options); + return NULL; + } + } + if (stdout_obj) { + options->stdout_fd = PyObject_AsFileDescriptor(stdout_obj); + if (options->stdout_fd < 0) { + lxc_attach_free_options(options); + return NULL; + } + } + if (stderr_obj) { + options->stderr_fd = PyObject_AsFileDescriptor(stderr_obj); + if (options->stderr_fd < 0) { + lxc_attach_free_options(options); + return NULL; + } + } + + return options; +} + +void lxc_attach_free_options(lxc_attach_options_t *options) +{ + int i; + if (!options) + return; + if (options->initial_cwd) + free(options->initial_cwd); + if (options->extra_env_vars) { + for (i = 0; options->extra_env_vars[i]; i++) + free(options->extra_env_vars[i]); + free(options->extra_env_vars); + } + if (options->extra_keep_env) { + for (i = 0; options->extra_keep_env[i]; i++) + free(options->extra_keep_env[i]); + free(options->extra_keep_env); + } + free(options); +} + +static PyObject * +Container_attach_and_possibly_wait(Container *self, PyObject *args, PyObject *kwds, int wait) +{ + struct lxc_attach_python_payload payload = { NULL, NULL }; + lxc_attach_options_t *options = NULL; + long ret; + pid_t pid; + + if (!PyArg_ParseTuple(args, "O|O", &payload.fn, &payload.arg)) + return NULL; + if (!PyCallable_Check(payload.fn)) { + PyErr_Format(PyExc_TypeError, "attach: object not callable"); + return NULL; + } + + options = lxc_attach_parse_options(kwds); + if (!options) + return NULL; + + ret = self->container->attach(self->container, lxc_attach_python_exec, &payload, options, &pid); + if (ret < 0) + goto out; + + if (wait) { + ret = lxc_wait_for_pid_status(pid); + /* handle case where attach fails */ + if (WIFEXITED(ret) && WEXITSTATUS(ret) == 255) + ret = -1; + } else { + ret = (long)pid; + } + +out: + lxc_attach_free_options(options); + return PyLong_FromLong(ret); +} + +static PyObject * +Container_attach(Container *self, PyObject *args, PyObject *kwds) +{ + return Container_attach_and_possibly_wait(self, args, kwds, 0); +} + +static PyObject * +Container_attach_wait(Container *self, PyObject *args, PyObject *kwds) +{ + return Container_attach_and_possibly_wait(self, args, kwds, 1); +} + +static PyObject * +LXC_attach_run_shell(PyObject *self, PyObject *arg) +{ + int rv; + + rv = lxc_attach_run_shell(NULL); + + return PyLong_FromLong(rv); +} + +static PyObject * +LXC_arch_to_personality(PyObject *self, PyObject *arg) +{ + long rv = -1; + PyObject *pystr; + char *str; + + if (!PyUnicode_Check(arg)) { + PyErr_SetString(PyExc_ValueError, "Expected a string"); + return NULL; + } + + pystr = PyUnicode_AsUTF8String(arg); + if (!pystr) + return NULL; + + str = PyBytes_AsString(pystr); + if (!str) + goto out; + + rv = lxc_config_parse_arch(str); + if (rv == -1) + PyErr_SetString(PyExc_KeyError, "Failed to lookup architecture."); + +out: + Py_DECREF(pystr); + return rv == -1 ? NULL : PyLong_FromLong(rv); +} + +static PyObject * +LXC_attach_run_command(PyObject *self, PyObject *arg) +{ + PyObject *args_obj = NULL; + int i, rv; + lxc_attach_command_t cmd = { + NULL, /* program */ + NULL /* argv[] */ + }; + + if (!PyArg_ParseTuple(arg, "sO", (const char**)&cmd.program, &args_obj)) + return NULL; + if (args_obj && PyList_Check(args_obj)) { + cmd.argv = convert_tuple_to_char_pointer_array(args_obj); + } else { + PyErr_Format(PyExc_TypeError, "Second part of tuple passed to attach_run_command must be a list."); + return NULL; + } + + if (!cmd.argv) + return NULL; + + rv = lxc_attach_run_command(&cmd); + + for (i = 0; cmd.argv[i]; i++) + free(cmd.argv[i]); + free(cmd.argv); + + return PyLong_FromLong(rv); +} + static PyGetSetDef Container_getseters[] = { {"config_file_name", - (getter)Container_config_file_name, 0, + (getter)Container_config_file_name, NULL, "Path to the container configuration", NULL}, {"defined", - (getter)Container_defined, 0, + (getter)Container_defined, NULL, "Boolean indicating whether the container configuration exists", NULL}, {"init_pid", - (getter)Container_init_pid, 0, + (getter)Container_init_pid, NULL, "PID of the container's init process in the host's PID namespace", NULL}, {"name", - (getter)Container_name, 0, + (getter)Container_name, NULL, "Container name", NULL}, {"running", - (getter)Container_running, 0, + (getter)Container_running, NULL, "Boolean indicating whether the container is running or not", NULL}, {"state", - (getter)Container_state, 0, + (getter)Container_state, NULL, "Container state", NULL}, {NULL, NULL, NULL, NULL, NULL} }; static PyMethodDef Container_methods[] = { - {"clear_config_item", (PyCFunction)Container_clear_config_item, METH_VARARGS | METH_KEYWORDS, + {"clear_config_item", (PyCFunction)Container_clear_config_item, + METH_VARARGS|METH_KEYWORDS, "clear_config_item(key) -> boolean\n" "\n" "Clear the current value of a config key." }, - {"create", (PyCFunction)Container_create, METH_VARARGS | METH_KEYWORDS, + {"create", (PyCFunction)Container_create, + METH_VARARGS|METH_KEYWORDS, "create(template, args = (,)) -> boolean\n" "\n" "Create a new rootfs for the container, using the given template " "and passing some optional arguments to it." }, - {"destroy", (PyCFunction)Container_destroy, METH_NOARGS, + {"destroy", (PyCFunction)Container_destroy, + METH_NOARGS, "destroy() -> boolean\n" "\n" "Destroys the container." }, - {"freeze", (PyCFunction)Container_freeze, METH_NOARGS, + {"freeze", (PyCFunction)Container_freeze, + METH_NOARGS, "freeze() -> boolean\n" "\n" "Freezes the container and returns its return code." }, - {"get_cgroup_item", (PyCFunction)Container_get_cgroup_item, METH_VARARGS | METH_KEYWORDS, + {"get_cgroup_item", (PyCFunction)Container_get_cgroup_item, + METH_VARARGS|METH_KEYWORDS, "get_cgroup_item(key) -> string\n" "\n" "Get the current value of a cgroup entry." }, - {"get_config_item", (PyCFunction)Container_get_config_item, METH_VARARGS | METH_KEYWORDS, + {"get_config_item", (PyCFunction)Container_get_config_item, + METH_VARARGS|METH_KEYWORDS, "get_config_item(key) -> string\n" "\n" "Get the current value of a config key." }, - {"get_config_path", (PyCFunction)Container_get_config_path, METH_NOARGS, + {"get_config_path", (PyCFunction)Container_get_config_path, + METH_NOARGS, "get_config_path() -> string\n" "\n" "Return the LXC config path (where the containers are stored)." }, - {"get_keys", (PyCFunction)Container_get_keys, METH_VARARGS | METH_KEYWORDS, + {"get_keys", (PyCFunction)Container_get_keys, + METH_VARARGS|METH_KEYWORDS, "get_keys(key) -> string\n" "\n" "Get a list of valid sub-keys for a key." }, - {"load_config", (PyCFunction)Container_load_config, METH_VARARGS | METH_KEYWORDS, + {"get_ips", (PyCFunction)Container_get_ips, + METH_VARARGS|METH_KEYWORDS, + "get_ips(interface, family, scope) -> tuple\n" + "\n" + "Get a tuple of IPs for the container." + }, + {"load_config", (PyCFunction)Container_load_config, + METH_VARARGS|METH_KEYWORDS, "load_config(path = DEFAULT) -> boolean\n" "\n" "Read the container configuration from its default " "location or from an alternative location if provided." }, - {"save_config", (PyCFunction)Container_save_config, METH_VARARGS | METH_KEYWORDS, + {"save_config", (PyCFunction)Container_save_config, + METH_VARARGS|METH_KEYWORDS, "save_config(path = DEFAULT) -> boolean\n" "\n" "Save the container configuration to its default " "location or to an alternative location if provided." }, - {"set_cgroup_item", (PyCFunction)Container_set_cgroup_item, METH_VARARGS | METH_KEYWORDS, + {"set_cgroup_item", (PyCFunction)Container_set_cgroup_item, + METH_VARARGS|METH_KEYWORDS, "set_cgroup_item(key, value) -> boolean\n" "\n" "Set a cgroup entry to the provided value." }, - {"set_config_item", (PyCFunction)Container_set_config_item, METH_VARARGS | METH_KEYWORDS, + {"set_config_item", (PyCFunction)Container_set_config_item, + METH_VARARGS|METH_KEYWORDS, "set_config_item(key, value) -> boolean\n" "\n" "Set a config key to the provided value." }, - {"set_config_path", (PyCFunction)Container_set_config_path, METH_VARARGS | METH_KEYWORDS, + {"set_config_path", (PyCFunction)Container_set_config_path, + METH_VARARGS|METH_KEYWORDS, "set_config_path(path) -> boolean\n" "\n" "Set the LXC config path (where the containers are stored)." }, - {"shutdown", (PyCFunction)Container_shutdown, METH_VARARGS | METH_KEYWORDS, + {"shutdown", (PyCFunction)Container_shutdown, + METH_VARARGS|METH_KEYWORDS, "shutdown(timeout = -1) -> boolean\n" "\n" "Sends SIGPWR to the container and wait for it to shutdown " "unless timeout is set to a positive value, in which case " "the container will be killed when the timeout is reached." }, - {"start", (PyCFunction)Container_start, METH_VARARGS | METH_KEYWORDS, + {"start", (PyCFunction)Container_start, + METH_VARARGS|METH_KEYWORDS, "start(useinit = False, cmd = (,)) -> boolean\n" "\n" "Start the container, optionally using lxc-init and " "an alternate init command, then returns its return code." }, - {"stop", (PyCFunction)Container_stop, METH_NOARGS, + {"stop", (PyCFunction)Container_stop, + METH_NOARGS, "stop() -> boolean\n" "\n" "Stop the container and returns its return code." }, - {"unfreeze", (PyCFunction)Container_unfreeze, METH_NOARGS, + {"unfreeze", (PyCFunction)Container_unfreeze, + METH_NOARGS, "unfreeze() -> boolean\n" "\n" "Unfreezes the container and returns its return code." }, - {"wait", (PyCFunction)Container_wait, METH_VARARGS | METH_KEYWORDS, + {"wait", (PyCFunction)Container_wait, + METH_VARARGS|METH_KEYWORDS, "wait(state, timeout = -1) -> boolean\n" "\n" "Wait for the container to reach a given state or timeout." }, + {"console", (PyCFunction)Container_console, + METH_VARARGS|METH_KEYWORDS, + "console(ttynum = -1, stdinfd = 0, stdoutfd = 1, stderrfd = 2, escape = 0) -> boolean\n" + "\n" + "Attach to container's console." + }, + {"console_getfd", (PyCFunction)Container_console_getfd, + METH_VARARGS|METH_KEYWORDS, + "console(ttynum = -1) -> boolean\n" + "\n" + "Attach to container's console." + }, + {"attach", (PyCFunction)Container_attach, + METH_VARARGS|METH_KEYWORDS, + "attach(run, payload) -> int\n" + "\n" + "Attach to the container. Returns the pid of the attached process." + }, + {"attach_wait", (PyCFunction)Container_attach_wait, + METH_VARARGS|METH_KEYWORDS, + "attach(run, payload) -> int\n" + "\n" + "Attach to the container. Returns the exit code of the process." + }, {NULL, NULL, 0, NULL} }; @@ -642,6 +1166,12 @@ PyVarObject_HEAD_INIT(NULL, 0) }; static PyMethodDef LXC_methods[] = { + {"attach_run_shell", (PyCFunction)LXC_attach_run_shell, METH_O, + "Starts up a shell when attaching, to use as the run parameter for attach or attach_wait"}, + {"attach_run_command", (PyCFunction)LXC_attach_run_command, METH_O, + "Runs a command when attaching, to use as the run parameter for attach or attach_wait"}, + {"arch_to_personality", (PyCFunction)LXC_arch_to_personality, METH_O, + "Returns the process personality of the corresponding architecture"}, {"get_default_config_path", (PyCFunction)LXC_get_default_config_path, METH_NOARGS, "Returns the current LXC config path"}, {"get_version", (PyCFunction)LXC_get_version, METH_NOARGS, @@ -661,6 +1191,7 @@ PyMODINIT_FUNC PyInit__lxc(void) { PyObject* m; + PyObject* d; if (PyType_Ready(&_lxc_ContainerType) < 0) return NULL; @@ -671,5 +1202,37 @@ PyInit__lxc(void) Py_INCREF(&_lxc_ContainerType); PyModule_AddObject(m, "Container", (PyObject *)&_lxc_ContainerType); + + /* add constants */ + d = PyModule_GetDict(m); + + #define PYLXC_EXPORT_CONST(c) PyDict_SetItemString(d, #c, PyLong_FromLong(c)) + + /* environment variable handling */ + PYLXC_EXPORT_CONST(LXC_ATTACH_KEEP_ENV); + PYLXC_EXPORT_CONST(LXC_ATTACH_CLEAR_ENV); + + /* attach options */ + PYLXC_EXPORT_CONST(LXC_ATTACH_MOVE_TO_CGROUP); + PYLXC_EXPORT_CONST(LXC_ATTACH_DROP_CAPABILITIES); + PYLXC_EXPORT_CONST(LXC_ATTACH_SET_PERSONALITY); + PYLXC_EXPORT_CONST(LXC_ATTACH_APPARMOR); + PYLXC_EXPORT_CONST(LXC_ATTACH_REMOUNT_PROC_SYS); + PYLXC_EXPORT_CONST(LXC_ATTACH_DEFAULT); + + /* namespace flags (no other python lib exports this) */ + PYLXC_EXPORT_CONST(CLONE_NEWUTS); + PYLXC_EXPORT_CONST(CLONE_NEWIPC); + PYLXC_EXPORT_CONST(CLONE_NEWUSER); + PYLXC_EXPORT_CONST(CLONE_NEWPID); + PYLXC_EXPORT_CONST(CLONE_NEWNET); + PYLXC_EXPORT_CONST(CLONE_NEWNS); + + #undef PYLXC_EXPORT_CONST + return m; } + +/* + * kate: space-indent on; indent-width 4; mixedindent off; indent-mode cstyle; + */ diff --git a/src/python-lxc/lxc/__init__.py b/src/python-lxc/lxc/__init__.py index 118a08125..3fb76dc02 100644 --- a/src/python-lxc/lxc/__init__.py +++ b/src/python-lxc/lxc/__init__.py @@ -18,7 +18,7 @@ # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # import _lxc @@ -230,28 +230,6 @@ class Container(_lxc.Container): return _lxc.Container.set_config_item(self, key, value) - def attach(self, namespace="ALL", *cmd): - """ - Attach to a running container. - """ - - if not self.running: - return False - - attach = ["lxc-attach", "-n", self.name, - "-P", self.get_config_path()] - if namespace != "ALL": - attach += ["-s", namespace] - - if cmd: - attach += ["--"] + list(cmd) - - if subprocess.call( - attach, - universal_newlines=True) != 0: - return False - return True - def create(self, template, args={}): """ Create a new rootfs for the container. @@ -292,19 +270,26 @@ class Container(_lxc.Container): self.load_config() return True - def console(self, tty="1"): + def console(self, ttynum=-1, stdinfd=0, stdoutfd=1, stderrfd=2, escape=1): """ - Access the console of a container. + Attach to console of running container. """ if not self.running: return False - if subprocess.call(["lxc-console", "-n", self.name, "-t", "%s" % tty, - "-P", self.get_config_path()], - universal_newlines=True) != 0: + return _lxc.Container.console(self, ttynum, stdinfd, stdoutfd, + stderrfd, escape) + + def console_getfd(self, ttynum=-1): + """ + Attach to console of running container. + """ + + if not self.running: return False - return True + + return _lxc.Container.console_getfd(self, ttynum) def get_cgroup_item(self, key): """ @@ -332,65 +317,6 @@ class Container(_lxc.Container): else: return value - def get_ips(self, timeout=60, interface=None, protocol=None): - """ - Returns the list of IP addresses for the container. - """ - - if not self.running: - return False - - ips = [] - - count = 0 - while count < timeout: - if count != 0: - time.sleep(1) - - base_cmd = ["lxc-attach", "-s", "NETWORK", "-n", self.name, "--", - "ip"] - - # Get IPv6 - if protocol in ("ipv6", None): - ip6_cmd = base_cmd + ["-6", "addr", "show", "scope", "global"] - if interface: - ip = subprocess.Popen(ip6_cmd + ["dev", interface], - stdout=subprocess.PIPE, - universal_newlines=True) - else: - ip = subprocess.Popen(ip6_cmd, stdout=subprocess.PIPE, - universal_newlines=True) - - ip.wait() - for line in ip.stdout.read().split("\n"): - fields = line.split() - if len(fields) > 2 and fields[0] == "inet6": - ips.append(fields[1].split('/')[0]) - - # Get IPv4 - if protocol in ("ipv4", None): - ip4_cmd = base_cmd + ["-4", "addr", "show", "scope", "global"] - if interface: - ip = subprocess.Popen(ip4_cmd + ["dev", interface], - stdout=subprocess.PIPE, - universal_newlines=True) - else: - ip = subprocess.Popen(ip4_cmd, stdout=subprocess.PIPE, - universal_newlines=True) - - ip.wait() - for line in ip.stdout.read().split("\n"): - fields = line.split() - if len(fields) > 2 and fields[0] == "inet": - ips.append(fields[1].split('/')[0]) - - if ips: - break - - count += 1 - - return ips - def get_keys(self, key=None): """ Returns a list of valid sub-keys. @@ -407,12 +333,41 @@ class Container(_lxc.Container): else: return value + def get_ips(self, interface=None, family=None, scope=None, timeout=0): + """ + Get a tuple of IPs for the container. + """ + + kwargs = {} + if interface: + kwargs['interface'] = interface + if family: + kwargs['family'] = family + if scope: + kwargs['scope'] = scope + + ips = None + timeout = int(os.environ.get('LXC_GETIP_TIMEOUT', timeout)) + + while not ips: + ips = _lxc.Container.get_ips(self, **kwargs) + if timeout == 0: + break + + timeout -= 1 + time.sleep(1) + + return ips + def set_config_item(self, key, value): """ Set a config key to a provided value. The value can be a list for the keys supporting multiple values. """ - old_value = self.get_config_item(key) + try: + old_value = self.get_config_item(key) + except KeyError: + old_value = None # Check if it's a list def set_key(key, value): @@ -469,3 +424,52 @@ def list_containers(as_object=False, config_path=None): else: containers.append(entry.split("/")[-2]) return containers + +def attach_run_command(cmd): + """ + Run a command when attaching + + Please do not call directly, this will execvp the command. + This is to be used in conjunction with the attach method + of a container. + """ + if isinstance(cmd, tuple): + return _lxc.attach_run_command(cmd) + elif isinstance(cmd, list): + return _lxc.attach_run_command((cmd[0], cmd)) + else: + return _lxc.attach_run_command((cmd, [cmd])) + +def attach_run_shell(): + """ + Run a shell when attaching + + Please do not call directly, this will execvp the shell. + This is to be used in conjunction with the attach method + of a container. + """ + return _lxc.attach_run_shell(None) + +def arch_to_personality(arch): + """ + Determine the process personality corresponding to the architecture + """ + if isinstance(arch, bytes): + arch = str(arch, 'utf-8') + return _lxc.arch_to_personality(arch) + +# Some constants for attach +LXC_ATTACH_KEEP_ENV = _lxc.LXC_ATTACH_KEEP_ENV +LXC_ATTACH_CLEAR_ENV = _lxc.LXC_ATTACH_CLEAR_ENV +LXC_ATTACH_MOVE_TO_CGROUP = _lxc.LXC_ATTACH_MOVE_TO_CGROUP +LXC_ATTACH_DROP_CAPABILITIES = _lxc.LXC_ATTACH_DROP_CAPABILITIES +LXC_ATTACH_SET_PERSONALITY = _lxc.LXC_ATTACH_SET_PERSONALITY +LXC_ATTACH_APPARMOR = _lxc.LXC_ATTACH_APPARMOR +LXC_ATTACH_REMOUNT_PROC_SYS = _lxc.LXC_ATTACH_REMOUNT_PROC_SYS +LXC_ATTACH_DEFAULT = _lxc.LXC_ATTACH_DEFAULT +CLONE_NEWUTS = _lxc.CLONE_NEWUTS +CLONE_NEWIPC = _lxc.CLONE_NEWIPC +CLONE_NEWUSER = _lxc.CLONE_NEWUSER +CLONE_NEWPID = _lxc.CLONE_NEWPID +CLONE_NEWNET = _lxc.CLONE_NEWNET +CLONE_NEWNS = _lxc.CLONE_NEWNS diff --git a/src/python-lxc/setup.py b/src/python-lxc/setup.py index 8c22961be..a1061f14e 100644 --- a/src/python-lxc/setup.py +++ b/src/python-lxc/setup.py @@ -1,3 +1,26 @@ +#!/usr/bin/python3 +# +# python-lxc: Python bindings for LXC +# +# (C) Copyright Canonical Ltd. 2012 +# +# Authors: +# Stéphane Graber +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + from distutils.core import setup, Extension module = Extension('_lxc', sources=['lxc.c'], libraries=['lxc']) diff --git a/src/tests/Makefile.am b/src/tests/Makefile.am index 4cbeeb3fe..76b38f946 100644 --- a/src/tests/Makefile.am +++ b/src/tests/Makefile.am @@ -13,17 +13,24 @@ lxc_test_get_item_SOURCES = get_item.c lxc_test_getkeys_SOURCES = getkeys.c lxc_test_lxcpath_SOURCES = lxcpath.c lxc_test_cgpath_SOURCES = cgpath.c +lxc_test_clonetest_SOURCES = clonetest.c +lxc_test_console_SOURCES = console.c +lxc_usernic_test_SOURCES = ../lxc/lxc_user_nic.c ../lxc/nl.c +lxc_usernic_test_CFLAGS = -DISTEST AM_CFLAGS=-I$(top_srcdir)/src \ -DLXCROOTFSMOUNT=\"$(LXCROOTFSMOUNT)\" \ -DLXCPATH=\"$(LXCPATH)\" \ -DLXC_GLOBAL_CONF=\"$(LXC_GLOBAL_CONF)\" \ - -DLXCINITDIR=\"$(LXCINITDIR)\" + -DLXCINITDIR=\"$(LXCINITDIR)\" \ + -DLXC_DEFAULT_CONFIG=\"$(LXC_DEFAULT_CONFIG)\" bin_PROGRAMS = lxc-test-containertests lxc-test-locktests lxc-test-startone \ lxc-test-destroytest lxc-test-saveconfig lxc-test-createtest \ lxc-test-shutdowntest lxc-test-get_item lxc-test-getkeys lxc-test-lxcpath \ - lxc-test-cgpath + lxc-test-cgpath lxc-test-clonetest lxc-test-console lxc-usernic-test + +bin_SCRIPTS = lxc-test-usernic endif @@ -38,4 +45,7 @@ EXTRA_DIST = \ lxcpath.c \ saveconfig.c \ shutdowntest.c \ - startone.c + clonetest.c \ + startone.c \ + console.c \ + lxc-test-usernic diff --git a/src/tests/cgpath.c b/src/tests/cgpath.c index f05c43592..6761af460 100644 --- a/src/tests/cgpath.c +++ b/src/tests/cgpath.c @@ -18,6 +18,7 @@ */ #include "../lxc/lxccontainer.h" +#include #include #include #include @@ -27,20 +28,180 @@ #include #include "../lxc/cgroup.h" #include "../lxc/lxc.h" +#include "../lxc/commands.h" #define MYNAME "lxctest1" -#define MYNAME2 "lxctest2" -#define TSTERR(x) do { \ - fprintf(stderr, "%d: %s\n", __LINE__, x); \ +#define TSTERR(fmt, ...) do { \ + fprintf(stderr, "%s:%d " fmt "\n", __FILE__, __LINE__, ##__VA_ARGS__); \ } while (0) +/* + * test_running_container: test cgroup functions against a running container + * + * @group : name of the container group or NULL for default "lxc" + * @name : name of the container + */ +static int test_running_container(const char *lxcpath, + const char *group, const char *name) +{ + int ret = -1; + struct lxc_container *c = NULL; + char *cgrelpath; + char *cgabspath; + char relpath[PATH_MAX+1]; + char abspath[PATH_MAX+1]; + char value[NAME_MAX], value_save[NAME_MAX]; + + sprintf(relpath, "%s/%s", group ? group : "lxc", name); + + if ((c = lxc_container_new(name, lxcpath)) == NULL) { + TSTERR("container %s couldn't instantiate", name); + goto err1; + } + if (!c->is_defined(c)) { + TSTERR("container %s does not exist", name); + goto err2; + } + + cgrelpath = lxc_cmd_get_cgroup_path(c->name, c->config_path, "freezer"); + if (!cgrelpath) { + TSTERR("lxc_cmd_get_cgroup_path returned NULL"); + goto err2; + } + if (!strstr(cgrelpath, relpath)) { + TSTERR("lxc_cmd_get_cgroup_path %s not in %s", relpath, cgrelpath); + goto err3; + } + + /* test get/set value using memory.swappiness file */ + ret = lxc_cgroup_get(c->name, "memory.swappiness", value, + sizeof(value), c->config_path); + if (ret < 0) { + TSTERR("lxc_cgroup_get failed"); + goto err3; + } + strcpy(value_save, value); + + ret = lxc_cgroup_set(c->name, "memory.swappiness", "100", c->config_path); + if (ret < 0) { + TSTERR("lxc_cgroup_set_bypath failed"); + goto err3; + } + ret = lxc_cgroup_get(c->name, "memory.swappiness", value, + sizeof(value), c->config_path); + if (ret < 0) { + TSTERR("lxc_cgroup_get failed"); + goto err3; + } + if (strcmp(value, "100\n")) { + TSTERR("lxc_cgroup_set_bypath failed to set value >%s<", value); + goto err3; + } + + /* restore original value */ + ret = lxc_cgroup_set(c->name, "memory.swappiness", value_save, + c->config_path); + if (ret < 0) { + TSTERR("lxc_cgroup_set failed"); + goto err3; + } + ret = lxc_cgroup_get(c->name, "memory.swappiness", value, + sizeof(value), c->config_path); + if (ret < 0) { + TSTERR("lxc_cgroup_get failed"); + goto err3; + } + if (strcmp(value, value_save)) { + TSTERR("lxc_cgroup_set failed to set value >%s<", value); + goto err3; + } + + cgabspath = lxc_cgroup_path_get("freezer", c->name, c->config_path); + if (!cgabspath) { + TSTERR("lxc_cgroup_path_get returned NULL"); + goto err3; + } + sprintf(abspath, "%s/%s/%s", "freezer", group ? group : "lxc", c->name); + if (!strstr(cgabspath, abspath)) { + TSTERR("lxc_cgroup_path_get %s not in %s", abspath, cgabspath); + goto err4; + } + + free(cgabspath); + cgabspath = lxc_cgroup_path_get("freezer.state", c->name, c->config_path); + if (!cgabspath) { + TSTERR("lxc_cgroup_path_get returned NULL"); + goto err3; + } + sprintf(abspath, "%s/%s/%s", "freezer", group ? group : "lxc", c->name); + if (!strstr(cgabspath, abspath)) { + TSTERR("lxc_cgroup_path_get %s not in %s", abspath, cgabspath); + goto err4; + } + + ret = 0; +err4: + free(cgabspath); +err3: + free(cgrelpath); +err2: + lxc_container_put(c); +err1: + return ret; +} + +static int test_container(const char *lxcpath, + const char *group, const char *name, + const char *template) +{ + int ret; + struct lxc_container *c = NULL; + + if (lxcpath) { + ret = mkdir(lxcpath, 0755); + if (ret < 0 && errno != EEXIST) { + TSTERR("failed to mkdir %s %s", lxcpath, strerror(errno)); + goto out1; + } + } + ret = -1; + + if ((c = lxc_container_new(name, lxcpath)) == NULL) { + TSTERR("instantiating container %s", name); + goto out1; + } + if (c->is_defined(c)) { + c->stop(c); + c->destroy(c); + c = lxc_container_new(name, lxcpath); + } + c->set_config_item(c, "lxc.network.type", "empty"); + if (!c->createl(c, template, NULL, NULL, 0, NULL)) { + TSTERR("creating container %s", name); + goto out2; + } + c->load_config(c, NULL); + c->want_daemonize(c); + if (!c->startl(c, 0, NULL)) { + TSTERR("starting container %s", name); + goto out3; + } + + ret = test_running_container(lxcpath, group, name); + + c->stop(c); +out3: + c->destroy(c); +out2: + lxc_container_put(c); +out1: + return ret; +} + int main() { - struct lxc_container *c = NULL, *c2 = NULL; - char *path; - int len; - int ret, retv = -1; + int ret = EXIT_FAILURE; /* won't require privilege necessarily once users are classified by * pam_cgroup */ @@ -49,116 +210,31 @@ int main() exit(0); } - printf("Basic cgroup path tests...\n"); - path = lxc_cgroup_path_create(NULL, MYNAME); - len = strlen(path); - if (!path || !len) { - TSTERR("zero result from lxc_cgroup_path_create"); - exit(1); - } - if (!strstr(path, "lxc/" MYNAME)) { - TSTERR("lxc_cgroup_path_create NULL lxctest1"); - exit(1); - } - free(path); + #if TEST_ALREADY_RUNNING_CT - path = lxc_cgroup_path_create("ab", MYNAME); - len = strlen(path); - if (!path || !len) { - TSTERR("zero result from lxc_cgroup_path_create"); - exit(1); - } - if (!strstr(path, "ab/" MYNAME)) { - TSTERR("lxc_cgroup_path_create ab lxctest1"); - exit(1); - } - free(path); - printf("... passed\n"); - - printf("Container creation tests...\n"); - - if ((c = lxc_container_new(MYNAME, NULL)) == NULL) { - TSTERR("instantiating first container"); - exit(1); - } - if (c->is_defined(c)) { - c->stop(c); - c->destroy(c); - c = lxc_container_new(MYNAME, NULL); - } - c->set_config_item(c, "lxc.network.type", "empty"); - if (!c->createl(c, "ubuntu", NULL)) { - TSTERR("creating first container"); - exit(1); - } - c->load_config(c, NULL); - c->want_daemonize(c); - if (!c->startl(c, 0, NULL)) { - TSTERR("starting first container"); + /* + * This is useful for running with valgrind to test for memory + * leaks. The container should already be running, we can't start + * the container ourselves because valgrind gets confused by lxc's + * internal calls to clone. + */ + if (test_running_container(NULL, NULL, "bb01") < 0) goto out; - } - printf("first container passed. Now two containers...\n"); + printf("Running container cgroup tests...Passed\n"); - char *nsgroup; -#define ALTBASE "/var/lib/lxctest2" - ret = mkdir(ALTBASE, 0755); + #else - ret = lxc_cgroup_path_get(&nsgroup, "freezer", MYNAME, c->get_config_path(c)); - if (ret < 0 || !strstr(nsgroup, "lxc/" MYNAME)) { - TSTERR("getting first cgroup path from lxc_command"); + if (test_container(NULL, NULL, MYNAME, "busybox") < 0) goto out; - } + printf("Container creation tests...Passed\n"); - /* start second container */ - if ((c2 = lxc_container_new(MYNAME2, ALTBASE)) == NULL) { - TSTERR("instantiating first container"); + if (test_container("/var/lib/lxctest2", NULL, MYNAME, "busybox") < 0) goto out; - } - if (c2->is_defined(c2)) { - c2->stop(c2); - c2->destroy(c2); - c2 = lxc_container_new(MYNAME2, ALTBASE); - } - c2->set_config_item(c2, "lxc.network.type", "empty"); - if (!c2->createl(c2, "ubuntu", NULL)) { - TSTERR("creating first container"); - goto out; - } + printf("Container creation with LXCPATH tests...Passed\n"); - c2->load_config(c2, NULL); - c2->want_daemonize(c2); - if (!c2->startl(c2, 0, NULL)) { - TSTERR("starting first container"); - goto out; - } + #endif - ret = lxc_cgroup_path_get(&nsgroup, "freezer", MYNAME2, c2->get_config_path(c2)); - if (ret < 0 || !strstr(nsgroup, "lxc/" MYNAME2)) { - TSTERR("getting second cgroup path from lxc_command"); - goto out; - } - - const char *dirpath; - if (lxc_get_cgpath(&dirpath, NULL, c2->name, c2->config_path) < 0) { - TSTERR("getting second container's cgpath"); - return -1; - } - - if (lxc_cgroup_nrtasks(dirpath) < 1) { - TSTERR("getting nrtasks"); - goto out; - } - printf("...passed\n"); - - retv = 0; + ret = EXIT_SUCCESS; out: - if (c2) { - c2->stop(c2); - c2->destroy(c2); - } - if (c) { - c->stop(c); - c->destroy(c); - } - return retv; + return ret; } diff --git a/src/tests/clonetest.c b/src/tests/clonetest.c new file mode 100644 index 000000000..da3ce7526 --- /dev/null +++ b/src/tests/clonetest.c @@ -0,0 +1,178 @@ +/* liblxcapi + * + * Copyright © 2012 Serge Hallyn . + * Copyright © 2012 Canonical Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ +#include "../lxc/lxccontainer.h" + +#include +#include +#include +#include +#include +#include +#include + +#define MYNAME "clonetest1" +#define MYNAME2 "clonetest2" + +int main(int argc, char *argv[]) +{ + struct lxc_container *c = NULL, *c2 = NULL, *c3 = NULL; + int ret = 1; + + c = lxc_container_new(MYNAME, NULL); + c2 = lxc_container_new(MYNAME2, NULL); + if (c) { + c->destroy(c); + lxc_container_put(c); + c = NULL; + } + if (c2) { + c2->destroy(c2); + lxc_container_put(c2); + c2 = NULL; + } + + if ((c = lxc_container_new(MYNAME, NULL)) == NULL) { + fprintf(stderr, "%d: error opening lxc_container %s\n", __LINE__, MYNAME); + ret = 1; + goto out; + } + c->save_config(c, NULL); + if (!c->createl(c, "ubuntu", NULL, NULL, 0, NULL)) { + fprintf(stderr, "%d: failed to create a container\n", __LINE__); + goto out; + } + c->load_config(c, NULL); + + if (!c->is_defined(c)) { + fprintf(stderr, "%d: %s thought it was not defined\n", __LINE__, MYNAME); + goto out; + } + + c2 = c->clone(c, MYNAME2, NULL, 0, NULL, NULL, 0, NULL); + if (!c2) { + fprintf(stderr, "%d: %s clone returned NULL\n", __LINE__, MYNAME2); + goto out; + } + + if (!c2->is_defined(c2)) { + fprintf(stderr, "%d: %s not defined after clone\n", __LINE__, MYNAME2); + goto out; + } + + fprintf(stderr, "directory backing store tests passed\n"); + + // now test with lvm + // Only do this if clonetestlvm1 exists - user has to set this up + // in advance + //c2->destroy(c2); + lxc_container_put(c2); + //c->destroy(c); + lxc_container_put(c); + c = NULL; + + c2 = lxc_container_new("clonetestlvm2", NULL); + if (c2) { + if (c2->is_defined(c2)) + c2->destroy(c2); + lxc_container_put(c2); + } + c2 = lxc_container_new("clonetest-o1", NULL); + if (c2) { + if (c2->is_defined(c2)) + c2->destroy(c2); + lxc_container_put(c2); + } + c2 = lxc_container_new("clonetest-o2", NULL); + if (c2) { + if (c2->is_defined(c2)) + c2->destroy(c2); + lxc_container_put(c2); + } + c2 = NULL; + + // lvm-copied + c = lxc_container_new("clonetestlvm1", NULL); + if (!c) { + fprintf(stderr, "failed loading clonetestlvm1\n"); + goto out; + } + if (!c->is_defined(c)) { + fprintf(stderr, "clonetestlvm1 does not exist, skipping lvm tests\n"); + ret = 0; + goto out; + } + + if ((c2 = c->clone(c, "clonetestlvm2", NULL, 0, NULL, NULL, 0, NULL)) == NULL) { + fprintf(stderr, "lvm clone failed\n"); + goto out; + } + + lxc_container_put(c2); + + // lvm-snapshot + c2 = lxc_container_new("clonetestlvm3", NULL); + if (c2) { + if (c2->is_defined(c2)) + c2->destroy(c2); + lxc_container_put(c2); + c2 = NULL; + } + + if ((c2 = c->clone(c, "clonetestlvm3", NULL, LXC_CLONE_SNAPSHOT, NULL, NULL, 0, NULL)) == NULL) { + fprintf(stderr, "lvm clone failed\n"); + goto out; + } + lxc_container_put(c2); + lxc_container_put(c); + c = c2 = NULL; + + if ((c = lxc_container_new(MYNAME, NULL)) == NULL) { + fprintf(stderr, "error opening original container for overlay test\n"); + goto out; + } + + // Now create an overlayfs clone of a dir-backed container + if ((c2 = c->clone(c, "clonetest-o1", NULL, LXC_CLONE_SNAPSHOT, "overlayfs", NULL, 0, NULL)) == NULL) { + fprintf(stderr, "overlayfs clone of dir failed\n"); + goto out; + } + + // Now create an overlayfs clone of the overlayfs clone + if ((c3 = c2->clone(c2, "clonetest-o2", NULL, LXC_CLONE_SNAPSHOT, "overlayfs", NULL, 0, NULL)) == NULL) { + fprintf(stderr, "overlayfs clone of overlayfs failed\n"); + goto out; + } + + fprintf(stderr, "all clone tests passed for %s\n", c->name); + ret = 0; + +out: + if (c3) { + lxc_container_put(c3); + } + if (c2) { + //c2->destroy(c2); // keep around to verify manuall + lxc_container_put(c2); + } + if (c) { + //c->destroy(c); + lxc_container_put(c); + } + exit(ret); +} diff --git a/src/tests/console.c b/src/tests/console.c new file mode 100644 index 000000000..cd6398109 --- /dev/null +++ b/src/tests/console.c @@ -0,0 +1,177 @@ +/* liblxcapi + * + * Copyright © 2013 Oracle. + * + * Authors: + * Dwight Engen + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "../lxc/lxccontainer.h" + +#include +#include + +#define TTYCNT 4 +#define TTYCNT_STR "4" +#define TSTNAME "lxcconsoletest" +#define MAXCONSOLES 512 + +#define TSTERR(fmt, ...) do { \ + fprintf(stderr, "%s:%d " fmt "\n", __FILE__, __LINE__, ##__VA_ARGS__); \ +} while (0) + +static void test_console_close_all(int ttyfd[MAXCONSOLES], + int masterfd[MAXCONSOLES]) +{ + int i; + + for (i = 0; i < MAXCONSOLES; i++) { + if (masterfd[i] != -1) { + close(masterfd[i]); + masterfd[i] = -1; + } + if (ttyfd[i] != -1) { + close(ttyfd[i]); + ttyfd[i] = -1; + } + } +} + +static int test_console_running_container(struct lxc_container *c) +{ + int nrconsoles, i, ret = -1; + int ttynum [MAXCONSOLES]; + int ttyfd [MAXCONSOLES]; + int masterfd[MAXCONSOLES]; + + for (i = 0; i < MAXCONSOLES; i++) + ttynum[i] = ttyfd[i] = masterfd[i] = -1; + + ttynum[0] = 1; + ret = c->console_getfd(c, &ttynum[0], &masterfd[0]); + if (ret < 0) { + TSTERR("console allocate failed"); + goto err1; + } + ttyfd[0] = ret; + if (ttynum[0] != 1) { + TSTERR("console allocate got bad ttynum %d", ttynum[0]); + goto err2; + } + + /* attempt to alloc same ttynum */ + ret = c->console_getfd(c, &ttynum[0], &masterfd[1]); + if (ret != -1) { + TSTERR("console allocate should fail for allocated ttynum %d", ttynum[0]); + goto err2; + } + close(masterfd[0]); masterfd[0] = -1; + close(ttyfd[0]); ttyfd[0] = -1; + + /* ensure we can allocate all consoles, we do this a few times to + * show that the closes are freeing up the allocated slots + */ + for (i = 0; i < 10; i++) { + for (nrconsoles = 0; nrconsoles < MAXCONSOLES; nrconsoles++) { + ret = c->console_getfd(c, &ttynum[nrconsoles], &masterfd[nrconsoles]); + if (ret < 0) + break; + ttyfd[nrconsoles] = ret; + } + if (nrconsoles != TTYCNT) { + TSTERR("didn't allocate all consoles %d != %d", nrconsoles, TTYCNT); + goto err2; + } + test_console_close_all(ttyfd, masterfd); + } + ret = 0; + +err2: + test_console_close_all(ttyfd, masterfd); +err1: + return ret; +} + +/* test_container: test console function + * + * @lxcpath : the lxcpath in which to create the container + * @group : name of the container group or NULL for default "lxc" + * @name : name of the container + * @template : template to use when creating the container + */ +static int test_console(const char *lxcpath, + const char *group, const char *name, + const char *template) +{ + int ret; + struct lxc_container *c = NULL; + + if (lxcpath) { + ret = mkdir(lxcpath, 0755); + if (ret < 0 && errno != EEXIST) { + TSTERR("failed to mkdir %s %s", lxcpath, strerror(errno)); + goto out1; + } + } + ret = -1; + + if ((c = lxc_container_new(name, lxcpath)) == NULL) { + TSTERR("instantiating container %s", name); + goto out1; + } + if (c->is_defined(c)) { + c->stop(c); + c->destroy(c); + c = lxc_container_new(name, lxcpath); + } + if (!c->createl(c, template, NULL, NULL, 0, NULL)) { + TSTERR("creating container %s", name); + goto out2; + } + c->load_config(c, NULL); + c->set_config_item(c, "lxc.tty", TTYCNT_STR); + c->save_config(c, NULL); + c->want_daemonize(c); + if (!c->startl(c, 0, NULL)) { + TSTERR("starting container %s", name); + goto out3; + } + + ret = test_console_running_container(c); + + c->stop(c); +out3: + c->destroy(c); +out2: + lxc_container_put(c); +out1: + return ret; +} + +int main(int argc, char *argv[]) +{ + int ret; + ret = test_console(NULL, NULL, TSTNAME, "busybox"); + if (ret < 0) + goto err1; + + ret = test_console("/var/lib/lxctest2", NULL, TSTNAME, "busybox"); + if (ret < 0) + goto err1; + printf("All tests passed\n"); +err1: + return ret; +} diff --git a/src/tests/containertests.c b/src/tests/containertests.c index 8868faacb..8d1658fab 100644 --- a/src/tests/containertests.c +++ b/src/tests/containertests.c @@ -71,7 +71,7 @@ static int create_busybox(void) return -1; } if (pid == 0) { - ret = execlp("lxc-create", "lxc-create", "-t", "busybox", "-f", "/etc/lxc/default.conf", "-n", MYNAME, NULL); + ret = execlp("lxc-create", "lxc-create", "-t", "busybox", "-f", LXC_DEFAULT_CONFIG, "-n", MYNAME, NULL); // Should not return perror("execl"); exit(1); diff --git a/src/tests/createtest.c b/src/tests/createtest.c index c2abee233..879d0a122 100644 --- a/src/tests/createtest.c +++ b/src/tests/createtest.c @@ -50,7 +50,7 @@ int main(int argc, char *argv[]) } c->set_config_item(c, "lxc.network.link", "lxcbr0"); c->set_config_item(c, "lxc.network.flags", "up"); - if (!c->createl(c, "ubuntu", "-r", "lucid", NULL)) { + if (!c->createl(c, "ubuntu", NULL, NULL, 0, "-r", "lucid", NULL)) { fprintf(stderr, "%d: failed to create a lucid container\n", __LINE__); goto out; } diff --git a/src/tests/destroytest.c b/src/tests/destroytest.c index 0552b4c05..428d09ef1 100644 --- a/src/tests/destroytest.c +++ b/src/tests/destroytest.c @@ -38,7 +38,7 @@ static int create_ubuntu(void) return -1; } if (pid == 0) { - ret = execlp("lxc-create", "lxc-create", "-t", "ubuntu", "-f", "/etc/lxc/default.conf", "-n", MYNAME, NULL); + ret = execlp("lxc-create", "lxc-create", "-t", "ubuntu", "-f", LXC_DEFAULT_CONFIG, "-n", MYNAME, NULL); // Should not return perror("execl"); exit(1); diff --git a/src/tests/get_item.c b/src/tests/get_item.c index d3e6d29ad..c4c95cbd4 100644 --- a/src/tests/get_item.c +++ b/src/tests/get_item.c @@ -170,7 +170,7 @@ int main(int argc, char *argv[]) ret = 1; goto out; } - if (!c->createl(c, "ubuntu", "-r", "lucid", NULL)) { + if (!c->createl(c, "ubuntu", NULL, NULL, 0, "-r", "lucid", NULL)) { fprintf(stderr, "%d: failed to create a lucid container\n", __LINE__); ret = 1; goto out; diff --git a/src/tests/locktests.c b/src/tests/locktests.c index f08555805..360851fdf 100644 --- a/src/tests/locktests.c +++ b/src/tests/locktests.c @@ -27,213 +27,115 @@ #define mycontainername "lxctest.sem" #define TIMEOUT_SECS 3 -int timedout; -int pid_to_kill; - -void timeouthandler(int sig) +void test_two_locks(void) { - // timeout received - timedout = 1; - kill(pid_to_kill, SIGTERM); -} + struct lxc_lock *l; + pid_t pid; + int ret, status; + int p[2]; + char c; -void starttimer(int secs) -{ - timedout = 0; - signal(SIGALRM, timeouthandler); - alarm(secs); -} -void stoptimer(void) -{ - alarm(0); - signal(SIGALRM, NULL); -} - -int test_one_lock(sem_t *lock) -{ - int ret; - starttimer(TIMEOUT_SECS); - ret = lxclock(lock, TIMEOUT_SECS*2); - stoptimer(); - if (ret == 0) { - lxcunlock(lock); - return 0; - } - if (timedout) - fprintf(stderr, "%d: timed out waiting for lock\n", __LINE__); - else - fprintf(stderr, "%d: failed to get single lock\n", __LINE__); - return 1; -} - -/* - * get one lock. Fork a second task to try to get a second lock, - * with infinite timeout. If our alarm hits, kill the second - * task. If second task does not - */ -int test_two_locks(sem_t *lock) -{ - int status; - int ret; - - ret = lxclock(lock, 1); - if (ret) { - fprintf(stderr, "%d: Error getting first lock\n", __LINE__); - return 2; - } - - pid_to_kill = fork(); - if (pid_to_kill < 0) { - fprintf(stderr, "%d: Failed to fork\n", __LINE__); - lxcunlock(lock); - return 3; - } - - if (pid_to_kill == 0) { // child - ret = lxclock(lock, TIMEOUT_SECS*2); - if (ret == 0) { - lxcunlock(lock); - exit(0); + if (pipe(p) < 0) + exit(1); + if ((pid = fork()) < 0) + exit(1); + if (pid == 0) { + if (read(p[0], &c, 1) < 0) { + perror("read"); + exit(1); } - fprintf(stderr, "%d: child, was not able to get lock\n", __LINE__); + l = lxc_newlock("/tmp", "lxctest-sem"); + if (!l) { + fprintf(stderr, "%d: child: failed to create lock\n", __LINE__); + exit(1); + } + if (lxclock(l, 0) < 0) { + fprintf(stderr, "%d: child: failed to grab lock\n", __LINE__); + exit(1); + } + fprintf(stderr, "%d: child: grabbed lock\n", __LINE__); + exit(0); + } + l = lxc_newlock("/tmp", "lxctest-sem"); + if (!l) { + fprintf(stderr, "%d: failed to create lock\n", __LINE__); exit(1); } - starttimer(TIMEOUT_SECS); - waitpid(pid_to_kill, &status, 0); - stoptimer(); - if (WIFEXITED(status)) { - // child exited normally - timeout didn't kill it - if (WEXITSTATUS(status) == 0) - fprintf(stderr, "%d: child was able to get the lock\n", __LINE__); - else - fprintf(stderr, "%d: child timed out too early\n", __LINE__); - lxcunlock(lock); - return 1; - } - lxcunlock(lock); - return 0; -} - -/* - * get one lock. try to get second lock, but asking for timeout. If - * should return failure. If our own alarm, set at twice the lock - * request's timeout, hits, then lxclock() did not properly time out. - */ -int test_with_timeout(sem_t *lock) -{ - int status; - int ret = 0; - - ret = lxclock(lock, 0); - if (ret) { - fprintf(stderr, "%d: Error getting first lock\n", __LINE__); - return 2; - } - pid_to_kill = fork(); - if (pid_to_kill < 0) { - fprintf(stderr, "%d: Error on fork\n", __LINE__); - lxcunlock(lock); - return 2; - } - if (pid_to_kill == 0) { - ret = lxclock(lock, TIMEOUT_SECS); - if (ret == 0) { - lxcunlock(lock); - exit(0); - } + if (lxclock(l, 0) < 0) { + fprintf(stderr, "%d; failed to get lock\n", __LINE__); exit(1); } - starttimer(TIMEOUT_SECS * 2); - waitpid(pid_to_kill, &status, 0); - stoptimer(); - if (!WIFEXITED(status)) { - fprintf(stderr, "%d: lxclock did not honor its timeout\n", __LINE__); - lxcunlock(lock); - return 1; + if (write(p[1], &c, 1) < 0) { + perror("write"); + exit(1); } - if (WEXITSTATUS(status) == 0) { - fprintf(stderr, "%d: child was able to get lock, should have failed with timeout\n", __LINE__); - ret = 1; + sleep(3); + ret = waitpid(pid, &status, WNOHANG); + if (ret == pid) { // task exited + if (WIFEXITED(status)) { + printf("%d exited normally with exit code %d\n", pid, + WEXITSTATUS(status)); + if (WEXITSTATUS(status) == 0) + exit(1); + } else + printf("%d did not exit normally\n", pid); + return; + } else if (ret < 0) { + perror("waitpid"); + exit(1); } - lxcunlock(lock); - return ret; + kill(pid, SIGKILL); + wait(&status); + close(p[1]); + close(p[0]); + lxcunlock(l); + lxc_putlock(l); } int main(int argc, char *argv[]) { - int ret, sval, r; - sem_t *lock; + int ret; + struct lxc_lock *lock; - lock = lxc_newlock(NULL); - if (!lock) { + lock = lxc_newlock(NULL, NULL); + if (!lock) { fprintf(stderr, "%d: failed to get unnamed lock\n", __LINE__); exit(1); - } - ret = lxclock(lock, 0); - if (ret) { + } + ret = lxclock(lock, 0); + if (ret) { fprintf(stderr, "%d: failed to take unnamed lock (%d)\n", __LINE__, ret); exit(1); - } + } - ret = lxcunlock(lock); - if (ret) { + ret = lxcunlock(lock); + if (ret) { fprintf(stderr, "%d: failed to put unnamed lock (%d)\n", __LINE__, ret); exit(1); - } + } + lxc_putlock(lock); - sem_destroy(lock); - free(lock); - - lock = lxc_newlock(mycontainername); + lock = lxc_newlock("/var/lib/lxc", mycontainername); if (!lock) { fprintf(stderr, "%d: failed to get lock\n", __LINE__); exit(1); } - r = sem_getvalue(lock, &sval); - if (!r) { - fprintf(stderr, "%d: sem value at start is %d\n", __LINE__, sval); - } else { - fprintf(stderr, "%d: failed to get initial value\n", __LINE__); + struct stat sb; + // we don't create the file until the container is running, so this + // bit of the test needs to be changed + //char *pathname = "/run/lock/lxc/var/lib/lxc/" mycontainername; + char *pathname = "/run/lock/lxc/var/lib/lxc/"; + ret = stat(pathname, &sb); + if (ret != 0) { + fprintf(stderr, "%d: filename %s not created\n", __LINE__, + pathname); + exit(1); } + lxc_putlock(lock); - ret = test_one_lock(lock); - if (ret) { - fprintf(stderr, "%d: test failed\n", __LINE__); - goto out; - } - r = sem_getvalue(lock, &sval); - if (!r) { - fprintf(stderr, "%d: sem value is %d\n", __LINE__, sval); - } else { - fprintf(stderr, "%d: failed to get sem value\n", __LINE__); - } + test_two_locks(); - ret = test_two_locks(lock); - if (ret) { - fprintf(stderr, "%d: test failed\n", __LINE__); - goto out; - } - r = sem_getvalue(lock, &sval); - if (!r) { - fprintf(stderr, "%d: sem value is %d\n", __LINE__, sval); - } else { - fprintf(stderr, "%d: failed to get value\n", __LINE__); - } + fprintf(stderr, "all tests passed\n"); - ret = test_with_timeout(lock); - if (ret) { - fprintf(stderr, "%d: test failed\n", __LINE__); - goto out; - } - r = sem_getvalue(lock, &sval); - if (!r) { - fprintf(stderr, "%d: sem value is %d\n", __LINE__, sval); - } else { - fprintf(stderr, "%d: failed to get value\n", __LINE__); - } - - fprintf(stderr, "all tests passed\n"); - -out: exit(ret); } diff --git a/src/tests/lxc-test-usernic b/src/tests/lxc-test-usernic new file mode 100755 index 000000000..9e6d834b4 --- /dev/null +++ b/src/tests/lxc-test-usernic @@ -0,0 +1,67 @@ +#!/bin/bash + +# lxc: linux Container library + +# Authors: +# Serge Hallyn +# +# This is a test script for the lxc-user-nic program + +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. + +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. + +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +conffile="/tmp/lxc-usernet" +dbfile="/tmp/nics" +sysfsdir=/tmp/lxcnettest + +rm -f $conffile $dbfile + +rm -rf $sysfsdir +mkdir -p $sysfsdir + +# there is no conffile, so we have no permissions +lxc-usernic-test 1111 veth lxcbr0 > /dev/null 2>&1 +if [ $? -eq 0 ]; then + echo "Fail: empty conffile should not allow me a nic" + exit 1 +fi + +cat > $conffile << EOF +$(id -un) veth lxcbr0 1 +EOF + +# Should be allowed one but not two +lxc-usernic-test 1111 veth lxcbr0 > /dev/null 2>&1 +if [ $? -ne 0 ]; then + echo "Failed to get one allowed nic" + exit 1 +fi + +lxc-usernic-test 1111 veth lxcbr0 > /dev/null 2>&1 +if [ $? -eq 0 ]; then + echo "Fail: was able to get a second nic" + exit 1 +fi + +# now remove the 'existing nic' and make sure we're allowed to create +# a new one +lxc-usernic-test 1111 veth lxcbr0 > /dev/null 2>&1 +rm -rf $sysfsdir +mkdir -p $sysfsdir +if [ $? -ne 0 ]; then + echo "Fail: was unable to get a replacement nic" + exit 1 +fi + +echo "All tests passed" diff --git a/src/tests/saveconfig.c b/src/tests/saveconfig.c index fa84e3214..649573f60 100644 --- a/src/tests/saveconfig.c +++ b/src/tests/saveconfig.c @@ -38,7 +38,7 @@ static int create_ubuntu(void) return -1; } if (pid == 0) { - ret = execlp("lxc-create", "lxc-create", "-t", "ubuntu", "-f", "/etc/lxc/default.conf", "-n", MYNAME, NULL); + ret = execlp("lxc-create", "lxc-create", "-t", "ubuntu", "-f", LXC_DEFAULT_CONFIG, "-n", MYNAME, NULL); // Should not return perror("execl"); exit(1); diff --git a/src/tests/shutdowntest.c b/src/tests/shutdowntest.c index a1a84e813..f67995e1d 100644 --- a/src/tests/shutdowntest.c +++ b/src/tests/shutdowntest.c @@ -51,7 +51,7 @@ int main(int argc, char *argv[]) } c->set_config_item(c, "lxc.network.link", "lxcbr0"); c->set_config_item(c, "lxc.network.flags", "up"); - if (!c->createl(c, "ubuntu", "-r", "lucid", NULL)) { + if (!c->createl(c, "ubuntu", NULL, NULL, 0, "-r", "lucid", NULL)) { fprintf(stderr, "%d: failed to create a lucid container\n", __LINE__); goto out; } diff --git a/src/tests/startone.c b/src/tests/startone.c index 1eb3e9915..d781e758b 100644 --- a/src/tests/startone.c +++ b/src/tests/startone.c @@ -70,7 +70,7 @@ static int create_ubuntu(void) return -1; } if (pid == 0) { - ret = execlp("lxc-create", "lxc-create", "-t", "ubuntu", "-f", "/etc/lxc/default.conf", "-n", MYNAME, NULL); + ret = execlp("lxc-create", "lxc-create", "-t", "ubuntu", "-f", LXC_DEFAULT_CONFIG, "-n", MYNAME, NULL); // Should not return perror("execl"); exit(1); @@ -222,6 +222,7 @@ int main(int argc, char *argv[]) c->stop(c); /* feh - multilib has moved the lxc-init crap */ +#if 0 goto ok; ret = system("mkdir -p " LXCPATH "/lxctest1/rootfs//usr/local/libexec/lxc"); @@ -251,6 +252,7 @@ int main(int argc, char *argv[]) // auto-check result? ('bobo' is printed on stdout) ok: +#endif fprintf(stderr, "all lxc_container tests passed for %s\n", c->name); ret = 0; diff --git a/templates/Makefile.am b/templates/Makefile.am index 0c3066710..98d6d72c3 100644 --- a/templates/Makefile.am +++ b/templates/Makefile.am @@ -11,4 +11,5 @@ templates_SCRIPTS = \ lxc-busybox \ lxc-sshd \ lxc-archlinux \ - lxc-alpine + lxc-alpine \ + lxc-cirros diff --git a/templates/lxc-alpine.in b/templates/lxc-alpine.in index 962d274e8..05aec7473 100644 --- a/templates/lxc-alpine.in +++ b/templates/lxc-alpine.in @@ -1,20 +1,99 @@ #!/bin/sh +key_sha256sums="9c102bcc376af1498d549b77bdbfa815ae86faa1d2d82f040e616b18ef2df2d4 alpine-devel@lists.alpinelinux.org-4a6a0840.rsa.pub +2adcf7ce224f476330b5360ca5edb92fd0bf91c92d83292ed028d7c4e26333ab alpine-devel@lists.alpinelinux.org-4d07755e.rsa.pub" + +get_static_apk () { + wget="wget -q -O -" + pkglist=alpine-keys:apk-tools-static + auto_repo_dir= + + if [ -z "$repository" ]; then + url=http://wiki.alpinelinux.org/cgi-bin/dl.cgi + if [ -z "$release" ]; then + echo -n "Determining the latest release... " + release=$($wget $url/.latest.$apk_arch.txt | \ + cut -d " " -f 3 | cut -d / -f 1 | uniq) + if [ -z "$release" ]; then + echo failed + return 1 + fi + echo $release + fi + auto_repo_dir=$release/main + repository=$url/$auto_repo_dir + pkglist=$pkglist:alpine-mirrors + fi + + rootfs="$1" + echo "Using static apk from $repository/$apk_arch" + wget="$wget $repository/$apk_arch" + + # parse APKINDEX to find the current versions + static_pkgs=$($wget/APKINDEX.tar.gz | \ + tar -Oxz APKINDEX | \ + awk -F: -v pkglist=$pkglist ' + BEGIN { split(pkglist,pkg) } + $0 != "" { f[$1] = $2 } + $0 == "" { for (i in pkg) + if (pkg[i] == f["P"]) + print(f["P"] "-" f["V"] ".apk") }') + [ "$static_pkgs" ] || return 1 + + mkdir -p "$rootfs" || return 1 + for pkg in $static_pkgs; do + echo "Downloading $pkg" + $wget/$pkg | tar -xz -C "$rootfs" + done + + # clean up .apk meta files + rm -f "$rootfs"/.[A-Z]* + + # verify checksum of the key + keyname=$(echo $rootfs/sbin/apk.static.*.pub | sed 's/.*\.SIGN\.RSA\.//') + checksum=$(echo "$key_sha256sums" | grep -w "$keyname") + if [ -z "$checksum" ]; then + echo "ERROR: checksum is missing for $keyname" + return 1 + fi + (cd $rootfs/etc/apk/keys && echo "$checksum" | sha256sum -c -) || return 1 + + # verify the static apk binary signature + APK=$rootfs/sbin/apk.static + openssl dgst -verify $rootfs/etc/apk/keys/$keyname \ + -signature "$APK.SIGN.RSA.$keyname" "$APK" || return 1 + + if [ "$auto_repo_dir" ]; then + mirror_list=$rootfs/usr/share/alpine-mirrors/MIRRORS.txt + mirror_count=$(wc -l $mirror_list | cut -d " " -f 1) + repository=$(sed $(expr $RANDOM % $mirror_count + 1)\!d \ + $mirror_list)$auto_repo_dir + echo "Selecting mirror $repository" + fi +} + install_alpine() { rootfs="$1" shift mkdir -p "$rootfs"/etc/apk || return 1 - cp -r ${keys_dir:-/etc/apk/keys} "$rootfs"/etc/apk/ + : ${keys_dir:=/etc/apk/keys} + if ! [ -d "$rootfs"/etc/apk/keys ] && [ -d "$keys_dir" ]; then + cp -r "$keys_dir" "$rootfs"/etc/apk/keys + fi if [ -n "$repository" ]; then echo "$repository" > "$rootfs"/etc/apk/repositories else cp /etc/apk/repositories "$rootfs"/etc/apk/repositories || return 1 + if [ -n "$release" ]; then + sed -i -e "s:/[^/]\+/\([^/]\+\)$:/$release/\1:" \ + "$rootfs"/etc/apk/repositories + fi fi opt_arch= if [ -n "$apk_arch" ]; then opt_arch="--arch $apk_arch" fi - ${APK:-apk} add -U --initdb --root $rootfs $opt_arch "$@" alpine-base + $APK add -U --initdb --root $rootfs $opt_arch "$@" alpine-base } configure_alpine() { @@ -47,6 +126,7 @@ EOF echo "Setting up device nodes" mkdir -p -m 755 "$rootfs/dev/pts" mkdir -p -m 1777 "$rootfs/dev/shm" + mknod -m 666 "$rootfs/dev/zero" c 1 5 mknod -m 666 "$rootfs/dev/full" c 1 7 mknod -m 666 "$rootfs/dev/random" c 1 8 mknod -m 666 "$rootfs/dev/urandom" c 1 9 @@ -93,9 +173,11 @@ lxc.network.flags = up EOF fi - # if there is exactly one veth network entry, make sure it has an - # associated mac address. - nics=$(grep -e '^lxc\.network\.type[ \t]*=[ \t]*veth' $path/config | wc -l) + # if there is exactly one veth or macvlan network entry, make sure + # it has an associated mac address. + nics=$(awk -F '[ \t]*=[ \t]*' \ + '$1=="lxc.network.type" && ($2=="veth" || $2=="macvlan") {print $2}' \ + $path/config | wc -l) if [ "$nics" -eq 1 ] && ! grep -q "^lxc.network.hwaddr" $path/config; then # see http://sourceforge.net/tracker/?func=detail&aid=3411497&group_id=163076&atid=826303 hwaddr="fe:$(dd if=/dev/urandom bs=8 count=1 2>/dev/null |od -t x8 | \ @@ -109,6 +191,7 @@ EOF lxc.tty = 4 lxc.pts = 1024 lxc.utsname = $hostname +lxc.cap.drop = sys_module mac_admin mac_override sys_time # When using LXC with apparmor, uncomment the next line to run unconfined: #lxc.aa_profile = unconfined @@ -129,7 +212,7 @@ lxc.cgroup.devices.allow = c 1:8 rwm lxc.cgroup.devices.allow = c 136:* rwm lxc.cgroup.devices.allow = c 5:2 rwm # rtc -lxc.cgroup.devices.allow = c 254:0 rwm +lxc.cgroup.devices.allow = c 254:0 rm # mounts point lxc.mount.entry=proc proc proc nodev,noexec,nosuid 0 0 @@ -148,8 +231,10 @@ die() { usage() { cat >&2 <] [-a|--arch ] - -p|--path -n|--name [PKG...] +Usage: $(basename $0) [-h|--help] [-r|--repository ] + [-R|--release ] [-a|--arch ] + [--rootfs ] -p|--path -n|--name + [PKG...] EOF } @@ -165,6 +250,14 @@ optarg_check() { } default_path=@LXCPATH@ +release= +arch=$(uname -m) + +# template mknods, requires root +if [ $(id -u) -ne 0 ]; then + echo "$(basename $0): must be run as root" >&2 + exit 1 +fi while [ $# -gt 0 ]; do opt="$1" @@ -179,6 +272,11 @@ while [ $# -gt 0 ]; do name=$1 shift ;; + --rootfs) + optarg_check $opt "$1" + rootfs=$1 + shift + ;; -p|--path) optarg_check $opt "$1" path=$1 @@ -189,6 +287,11 @@ while [ $# -gt 0 ]; do repository=$1 shift ;; + -R|--release) + optarg_check $opt "$1" + release=$1 + shift + ;; -a|--arch) optarg_check $opt "$1" arch=$1 @@ -217,9 +320,11 @@ if [ -z "${path}" ]; then path="${default_path}/${name}" fi -rootfs=`awk -F= '$1 ~ /^lxc.rootfs/ { print $2 }' "$path/config" 2>/dev/null` if [ -z "$rootfs" ]; then - rootfs="${path}/rootfs" + rootfs=`awk -F= '$1 ~ /^lxc.rootfs/ { print $2 }' "$path/config" 2>/dev/null` + if [ -z "$rootfs" ]; then + rootfs="${path}/rootfs" + fi fi lxc_arch=$arch @@ -234,6 +339,11 @@ case "$arch" in *) die "unsupported architecture: $arch";; esac +: ${APK:=apk} +if ! which $APK >/dev/null; then + get_static_apk "$rootfs" || die "Failed to download a valid static apk" +fi + install_alpine "$rootfs" "$@" || die "Failed to install rootfs for $name" configure_alpine "$rootfs" "$name" || die "Failed to configure $name" copy_configuration "$path" "$rootfs" "$name" diff --git a/templates/lxc-altlinux.in b/templates/lxc-altlinux.in index fac545cc3..c8de4e44f 100644 --- a/templates/lxc-altlinux.in +++ b/templates/lxc-altlinux.in @@ -22,10 +22,10 @@ # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #Configurations -arch=$(arch) +arch=$(uname -m) cache_base=@LOCALSTATEDIR@/cache/lxc/altlinux/$arch default_path=@LXCPATH@ default_profile=default @@ -243,6 +243,7 @@ lxc.utsname = $name lxc.tty = 4 lxc.pts = 1024 lxc.mount = $config_path/fstab +lxc.cap.drop = sys_module mac_admin mac_override sys_time # When using LXC with apparmor, uncomment the next line to run unconfined: #lxc.aa_profile = unconfined @@ -336,7 +337,7 @@ usage: [-p|--path=] [-c|--clean] [-R|--release=] [-4|--ipv4=] [-6|--ipv6=] [-g|--gw=] [-d|--dns=] - [-P|--profile=] + [-P|--profile=] [--rootfs=] [-A|--arch=] [-h|--help] Mandatory args: @@ -352,12 +353,13 @@ Optional args: -d,--dns specify the DNS server, eg. 192.168.1.2 -P,--profile Profile name is the file name in /etc/lxc/profiles contained packages name for install to cache. -A,--arch NOT USED YET. Define what arch the container will be [i686,x86_64] + ---rootfs rootfs path -h,--help print this help EOF return 0 } -options=$(getopt -o hp:n:P:cR:4:6:g:d: -l help,path:,name:,profile:,clean,release:ipv4:ipv6:gw:dns: -- "$@") +options=$(getopt -o hp:n:P:cR:4:6:g:d: -l help,rootfs:,path:,name:,profile:,clean,release:ipv4:ipv6:gw:dns: -- "$@") if [ $? -ne 0 ]; then usage $(basename $0) exit 1 @@ -369,6 +371,7 @@ do case "$1" in -h|--help) usage $0 && exit 0;; -p|--path) path=$2; shift 2;; + --rootfs) rootfs_path=$2; shift 2;; -n|--name) name=$2; shift 2;; -P|--profile) profile=$2; shift 2;; -c|--clean) clean=$2; shift 2;; @@ -421,7 +424,15 @@ if [ "$(id -u)" != "0" ]; then exit 1 fi -rootfs_path=$path/$name/rootfs +# check for 'lxc.rootfs' passed in through default config by lxc-create +if [ -z "$rootfs_path" ]; then + if grep -q '^lxc.rootfs' $path/config 2>/dev/null ; then + rootfs_path=`grep 'lxc.rootfs =' $path/config | awk -F= '{ print $2 }'` + else + rootfs_path=$path/$name/rootfs + fi +fi + config_path=$default_path/$name cache=$cache_base/$release/$profile @@ -430,11 +441,6 @@ if [ -f $config_path/config ]; then exit 1 fi -# check for 'lxc.rootfs' passed in through default config by lxc-create -if grep -q '^lxc.rootfs' $path/config 2>/dev/null ; then - rootfs_path=`grep 'lxc.rootfs =' $path/config | awk -F= '{ print $2 }'` -fi - install_altlinux if [ $? -ne 0 ]; then echo "failed to install altlinux" diff --git a/templates/lxc-archlinux.in b/templates/lxc-archlinux.in index ed5fb46ed..e3c01d51a 100644 --- a/templates/lxc-archlinux.in +++ b/templates/lxc-archlinux.in @@ -22,7 +22,7 @@ # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # defaults arch=$(uname -m) @@ -125,9 +125,8 @@ lxc.utsname=${name} lxc.autodev=1 lxc.tty=1 lxc.pts=1024 -lxc.rootfs=${rootfs_path} lxc.mount=${config_path}/fstab -lxc.cap.drop=mknod sys_module mac_admin mac_override +lxc.cap.drop=mknod sys_module mac_admin mac_override sys_time lxc.kmsg=0 lxc.stopsignal=SIGRTMIN+4 #networking @@ -153,6 +152,8 @@ lxc.cgroup.devices.allow = c 5:2 rwm lxc.cgroup.devices.allow = c 136:* rwm EOF + grep -q "^lxc.rootfs" ${config_path}/config 2>/dev/null || echo "lxc.rootfs = ${rootfs_path}" >> ${config_path}/config + cat > "${config_path}/fstab" << EOF sysfs sys sysfs ro,defaults 0 0 proc proc proc nodev,noexec,nosuid 0 0 @@ -191,7 +192,7 @@ EOF return 0 } -options=$(getopt -o hp:P:n:c:l:t: -l help,path:,packages:,name:,config:,network_type:,network_link: -- "${@}") +options=$(getopt -o hp:P:n:c:l:t: -l help,rootfs:,path:,packages:,name:,config:,network_type:,network_link: -- "${@}") if [ ${?} -ne 0 ]; then usage $(basename ${0}) exit 1 @@ -204,6 +205,7 @@ do -h|--help) usage ${0} && exit 0;; -p|--path) path=${2}; shift 2;; -n|--name) name=${2}; shift 2;; + --rootfs) rootfs_path=${2}; shift 2;; -P|--packages) additional_packages=${2}; shift 2;; -c|--config) pacman_config=${2}; shift 2;; -t|--network_type) lxc_network_type=${2}; shift 2;; @@ -238,7 +240,9 @@ if [ "${EUID}" != "0" ]; then exit 1 fi -rootfs_path="${path}/rootfs" +if [ -z "$rootfs_path" ]; then + rootfs_path="${path}/rootfs" +fi config_path="${default_path}/${name}" revert() { diff --git a/templates/lxc-busybox.in b/templates/lxc-busybox.in index cb425ecf8..12059f70a 100644 --- a/templates/lxc-busybox.in +++ b/templates/lxc-busybox.in @@ -18,7 +18,7 @@ # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA install_busybox() { @@ -189,9 +189,35 @@ configure_busybox() # passwd exec must be setuid chmod +s $rootfs/bin/passwd touch $rootfs/etc/shadow - echo "setting root passwd to root" - echo "root:root" | chroot $rootfs chpasswd + # setting passwd for root + CHPASSWD_FILE=$rootfs/root/chpasswd.sh + + cat <$CHPASSWD_FILE +echo "setting root password to \"root\"" + +mount --bind /lib $rootfs/lib +if [ \$? -ne 0 ]; then + echo "Failed bind-mounting /lib at $rootfs/lib" + exit 1 +fi + +chroot $rootfs chpasswd </dev/null +root:root +EOFF + + +if [ \$? -ne 0 ]; then + echo "Failed to change root password" + exit 1 +fi + +umount $rootfs/lib + +EOF + + lxc-unshare -s MOUNT -- /bin/sh < $CHPASSWD_FILE + rm $CHPASSWD_FILE # add ssh functionality if dropbear package available on host which dropbear >/dev/null 2>&1 @@ -235,18 +261,12 @@ cat <> $path/config lxc.utsname = $name lxc.tty = 1 lxc.pts = 1 +lxc.cap.drop = sys_module mac_admin mac_override sys_time # When using LXC with apparmor, uncomment the next line to run unconfined: #lxc.aa_profile = unconfined EOF -if [ -d "$rootfs/lib" ]; then -cat <> $path/config -lxc.mount.entry = /lib $rootfs/lib none ro,bind 0 0 -lxc.mount.entry = /usr/lib $rootfs/usr/lib none ro,bind 0 0 -EOF -fi - libdirs="\ lib \ usr/lib \ @@ -268,7 +288,7 @@ EOF return 0 } -options=$(getopt -o hp:n: -l help,path:,name: -- "$@") +options=$(getopt -o hp:n: -l help,rootfs:,path:,name: -- "$@") if [ $? -ne 0 ]; then usage $(basename $0) exit 1 @@ -280,6 +300,7 @@ do case "$1" in -h|--help) usage $0 && exit 0;; -p|--path) path=$2; shift 2;; + --rootfs) rootfs=$2; shift 2;; -n|--name) name=$2; shift 2;; --) shift 1; break ;; *) break ;; @@ -298,10 +319,12 @@ fi # detect rootfs config="$path/config" -if grep -q '^lxc.rootfs' $config 2>/dev/null ; then - rootfs=`grep 'lxc.rootfs =' $config | awk -F= '{ print $2 }'` -else - rootfs=$path/rootfs +if [ -z "$rootfs" ]; then + if grep -q '^lxc.rootfs' $config 2>/dev/null ; then + rootfs=`grep 'lxc.rootfs =' $config | awk -F= '{ print $2 }'` + else + rootfs=$path/rootfs + fi fi install_busybox $rootfs $name diff --git a/templates/lxc-cirros.in b/templates/lxc-cirros.in new file mode 100644 index 000000000..5934f4d10 --- /dev/null +++ b/templates/lxc-cirros.in @@ -0,0 +1,321 @@ +#!/bin/bash + +# template script for generating ubuntu container for LXC +# +# This script consolidates and extends the existing lxc ubuntu scripts +# + +# Copyright © 2013 Canonical Ltd. +# Author: Scott Moser +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2, as +# published by the Free Software Foundation. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +VERBOSITY=0 +DOWNLOAD_URL="http://download.cirros-cloud.net/" +CACHE_D="@LOCALSTATEDIR@/cache/lxc/cirros" + +UNAME_M=$(uname -m) +ARCHES=( i386 x86_64 amd64 arm ) +STREAMS=( released devel ) +SOURCES=( nocloud none ) +BUILD="standard" + +DEF_VERSION="released" +DEF_SOURCE="nocloud" +case "${UNAME_M}" in + i?86) DEF_ARCH="i386";; + x86_64) DEF_ARCH="x86_64";; + arm*) DEF_ARCH="arm";; + *) DEF_ARCH="i386";; +esac + +error() { echo "$@" 1>&2; } +inargs() { + local needle="$1" x="" + shift + for x in "$@"; do + [ "$needle" = "$x" ] && return 0 + done + return 1 +} + +Usage() { + cat <&2; [ $# -eq 0 ] || error "$@"; return 1; } + +debug() { + local level=${1}; shift; + [ "${level}" -gt "${VERBOSITY}" ] && return + error "${@}" +} +jsondict() { + local k="" v="" ret="{" + for arg in "$@"; do + k="${arg%%=*}" + v="${arg#*=}" + ret="${ret} \"${k}\": \"$v\"," + done + ret="${ret%,} }" + echo "$ret" +} + +copy_configuration() +{ + local path=$1 rootfs=$2 name=$3 arch=$4 release=$5 +cat >> "$path/config" < "$sdir/meta-data" || + { error "failed to write metadata to $sdir/meta-data"; return 1; } + + if [ -n "$udfile" ]; then + cat "$udfile" > "$sdir/user-data" || + { error "failed to write user-data to $sdir"; return 1; } + else + rm -f "$sdir/user-data" + fi +} + +insert_ds() { + local dstype="$1" root_d="$2" authkey="$3" udfile="$4" + case "$dstype" in + nocloud) insert_ds_nocloud "$root_d" "$authkey" "$udfile" + esac +} + +extract_rootfs() { + local tarball="$1" rootfs_d="$2" + mkdir -p "${rootfs_d}" || + { error "failed to make rootfs dir ${rootfs_d}"; return 1; } + + tar -C "${rootfs_d}" -Sxzf "${tarball}" || + { error "failed to populate ${rootfs_d}"; return 1; } + return 0 +} + +download_tarball() { + local arch="$1" ver="$2" cached="$3" baseurl="$4" + local out="" outd="" file="" dlpath="" + file="cirros-$ver-$arch-lxc.tar.gz" + dlpath="$ver/$file" + outd="${cached}/${dlpath%/*}" + if [ -f "$cached/$dlpath" ]; then + _RET="$cached/$dlpath" + return 0 + fi + + mkdir -p "${outd}" || + { error "failed to create ${outd}"; return 1; } + + debug 1 "downloading ${baseurl%/}/$dlpath" to "${cached}/$dlpath" + wget "${baseurl%/}/$dlpath" -O "$cached/${dlpath}.$$" && + mv "$cached/$dlpath.$$" "$cached/$dlpath" || { + rm -f "$cached/$dlpath.$$"; + error "failed to download $dlpath"; + return 1; + } + _RET="$cached/$dlpath" +} + +create_main() { + local short_opts="a:hn:p:S:uvV" + local long_opts="arch:,auth-key:,name:,path:,tarball:,userdata:,verbose,version:,rootfs:" + local getopt_out="" + getopt_out=$(getopt --name "${0##*/}" \ + --options "${short_opts}" --long "${long_opts}" -- "$@") && + eval set -- "${getopt_out}" || + { bad_Usage; return; } + + local arch="${DEF_ARCH}" dsource="${DEF_SOURCE}" version="${DEF_VERSION}" + local authkey_f="" authkeys="" userdata_f="" path="" tarball="" + local cur="" next="" + local rootfs_d="" + + while [ $# -ne 0 ]; do + cur=$1; next=$2; + case "$cur" in + -a|--arch) arch="$next"; shift;; + -h|--help) Usage ; return 0;; + -n|--name) name="$next"; shift;; + -v|--verbose) VERBOSITY=$((${VERBOSITY}+1));; + -S|--auth-key) authkey_f="$next"; shift;; + -p|--path) path=$next; shift;; + -v|--version) version=$next; shift;; + -u|--userdata) userdata_f="$next"; shift;; + --tarball) tarball="$next"; shift;; + --source) dsource="$next"; shift;; + --rootfs) rootfs_d="$next"; shift;; + --) shift; break;; + esac + shift; + done + + [ -n "$rootfs_d" ] || rootfs_d="$path/rootfs" + [ $# -eq 0 ] || { bad_Usage "unexpected arguments: $*"; return; } + [ -n "$path" ] || { error "'path' parameter is required"; return 1; } + + if [ "$(id -u)" != "0" ]; then + { error "must be run as root"; return 1; } + fi + + case "$arch" in + i?86) arch="i386";; + amd64) arch="x86_64";; + esac + + inargs "$arch" "${ARCHES[@]}" || + { error "bad arch '$arch'. allowed: ${ARCHES[*]}"; return 1; } + + inargs "$dsource" "${SOURCES[@]}" || + { error "bad source '$dsource'. allowed: ${SOURCES[*]}"; return 1; } + + if [ "$dsource" = "none" ] && [ -n "$userdata_f" -o -n "$authkey_f" ]; then + error "userdata and authkey are incompatible with --source=none"; + return 1; + fi + + if [ -n "$authkey_f" ]; then + if [ ! -f "$authkey_f" ]; then + error "--auth-key=${authkey_f} must reference a file" + return 1 + fi + authkeys=$(cat "$authkey_f") || + { error "failed to read ${authkey_f}"; return 1; } + fi + + if [ -n "$userdata_f" -a ! -f "${userdata_f}" ]; then + error "${userdata_f}: --userdata arg not a file" + return 1 + fi + + if [ -z "$tarball" ]; then + if inargs "$version" "${STREAMS[@]}"; then + out=$(wget -O - -q "${DOWNLOAD_URL%/}/version/$version") || + { error "failed to convert 'version=$version'"; return 1; } + version="$out" + fi + download_tarball "$arch" "$version" "${CACHE_D}" "${DOWNLOAD_URL}" || + return + tarball="$_RET" + fi + + extract_rootfs "${tarball}" "${rootfs_d}" || return + + # cirros 0.3.1 was broken for /dev/random and /dev/urandom + if [ -b "$rootfs_d/dev/random" ]; then + rm -f "$rootfs_d/dev/random" && + mknod --mode=666 "$rootfs_d/dev/random" c 1 8 || + { error "failed to fix /dev/random"; return 1; } + fi + if [ -b "$rootfs_d/dev/urandom" ]; then + rm -f "$rootfs_d/dev/urandom" && + mknod --mode=666 "$rootfs_d/dev/urandom" c 1 9 || + { error "failed to fix /dev/urandom"; return 1; } + fi + + if [ "$version" = "0.3.2~pre1" ]; then + debug 1 "fixing console for lxc and '$version'" + sed -i 's,^\(#console.* 115200 \)# /dev/console,\1 console,g' \ + "$rootfs_d/etc/inittab" || + { error "failed to fix console entry for $version"; return 1; } + fi + + if [ "$dsource" != "none" ]; then + insert_ds "$dsource" "$path/rootfs" "$authkeys" "$userdata_f" || { + error "failed to insert userdata to $path/rootfs" + return 1 + } + fi + + copy_configuration "$path" "$path/rootfs" "$name" "$arch" "$release" + return +} + +create_main "$@" + +# vi: ts=4 expandtab diff --git a/templates/lxc-debian.in b/templates/lxc-debian.in index 7bbc46b94..df464b94e 100644 --- a/templates/lxc-debian.in +++ b/templates/lxc-debian.in @@ -18,7 +18,7 @@ # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA SUITE=${SUITE:-squeeze} MIRROR=${MIRROR:-http://cdn.debian.net/debian} @@ -172,7 +172,18 @@ install_debian() return 1 fi - arch=$(dpkg --print-architecture) + if which dpkg >/dev/null 2>&1 ; then + arch=$(dpkg --print-architecture) + else + arch=$(uname -m) + if [ "$arch" = "i686" ]; then + arch="i386" + elif [ "$arch" = "x86_64" ]; then + arch="amd64" + elif [ "$arch" = "armv7l" ]; then + arch="armhf" + fi + fi echo "Checking cache download in $cache/rootfs-$SUITE-$arch ... " if [ ! -e "$cache/rootfs-$SUITE-$arch" ]; then @@ -207,6 +218,7 @@ copy_configuration() lxc.tty = 4 lxc.pts = 1024 lxc.utsname = $hostname +lxc.cap.drop = sys_module mac_admin mac_override sys_time # When using LXC with apparmor, uncomment the next line to run unconfined: #lxc.aa_profile = unconfined @@ -226,7 +238,7 @@ lxc.cgroup.devices.allow = c 1:8 rwm lxc.cgroup.devices.allow = c 136:* rwm lxc.cgroup.devices.allow = c 5:2 rwm # rtc -lxc.cgroup.devices.allow = c 254:0 rwm +lxc.cgroup.devices.allow = c 254:0 rm # mounts point lxc.mount.entry = proc proc proc nodev,noexec,nosuid 0 0 @@ -272,7 +284,7 @@ EOF return 0 } -options=$(getopt -o hp:n:c -l help,path:,name:,clean -- "$@") +options=$(getopt -o hp:n:c -l help,rootfs:,path:,name:,clean -- "$@") if [ $? -ne 0 ]; then usage $(basename $0) exit 1 @@ -284,6 +296,7 @@ do case "$1" in -h|--help) usage $0 && exit 0;; -p|--path) path=$2; shift 2;; + --rootfs) rootfs=$2; shift 2;; -n|--name) name=$2; shift 2;; -c|--clean) clean=$2; shift 2;; --) shift 1; break ;; @@ -314,10 +327,12 @@ fi # detect rootfs config="$path/config" -if grep -q '^lxc.rootfs' $config 2>/dev/null ; then - rootfs=`grep 'lxc.rootfs =' $config | awk -F= '{ print $2 }'` -else - rootfs=$path/rootfs +if [ -z "$rootfs" ]; then + if grep -q '^lxc.rootfs' $config 2>/dev/null ; then + rootfs=`grep 'lxc.rootfs =' $config | awk -F= '{ print $2 }'` + else + rootfs=$path/rootfs + fi fi diff --git a/templates/lxc-fedora.in b/templates/lxc-fedora.in index 684bb9cca..072b3c143 100644 --- a/templates/lxc-fedora.in +++ b/templates/lxc-fedora.in @@ -23,21 +23,72 @@ # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA #Configurations -arch=$(arch) +arch=$(uname -m) cache_base=@LOCALSTATEDIR@/cache/lxc/fedora/$arch default_path=@LXCPATH@ root_password=root # is this fedora? -[ -f /etc/fedora-release ] && is_fedora=true +# Alow for weird remixes like the Raspberry Pi +# +# Use the Mitre standard CPE identifier for the release ID if possible... +# This may be in /etc/os-release or /etc/system-release-cpe. We +# should be able to use EITHER. Give preference to /etc/os-release for now. -if [ "$arch" = "i686" ]; then - arch=i386 +if [ -e /etc/os-release ] +then +# This is a shell friendly configuration file. We can just source it. +# What we're looking for in here is the ID, VERSION_ID and the CPE_NAME + . /etc/os-release + echo "Host CPE ID from /etc/os-release: ${CPE_NAME}" fi +if [ "${CPE_NAME}" = "" -a -e /etc/system-release-cpe ] +then + CPE_NAME=$(head -n1 /etc/system-release-cpe) + CPE_URI=$(expr ${CPE_NAME} : '\([^:]*:[^:*]\)') + if [ "${CPE_URI}" != "cpe:/o" ] + then + CPE_NAME= + else + echo "Host CPE ID from /etc/system-release-cpe: ${CPE_NAME}" + # Probably a better way to do this but sill remain posix + # compatible but this works, shrug... + # Must be nice and not introduce convenient bashisms here. + ID=$(expr ${CPE_NAME} : '[^:]*:[^:]*:[^:]*:\([^:]*\)') + VERSION_ID=$(expr ${CPE_NAME} : '[^:]*:[^:]*:[^:]*:[^:]*:\([^:]*\)') + fi +fi + +if [ "${CPE_NAME}" != "" -a "${ID}" = "fedora" -a "${VERSION_ID}" != "" ] +then + fedora_host_ver=${VERSION_ID} + is_fedora=true +elif [ -e /etc/redhat-release ] +then + # Only if all other methods fail, try to parse the redhat-release file. + fedora_host_ver=$( sed -e '/^Fedora /!d' -e 's/Fedora.*\srelease\s*\([0-9][0-9]*\)\s.*/\1/' < /etc/redhat-release ) + if [ "$fedora_host_ver" != "" ] + then + is_fedora=true + fi +fi + +# Map a few architectures to their generic Fedora repository archs. +# The two ARM archs are a bit of a guesstimate for the v5 and v6 +# archs. V6 should have hardware floating point (Rasberry Pi). +# The "arm" arch is safer (no hardware floating point). So +# there may be cases where we "get it wrong" for some v6 other +# than RPi. +case "$arch" in +i686) arch=i386 ;; +armv3l|armv4l|armv5l) arch=arm ;; +armv6l|armv7l|armv8l) arch=armhfp ;; +esac + configure_fedora() { @@ -50,7 +101,7 @@ configure_fedora() DEVICE=eth0 BOOTPROTO=dhcp ONBOOT=yes -HOSTNAME=${name} +HOSTNAME=${utsname} NM_CONTROLLED=no TYPE=Ethernet MTU=${MTU} @@ -59,12 +110,18 @@ EOF # set the hostname cat < ${rootfs_path}/etc/sysconfig/network NETWORKING=yes -HOSTNAME=${name} +HOSTNAME=${utsname} EOF + # set hostname on systemd Fedora systems + if [ $release -gt 14 ]; then + echo "${utsname}" > ${rootfs_path}/etc/hostname + fi + # set minimal hosts cat < $rootfs_path/etc/hosts -127.0.0.1 localhost $name +127.0.0.1 localhost.localdomain localhost $utsname +::1 localhost6.localdomain6 localhost6 EOF dev_path="${rootfs_path}/dev" @@ -118,10 +175,27 @@ configure_fedora_systemd() { unlink ${rootfs_path}/etc/systemd/system/default.target touch ${rootfs_path}/etc/fstab - chroot ${rootfs_path} ln -s /dev/null //etc/systemd/system/udev.service + chroot ${rootfs_path} ln -s /dev/null /etc/systemd/system/udev.service chroot ${rootfs_path} ln -s /lib/systemd/system/multi-user.target /etc/systemd/system/default.target #dependency on a device unit fails it specially that we disabled udev - sed -i 's/After=dev-%i.device/After=/' ${rootfs_path}/lib/systemd/system/getty\@.service + # sed -i 's/After=dev-%i.device/After=/' ${rootfs_path}/lib/systemd/system/getty\@.service + # + # Actually, the After=dev-%i.device line does not appear in the + # Fedora 17 or Fedora 18 systemd getty\@.service file. It may be left + # over from an earlier version and it's not doing any harm. We do need + # to disable the "ConditionalPathExists=/dev/tty0" line or no gettys are + # started on the ttys in the container. Lets do it in an override copy of + # the service so it can still pass rpm verifies and not be automatically + # updated by a new systemd version. -- mhw /\/\|=mhw=|\/\/ + + sed -e 's/^ConditionPathExists=/# ConditionPathExists=/' \ + -e 's/After=dev-%i.device/After=/' \ + < ${rootfs_path}/lib/systemd/system/getty\@.service \ + > ${rootfs_path}/etc/systemd/system/getty\@.service + # Setup getty service on the 4 ttys we are going to allow in the + # default config. Number should match lxc.tty + ( cd ${rootfs_path}/etc/systemd/system/getty.target.wants + for i in 1 2 3 4 ; do ln -sf ../getty\@.service getty@tty${i}.service; done ) } download_fedora() @@ -142,27 +216,53 @@ download_fedora() MIRRORLIST_URL="http://mirrors.fedoraproject.org/mirrorlist?repo=fedora-$release&arch=$arch" DOWNLOAD_OK=no - for trynumber in 1 2 3; do + + # We're splitting the old loop into two loops plus a directory retrival. + # First loop... Try and retrive a mirror list with retries and a slight + # delay between attempts... + for trynumber in 1 2 3 4; do [ $trynumber != 1 ] && echo "Trying again..." - MIRROR_URL=$(curl -s -S -f "$MIRRORLIST_URL" | head -n2 | tail -n1) - if [ $? -ne 0 ] || [ -z "$MIRROR_URL" ]; then - echo "Failed to get a mirror" - continue - fi + # This code is mildly "brittle" in that it assumes a certain + # page format and parsing HTML. I've done worse. :-P + MIRROR_URLS=$(curl -s -S -f "$MIRRORLIST_URL" | sed -e '/^http:/!d' -e '2,6!d') + if [ $? -eq 0 ] && [ -n "$MIRROR_URLS" ] + then + break + fi + + echo "Failed to get a mirror on try $trynumber" + sleep 3 + done + + # This will fall through if we didn't get any URLS above + for MIRROR_URL in ${MIRROR_URLS} + do if [ "$release" -gt "16" ]; then - RELEASE_URL="$MIRROR_URL/Packages/f/fedora-release-$release-1.noarch.rpm" + RELEASE_URL="$MIRROR_URL/Packages/f" else - RELEASE_URL="$MIRROR_URL/Packages/fedora-release-$release-1.noarch.rpm" + RELEASE_URL="$MIRROR_URL/Packages/" fi - echo "Fetching from $RELEASE_URL" - curl -f "$RELEASE_URL" > $INSTALL_ROOT/fedora-release-$release.noarch.rpm - if [ $? -ne 0 ]; then - echo "Failed to download fedora release rpm" + + echo "Fetching rpm name from $RELEASE_URL..." + # This code is mildly "brittle" in that it assumes a certain directory + # page format and parsing HTML. I've done worse. :-P + RELEASE_RPM=$(curl -L -f "$RELEASE_URL" | sed -e "/fedora-release-${release}-/!d" -e 's/.*.*//' ) + if [ $? -ne 0 -o "${RELEASE_RPM}" = "" ]; then + echo "Failed to identify fedora release rpm." continue fi + + echo "Fetching fedora release rpm from ${RELEASE_URL}/${RELEASE_RPM}......" + curl -L -f "${RELEASE_URL}/${RELEASE_RPM}" > ${INSTALL_ROOT}/${RELEASE_RPM} + if [ $? -ne 0 ]; then + echo "Failed to download fedora release rpm ${RELEASE_RPM}." + continue + fi + DOWNLOAD_OK=yes break done + if [ $DOWNLOAD_OK != yes ]; then echo "Aborting" return 1 @@ -170,7 +270,7 @@ download_fedora() mkdir -p $INSTALL_ROOT/var/lib/rpm rpm --root $INSTALL_ROOT --initdb - rpm --root $INSTALL_ROOT -ivh $INSTALL_ROOT/fedora-release-$release.noarch.rpm + rpm --root $INSTALL_ROOT -ivh ${INSTALL_ROOT}/${RELEASE_RPM} $YUM install $PKG_LIST if [ $? -ne 0 ]; then @@ -248,10 +348,13 @@ copy_configuration() mkdir -p $config_path grep -q "^lxc.rootfs" $config_path/config 2>/dev/null || echo "lxc.rootfs = $rootfs_path" >> $config_path/config cat <> $config_path/config -lxc.utsname = $name +lxc.utsname = $utsname lxc.tty = 4 lxc.pts = 1024 lxc.mount = $config_path/fstab +lxc.cap.drop = sys_module mac_admin mac_override sys_time + +lxc.autodev = $auto_dev # When using LXC with apparmor, uncomment the next line to run unconfined: #lxc.aa_profile = unconfined @@ -272,7 +375,7 @@ lxc.cgroup.devices.allow = c 1:8 rwm lxc.cgroup.devices.allow = c 136:* rwm lxc.cgroup.devices.allow = c 5:2 rwm # rtc -lxc.cgroup.devices.allow = c 254:0 rwm +lxc.cgroup.devices.allow = c 254:0 rm EOF cat < $config_path/fstab @@ -313,21 +416,23 @@ usage() cat < - [-p|--path=] [-c|--clean] [-R|--release=] [-A|--arch=] + [-p|--path=] [-c|--clean] [-R|--release=] [--fqdn=] [-A|--arch=] [-h|--help] Mandatory args: -n,--name container name, used to as an identifier for that container from now on Optional args: - -p,--path path to where the container rootfs will be created, defaults to @LXCPATH@. The container config will go under @LXCPATH@ in that case + -p,--path path to where the container will be created, defaults to @LXCPATH@. The container config will go under @LXCPATH@ in that case + --rootfs path for actual rootfs. -c,--clean clean the cache - -R,--release Fedora release for the new container. if the host is Fedora, then it will defaultto the host's release. + -R,--release Fedora release for the new container. if the host is Fedora, then it will default to the host's release. + --fqdn fully qualified domain name (FQDN) for DNS and system naming -A,--arch NOT USED YET. Define what arch the container will be [i686,x86_64] -h,--help print this help EOF return 0 } -options=$(getopt -o hp:n:cR: -l help,path:,name:,clean,release: -- "$@") +options=$(getopt -o hp:n:cR: -l help,path:,rootfs:,name:,clean,release:,fqdn: -- "$@") if [ $? -ne 0 ]; then usage $(basename $0) exit 1 @@ -339,9 +444,11 @@ do case "$1" in -h|--help) usage $0 && exit 0;; -p|--path) path=$2; shift 2;; + --rootfs) rootfs=$2; shift 2;; -n|--name) name=$2; shift 2;; -c|--clean) clean=$2; shift 2;; -R|--release) release=$2; shift 2;; + --fqdn) utsname=$2; shift 2;; --) shift 1; break ;; *) break ;; esac @@ -352,6 +459,29 @@ if [ ! -z "$clean" -a -z "$path" ]; then exit 0 fi +if [ -z "${utsname}" ]; then + utsname=${name} +fi + +# This follows a standard "resolver" convention that an FQDN must have +# at least two dots or it is considered a local relative host name. +# If it doesn't, append the dns domain name of the host system. +# +# This changes one significant behavior when running +# "lxc_create -n Container_Name" without using the +# --fqdn option. +# +# Old behavior: +# utsname and hostname = Container_Name +# New behavior: +# utsname and hostname = Container_Name.Domain_Name + +if [ $(expr "$utsname" : '.*\..*\.') = 0 ]; then + if [ -n "$(dnsdomainname)" ]; then + utsname=${utsname}.$(dnsdomainname) + fi +fi + needed_pkgs="" type yum >/dev/null 2>&1 if [ $? -ne 0 ]; then @@ -365,33 +495,43 @@ fi if [ -n "$needed_pkgs" ]; then echo "Missing commands: $needed_pkgs" - echo "Please install these using \"sudo apt-get install $needed_pkgs\"" + echo "Please install these using \"sudo yum install $needed_pkgs\"" exit 1 fi if [ -z "$path" ]; then - path=$default_path + path=$default_path/$name fi if [ -z "$release" ]; then - if [ "$is_fedora" ]; then - release=$(cat /etc/fedora-release |awk '/^Fedora/ {print $3}') + if [ "$is_fedora" -a "$fedora_host_ver" ]; then + release=$fedora_host_ver else - echo "This is not a fedora host and release missing, defaulting to 14. use -R|--release to specify release" - release=14 + echo "This is not a fedora host and release missing, defaulting to 18. use -R|--release to specify release" + release=18 fi fi +# Fedora 15 and above run systemd. We need autodev enabled to keep +# systemd from causing problems. +if [ $release -gt 14 ]; then + auto_dev="1" +else + auto_dev="0" +fi + if [ "$(id -u)" != "0" ]; then echo "This script should be run as 'root'" exit 1 fi -rootfs_path=$path/$name/rootfs -# check for 'lxc.rootfs' passed in through default config by lxc-create -if grep -q '^lxc.rootfs' $path/config 2>/dev/null ; then - rootfs_path=`grep 'lxc.rootfs =' $path/config | awk -F= '{ print $2 }'` +if [ -z "$rootfs_path" ]; then + rootfs_path=$path/rootfs + # check for 'lxc.rootfs' passed in through default config by lxc-create + if grep -q '^lxc.rootfs' $path/config 2>/dev/null ; then + rootfs_path=`grep 'lxc.rootfs =' $path/config | awk -F= '{ print $2 }'` + fi fi config_path=$default_path/$name cache=$cache_base/$release @@ -427,13 +567,20 @@ if [ $? -ne 0 ]; then exit 1 fi -type /bin/systemd >/dev/null 2>&1 -if [ $? -ne 0 ]; then - configure_fedora_init -else +# If the systemd configuration directory exists - set it up for what we need. +if [ -d ${rootfs_path}/etc/systemd/system ] +then configure_fedora_systemd fi +# This configuration (rc.sysinit) is not inconsistent with the systemd stuff +# above and may actually coexist on some upgraded systems. Let's just make +# sure that, if it exists, we update this file, even if it's not used... +if [ -f ${rootfs_path}/etc/rc.sysinit ] +then + configure_fedora_init +fi + if [ ! -z $clean ]; then clean || exit 1 exit 0 diff --git a/templates/lxc-opensuse.in b/templates/lxc-opensuse.in index 77ef6b20e..1fc7e21a6 100644 --- a/templates/lxc-opensuse.in +++ b/templates/lxc-opensuse.in @@ -23,7 +23,7 @@ # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA DISTRO=12.3 @@ -212,7 +212,7 @@ install_opensuse() return 1 fi - arch=$(arch) + arch=$(uname -m) echo "Checking cache download in $cache/rootfs-$arch ... " if [ ! -e "$cache/rootfs-$arch" ]; then @@ -275,7 +275,7 @@ lxc.autodev=1 lxc.tty = 4 lxc.pts = 1024 lxc.mount = $path/fstab -lxc.cap.drop = sys_module mac_admin mac_override mknod +lxc.cap.drop = sys_module mac_admin mac_override mknod sys_time # When using LXC with apparmor, uncomment the next line to run unconfined: #lxc.aa_profile = unconfined @@ -295,7 +295,7 @@ lxc.cgroup.devices.allow = c 1:8 rwm lxc.cgroup.devices.allow = c 136:* rwm lxc.cgroup.devices.allow = c 5:2 rwm # rtc -lxc.cgroup.devices.allow = c 254:0 rwm +lxc.cgroup.devices.allow = c 254:0 rm EOF cat < $path/fstab @@ -342,7 +342,7 @@ EOF return 0 } -options=$(getopt -o hp:n:c -l help,path:,name:,clean -- "$@") +options=$(getopt -o hp:n:c -l help,rootfs:,path:,name:,clean -- "$@") if [ $? -ne 0 ]; then usage $(basename $0) exit 1 @@ -354,6 +354,7 @@ do case "$1" in -h|--help) usage $0 && exit 0;; -p|--path) path=$2; shift 2;; + --rootfs) rootfs=$2; shift 2;; -n|--name) name=$2; shift 2;; -c|--clean) clean=$2; shift 2;; --) shift 1; break ;; @@ -384,10 +385,12 @@ fi # detect rootfs config="$path/config" -if grep -q '^lxc.rootfs' $config 2>/dev/null ; then - rootfs=`grep 'lxc.rootfs =' $config | awk -F= '{ print $2 }'` -else - rootfs=$path/rootfs +if [ -z "$rootfs" ]; then + if grep -q '^lxc.rootfs' $config 2>/dev/null ; then + rootfs=`grep 'lxc.rootfs =' $config | awk -F= '{ print $2 }'` + else + rootfs=$path/rootfs + fi fi install_opensuse $rootfs diff --git a/templates/lxc-oracle.in b/templates/lxc-oracle.in index aad21e8a8..98ea609f0 100644 --- a/templates/lxc-oracle.in +++ b/templates/lxc-oracle.in @@ -24,7 +24,7 @@ # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # # use virbr0 that is setup by default by libvirtd @@ -142,6 +142,17 @@ EOF sed -i 's|action $"Setting network parameters|# LXC action $"Setting network parameters|' $container_rootfs/etc/init.d/NetworkManager 2>/dev/null fi + # sem_open(3) checks that /dev/shm is SHMFS_SUPER_MAGIC, so make sure to mount /dev/shm (normally done by dracut initrd) as tmpfs + if [ $container_release_major = "4" -o $container_release_major = "5" ]; then + echo "mount -t tmpfs tmpfs /dev/shm" >>$container_rootfs/etc/rc.sysinit + echo "mount -t tmpfs tmpfs /dev/shm" >>$container_rootfs/etc/rc.d/rc.sysinit + fi + + if [ $container_release_major = "6" ]; then + sed -i 's|mount -n -o remount /dev/shm >/dev/null 2>&1$|mount -t tmpfs tmpfs /dev/shm # LXC|' $container_rootfs/etc/rc.sysinit + sed -i 's|mount -n -o remount /dev/shm >/dev/null 2>&1$|mount -t tmpfs tmpfs /dev/shm # LXC|' $container_rootfs/etc/rc.d/rc.sysinit + fi + # no need to attempt to mount / sed -i 's|mount -f /$|# LXC mount -f /|' $container_rootfs/etc/rc.sysinit sed -i 's|mount -f /$|# LXC mount -f /|' $container_rootfs/etc/rc.d/rc.sysinit @@ -337,8 +348,8 @@ lxc.utsname = $name lxc.devttydir = lxc lxc.tty = 4 lxc.pts = 1024 -lxc.rootfs = $container_rootfs lxc.mount = $cfg_dir/fstab +lxc.hook.clone = @DATADIR@/lxc/hooks/clonehostname # Uncomment these if you don't run anything that needs the capability, and # would like the container to run with less privilege. # @@ -359,6 +370,7 @@ lxc.cap.drop = mac_admin mac_override setfcap setpcap lxc.cap.drop = sys_module sys_nice sys_pacct lxc.cap.drop = sys_rawio sys_time EOF + grep -q "^lxc.rootfs" $cfg_dir/config 2>/dev/null || echo "lxc.rootfs = $container_rootfs" >> $cfg_dir/config if [ $container_release_major != "4" ]; then echo "lxc.cap.drop = sys_resource" >>$cfg_dir/config @@ -511,7 +523,7 @@ container_rootfs_create() # we unshare the mount namespace because yum installing the ol4 # packages causes $rootfs/proc to be mounted on - lxc-unshare -s MOUNT yum -- $yum_args install $min_pkgs + lxc-unshare -s MOUNT yum -- $yum_args install $min_pkgs $user_pkgs if [ $? -ne 0 ]; then die "Failed to download and install the rootfs, aborting." fi @@ -599,6 +611,8 @@ usage() cat < architecture (ie. i386, x86_64) -R|--release= release to download for the new container + --rootfs= rootfs path + -r|--rpms= additional rpms to install into container -u|--url= replace yum repo url (ie. local yum mirror) -t|--templatefs= copy/clone rootfs at path instead of downloading -h|--help @@ -608,23 +622,25 @@ EOF return 0 } -options=$(getopt -o hp:n:a:R:u:t: -l help,path:,name:,arch:,release:,url:,templatefs: -- "$@") +options=$(getopt -o hp:n:a:R:r:u:t: -l help,rootfs:,path:,name:,arch:,release:,rpms:,url:,templatefs: -- "$@") if [ $? -ne 0 ]; then usage $(basename $0) exit 1 fi -arch=$(arch) +arch=$(uname -m) eval set -- "$options" while true do case "$1" in -h|--help) usage $0 && exit 0;; -p|--path) cfg_dir=$2; shift 2;; + --rootfs) container_rootfs=$2; shift 2;; -n|--name) name=$2; shift 2;; -a|--arch) arch=$2; shift 2;; -R|--release) container_release_version=$2; shift 2;; - -u|--url) repourl=$2; shift;; + -r|--rpms) user_pkgs=$2; shift 2;; + -u|--url) repourl=$2; shift 2;; -t|--templatefs) template_rootfs=$2; shift 2;; --) shift 1; break ;; *) break ;; @@ -683,7 +699,9 @@ else fi echo "Host is $host_distribution $host_release_version" -container_rootfs="$cfg_dir/rootfs" +if [ -z "$container_rootfs" ]; then + container_rootfs="$cfg_dir/rootfs" +fi if [ -n "$template_rootfs" ]; then container_release_get $template_rootfs diff --git a/templates/lxc-sshd.in b/templates/lxc-sshd.in index b704723b4..0d4a50273 100644 --- a/templates/lxc-sshd.in +++ b/templates/lxc-sshd.in @@ -18,7 +18,7 @@ # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA install_sshd() { @@ -28,10 +28,14 @@ install_sshd() $rootfs/var/run/sshd \ $rootfs/var/empty/sshd \ $rootfs/var/lib/empty/sshd \ +$rootfs/etc/init.d \ +$rootfs/etc/rc.d \ $rootfs/etc/ssh \ +$rootfs/etc/sysconfig/network-scripts \ $rootfs/dev/shm \ $rootfs/run/shm \ $rootfs/proc \ +$rootfs/sys \ $rootfs/bin \ $rootfs/sbin \ $rootfs/usr \ @@ -63,8 +67,8 @@ root:x:0:root sshd:x:74: EOF -ssh-keygen -t rsa -f $rootfs/etc/ssh/ssh_host_rsa_key -ssh-keygen -t dsa -f $rootfs/etc/ssh/ssh_host_dsa_key +ssh-keygen -t rsa -N "" -f $rootfs/etc/ssh/ssh_host_rsa_key +ssh-keygen -t dsa -N "" -f $rootfs/etc/ssh/ssh_host_dsa_key # by default setup root password with no password cat < $rootfs/etc/ssh/sshd_config @@ -112,6 +116,8 @@ copy_configuration() cat <> $path/config lxc.utsname = $name lxc.pts = 1024 +lxc.kmsg = 0 +lxc.cap.drop = sys_module mac_admin mac_override sys_time # When using LXC with apparmor, uncomment the next line to run unconfined: #lxc.aa_profile = unconfined @@ -123,9 +129,24 @@ lxc.mount.entry = /usr usr none ro,bind 0 0 lxc.mount.entry = /sbin sbin none ro,bind 0 0 lxc.mount.entry = tmpfs var/run/sshd tmpfs mode=0644 0 0 lxc.mount.entry = @LXCTEMPLATEDIR@/lxc-sshd sbin/init none bind 0 0 -lxc.mount.entry = proc $rootfs/proc proc nodev,noexec,nosuid 0 0 +lxc.mount.entry = proc proc proc nodev,noexec,nosuid 0 0 +lxc.mount.entry = sysfs sys sysfs ro 0 0 +lxc.mount.entry = /etc/init.d etc/init.d none ro,bind 0 0 EOF + # Oracle Linux and Fedora need the following two bind mounted + if [ -d /etc/sysconfig/network-scripts ]; then + cat <> $path/config +lxc.mount.entry = /etc/sysconfig/network-scripts etc/sysconfig/network-scripts none ro,bind 0 0 +EOF + fi + + if [ -d /etc/rc.d ]; then + cat <> $path/config +lxc.mount.entry = /etc/rc.d etc/rc.d none ro,bind 0 0 +EOF + fi + # if no .ipv4 section in config, then have the container run dhcp grep -q "^lxc.network.ipv4" $path/config || touch $rootfs/run-dhcp @@ -139,12 +160,24 @@ EOF usage() { cat < +$1 -h|--help -p|--path= [--rootfs=] EOF return 0 } -options=$(getopt -o hp:n:S: -l help,path:,name:,auth-key: -- "$@") +check_for_cmd() +{ + cmd_path=`type $1` + if [ $? -ne 0 ]; then + echo "The command '$1' $cmd_path is not accessible on the system" + exit 1 + fi + # we use cut instead of awk because awk is alternatives symlink on ubuntu + # and /etc/alternatives isn't bind mounted + cmd_path=`echo $cmd_path |cut -d ' ' -f 3` +} + +options=$(getopt -o hp:n:S: -l help,rootfs:,path:,name:,auth-key: -- "$@") if [ $? -ne 0 ]; then usage $(basename $0) exit 1 @@ -156,6 +189,7 @@ do case "$1" in -h|--help) usage $0 && exit 0;; -p|--path) path=$2; shift 2;; + --rootfs) rootfs=$2; shift 2;; -n|--name) name=$2; shift 2;; -S|--auth-key) auth_key=$2; shift 2;; --) shift 1; break ;; @@ -170,25 +204,15 @@ fi if [ $0 == "/sbin/init" ]; then - type @LXCINITDIR@/lxc-init - if [ $? -ne 0 ]; then - echo "'lxc-init is not accessible on the system" - exit 1 - fi - - type sshd - if [ $? -ne 0 ]; then - echo "'sshd' is not accessible on the system " - exit 1 - fi + PATH="$PATH:/bin:/sbin:/usr/sbin" + check_for_cmd @LXCINITDIR@/lxc/lxc-init + check_for_cmd sshd + sshd_path=$cmd_path # run dhcp? if [ -f /run-dhcp ]; then - type dhclient - if [ $? -ne 0 ]; then - echo "can't find dhclient" - exit 1 - fi + check_for_cmd dhclient + check_for_cmd ifconfig touch /etc/fstab rm -f /dhclient.conf cat > /dhclient.conf << EOF @@ -196,9 +220,11 @@ send host-name ""; EOF ifconfig eth0 up dhclient eth0 -cf /dhclient.conf + echo "Container IP address:" + ifconfig eth0 |grep inet fi - exec @LXCINITDIR@/lxc-init -- /usr/sbin/sshd + exec @LXCINITDIR@/lxc/lxc-init -- $sshd_path exit 1 fi @@ -209,10 +235,12 @@ fi # detect rootfs config="$path/config" -if grep -q '^lxc.rootfs' $config 2>/dev/null ; then - rootfs=`grep 'lxc.rootfs =' $config | awk -F= '{ print $2 }'` -else - rootfs=$path/rootfs +if [ -z "$rootfs" ]; then + if grep -q '^lxc.rootfs' $config 2>/dev/null ; then + rootfs=`grep 'lxc.rootfs =' $config | awk -F= '{ print $2 }'` + else + rootfs=$path/rootfs + fi fi install_sshd $rootfs diff --git a/templates/lxc-ubuntu-cloud.in b/templates/lxc-ubuntu-cloud.in index 8673e4cbb..0abc9328a 100644 --- a/templates/lxc-ubuntu-cloud.in +++ b/templates/lxc-ubuntu-cloud.in @@ -1,30 +1,45 @@ #!/bin/bash -# template script for generating ubuntu container for LXC based on released cloud -# images +# template script for generating ubuntu container for LXC based on released +# cloud images. # # Copyright © 2012 Serge Hallyn # -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2, as -# published by the Free Software Foundation. +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA set -e +STATE_DIR="@LOCALSTATEDIR@" +HOOK_DIR="@LXCHOOKDIR@" +CLONE_HOOK_FN="$HOOK_DIR/ubuntu-cloud-prep" + if [ -r /etc/default/lxc ]; then . /etc/default/lxc fi +am_in_userns() { + [ -e /proc/self/uid_map ] || { echo no; return; } + [ "$(wc -l /proc/self/uid_map | awk '{ print $1 }')" -eq 1 ] || { echo yes; return; } + line=$(awk '{ print $1 " " $2 " " $3 }' /proc/self/uid_map) + [ "$line" = "0 0 4294967295" ] && { echo no; return; } + echo yes +} + +in_userns=0 +[ $(am_in_userns) = "yes" ] && in_userns=1 + copy_configuration() { path=$1 @@ -55,11 +70,17 @@ lxc.pts = 1024 lxc.utsname = $name lxc.arch = $arch -lxc.cap.drop = sys_module mac_admin mac_override +lxc.cap.drop = sys_module mac_admin mac_override sys_time # When using LXC with apparmor, uncomment the next line to run unconfined: #lxc.aa_profile = unconfined +# To support container nesting on an Ubuntu host, uncomment next two lines: +#lxc.aa_profile = lxc-container-default-with-nesting +#lxc.hook.mount = /usr/share/lxc/hooks/mountcgroups + +lxc.hook.clone = ${CLONE_HOOK_FN} + lxc.cgroup.devices.deny = a # Allow any mknod (but not using the node) lxc.cgroup.devices.allow = c *:* m @@ -70,32 +91,43 @@ lxc.cgroup.devices.allow = c 1:5 rwm # consoles lxc.cgroup.devices.allow = c 5:1 rwm lxc.cgroup.devices.allow = c 5:0 rwm -#lxc.cgroup.devices.allow = c 4:0 rwm -#lxc.cgroup.devices.allow = c 4:1 rwm # /dev/{,u}random lxc.cgroup.devices.allow = c 1:9 rwm lxc.cgroup.devices.allow = c 1:8 rwm lxc.cgroup.devices.allow = c 136:* rwm lxc.cgroup.devices.allow = c 5:2 rwm # rtc -lxc.cgroup.devices.allow = c 254:0 rwm -#fuse +lxc.cgroup.devices.allow = c 254:0 rm +# fuse lxc.cgroup.devices.allow = c 10:229 rwm -#tun +# tun lxc.cgroup.devices.allow = c 10:200 rwm -#full +# full lxc.cgroup.devices.allow = c 1:7 rwm -#hpet +# hpet lxc.cgroup.devices.allow = c 10:228 rwm -#kvm +# kvm lxc.cgroup.devices.allow = c 10:232 rwm EOF cat < $path/fstab proc proc proc nodev,noexec,nosuid 0 0 sysfs sys sysfs defaults 0 0 +/sys/fs/fuse/connections sys/fs/fuse/connections none bind 0 0 +/sys/kernel/debug sys/kernel/debug none bind 0 0 +/sys/kernel/security sys/kernel/security none bind 0 0 EOF + # unprivileged user can't mknod these. One day we may allow + # that in the kernel, but not right now. So let's just bind + # mount the files from the host. + if [ $in_userns -eq 1 ]; then + for dev in null tty urandom console; do + touch $rootfs/dev/$dev + echo "/dev/$dev dev/$dev none bind 0 0" >> $path/fstab + done + fi + # rmdir /dev/shm for containers that have /run/shm # I'm afraid of doing rm -rf $rootfs/dev/shm, in case it did # get bind mounted to the host's /run/shm. So try to rmdir @@ -115,48 +147,44 @@ LXC Container configuration for Ubuntu Cloud images. Generic Options [ -r | --release ]: Release name of container, defaults to host -[ -a | --arch ]: Arhcitecture of container, defaults to host arcitecture -[ -C | --cloud ]: Configure container for use with meta-data service, defaults to no +[ --rootfs ]: Path in which rootfs will be placed +[ -a | --arch ]: Arhcitecture of container, defaults to host architecture [ -T | --tarball ]: Location of tarball [ -d | --debug ]: Run with 'set -x' to debug errors [ -s | --stream]: Use specified stream rather than 'released' -Options, mutually exclusive of "-C" and "--cloud": - [ -i | --hostid ]: HostID for cloud-init, defaults to random string - [ -u | --userdata ]: Cloud-init user-data file to configure container on start - [ -S | --auth-key ]: SSH Public key file to inject into container - [ -L | --nolocales ]: Do not copy host's locales into container - +Additionally, clone hooks can be passed through (ie, --userdata). For those, +see: + $CLONE_HOOK_FN --help EOF return 0 } -options=$(getopt -o a:hp:r:n:Fi:CLS:T:ds:u: -l arch:,help,path:,release:,name:,flush-cache,hostid:,auth-key:,cloud,no_locales,tarball:,debug,stream:,userdata: -- "$@") +options=$(getopt -o a:hp:r:n:Fi:CLS:T:ds:u: -l arch:,help,rootfs:,path:,release:,name:,flush-cache,hostid:,auth-key:,cloud,no_locales,tarball:,debug,stream:,userdata: -- "$@") if [ $? -ne 0 ]; then usage $(basename $0) exit 1 fi eval set -- "$options" -release=lucid +# default release is precise, or the systems release if recognized +release=precise if [ -f /etc/lsb-release ]; then . /etc/lsb-release - case "$DISTRIB_CODENAME" in - lucid|natty|oneiric|precise|quantal) - release=$DISTRIB_CODENAME - ;; - esac + rels=$(ubuntu-distro-info --supported 2>/dev/null) || + rels="lucid natty oneiric precise quantal raring saucy" + for r in $rels; do + [ "$DISTRIB_CODENAME" = "$r" ] && release="$r" + done fi -arch=$(arch) - # Code taken from debootstrap if [ -x /usr/bin/dpkg ] && /usr/bin/dpkg --print-architecture >/dev/null 2>&1; then arch=`/usr/bin/dpkg --print-architecture` elif type udpkg >/dev/null 2>&1 && udpkg --print-architecture >/dev/null 2>&1; then arch=`/usr/bin/udpkg --print-architecture` else - arch=$(arch) + arch=$(uname -m) if [ "$arch" = "i686" ]; then arch="i386" elif [ "$arch" = "x86_64" ]; then @@ -176,6 +204,7 @@ cloud=0 locales=1 flushcache=0 stream="released" +cloneargs=() while true do case "$1" in @@ -185,19 +214,22 @@ do -F|--flush-cache) flushcache=1; shift 1;; -r|--release) release=$2; shift 2;; -a|--arch) arch=$2; shift 2;; - -i|--hostid) host_id=$2; shift 2;; - -u|--userdata) userdata=$2; shift 2;; - -C|--cloud) cloud=1; shift 1;; - -S|--auth-key) auth_key=$2; shift 2;; - -L|--no_locales) locales=0; shift 1;; -T|--tarball) tarball=$2; shift 2;; -d|--debug) debug=1; shift 1;; -s|--stream) stream=$2; shift 2;; + --rootfs) rootfs=$2; shift 2;; + -L|--no?locales) cloneargs[${#cloneargs[@]}]="--no-locales"; shift 1;; + -i|--hostid) cloneargs[${#cloneargs[@]}]="--hostid=$2"; shift 2;; + -u|--userdata) cloneargs[${#cloneargs[@]}]="--userdata=$2"; shift 2;; + -C|--cloud) cloneargs[${#cloneargs[@]}]="--cloud"; shift 1;; + -S|--auth-key) cloneargs[${#cloneargs[@]}]="--auth-key=$2"; shift 2;; --) shift 1; break ;; *) break ;; esac done +cloneargs=( "--name=$name" "${cloneargs[@]}" ) + if [ $debug -eq 1 ]; then set -x fi @@ -237,24 +269,6 @@ if [ "$stream" != "daily" -a "$stream" != "released" ]; then exit 1 fi -if [ -n "$userdata" ]; then - if [ ! -f "$userdata" ]; then - echo "Userdata ($userdata) does not exist" - exit 1 - else - userdata=`readlink -f $userdata` - fi -fi - -if [ -n "$auth_key" ]; then - if [ ! -f "$auth_key" ]; then - echo "--auth-key=${auth_key} must reference a file" - exit 1 - fi - auth_key=$(readlink -f "${auth_key}") || - { echo "failed to get full path for auth_key"; exit 1; } -fi - if [ -z "$path" ]; then echo "'path' parameter is required" exit 1 @@ -267,10 +281,12 @@ fi # detect rootfs config="$path/config" -if grep -q '^lxc.rootfs' $config 2>/dev/null ; then - rootfs=`grep 'lxc.rootfs =' $config | awk -F= '{ print $2 }'` -else - rootfs=$path/rootfs +if [ -z "$rootfs" ]; then + if grep -q '^lxc.rootfs' $config 2>/dev/null ; then + rootfs=`grep 'lxc.rootfs =' $config | awk -F= '{ print $2 }'` + else + rootfs=$path/rootfs + fi fi type ubuntu-cloudimg-query @@ -278,7 +294,7 @@ type wget # determine the url, tarball, and directory names # download if needed -cache="@LOCALSTATEDIR@/cache/lxc/cloud-$release" +cache="$STATE_DIR/cache/lxc/cloud-$release" mkdir -p $cache @@ -334,9 +350,7 @@ build_root_tgz() trap SIGTERM } -mkdir -p @LOCALSTATEDIR@/lock/subsys/ -( - flock -x 200 +do_extract_rootfs() { cd $cache if [ $flushcache -eq 1 ]; then @@ -357,64 +371,22 @@ mkdir -p @LOCALSTATEDIR@/lock/subsys/ mkdir -p $rootfs cd $rootfs tar -zxf $cache/$filename +} - - if [ $cloud -eq 0 ]; then - echo "Configuring for running outside of a cloud environment" - echo "If you want to configure for a cloud evironment, please use '-- -C' to create the container" - - seed_d=$rootfs/var/lib/cloud/seed/nocloud-net - rhostid=$(uuidgen | cut -c -8) - host_id=${hostid:-$rhostid} - mkdir -p $seed_d - - cat > "$seed_d/meta-data" <> "$seed_d/meta-data" - [ $? -eq 0 ] || - { echo "failed to write public keys to metadata"; exit 1; } - fi - - rm $rootfs/etc/hostname - - if [ $locales -eq 1 ]; then - cp /usr/lib/locale/locale-archive $rootfs/usr/lib/locale/locale-archive - fi - - if [ -f "$userdata" ]; then - echo "Using custom user-data" - cp $userdata $seed_d/user-data - else - - if [ -z "$MIRROR" ]; then - MIRROR="http://archive.ubuntu.com/ubuntu" - fi - - cat > "$seed_d/user-data" <@LOCALSTATEDIR@/lock/subsys/lxc-ubuntu-cloud +if [ -n "$tarball" ]; then + do_extract_rootfs +else + mkdir -p "$STATE_DIR/lock/subsys/" + ( + flock -x 200 + do_extract_rootfs + ) 200>"$STATE_DIR/lock/subsys/lxc-ubuntu-cloud" +fi copy_configuration $path $rootfs $name $arch $release +"$CLONE_HOOK_FN" "${cloneargs[@]}" "$rootfs" + echo "Container $name created." exit 0 diff --git a/templates/lxc-ubuntu.in b/templates/lxc-ubuntu.in index f01163326..6f6f3e131 100644 --- a/templates/lxc-ubuntu.in +++ b/templates/lxc-ubuntu.in @@ -10,19 +10,19 @@ # Copyright © 2010 Wilhelm Meier # Author: Wilhelm Meier # -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2, as -# published by the Free Software Foundation. +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA set -e @@ -36,7 +36,7 @@ configure_ubuntu() hostname=$2 release=$3 - # configure the network using the dhcp + # configure the network using the dhcp cat < $rootfs/etc/network/interfaces # This file describes the network interfaces available on your system # and how to activate them. For more information, see interfaces(5). @@ -81,7 +81,7 @@ EOF fi # make sure we have the current locale defined in the container - if [ -z "$LANG" ]; then + if [ -z "$LANG" ] || echo $LANG | grep -E -q "^C(\..+)*$"; then chroot $rootfs locale-gen en_US.UTF-8 chroot $rootfs update-locale LANG=en_US.UTF-8 else @@ -89,6 +89,22 @@ EOF chroot $rootfs update-locale LANG=$LANG fi + # generate new SSH keys + if [ -x $rootfs@LOCALSTATEDIR@/lib/dpkg/info/openssh-server.postinst ]; then + cat > $rootfs/usr/sbin/policy-rc.d << EOF +#!/bin/sh +exit 101 +EOF + chmod +x $rootfs/usr/sbin/policy-rc.d + + rm -f $rootfs/etc/ssh/ssh_host_*key* + mv $rootfs/etc/init/ssh.conf $rootfs/etc/init/ssh.conf.disabled + DPKG_MAINTSCRIPT_PACKAGE=openssh DPKG_MAINTSCRIPT_NAME=postinst chroot $rootfs @LOCALSTATEDIR@/lib/dpkg/info/openssh-server.postinst configure + mv $rootfs/etc/init/ssh.conf.disabled $rootfs/etc/init/ssh.conf + + rm -f $rootfs/usr/sbin/policy-rc.d + fi + return 0 } @@ -124,12 +140,48 @@ finalize_user() return 0 } +# +# Choose proxies for container +# http_proxy will be used by debootstrap on the host. +# APT_PROXY will be used to set /etc/apt/apt.conf.d/70proxy in the container. +# +choose_container_proxy() +{ + local rootfs=$1 + local arch=$2 + + if [ -z "$HTTP_PROXY" ]; then + HTTP_PROXY="none" + fi + case "$HTTP_PROXY" in + none) + APT_PROXY= + ;; + apt) + RES=`apt-config shell APT_PROXY Acquire::http::Proxy` + eval $RES + [ -z "$APT_PROXY" ] || export http_proxy=$APT_PROXY + ;; + *) + APT_PROXY=$HTTP_PROXY + export http_proxy=$HTTP_PROXY + ;; + esac +} + write_sourceslist() { # $1 => path to the rootfs # $2 => architecture we want to add # $3 => whether to use the multi-arch syntax or not + if [ -n "$APT_PROXY" ]; then + mkdir -p $rootfs/etc/apt/apt.conf.d + cat > $rootfs/etc/apt/apt.conf.d/70proxy << EOF +Acquire::http::Proxy "$APT_PROXY" ; +EOF + fi + case $2 in amd64|i386) MIRROR=${MIRROR:-http://archive.ubuntu.com/ubuntu} @@ -196,6 +248,7 @@ download_ubuntu() return 1 fi + choose_container_proxy $cache/partial-$arch/ $arch # download a mini ubuntu into a cache echo "Downloading ubuntu $release minimal ..." if [ -n "$(which qemu-debootstrap)" ]; then @@ -333,11 +386,15 @@ lxc.pts = 1024 lxc.utsname = $name lxc.arch = $arch -lxc.cap.drop = sys_module mac_admin mac_override +lxc.cap.drop = sys_module mac_admin mac_override sys_time # When using LXC with apparmor, uncomment the next line to run unconfined: #lxc.aa_profile = unconfined +# To support container nesting on an Ubuntu host, uncomment next two lines: +#lxc.aa_profile = lxc-container-default-with-nesting +#lxc.hook.mount = /usr/share/lxc/hooks/mountcgroups + lxc.cgroup.devices.deny = a # Allow any mknod (but not using the node) lxc.cgroup.devices.allow = c *:* m @@ -348,30 +405,31 @@ lxc.cgroup.devices.allow = c 1:5 rwm # consoles lxc.cgroup.devices.allow = c 5:1 rwm lxc.cgroup.devices.allow = c 5:0 rwm -#lxc.cgroup.devices.allow = c 4:0 rwm -#lxc.cgroup.devices.allow = c 4:1 rwm # /dev/{,u}random lxc.cgroup.devices.allow = c 1:9 rwm lxc.cgroup.devices.allow = c 1:8 rwm lxc.cgroup.devices.allow = c 136:* rwm lxc.cgroup.devices.allow = c 5:2 rwm # rtc -lxc.cgroup.devices.allow = c 254:0 rwm -#fuse +lxc.cgroup.devices.allow = c 254:0 rm +# fuse lxc.cgroup.devices.allow = c 10:229 rwm -#tun +# tun lxc.cgroup.devices.allow = c 10:200 rwm -#full +# full lxc.cgroup.devices.allow = c 1:7 rwm -#hpet +# hpet lxc.cgroup.devices.allow = c 10:228 rwm -#kvm +# kvm lxc.cgroup.devices.allow = c 10:232 rwm EOF cat < $path/fstab proc proc proc nodev,noexec,nosuid 0 0 sysfs sys sysfs defaults 0 0 +/sys/fs/fuse/connections sys/fs/fuse/connections none bind 0 0 +/sys/kernel/debug sys/kernel/debug none bind 0 0 +/sys/kernel/security sys/kernel/security none bind 0 0 EOF if [ $? -ne 0 ]; then @@ -519,8 +577,14 @@ post_process() write_sourceslist $rootfs $hostarch "multiarch" # Finally update the lists and install upstart using the host architecture + HOST_PACKAGES="upstart:${hostarch} mountall:${hostarch} isc-dhcp-client:${hostarch}" chroot $rootfs apt-get update - chroot $rootfs apt-get install --force-yes -y --no-install-recommends upstart:${hostarch} mountall:${hostarch} iproute:${hostarch} isc-dhcp-client:${hostarch} + if chroot $rootfs dpkg -l iproute2 | grep -q ^ii; then + HOST_PACKAGES="$HOST_PACKAGES iproute2:${hostarch}" + else + HOST_PACKAGES="$HOST_PACKAGES iproute:${hostarch}" + fi + chroot $rootfs apt-get install --force-yes -y --no-install-recommends $HOST_PACKAGES fi # rmdir /dev/shm for containers that have /run/shm @@ -576,6 +640,7 @@ usage() cat <] [--trim] [-d|--debug] [-F | --flush-cache] [-r|--release ] [ -S | --auth-key ] + [--rootfs ] release: the ubuntu release (e.g. precise): defaults to host release on ubuntu, otherwise uses latest LTS trim: make a minimal (faster, but not upgrade-safe) container bindhome: bind 's home into the container @@ -587,7 +652,7 @@ EOF return 0 } -options=$(getopt -o a:b:hp:r:xn:FS:d -l arch:,bindhome:,help,path:,release:,trim,name:,flush-cache,auth-key:,debug -- "$@") +options=$(getopt -o a:b:hp:r:xn:FS:d -l arch:,bindhome:,help,path:,release:,trim,name:,flush-cache,auth-key:,debug,rootfs: -- "$@") if [ $? -ne 0 ]; then usage $(basename $0) exit 1 @@ -603,7 +668,6 @@ if [ -f /etc/lsb-release ]; then fi bindhome= -arch=$(arch) # Code taken from debootstrap if [ -x /usr/bin/dpkg ] && /usr/bin/dpkg --print-architecture >/dev/null 2>&1; then @@ -611,7 +675,7 @@ if [ -x /usr/bin/dpkg ] && /usr/bin/dpkg --print-architecture >/dev/null 2>&1; t elif which udpkg >/dev/null 2>&1 && udpkg --print-architecture >/dev/null 2>&1; then arch=`/usr/bin/udpkg --print-architecture` else - arch=$(arch) + arch=$(uname -m) if [ "$arch" = "i686" ]; then arch="i386" elif [ "$arch" = "x86_64" ]; then @@ -629,6 +693,7 @@ while true do case "$1" in -h|--help) usage $0 && exit 0;; + --rootfs) rootfs=$2; shift 2;; -p|--path) path=$2; shift 2;; -n|--name) name=$2; shift 2;; -F|--flush-cache) flushcache=1; shift 1;; @@ -690,10 +755,13 @@ fi # detect rootfs config="$path/config" -if grep -q '^lxc.rootfs' $config 2>/dev/null ; then - rootfs=`grep 'lxc.rootfs =' $config | awk -F= '{ print $2 }'` -else - rootfs=$path/rootfs +# if $rootfs exists here, it was passed in with --rootfs +if [ -z "$rootfs" ]; then + if grep -q '^lxc.rootfs' $config 2>/dev/null ; then + rootfs=`grep 'lxc.rootfs =' $config | awk -F= '{ print $2 }'` + else + rootfs=$path/rootfs + fi fi install_ubuntu $rootfs $release $flushcache