2011-03-31 09:44:30 -07:00
|
|
|
# Copyright (c) 2010, 2011 Nicira Networks.
|
2010-09-22 22:48:42 -07:00
|
|
|
#
|
2010-08-25 10:26:40 -07:00
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at:
|
2010-09-22 22:48:42 -07:00
|
|
|
#
|
2010-08-25 10:26:40 -07:00
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
2010-09-22 22:48:42 -07:00
|
|
|
#
|
2010-08-25 10:26:40 -07:00
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
|
|
|
|
|
|
|
import getopt
|
daemon: Avoid races on pidfile creation.
Until now, if two copies of one OVS daemon started up at the same time,
then due to races in pidfile creation it was possible for both of them to
start successfully, instead of just one. This was made worse when a
previous copy of the daemon had died abruptly, leaving a stale pidfile.
This commit implements a new pidfile creation and removal protocol that I
believe closes these races. Now, a pidfile is asserted with "link" instead
of "rename", which prevents the race on creation, and a stale pidfile may
only be deleted by a process after it has taken a lock on it.
This may solve mysterious problems seen occasionally on vswitch restart.
I'm still puzzled by these problems, however, because I don't see anything
in our tests cases that would actually cause two copies of a daemon to
start at the same time, which as far as I can see is a necessary
precondition for the problem.
2011-04-04 10:59:19 -07:00
|
|
|
import logging
|
2010-09-22 21:59:02 -07:00
|
|
|
import signal
|
2010-08-25 10:26:40 -07:00
|
|
|
import sys
|
|
|
|
import time
|
|
|
|
|
|
|
|
import ovs.daemon
|
|
|
|
import ovs.util
|
|
|
|
|
2011-09-16 15:48:33 -07:00
|
|
|
|
|
|
|
def handler(signum, _):
|
2010-09-22 21:59:02 -07:00
|
|
|
raise Exception("Signal handler called with %d" % signum)
|
|
|
|
|
2011-09-16 15:48:33 -07:00
|
|
|
|
2010-08-25 10:26:40 -07:00
|
|
|
def main(argv):
|
daemon: Avoid races on pidfile creation.
Until now, if two copies of one OVS daemon started up at the same time,
then due to races in pidfile creation it was possible for both of them to
start successfully, instead of just one. This was made worse when a
previous copy of the daemon had died abruptly, leaving a stale pidfile.
This commit implements a new pidfile creation and removal protocol that I
believe closes these races. Now, a pidfile is asserted with "link" instead
of "rename", which prevents the race on creation, and a stale pidfile may
only be deleted by a process after it has taken a lock on it.
This may solve mysterious problems seen occasionally on vswitch restart.
I'm still puzzled by these problems, however, because I don't see anything
in our tests cases that would actually cause two copies of a daemon to
start at the same time, which as far as I can see is a necessary
precondition for the problem.
2011-04-04 10:59:19 -07:00
|
|
|
logging.basicConfig(level=logging.DEBUG)
|
2010-09-22 21:59:02 -07:00
|
|
|
|
|
|
|
signal.signal(signal.SIGHUP, handler)
|
|
|
|
|
2010-08-25 10:26:40 -07:00
|
|
|
try:
|
|
|
|
options, args = getopt.gnu_getopt(
|
|
|
|
argv[1:], 'b', ["bail", "help"] + ovs.daemon.LONG_OPTIONS)
|
|
|
|
except getopt.GetoptError, geo:
|
|
|
|
sys.stderr.write("%s: %s\n" % (ovs.util.PROGRAM_NAME, geo.msg))
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
bail = False
|
|
|
|
for key, value in options:
|
|
|
|
if key == '--help':
|
|
|
|
usage()
|
|
|
|
elif key in ['-b', '--bail']:
|
|
|
|
bail = True
|
|
|
|
elif not ovs.daemon.parse_opt(key, value):
|
|
|
|
sys.stderr.write("%s: unhandled option %s\n"
|
|
|
|
% (ovs.util.PROGRAM_NAME, key))
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
ovs.daemon.daemonize_start()
|
|
|
|
if bail:
|
|
|
|
sys.stderr.write("%s: exiting after daemonize_start() as requested\n"
|
|
|
|
% ovs.util.PROGRAM_NAME)
|
|
|
|
sys.exit(1)
|
|
|
|
ovs.daemon.daemonize_complete()
|
|
|
|
|
|
|
|
while True:
|
|
|
|
time.sleep(1)
|
|
|
|
|
2011-09-16 15:48:33 -07:00
|
|
|
|
2010-08-25 10:26:40 -07:00
|
|
|
def usage():
|
|
|
|
sys.stdout.write("""\
|
|
|
|
%s: Open vSwitch daemonization test program for Python
|
|
|
|
usage: %s [OPTIONS]
|
|
|
|
""" % ovs.util.PROGRAM_NAME)
|
|
|
|
ovs.daemon.usage()
|
|
|
|
sys.stdout.write("""
|
|
|
|
Other options:
|
|
|
|
-h, --help display this help message
|
|
|
|
-b, --bail exit with an error after daemonize_start()
|
|
|
|
""")
|
|
|
|
sys.exit(0)
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
2010-09-22 21:59:02 -07:00
|
|
|
try:
|
|
|
|
main(sys.argv)
|
|
|
|
except SystemExit:
|
|
|
|
# Let system.exit() calls complete normally
|
|
|
|
raise
|
|
|
|
except:
|
|
|
|
sys.exit(ovs.daemon.RESTART_EXIT_CODE)
|