diff --git a/NEWS b/NEWS
index 9640ee6d0..fc8ab05de 100644
--- a/NEWS
+++ b/NEWS
@@ -13,6 +13,9 @@ Post-v2.10.0
        It can be set with pmd-rxq-assign.
    - Add 'symmetric_l3' hash function.
    - OVS now honors 'updelay' and 'downdelay' for bonds with LACP configured.
+   - ovs-vswitchd:
+     * New configuration option "offload-rebalance", that enables dynamic
+       rebalancing of offloaded flows.
 
 v2.10.0 - 18 Aug 2018
 ---------------------
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 5df4129fb..c44c417d3 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -3705,7 +3705,8 @@ dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute)
 }
 
 static void
-dpif_netdev_operate(struct dpif *dpif, struct dpif_op **ops, size_t n_ops)
+dpif_netdev_operate(struct dpif *dpif, struct dpif_op **ops, size_t n_ops,
+                    enum dpif_offload_type offload_type OVS_UNUSED)
 {
     size_t i;
 
diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
index 5a2ba2d5b..2b2bb01a4 100644
--- a/lib/dpif-netlink.c
+++ b/lib/dpif-netlink.c
@@ -2133,7 +2133,8 @@ dpif_netlink_operate_chunks(struct dpif_netlink *dpif, struct dpif_op **ops,
 }
 
 static void
-dpif_netlink_operate(struct dpif *dpif_, struct dpif_op **ops, size_t n_ops)
+dpif_netlink_operate(struct dpif *dpif_, struct dpif_op **ops, size_t n_ops,
+                     enum dpif_offload_type offload_type)
 {
     struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
     struct dpif_op *new_ops[OPERATE_MAX_OPS];
@@ -2141,7 +2142,12 @@ dpif_netlink_operate(struct dpif *dpif_, struct dpif_op **ops, size_t n_ops)
     int i = 0;
     int err = 0;
 
-    if (netdev_is_flow_api_enabled()) {
+    if (offload_type == DPIF_OFFLOAD_ALWAYS && !netdev_is_flow_api_enabled()) {
+        VLOG_DBG("Invalid offload_type: %d", offload_type);
+        return;
+    }
+
+    if (offload_type != DPIF_OFFLOAD_NEVER && netdev_is_flow_api_enabled()) {
         while (n_ops > 0) {
             count = 0;
 
@@ -2150,6 +2156,23 @@ dpif_netlink_operate(struct dpif *dpif_, struct dpif_op **ops, size_t n_ops)
 
                 err = try_send_to_netdev(dpif, op);
                 if (err && err != EEXIST) {
+                    if (offload_type == DPIF_OFFLOAD_ALWAYS) {
+                        /* We got an error while offloading an op. Since
+                         * OFFLOAD_ALWAYS is specified, we stop further
+                         * processing and return to the caller without
+                         * invoking kernel datapath as fallback. But the
+                         * interface requires us to process all n_ops; so
+                         * return the same error in the remaining ops too.
+                         */
+                        op->error = err;
+                        n_ops--;
+                        while (n_ops > 0) {
+                            op = ops[i++];
+                            op->error = err;
+                            n_ops--;
+                        }
+                        return;
+                    }
                     new_ops[count++] = op;
                 } else {
                     op->error = err;
@@ -2160,7 +2183,7 @@ dpif_netlink_operate(struct dpif *dpif_, struct dpif_op **ops, size_t n_ops)
 
             dpif_netlink_operate_chunks(dpif, new_ops, count);
         }
-    } else {
+    } else if (offload_type != DPIF_OFFLOAD_ALWAYS) {
         dpif_netlink_operate_chunks(dpif, ops, n_ops);
     }
 }
diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h
index 8d833b85f..78e153c86 100644
--- a/lib/dpif-provider.h
+++ b/lib/dpif-provider.h
@@ -288,12 +288,14 @@ struct dpif_class {
 
     int (*flow_dump_next)(struct dpif_flow_dump_thread *thread,
                           struct dpif_flow *flows, int max_flows);
-
     /* Executes each of the 'n_ops' operations in 'ops' on 'dpif', in the order
      * in which they are specified, placing each operation's results in the
      * "output" members documented in comments and the 'error' member of each
-     * dpif_op. */
-    void (*operate)(struct dpif *dpif, struct dpif_op **ops, size_t n_ops);
+     * dpif_op. The offload_type argument tells the provider if 'ops' should
+     * be submitted to to a netdev (only offload) or to the kernel datapath
+     * (never offload) or to both (offload if possible; software fallback). */
+    void (*operate)(struct dpif *dpif, struct dpif_op **ops, size_t n_ops,
+                    enum dpif_offload_type offload_type);
 
     /* Enables or disables receiving packets with dpif_recv() for 'dpif'.
      * Turning packet receive off and then back on is allowed to change Netlink
diff --git a/lib/dpif.c b/lib/dpif.c
index 4697a4dcd..7f981328e 100644
--- a/lib/dpif.c
+++ b/lib/dpif.c
@@ -49,6 +49,7 @@
 #include "valgrind.h"
 #include "openvswitch/ofp-errors.h"
 #include "openvswitch/vlog.h"
+#include "lib/netdev-provider.h"
 
 VLOG_DEFINE_THIS_MODULE(dpif);
 
@@ -1006,7 +1007,7 @@ dpif_flow_get(struct dpif *dpif,
     op.flow_get.flow->key_len = key_len;
 
     opp = &op;
-    dpif_operate(dpif, &opp, 1);
+    dpif_operate(dpif, &opp, 1, DPIF_OFFLOAD_AUTO);
 
     return op.error;
 }
@@ -1036,7 +1037,7 @@ dpif_flow_put(struct dpif *dpif, enum dpif_flow_put_flags flags,
     op.flow_put.stats = stats;
 
     opp = &op;
-    dpif_operate(dpif, &opp, 1);
+    dpif_operate(dpif, &opp, 1, DPIF_OFFLOAD_AUTO);
 
     return op.error;
 }
@@ -1059,7 +1060,7 @@ dpif_flow_del(struct dpif *dpif,
     op.flow_del.terse = false;
 
     opp = &op;
-    dpif_operate(dpif, &opp, 1);
+    dpif_operate(dpif, &opp, 1, DPIF_OFFLOAD_AUTO);
 
     return op.error;
 }
@@ -1317,7 +1318,7 @@ dpif_execute(struct dpif *dpif, struct dpif_execute *execute)
         op.execute = *execute;
 
         opp = &op;
-        dpif_operate(dpif, &opp, 1);
+        dpif_operate(dpif, &opp, 1, DPIF_OFFLOAD_AUTO);
 
         return op.error;
     } else {
@@ -1328,10 +1329,21 @@ dpif_execute(struct dpif *dpif, struct dpif_execute *execute)
 /* Executes each of the 'n_ops' operations in 'ops' on 'dpif', in the order in
  * which they are specified.  Places each operation's results in the "output"
  * members documented in comments, and 0 in the 'error' member on success or a
- * positive errno on failure. */
+ * positive errno on failure.
+ */
 void
-dpif_operate(struct dpif *dpif, struct dpif_op **ops, size_t n_ops)
+dpif_operate(struct dpif *dpif, struct dpif_op **ops, size_t n_ops,
+             enum dpif_offload_type offload_type)
 {
+    if (offload_type == DPIF_OFFLOAD_ALWAYS && !netdev_is_flow_api_enabled()) {
+        size_t i;
+        for (i = 0; i < n_ops; i++) {
+            struct dpif_op *op = ops[i];
+            op->error = EINVAL;
+        }
+        return;
+    }
+
     while (n_ops > 0) {
         size_t chunk;
 
@@ -1352,7 +1364,7 @@ dpif_operate(struct dpif *dpif, struct dpif_op **ops, size_t n_ops)
              * handle itself, without help. */
             size_t i;
 
-            dpif->dpif_class->operate(dpif, ops, chunk);
+            dpif->dpif_class->operate(dpif, ops, chunk, offload_type);
 
             for (i = 0; i < chunk; i++) {
                 struct dpif_op *op = ops[i];
@@ -1649,7 +1661,7 @@ dpif_queue_to_priority(const struct dpif *dpif, uint32_t queue_id,
     log_operation(dpif, "queue_to_priority", error);
     return error;
 }
-
+
 void
 dpif_init(struct dpif *dpif, const struct dpif_class *dpif_class,
           const char *name,
diff --git a/lib/dpif.h b/lib/dpif.h
index 1a35cc410..0df856d4f 100644
--- a/lib/dpif.h
+++ b/lib/dpif.h
@@ -606,6 +606,13 @@ enum dpif_op_type {
     DPIF_OP_FLOW_GET,
 };
 
+/* offload_type argument types to (*operate) interface */
+enum dpif_offload_type {
+    DPIF_OFFLOAD_AUTO,         /* Offload if possible, fallback to software. */
+    DPIF_OFFLOAD_NEVER,        /* Never offload to hardware. */
+    DPIF_OFFLOAD_ALWAYS,       /* Always offload to hardware. */
+};
+
 /* Add or modify a flow.
  *
  * The flow is specified by the Netlink attributes with types OVS_KEY_ATTR_* in
@@ -760,8 +767,9 @@ struct dpif_op {
     };
 };
 
-void dpif_operate(struct dpif *, struct dpif_op **ops, size_t n_ops);
-
+void dpif_operate(struct dpif *, struct dpif_op **ops, size_t n_ops,
+                  enum dpif_offload_type);
+
 /* Upcalls. */
 
 enum dpif_upcall_type {
diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h
index e320dad61..fb0c27e6e 100644
--- a/lib/netdev-provider.h
+++ b/lib/netdev-provider.h
@@ -38,10 +38,14 @@ struct netdev_tnl_build_header_params;
 /* Offload-capable (HW) netdev information */
 struct netdev_hw_info {
     bool oor;		/* Out of Offload Resources ? */
+    int offload_count;  /* Pending (non-offloaded) flow count */
+    int pending_count;  /* Offloaded flow count */
 };
 
 enum hw_info_type {
-    HW_INFO_TYPE_OOR = 1	/* OOR state */
+    HW_INFO_TYPE_OOR = 1,		/* OOR state */
+    HW_INFO_TYPE_PEND_COUNT = 2,	/* Pending(non-offloaded) flow count */
+    HW_INFO_TYPE_OFFL_COUNT = 3		/* Offloaded flow count */
 };
 
 /* A network device (e.g. an Ethernet device).
@@ -89,7 +93,6 @@ struct netdev {
     int n_rxq;
     struct shash_node *node;            /* Pointer to element in global map. */
     struct ovs_list saved_flags_list; /* Contains "struct netdev_saved_flags". */
-
     struct netdev_hw_info hw_info;	/* offload-capable netdev info */
 };
 
diff --git a/lib/netdev.c b/lib/netdev.c
index 708a4bbd7..84874408a 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -2261,11 +2261,23 @@ netdev_get_block_id(struct netdev *netdev)
 int
 netdev_get_hw_info(struct netdev *netdev, int type)
 {
-    if (type == HW_INFO_TYPE_OOR) {
-        return netdev->hw_info.oor;
+    int val = -1;
+
+    switch (type) {
+    case HW_INFO_TYPE_OOR:
+        val = netdev->hw_info.oor;
+        break;
+    case HW_INFO_TYPE_PEND_COUNT:
+        val = netdev->hw_info.pending_count;
+        break;
+    case HW_INFO_TYPE_OFFL_COUNT:
+        val = netdev->hw_info.offload_count;
+        break;
+    default:
+        break;
     }
 
-    return -1;
+    return val;
 }
 
 /*
@@ -2274,11 +2286,49 @@ netdev_get_hw_info(struct netdev *netdev, int type)
 void
 netdev_set_hw_info(struct netdev *netdev, int type, int val)
 {
-    if (type == HW_INFO_TYPE_OOR) {
+    switch (type) {
+    case HW_INFO_TYPE_OOR:
+        if (val == 0) {
+            VLOG_DBG("Offload rebalance: netdev: %s is not OOR", netdev->name);
+        }
         netdev->hw_info.oor = val;
+        break;
+    case HW_INFO_TYPE_PEND_COUNT:
+        netdev->hw_info.pending_count = val;
+        break;
+    case HW_INFO_TYPE_OFFL_COUNT:
+        netdev->hw_info.offload_count = val;
+        break;
+    default:
+        break;
     }
 }
 
+/*
+ * Find if any netdev is in OOR state. Return true if there's at least
+ * one netdev that's in OOR state; otherwise return false.
+ */
+bool
+netdev_any_oor(void)
+    OVS_EXCLUDED(netdev_mutex)
+{
+    struct shash_node *node;
+    bool oor = false;
+
+    ovs_mutex_lock(&netdev_mutex);
+    SHASH_FOR_EACH (node, &netdev_shash) {
+        struct netdev *dev = node->data;
+
+        if (dev->hw_info.oor) {
+            oor = true;
+            break;
+        }
+    }
+    ovs_mutex_unlock(&netdev_mutex);
+
+    return oor;
+}
+
 bool
 netdev_is_flow_api_enabled(void)
 {
@@ -2550,6 +2600,10 @@ netdev_set_flow_api_enabled(const struct smap *ovs_other_config)
             tc_set_policy(smap_get_def(ovs_other_config, "tc-policy",
                                        TC_POLICY_DEFAULT));
 
+            if (smap_get_bool(ovs_other_config, "offload-rebalance", false)) {
+                netdev_offload_rebalance_policy = true;
+            }
+
             netdev_ports_flow_init();
 
             ovsthread_once_done(&once);
diff --git a/lib/netdev.h b/lib/netdev.h
index c527bd2e9..d94817fb6 100644
--- a/lib/netdev.h
+++ b/lib/netdev.h
@@ -230,6 +230,7 @@ int netdev_init_flow_api(struct netdev *);
 uint32_t netdev_get_block_id(struct netdev *);
 int netdev_get_hw_info(struct netdev *, int);
 void netdev_set_hw_info(struct netdev *, int, int);
+bool netdev_any_oor(void);
 bool netdev_is_flow_api_enabled(void);
 void netdev_set_flow_api_enabled(const struct smap *ovs_other_config);
 bool netdev_is_offload_rebalance_policy_enabled(void);
diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c
index efff537fd..28172cb05 100644
--- a/ofproto/ofproto-dpif-upcall.c
+++ b/ofproto/ofproto-dpif-upcall.c
@@ -22,6 +22,7 @@
 #include "connmgr.h"
 #include "coverage.h"
 #include "cmap.h"
+#include "lib/dpif-provider.h"
 #include "dpif.h"
 #include "openvswitch/dynamic-string.h"
 #include "fail-open.h"
@@ -42,7 +43,6 @@
 #include "tunnel.h"
 #include "unixctl.h"
 #include "openvswitch/vlog.h"
-#include "lib/dpif-provider.h"
 #include "lib/netdev-provider.h"
 
 #define MAX_QUEUE_LENGTH 512
@@ -182,6 +182,8 @@ struct udpif {
     uint64_t conn_seq;                 /* Corresponds to 'dump_seq' when
                                           conns[n_conns-1] was stored. */
     size_t n_conns;                    /* Number of connections waiting. */
+
+    long long int offload_rebalance_time;  /* Time of last offload rebalance */
 };
 
 enum upcall_type {
@@ -308,6 +310,7 @@ struct udpif_key {
     struct recirc_refs recircs;  /* Action recirc IDs with references held. */
 
 #define OFFL_REBAL_INTVL_MSEC  3000	/* dynamic offload rebalance freq */
+    struct netdev *in_netdev;		/* in_odp_port's netdev */
     bool offloaded;			/* True if flow is offloaded */
     uint64_t flow_pps_rate;		/* Packets-Per-Second rate */
     long long int flow_time;		/* last pps update time */
@@ -396,6 +399,12 @@ static int upcall_receive(struct upcall *, const struct dpif_backer *,
                           const ovs_u128 *ufid, const unsigned pmd_id);
 static void upcall_uninit(struct upcall *);
 
+static void udpif_flow_rebalance(struct udpif *udpif);
+static int udpif_flow_program(struct udpif *udpif, struct udpif_key *ukey,
+                              enum dpif_offload_type offload_type);
+static int udpif_flow_unprogram(struct udpif *udpif, struct udpif_key *ukey,
+                                enum dpif_offload_type offload_type);
+
 static upcall_callback upcall_cb;
 static dp_purge_callback dp_purge_cb;
 
@@ -567,6 +576,7 @@ udpif_start_threads(struct udpif *udpif, size_t n_handlers_,
         ovs_barrier_init(&udpif->pause_barrier, udpif->n_revalidators + 1);
         udpif->reval_exit = false;
         udpif->pause = false;
+        udpif->offload_rebalance_time = time_msec();
         udpif->revalidators = xzalloc(udpif->n_revalidators
                                       * sizeof *udpif->revalidators);
         for (size_t i = 0; i < udpif->n_revalidators; i++) {
@@ -859,6 +869,26 @@ free_dupcall:
     return n_upcalls;
 }
 
+static void
+udpif_run_flow_rebalance(struct udpif *udpif)
+{
+    long long int now = 0;
+
+    /* Don't rebalance if OFFL_REBAL_INTVL_MSEC have not elapsed */
+    now = time_msec();
+    if (now < udpif->offload_rebalance_time + OFFL_REBAL_INTVL_MSEC) {
+        return;
+    }
+
+    if (!netdev_any_oor()) {
+        return;
+    }
+
+    VLOG_DBG("Offload rebalance: Found OOR netdevs");
+    udpif->offload_rebalance_time = now;
+    udpif_flow_rebalance(udpif);
+}
+
 static void *
 udpif_revalidator(void *arg)
 {
@@ -933,6 +963,9 @@ udpif_revalidator(void *arg)
 
             dpif_flow_dump_destroy(udpif->dump);
             seq_change(udpif->dump_seq);
+            if (netdev_is_offload_rebalance_policy_enabled()) {
+                udpif_run_flow_rebalance(udpif);
+            }
 
             duration = MAX(time_msec() - start_time, 1);
             udpif->dump_duration = duration;
@@ -977,7 +1010,7 @@ udpif_revalidator(void *arg)
 
     return NULL;
 }
-
+
 static enum upcall_type
 classify_upcall(enum dpif_upcall_type type, const struct nlattr *userdata,
                 struct user_action_cookie *cookie)
@@ -1578,7 +1611,7 @@ handle_upcalls(struct udpif *udpif, struct upcall *upcalls,
     for (i = 0; i < n_ops; i++) {
         opsp[n_opsp++] = &ops[i].dop;
     }
-    dpif_operate(udpif->dpif, opsp, n_opsp);
+    dpif_operate(udpif->dpif, opsp, n_opsp, DPIF_OFFLOAD_AUTO);
     for (i = 0; i < n_ops; i++) {
         struct udpif_key *ukey = ops[i].ukey;
 
@@ -1670,13 +1703,13 @@ ukey_create__(const struct nlattr *key, size_t key_len,
     ukey->state = UKEY_CREATED;
     ukey->state_thread = ovsthread_id_self();
     ukey->state_where = OVS_SOURCE_LOCATOR;
-    ukey->created = time_msec();
+    ukey->created = ukey->flow_time = time_msec();
     memset(&ukey->stats, 0, sizeof ukey->stats);
     ukey->stats.used = used;
     ukey->xcache = NULL;
 
     ukey->offloaded = false;
-    ukey->flow_time = 0;
+    ukey->in_netdev = NULL;
     ukey->flow_packets = ukey->flow_backlog_packets = 0;
 
     ukey->key_recirc_id = key_recirc_id;
@@ -2328,7 +2361,7 @@ push_dp_ops(struct udpif *udpif, struct ukey_op *ops, size_t n_ops)
     for (i = 0; i < n_ops; i++) {
         opsp[i] = &ops[i].dop;
     }
-    dpif_operate(udpif->dpif, opsp, n_ops);
+    dpif_operate(udpif->dpif, opsp, n_ops, DPIF_OFFLOAD_AUTO);
 
     for (i = 0; i < n_ops; i++) {
         struct ukey_op *op = &ops[i];
@@ -2454,6 +2487,57 @@ reval_op_init(struct ukey_op *op, enum reval_result result,
     }
 }
 
+static void
+ukey_netdev_unref(struct udpif_key *ukey)
+{
+    if (!ukey->in_netdev) {
+        return;
+    }
+    netdev_close(ukey->in_netdev);
+    ukey->in_netdev = NULL;
+}
+
+/*
+ * Given a udpif_key, get its input port (netdev) by parsing the flow keys
+ * and actions. The flow may not contain flow attributes if it is a terse
+ * dump; read its attributes from the ukey and then parse the flow to get
+ * the port info. Save them in udpif_key.
+ */
+static void
+ukey_to_flow_netdev(struct udpif *udpif, struct udpif_key *ukey)
+{
+    const struct dpif *dpif = udpif->dpif;
+    const struct dpif_class *dpif_class = dpif->dpif_class;
+    const struct nlattr *k;
+    unsigned int left;
+
+    /* Remove existing references to netdev */
+    ukey_netdev_unref(ukey);
+
+    /* Find the input port and get a reference to its netdev */
+    NL_ATTR_FOR_EACH (k, left, ukey->key, ukey->key_len) {
+        enum ovs_key_attr type = nl_attr_type(k);
+
+        if (type == OVS_KEY_ATTR_IN_PORT) {
+            ukey->in_netdev = netdev_ports_get(nl_attr_get_odp_port(k),
+                                               dpif_class);
+        } else if (type == OVS_KEY_ATTR_TUNNEL) {
+            struct flow_tnl tnl;
+            enum odp_key_fitness res;
+
+            if (ukey->in_netdev) {
+                netdev_close(ukey->in_netdev);
+                ukey->in_netdev = NULL;
+            }
+            res = odp_tun_key_from_attr(k, &tnl);
+            if (res != ODP_FIT_ERROR) {
+                ukey->in_netdev = flow_get_tunnel_netdev(&tnl);
+                break;
+            }
+        }
+    }
+}
+
 static uint64_t
 udpif_flow_packet_delta(struct udpif_key *ukey, const struct dpif_flow *f)
 {
@@ -2467,6 +2551,16 @@ udpif_flow_time_delta(struct udpif *udpif, struct udpif_key *ukey)
     return (udpif->dpif->current_ms - ukey->flow_time) / 1000;
 }
 
+/*
+ * Save backlog packet count while switching modes
+ * between offloaded and kernel datapaths.
+ */
+static void
+udpif_set_ukey_backlog_packets(struct udpif_key *ukey)
+{
+    ukey->flow_backlog_packets = ukey->flow_packets;
+}
+
 /* Gather pps-rate for the given dpif_flow and save it in its ukey */
 static void
 udpif_update_flow_pps(struct udpif *udpif, struct udpif_key *ukey,
@@ -2538,6 +2632,7 @@ revalidate(struct revalidator *revalidator)
         kill_them_all = n_dp_flows > flow_limit * 2;
         max_idle = n_dp_flows > flow_limit ? 100 : ofproto_max_idle;
 
+        udpif->dpif->current_ms = time_msec();
         for (f = flows; f < &flows[n_dumped]; f++) {
             long long int used = f->stats.used;
             struct recirc_refs recircs = RECIRC_REFS_EMPTY_INITIALIZER;
@@ -2914,3 +3009,342 @@ upcall_unixctl_purge(struct unixctl_conn *conn, int argc OVS_UNUSED,
     }
     unixctl_command_reply(conn, "");
 }
+
+/* Flows are sorted in the following order:
+ * netdev, flow state (offloaded/kernel path), flow_pps_rate.
+ */
+static int
+flow_compare_rebalance(const void *elem1, const void *elem2)
+{
+    const struct udpif_key *f1 = *(struct udpif_key **)elem1;
+    const struct udpif_key *f2 = *(struct udpif_key **)elem2;
+    int64_t diff;
+
+    if (f1->in_netdev < f2->in_netdev) {
+        return -1;
+    } else if (f1->in_netdev > f2->in_netdev) {
+        return 1;
+    }
+
+    if (f1->offloaded != f2->offloaded) {
+        return f2->offloaded - f1->offloaded;
+    }
+
+    diff = (f1->offloaded == true) ?
+        f1->flow_pps_rate - f2->flow_pps_rate :
+        f2->flow_pps_rate - f1->flow_pps_rate;
+
+    return (diff < 0) ? -1 : 1;
+}
+
+/* Insert flows from pending array during rebalancing */
+static int
+rebalance_insert_pending(struct udpif *udpif, struct udpif_key **pending_flows,
+                         int pending_count, int insert_count,
+                         uint64_t rate_threshold)
+{
+    int count = 0;
+
+    for (int i = 0; i < pending_count; i++) {
+        struct udpif_key *flow = pending_flows[i];
+        int err;
+
+        /* Stop offloading pending flows if the insert count is
+         * reached and the flow rate is less than the threshold
+         */
+        if (count >= insert_count && flow->flow_pps_rate < rate_threshold) {
+                break;
+        }
+
+        /* Offload the flow to netdev */
+        err = udpif_flow_program(udpif, flow, DPIF_OFFLOAD_ALWAYS);
+
+        if (err == ENOSPC) {
+            /* Stop if we are out of resources */
+            break;
+        }
+
+        if (err) {
+            continue;
+        }
+
+        /* Offload succeeded; delete it from the kernel datapath */
+        udpif_flow_unprogram(udpif, flow, DPIF_OFFLOAD_NEVER);
+
+        /* Change the state of the flow, adjust dpif counters */
+        flow->offloaded = true;
+
+        udpif_set_ukey_backlog_packets(flow);
+        count++;
+    }
+
+    return count;
+}
+
+/* Remove flows from offloaded array during rebalancing */
+static void
+rebalance_remove_offloaded(struct udpif *udpif,
+                           struct udpif_key **offloaded_flows,
+                           int offload_count)
+{
+    for (int i = 0; i < offload_count; i++) {
+        struct udpif_key *flow = offloaded_flows[i];
+        int err;
+
+        /* Install the flow into kernel path first */
+        err = udpif_flow_program(udpif, flow, DPIF_OFFLOAD_NEVER);
+        if (err) {
+            continue;
+        }
+
+        /* Success; now remove offloaded flow from netdev */
+        err = udpif_flow_unprogram(udpif, flow, DPIF_OFFLOAD_ALWAYS);
+        if (err) {
+            udpif_flow_unprogram(udpif, flow, DPIF_OFFLOAD_NEVER);
+            continue;
+        }
+        udpif_set_ukey_backlog_packets(flow);
+        flow->offloaded = false;
+    }
+}
+
+/*
+ * Rebalance offloaded flows on a netdev that's in OOR state.
+ *
+ * The rebalancing is done in two phases. In the first phase, we check if
+ * the pending flows can be offloaded (if some resources became available
+ * in the meantime) by trying to offload each pending flow. If all pending
+ * flows get successfully offloaded, the OOR state is cleared on the netdev
+ * and there's nothing to rebalance.
+ *
+ * If some of the pending flows could not be offloaded, i.e, we still see
+ * the OOR error, then we move to the second phase of rebalancing. In this
+ * phase, the rebalancer compares pps-rate of an offloaded flow with the
+ * least pps-rate with that of a pending flow with the highest pps-rate from
+ * their respective sorted arrays. If pps-rate of the offloaded flow is less
+ * than the pps-rate of the pending flow, then it deletes the offloaded flow
+ * from the HW/netdev and adds it to kernel datapath and then offloads pending
+ * to HW/netdev. This process is repeated for every pair of offloaded and
+ * pending flows in the ordered list. The process stops when we encounter an
+ * offloaded flow that has a higher pps-rate than the corresponding pending
+ * flow. The entire rebalancing process is repeated in the next iteration.
+ */
+static bool
+rebalance_device(struct udpif *udpif, struct udpif_key **offloaded_flows,
+                 int offload_count, struct udpif_key **pending_flows,
+                 int pending_count)
+{
+
+    /* Phase 1 */
+    int num_inserted = rebalance_insert_pending(udpif, pending_flows,
+                                                pending_count, pending_count,
+                                                0);
+    if (num_inserted) {
+        VLOG_DBG("Offload rebalance: Phase1: inserted %d pending flows",
+                  num_inserted);
+    }
+
+    /* Adjust pending array */
+    pending_flows = &pending_flows[num_inserted];
+    pending_count -= num_inserted;
+
+    if (!pending_count) {
+        /*
+         * Successfully offloaded all pending flows. The device
+         * is no longer in OOR state; done rebalancing this device.
+         */
+        return false;
+    }
+
+    /*
+     * Phase 2; determine how many offloaded flows to churn.
+     */
+#define	OFFL_REBAL_MAX_CHURN    1024
+    int churn_count = 0;
+    while (churn_count < OFFL_REBAL_MAX_CHURN && churn_count < offload_count
+           && churn_count < pending_count) {
+        if (pending_flows[churn_count]->flow_pps_rate <=
+            offloaded_flows[churn_count]->flow_pps_rate)
+                break;
+        churn_count++;
+    }
+
+    if (churn_count) {
+        VLOG_DBG("Offload rebalance: Phase2: removing %d offloaded flows",
+                  churn_count);
+    }
+
+    /* Bail early if nothing to churn */
+    if (!churn_count) {
+        return true;
+    }
+
+    /* Remove offloaded flows */
+    rebalance_remove_offloaded(udpif, offloaded_flows, churn_count);
+
+    /* Adjust offloaded array */
+    offloaded_flows = &offloaded_flows[churn_count];
+    offload_count -= churn_count;
+
+    /* Replace offloaded flows with pending flows */
+    num_inserted = rebalance_insert_pending(udpif, pending_flows,
+                                            pending_count, churn_count,
+                                            offload_count ?
+                                            offloaded_flows[0]->flow_pps_rate :
+                                            0);
+    if (num_inserted) {
+        VLOG_DBG("Offload rebalance: Phase2: inserted %d pending flows",
+                  num_inserted);
+    }
+
+    return true;
+}
+
+static struct udpif_key **
+udpif_add_oor_flows(struct udpif_key **sort_flows, size_t *total_flow_count,
+                    size_t *alloc_flow_count, struct udpif_key *ukey)
+{
+    if (*total_flow_count >= *alloc_flow_count) {
+        sort_flows = x2nrealloc(sort_flows, alloc_flow_count, sizeof ukey);
+    }
+    sort_flows[(*total_flow_count)++] = ukey;
+    return sort_flows;
+}
+
+/*
+ * Build sort_flows[] initially with flows that
+ * reference an 'OOR' netdev as their input port.
+ */
+static struct udpif_key **
+udpif_build_oor_flows(struct udpif_key **sort_flows, size_t *total_flow_count,
+                      size_t *alloc_flow_count, struct udpif_key *ukey,
+                      int *oor_netdev_count)
+{
+    struct netdev *netdev;
+    int count;
+
+    /* Input netdev must be available for the flow */
+    netdev = ukey->in_netdev;
+    if (!netdev) {
+        return sort_flows;
+    }
+
+    /* Is the in-netdev for this flow in OOR state ? */
+    if (!netdev_get_hw_info(netdev, HW_INFO_TYPE_OOR)) {
+        ukey_netdev_unref(ukey);
+        return sort_flows;
+    }
+
+    /* Add the flow to sort_flows[] */
+    sort_flows = udpif_add_oor_flows(sort_flows, total_flow_count,
+                                      alloc_flow_count, ukey);
+    if (ukey->offloaded) {
+        count = netdev_get_hw_info(netdev, HW_INFO_TYPE_OFFL_COUNT);
+        ovs_assert(count >= 0);
+        if (count++ == 0) {
+            (*oor_netdev_count)++;
+        }
+        netdev_set_hw_info(netdev, HW_INFO_TYPE_OFFL_COUNT, count);
+    } else {
+        count = netdev_get_hw_info(netdev, HW_INFO_TYPE_PEND_COUNT);
+        ovs_assert(count >= 0);
+        netdev_set_hw_info(netdev, HW_INFO_TYPE_PEND_COUNT, ++count);
+    }
+
+    return sort_flows;
+}
+
+/*
+ * Rebalance offloaded flows on HW netdevs that are in OOR state.
+ */
+static void
+udpif_flow_rebalance(struct udpif *udpif)
+{
+    struct udpif_key **sort_flows = NULL;
+    size_t alloc_flow_count = 0;
+    size_t total_flow_count = 0;
+    int oor_netdev_count = 0;
+    int offload_index = 0;
+    int pending_index;
+
+    /* Collect flows (offloaded and pending) that reference OOR netdevs */
+    for (size_t i = 0; i < N_UMAPS; i++) {
+        struct udpif_key *ukey;
+        struct umap *umap = &udpif->ukeys[i];
+
+        CMAP_FOR_EACH (ukey, cmap_node, &umap->cmap) {
+            ukey_to_flow_netdev(udpif, ukey);
+            sort_flows = udpif_build_oor_flows(sort_flows, &total_flow_count,
+                                               &alloc_flow_count, ukey,
+                                               &oor_netdev_count);
+        }
+    }
+
+    /* Sort flows by OOR netdevs, state (offloaded/pending) and pps-rate  */
+    qsort(sort_flows, total_flow_count, sizeof(struct udpif_key *),
+          flow_compare_rebalance);
+
+    /*
+     * We now have flows referencing OOR netdevs, that are sorted. We also
+     * have a count of offloaded and pending flows on each of the netdevs
+     * that are in OOR state. Now rebalance each oor-netdev.
+     */
+    while (oor_netdev_count) {
+        struct netdev *netdev;
+        int offload_count;
+        int pending_count;
+        bool oor;
+
+        netdev = sort_flows[offload_index]->in_netdev;
+        ovs_assert(netdev_get_hw_info(netdev, HW_INFO_TYPE_OOR) == true);
+        VLOG_DBG("Offload rebalance: netdev: %s is OOR", netdev->name);
+
+        offload_count = netdev_get_hw_info(netdev, HW_INFO_TYPE_OFFL_COUNT);
+        pending_count = netdev_get_hw_info(netdev, HW_INFO_TYPE_PEND_COUNT);
+        pending_index = offload_index + offload_count;
+
+        oor = rebalance_device(udpif,
+                               &sort_flows[offload_index], offload_count,
+                               &sort_flows[pending_index], pending_count);
+        netdev_set_hw_info(netdev, HW_INFO_TYPE_OOR, oor);
+
+        offload_index = pending_index + pending_count;
+        netdev_set_hw_info(netdev, HW_INFO_TYPE_OFFL_COUNT, 0);
+        netdev_set_hw_info(netdev, HW_INFO_TYPE_PEND_COUNT, 0);
+        oor_netdev_count--;
+    }
+
+    for (int i = 0; i < total_flow_count; i++) {
+        struct udpif_key *ukey = sort_flows[i];
+        ukey_netdev_unref(ukey);
+    }
+    free(sort_flows);
+}
+
+static int
+udpif_flow_program(struct udpif *udpif, struct udpif_key *ukey,
+                   enum dpif_offload_type offload_type)
+{
+    struct dpif_op *opsp;
+    struct ukey_op uop;
+
+    opsp = &uop.dop;
+    put_op_init(&uop, ukey, DPIF_FP_CREATE);
+    dpif_operate(udpif->dpif, &opsp, 1, offload_type);
+
+    return opsp->error;
+}
+
+static int
+udpif_flow_unprogram(struct udpif *udpif, struct udpif_key *ukey,
+                     enum dpif_offload_type offload_type)
+{
+    struct dpif_op *opsp;
+    struct ukey_op uop;
+
+    opsp = &uop.dop;
+    delete_op_init(udpif, &uop, ukey);
+    dpif_operate(udpif->dpif, &opsp, 1, offload_type);
+
+    return opsp->error;
+}
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index 66a8a3b4c..6d1fc1c1c 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -543,6 +543,27 @@
         </p>
     </column>
 
+      <column name="other_config" key="offload-rebalance"
+              type='{"type": "boolean"}'>
+        <p>
+            Configures HW offload rebalancing, that allows to dynamically
+            offload and un-offload flows while an offload-device is out of
+            resources (OOR). This policy allows flows to be selected for
+            offloading based on the packets-per-second (pps) rate of flows.
+        </p>
+        <p>
+          Set this value to <code>true</code> to enable this option.
+        </p>
+        <p>
+          The default value is <code>false</code>. Changing this value requires
+          restarting the daemon.
+        </p>
+        <p>
+            This is only relevant if HW offloading is enabled (hw-offload).
+            When this policy is enabled, it also requires 'tc-policy' to
+            be set to 'skip_sw'.
+        </p>
+      </column>
     </group>
 
     <group title="Status">