codeYMF
diff --git a/‎tensorflow/BUILD‎
Lines changed: 1 addition & 0 deletions b/‎tensorflow/BUILD‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎tensorflow/core/common_runtime/gpu/gpu_event_mgr.cc‎
Lines changed: 14 additions & 13 deletions b/‎tensorflow/core/common_runtime/gpu/gpu_event_mgr.cc‎
Lines changed: 14 additions & 13 deletions
diff --git a/‎tensorflow/core/common_runtime/gpu/gpu_event_mgr.h‎
Lines changed: 40 additions & 11 deletions b/‎tensorflow/core/common_runtime/gpu/gpu_event_mgr.h‎
Lines changed: 40 additions & 11 deletions
diff --git a/‎tensorflow/core/common_runtime/gpu/gpu_event_mgr_test.cc‎
Lines changed: 6 additions & 2 deletions b/‎tensorflow/core/common_runtime/gpu/gpu_event_mgr_test.cc‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎tensorflow/core/kernels/scatter_op.cc‎
Lines changed: 2 additions & 0 deletions b/‎tensorflow/core/kernels/scatter_op.cc‎
Lines changed: 2 additions & 0 deletions
@@ -40,6 +40,7 @@ filegroup(
 py_library(
     name = "tensorflow_py",
     srcs = ["__init__.py"],
+    srcs_version = "PY2AND3",
     visibility = ["//visibility:public"],
     deps = ["//tensorflow/python"],
 )
@@ -40,13 +40,13 @@ EventMgr::~EventMgr() {
     delete e;
   }
   while (!used_events_.empty()) {
-    delete used_events_[0].event;
-    delete used_events_[0].mem;
-    if (used_events_[0].bufrec.buf) {
-      used_events_[0].bufrec.alloc->DeallocateRaw(used_events_[0].bufrec.buf);
+    InUse* ue = &used_events_[0];
+    delete ue->event;
+    delete ue->mem;
+    if (ue->bufrec.buf) {
+      ue->bufrec.alloc->DeallocateRaw(ue->bufrec.buf);
     }
-    if (used_events_[0].func != nullptr)
-      threadpool_.Schedule(used_events_[0].func);
+    if (ue->func != nullptr) threadpool_.Schedule(ue->func);
     used_events_.pop_front();
   }
 }
@@ -60,10 +60,12 @@ EventMgr::~EventMgr() {
 void EventMgr::PollLoop() {
   while (!stop_polling_.HasBeenNotified()) {
     Env::Default()->SleepForMicroseconds(1 * 1000);
+    ToFreeVector to_free;
     {
       mutex_lock l(mu_);
-      PollEvents(true);
+      PollEvents(true, &to_free);
     }
+    FreeMemory(to_free);
   }
   polling_stopped_.Notify();
 }
@@ -103,7 +105,8 @@ void EventMgr::QueueInUse(gpu::Stream* stream, InUse iu) {
 // GPU memory use to spike needlessly.  An alternative strategy would
 // be to throttle new Op execution until the pending event queue
 // clears.
-void EventMgr::PollEvents(bool is_dedicated_poller) {
+void EventMgr::PollEvents(bool is_dedicated_poller,
+                          gtl::InlinedVector<InUse, 4>* to_free) {
   VLOG(2) << "PollEvents  free_events_ " << free_events_.size()
           << " used_events_ " << used_events_.size();
   // Sweep the remaining events in order.  If this is the dedicated
@@ -123,11 +126,9 @@ void EventMgr::PollEvents(bool is_dedicated_poller) {
         if (!is_dedicated_poller) return;  // quit processing queue
         break;
       case gpu::Event::Status::kComplete:
-        delete iu.mem;
-        if (iu.bufrec.buf) iu.bufrec.alloc->DeallocateRaw(iu.bufrec.buf);
-        // The function must be called in another thread, outside of
-        // the mutex held here.
-        if (iu.func != nullptr) threadpool_.Schedule(iu.func);
+        // Make a copy of the InUse record so we can free it after releasing
+        // the lock
+        to_free->push_back(iu);
         free_events_.push_back(iu.event);
         // Mark this InUse record as completed.
         iu.event = nullptr;
 
@@ -18,8 +18,10 @@ limitations under the License.
 
 #include <deque>
 #include <vector>
+#include "tensorflow/stream_executor/stream.h"
 #include "tensorflow/core/lib/core/notification.h"
 #include "tensorflow/core/lib/core/threadpool.h"
+#include "tensorflow/core/lib/gtl/inlined_vector.h"
 #include "tensorflow/core/platform/port.h"
 #include "tensorflow/core/platform/thread_annotations.h"
 #include "tensorflow/core/public/tensor.h"
@@ -47,9 +49,13 @@ class EventMgr {
   // currently enqueued on *stream have completed.
   inline void ThenDeleteTensors(perftools::gputools::Stream* stream,
                                 std::vector<Tensor>* tensors) {
-    mutex_lock l(mu_);
-    QueueTensors(stream, tensors);
-    PollEvents(false);
+    ToFreeVector to_free;
+    {
+      mutex_lock l(mu_);
+      QueueTensors(stream, tensors);
+      PollEvents(false, &to_free);
+    }
+    FreeMemory(to_free);
   }
 
   struct BufRec {
@@ -61,16 +67,24 @@ class EventMgr {
   // on it as soon as all events currently enqueued on *stream have completed.
   inline void ThenDeleteBuffer(perftools::gputools::Stream* stream,
                                BufRec bufrec) {
-    mutex_lock l(mu_);
-    QueueBuffer(stream, bufrec);
-    PollEvents(false);
+    ToFreeVector to_free;
+    {
+      mutex_lock l(mu_);
+      QueueBuffer(stream, bufrec);
+      PollEvents(false, &to_free);
+    }
+    FreeMemory(to_free);
   }
 
   inline void ThenExecute(perftools::gputools::Stream* stream,
                           std::function<void()> func) {
-    mutex_lock l(mu_);
-    QueueFunc(stream, func);
-    PollEvents(false);
+    ToFreeVector to_free;
+    {
+      mutex_lock l(mu_);
+      QueueFunc(stream, func);
+      PollEvents(false, &to_free);
+    }
+    FreeMemory(to_free);
   }
 
  private:
@@ -85,10 +99,22 @@ class EventMgr {
     std::function<void()> func;
   };
 
+  typedef gtl::InlinedVector<InUse, 4> ToFreeVector;
+
+  void FreeMemory(const ToFreeVector& to_free) {
+    for (const auto& iu : to_free) {
+      delete iu.mem;
+      if (iu.bufrec.buf) iu.bufrec.alloc->DeallocateRaw(iu.bufrec.buf);
+      // The function must be called in another thread.
+      if (iu.func != nullptr) threadpool_.Schedule(iu.func);
+    }
+  }
+
   // Stream-enqueue an unused Event and save with it a collection of
   // Tensors and/or a BufRec to be deleted only after the Event
   // records.
   void QueueInUse(perftools::gputools::Stream* stream, InUse in_use)
+
       EXCLUSIVE_LOCKS_REQUIRED(mu_);
 
   void QueueTensors(perftools::gputools::Stream* stream,
@@ -109,8 +135,11 @@ class EventMgr {
 
   // This function should be called at roughly the same tempo as
   // QueueTensors() to check whether pending events have recorded,
-  // and then retire them.
-  void PollEvents(bool is_dedicated_poller) EXCLUSIVE_LOCKS_REQUIRED(mu_);
+  // and then retire them.  It appends InUse elements that need cleanup
+  // to "*to_free".  The caller should call FreeMemory(to_free)
+  // when this returns.
+  void PollEvents(bool is_dedicated_poller, ToFreeVector* to_free)
+      EXCLUSIVE_LOCKS_REQUIRED(mu_);
 
   // An internal polling loop that runs at a low frequency to clear
   // straggler Events.
 
@@ -47,8 +47,12 @@ class TEST_EventMgrHelper {
   }
 
   void PollEvents(bool is_dedicated_poller) {
-    mutex_lock l(em_->mu_);
-    em_->PollEvents(is_dedicated_poller);
+    EventMgr::ToFreeVector to_free;
+    {
+      mutex_lock l(em_->mu_);
+      em_->PollEvents(is_dedicated_poller, &to_free);
+    }
+    em_->FreeMemory(to_free);
   }
 
  private:
 
@@ -140,6 +140,8 @@ class ScatterUpdateOp : public OpKernel {
 
 TF_CALL_NUMBER_TYPES(REGISTER_SCATTER_UPDATE_INT32);
 TF_CALL_NUMBER_TYPES(REGISTER_SCATTER_UPDATE_INT64);
+REGISTER_SCATTER_UPDATE_INT32(bool)
+REGISTER_SCATTER_UPDATE_INT64(bool)
 
 #undef REGISTER_SCATTER_UPDATE_INT64
 #undef REGISTER_SCATTER_UPDATE_INT32
Original file line number	Diff line number	Diff line change
`@@ -40,6 +40,7 @@ filegroup(`
`40`	`40`	`py_library(`
`41`	`41`	`name = "tensorflow_py",`
`42`	`42`	`srcs = ["__init__.py"],`
	`43`	`+ srcs_version = "PY2AND3",`
`43`	`44`	`visibility = ["//visibility:public"],`
`44`	`45`	`deps = ["//tensorflow/python"],`
`45`	`46`	`)`
Original file line number	Diff line number	Diff line change
`@@ -47,8 +47,12 @@ class TEST_EventMgrHelper {`
`47`	`47`	`}`
`48`	`48`
`49`	`49`	`void PollEvents(bool is_dedicated_poller) {`
`50`		`- mutex_lock l(em_->mu_);`
`51`		`- em_->PollEvents(is_dedicated_poller);`
	`50`	`+ EventMgr::ToFreeVector to_free;`
	`51`	`+ {`
	`52`	`+ mutex_lock l(em_->mu_);`
	`53`	`+ em_->PollEvents(is_dedicated_poller, &to_free);`
	`54`	`+ }`
	`55`	`+ em_->FreeMemory(to_free);`
`52`	`56`	`}`
`53`	`57`
`54`	`58`	`private:`