feat(tianmu) Refactor the aggregation and The IN operator to multithr…

…ead #472 (#422 #465 #466) (#472) * Refactor the aggregation to multithread Co-authored-by: hustjieke <[email protected]>
stoneatom · Sep 17, 2022 · 208f79d · 208f79d
1 parent e3a9902
commit 208f79d
Show file tree

Hide file tree

Showing 15 changed files with 445 additions and 138 deletions.
diff --git a/sql/table.h b/sql/table.h
@@ -3052,6 +3052,14 @@ inline bool is_perfschema_db(const char *name)
 */
 inline bool belongs_to_p_s(TABLE_LIST *tl)
 {
+  if (!tl->db) {
+	return false;
+  }
+
+  if (!tl->table_name) {
+	  return false;
+  }
+
   return (!strcmp("performance_schema", tl->db) &&
           strcmp(tl->table_name, "threads") &&
           strstr(tl->table_name, "setup_") == NULL);

diff --git a/storage/tianmu/core/aggregation_algorithm.cpp b/storage/tianmu/core/aggregation_algorithm.cpp
@@ -194,7 +194,7 @@ void AggregationAlgorithm::Aggregate(bool just_distinct, int64_t &limit, int64_t
     }
   } else {
     int64_t local_limit = limit == -1 ? upper_approx_of_groups : limit;
-    MultiDimensionalGroupByScan(gbw, local_limit, offset, sender, limit_less_than_no_groups);
+    MultiDimensionalGroupByScan(gbw, local_limit, offset, sender, limit_less_than_no_groups, true);
     if (limit != -1) limit = local_limit;
   }
   t->ClearMultiIndexP();  // cleanup (i.e. regarded as materialized,
@@ -205,7 +205,8 @@ void AggregationAlgorithm::Aggregate(bool just_distinct, int64_t &limit, int64_t
 }
 
 void AggregationAlgorithm::MultiDimensionalGroupByScan(GroupByWrapper &gbw, int64_t &limit, int64_t &offset,
-                                                       ResultSender *sender, bool limit_less_than_no_groups) {
+                                                       ResultSender *sender, bool limit_less_than_no_groups,
+                                                       bool force_parall) {
   MEASURE_FET("TempTable::MultiDimensionalGroupByScan(...)");
   bool first_pass = true;
   // tuples are numbered according to tuple_left filter (not used, if tuple_left
@@ -234,9 +235,19 @@ void AggregationAlgorithm::MultiDimensionalGroupByScan(GroupByWrapper &gbw, int6
   }
   gbw.SetDistinctTuples(mit.NumOfTuples());
 
+  auto get_thd_cnt = []() {
+    int hardware_concurrency = std::thread::hardware_concurrency();
+    // TODO: The original code was the number of CPU cores divided by 4, and the reason for that is to be traced further
+    return hardware_concurrency > 4 ? (hardware_concurrency / 4) : 1;
+  };
+
   int thd_cnt = 1;
-  if (ParallelAllowed(gbw) && !limit_less_than_no_groups) {
-    thd_cnt = std::thread::hardware_concurrency() / 4;  // For concurrence reason, don't swallow all cores once.
+  if (force_parall) {
+    thd_cnt = get_thd_cnt();
+  } else {
+    if (ParallelAllowed(gbw) && !limit_less_than_no_groups) {
+      thd_cnt = get_thd_cnt();  // For concurrence reason, don't swallow all cores once.
+    }
   }
 
   AggregationWorkerEnt ag_worker(gbw, mind, thd_cnt, this);
@@ -278,13 +289,13 @@ void AggregationAlgorithm::MultiDimensionalGroupByScan(GroupByWrapper &gbw, int6
 
           // Grouping on a packrow
           int64_t packrow_length = mit.GetPackSizeLeft();
-          int grouping_result = AggregatePackrow(gbw, &mit, cur_tuple);
+          AggregaGroupingResult grouping_result = AggregatePackrow(gbw, &mit, cur_tuple);
           if (sender) {
             sender->SetAffectRows(gbw.NumOfGroups());
           }
-          if (grouping_result == 2) throw common::KilledException();
-          if (grouping_result != 5) packrows_found++;  // for statistics
-          if (grouping_result == 1) break;             // end of the aggregation
+          if (grouping_result == AggregaGroupingResult::AGR_KILLED) throw common::KilledException();
+          if (grouping_result != AggregaGroupingResult::AGR_NO_LEFT) packrows_found++;  // for statistics
+          if (grouping_result == AggregaGroupingResult::AGR_FINISH) break;              // end of the aggregation
           if (!gbw.IsFull() && gbw.MemoryBlocksLeft() == 0) {
             gbw.SetAsFull();
           }
@@ -361,7 +372,7 @@ void AggregationAlgorithm::MultiDimensionalGroupByScan(GroupByWrapper &gbw, int6
         displayed_no_groups = t->NumOfObj();
       if (t->NumOfObj() >= limit) break;
       if (gbw.AnyTuplesLeft()) gbw.ClearUsed();  // prepare for the next pass, if needed
-    } while (gbw.AnyTuplesLeft());               // do the next pass, if anything left
+    } while (gbw.AnyTuplesLeft() && (1 == thd_cnt));  // do the next pass, if anything left
   } catch (...) {
     ag_worker.Commit(false);
     throw;
@@ -468,11 +479,12 @@ void AggregationAlgorithm::MultiDimensionalDistinctScan(GroupByWrapper &gbw, MII
   }
 }
 
-int AggregationAlgorithm::AggregatePackrow(GroupByWrapper &gbw, MIIterator *mit, int64_t cur_tuple) {
+AggregaGroupingResult AggregationAlgorithm::AggregatePackrow(GroupByWrapper &gbw, MIIterator *mit, int64_t cur_tuple) {
+  std::scoped_lock guard(mtx);
   int64_t packrow_length = mit->GetPackSizeLeft();
   if (!gbw.AnyTuplesLeft(cur_tuple, cur_tuple + packrow_length - 1)) {
     mit->NextPackrow();
-    return 5;
+    return AggregaGroupingResult::AGR_NO_LEFT;
   }
   int64_t uniform_pos = common::NULL_VALUE_64;
   bool skip_packrow = false;
@@ -509,7 +521,7 @@ int AggregationAlgorithm::AggregatePackrow(GroupByWrapper &gbw, MIIterator *mit,
     if (gbw.NumOfGroupingAttrs() == gbw.NumOfAttrs()  // just DISTINCT without grouping
         || stop_all) {                        // or aggregation already done on rough level
       gbw.TuplesResetAll();                   // no more rows needed, just produce output
-      return 1;                               // aggregation finished
+      return AggregaGroupingResult::AGR_FINISH;  // aggregation finished
     }
   }
   if (skip_packrow)
@@ -522,15 +534,15 @@ int AggregationAlgorithm::AggregatePackrow(GroupByWrapper &gbw, MIIterator *mit,
 
   if (packrow_done || skip_packrow) {
     mit->NextPackrow();
-    return 0;  // success - roughly omitted
+    return AggregaGroupingResult::AGR_OK;  // success - roughly omitted
   }
 
   // bool require_locking_ag  = true;					// a new packrow,
   // so locking will be needed bool require_locking_gr	= (uniform_pos ==
   // common::NULL_VALUE_64);	// do not lock if the grouping row is uniform
 
   while (mit->IsValid()) {           // becomes invalid on pack end
-    if (m_conn->Killed()) return 2;  // killed
+    if (m_conn->Killed()) return AggregaGroupingResult::AGR_KILLED;  // killed
     if (gbw.TuplesGet(cur_tuple)) {
       if (require_locking_gr) {
         for (int gr_a = 0; gr_a < gbw.NumOfGroupingAttrs(); gr_a++)
@@ -560,7 +572,7 @@ int AggregationAlgorithm::AggregatePackrow(GroupByWrapper &gbw, MIIterator *mit,
             gbw.SetAllGroupsFound();
             if (gbw.NumOfGroupingAttrs() == gbw.NumOfAttrs()) {  // just DISTINCT without grouping
               gbw.TuplesResetAll();                      // no more rows needed, just produce output
-              return 1;                                  // aggregation finished
+              return AggregaGroupingResult::AGR_FINISH;            // aggregation finished
             }
           }
         }
@@ -577,7 +589,9 @@ int AggregationAlgorithm::AggregatePackrow(GroupByWrapper &gbw, MIIterator *mit,
           for (int gr_a = gbw.NumOfGroupingAttrs(); gr_a < gbw.NumOfAttrs(); gr_a++)
             if (gbw.ColumnNotOmitted(gr_a)) {
               bool value_successfully_aggregated = gbw.PutAggregatedValue(gr_a, pos, *mit, factor);
-              if (!value_successfully_aggregated) gbw.DistinctlyOmitted(gr_a, cur_tuple);
+              if (!value_successfully_aggregated) {
+                gbw.DistinctlyOmitted(gr_a, cur_tuple);
+              }
             }
         }
       }
@@ -587,7 +601,7 @@ int AggregationAlgorithm::AggregatePackrow(GroupByWrapper &gbw, MIIterator *mit,
     if (mit->PackrowStarted()) break;
   }
   gbw.CommitResets();
-  return 0;  // success
+  return AggregaGroupingResult::AGR_OK;  // success
 }
 
 void AggregationAlgorithm::AggregateFillOutput(GroupByWrapper &gbw, int64_t gt_pos, int64_t &omit_by_offset) {
@@ -813,23 +827,27 @@ void AggregationAlgorithm::TaskFillOutput(GroupByWrapper *gbw, Transaction *ci,
   }
 }
 
-void AggregationWorkerEnt::TaskAggrePacks(MIUpdatingIterator *taskIterator, [[maybe_unused]] DimensionVector *dims,
-                                          [[maybe_unused]] MIIterator *mit, [[maybe_unused]] int pstart,
-                                          [[maybe_unused]] int pend, int tuple, GroupByWrapper *gbw, Transaction *ci) {
-  int i = 0;
-  int64_t cur_tuple = tuple;
-  common::SetMySQLTHD(ci->Thd());
-  current_txn_ = ci;
+void AggregationWorkerEnt::TaskAggrePacks(MIIterator *taskIterator, [[maybe_unused]] DimensionVector *dims,
+                                          [[maybe_unused]] MIIterator *mit, [[maybe_unused]] CTask *task,
+                                          GroupByWrapper *gbw, Transaction *ci) {
+  taskIterator->Rewind();
+  int task_pack_num = 0;
   while (taskIterator->IsValid()) {
-    int64_t packrow_length = taskIterator->GetPackSizeLeft();
-    int grouping_result = aa->AggregatePackrow(*gbw, taskIterator, cur_tuple);
-    if (grouping_result != 5) i++;
-    if (grouping_result == 1) break;
-    if (grouping_result == 2) throw common::KilledException();
-    if (grouping_result == 3 || grouping_result == 4) throw common::NotImplementedException("Aggregation overflow.");
-    cur_tuple += packrow_length;
-  }
-  TIANMU_LOG(LogCtl_Level::DEBUG, "TaskAggrePacks routine ends. Task id %d", taskIterator->GetTaskNum());
+    if ((task_pack_num >= task->dwStartPackno) && (task_pack_num <= task->dwEndPackno)) {
+
+      int cur_tuple = (*task->dwPack2cur)[task_pack_num];
+      MIInpackIterator mii(*taskIterator);
+      AggregaGroupingResult grouping_result = aa->AggregatePackrow(*gbw, &mii, cur_tuple);
+      if (grouping_result == AggregaGroupingResult::AGR_FINISH) break;
+      if (grouping_result == AggregaGroupingResult::AGR_KILLED) throw common::KilledException();
+      if (grouping_result == AggregaGroupingResult::AGR_OVERFLOW ||
+          grouping_result == AggregaGroupingResult::AGR_OTHER_ERROR)
+        throw common::NotImplementedException("Aggregation overflow.");
+    }
+
+    taskIterator->NextPackrow();
+    ++task_pack_num;
+  }
 }
 
 void AggregationWorkerEnt::PrepShardingCopy(MIIterator *mit, GroupByWrapper *gb_sharding,
@@ -852,83 +870,87 @@ void AggregationWorkerEnt::DistributeAggreTaskAverage(MIIterator &mit) {
   std::vector<std::unique_ptr<GroupByWrapper>> vGBW;
   vGBW.reserve(m_threads);
   vTask.reserve(m_threads);
-  if (rc_control_.isOn()) rc_control_.lock(conn->GetThreadID()) << "Prepare data for parallel aggreation" << system::unlock;
+  if (rc_control_.isOn())
+    rc_control_.lock(conn->GetThreadID()) << "Prepare data for parallel aggreation" << system::unlock;
 
   int packnum = 0;
+  int curtuple_index = 0;
+  std::unordered_map<int, int> pack2cur;
   while (mit.IsValid()) {
+    pack2cur.emplace(std::pair<int, int>(packnum, curtuple_index));
+
+    int64_t packrow_length = mit.GetPackSizeLeft();
+    curtuple_index += packrow_length;
     packnum++;
     mit.NextPackrow();
   }
 
-  int loopcnt = (packnum < m_threads) ? packnum : m_threads;
+  pack2cur.emplace(std::pair<int, int>(packnum, curtuple_index));
 
+  int loopcnt = (packnum < m_threads) ? packnum : m_threads;
   int mod = packnum % loopcnt;
   int num = packnum / loopcnt;
+
   utils::result_set<void> res;
   for (int i = 0; i < loopcnt; ++i) {
-    res.insert(ha_rcengine_->query_thread_pool.add_task(&AggregationWorkerEnt::PrepShardingCopy, this, &mit, gb_main, &vGBW));
+    res.insert(
+        ha_rcengine_->query_thread_pool.add_task(&AggregationWorkerEnt::PrepShardingCopy, this, &mit, gb_main, &vGBW));
+
+    int pack_start = i * num;
+    int pack_end = 0;
+    int dwPackNum = 0;
+    if (i == (loopcnt - 1)) {
+      pack_end = packnum;
+      dwPackNum = packnum;
+    } else {
+      pack_end = (i + 1) * num - 1;
+      dwPackNum = pack_end + 1;
+    }
+
+    int cur_start = pack2cur[pack_start];
+    int cur_end = pack2cur[pack_end] - 1;
+
     CTask tmp;
     tmp.dwTaskId = i;
-    tmp.dwPackNum = num + mod + i * num;
+    tmp.dwPackNum = dwPackNum;
+    tmp.dwStartPackno = pack_start;
+    tmp.dwEndPackno = pack_end;
+    tmp.dwStartTuple = cur_start;
+    tmp.dwEndTuple = cur_end;
+    tmp.dwTuple = cur_start;
+    tmp.dwPack2cur = &pack2cur;
+
     vTask.push_back(tmp);
   }
   res.get_all_with_except();
 
-  if (rc_control_.isOn())
-    rc_control_.lock(conn->GetThreadID()) << "Prepare data for parallel aggreation done. Total packnum " << packnum
-                                        << system::unlock;
-  packnum = 0;
   mit.Rewind();
-  int curtuple = 0;
-  while (mit.IsValid()) {
-    int64_t packrow_length = mit.GetPackSizeLeft();
-    packnum++;
-    curtuple += packrow_length;
-    for (auto &it : vTask) {
-      if (packnum == it.dwPackNum) {
-        it.dwEndPackno = mit.GetCurPackrow(0);
-        it.dwTuple = curtuple;
-      }
-    }
-
-    if (rc_control_.isOn())
-      rc_control_.lock(conn->GetThreadID()) << " GetCurPackrow: " << mit.GetCurPackrow(0) << " packnum: " << packnum
-                                          << " cur_tuple: " << curtuple << system::unlock;
-
-    mit.NextPackrow();
-  }
 
-  std::vector<MultiIndex> mis;
-  mis.reserve(vTask.size());
-
-  std::vector<MIUpdatingIterator> taskIterator;
+  std::vector<MIIterator> taskIterator;
   taskIterator.reserve(vTask.size());
 
   utils::result_set<void> res1;
   for (uint i = 0; i < vTask.size(); ++i) {
-    auto &mi = mis.emplace_back(*mind, true);
     if (dims.NoDimsUsed() == 0) dims.SetAll();
-    auto &mii = taskIterator.emplace_back(&mi, dims);
+    auto &mii = taskIterator.emplace_back(mit, true);
     mii.SetTaskNum(vTask.size());
     mii.SetTaskId(i);
-    mii.SetNoPacksToGo(vTask[i].dwEndPackno);
-    mii.RewindToPack((i == 0) ? 0 : vTask[i - 1].dwEndPackno + 1);
   }
 
-  res1.insert(ha_rcengine_->query_thread_pool.add_task(&AggregationWorkerEnt::TaskAggrePacks, this, &taskIterator[0], &dims,
-                                                &mit, 0, vTask[0].dwEndPackno, 0, gb_main, conn));
-  for (size_t i = 1; i < vTask.size(); ++i) {
-    res1.insert(ha_rcengine_->query_thread_pool.add_task(&AggregationWorkerEnt::TaskAggrePacks, this, &taskIterator[i], &dims,
-                                                  &mit, vTask[i - 1].dwEndPackno + 1, vTask[i].dwEndPackno,
-                                                  vTask[i - 1].dwTuple, vGBW[i].get(), conn));
+  for (size_t i = 0; i < vTask.size(); ++i) {
+    GroupByWrapper *gbw = i == 0 ? gb_main : vGBW[i].get(); 
+    res1.insert(ha_rcengine_->query_thread_pool.add_task(&AggregationWorkerEnt::TaskAggrePacks, this, &taskIterator[i],
+                                                         &dims, &mit, &vTask[i], gbw, conn));
   }
   res1.get_all_with_except();
 
   for (size_t i = 0; i < vTask.size(); ++i) {
     // Merge aggreation data together
-    if (i != 0) gb_main->Merge(*(vGBW[i]));
+    if (i != 0) {
+      aa->MultiDimensionalDistinctScan(*(vGBW[i]), mit);
+      gb_main->Merge(*(vGBW[i]));
+    }
   }
 }
-
 }  // namespace core
 }  // namespace Tianmu
diff --git a/storage/tianmu/core/aggregation_algorithm.h b/storage/tianmu/core/aggregation_algorithm.h
@@ -23,9 +23,19 @@
 #include "core/mi_iterator.h"
 #include "core/query.h"
 #include "core/temp_table.h"
+#include "core/ctask.h"
 
 namespace Tianmu {
 namespace core {
+enum class AggregaGroupingResult {
+  AGR_OK                = 0, // success
+  AGR_FINISH            = 1, // finish
+  AGR_KILLED            = 2, // killed
+  AGR_OVERFLOW          = 3, // overflow
+  AGR_OTHER_ERROR       = 4, // other error
+  AGR_NO_LEFT           = 5  // pack already aggregated
+};
+
 class AggregationAlgorithm {
  public:
   AggregationAlgorithm(TempTable *tt)
@@ -37,13 +47,13 @@ class AggregationAlgorithm {
                       bool &ag_not_changeabe, bool &stop_all, int64_t &uniform_pos, int64_t rows_in_pack,
                       int64_t local_factor, int just_one_aggr = -1);
   void MultiDimensionalGroupByScan(GroupByWrapper &gbw, int64_t &limit, int64_t &offset, ResultSender *sender,
-                                   bool limit_less_than_no_groups);
+                                   bool limit_less_than_no_groups, bool force_parall);
   void MultiDimensionalDistinctScan(GroupByWrapper &gbw, MIIterator &mit);
   void AggregateFillOutput(GroupByWrapper &gbw, int64_t gt_pos, int64_t &omit_by_offset);
 
   // Return code for AggregatePackrow: 0 - success, 1 - stop aggregation
   // (finished), 5 - pack already aggregated (skip)
-  int AggregatePackrow(GroupByWrapper &gbw, MIIterator *mit, int64_t cur_tuple);
+  AggregaGroupingResult AggregatePackrow(GroupByWrapper &gbw, MIIterator *mit, int64_t cur_tuple);
 
   // No parallel for subquery/join/distinct cases
   bool ParallelAllowed(GroupByWrapper &gbw) {
@@ -77,18 +87,18 @@ class AggregationWorkerEnt {
   // Return code for AggregatePackrow: 0 - success, 1 - stop aggregation
   // (finished), 2 - killed, 3
   // - overflow, 4 - other error, 5 - pack already aggregated (skip)
-  int AggregatePackrow(MIUpdatingIterator &lmit, int64_t cur_tuple) {
+  AggregaGroupingResult AggregatePackrow(MIIterator &lmit, int64_t cur_tuple) {
     return aa->AggregatePackrow(*gb_main, &lmit, cur_tuple);
   }
-  int AggregatePackrow(MIInpackIterator &lmit, int64_t cur_tuple) {
+  AggregaGroupingResult AggregatePackrow(MIInpackIterator &lmit, int64_t cur_tuple) {
     return aa->AggregatePackrow(*gb_main, &lmit, cur_tuple);
   }
   void Commit([[maybe_unused]] bool do_merge = true) { gb_main->CommitResets(); }
   void ReevaluateNumberOfThreads([[maybe_unused]] MIIterator &mit) {}
   int ThreadsUsed() { return m_threads; }
   void Barrier() {}
-  void TaskAggrePacks(MIUpdatingIterator *taskIterator, DimensionVector *dims, MIIterator *mit, int pstart, int pend,
-                      int tuple, GroupByWrapper *gbw, Transaction *ci);
+  void TaskAggrePacks(MIIterator *taskIterator, DimensionVector *dims, MIIterator *mit, CTask *task,
+                      GroupByWrapper *gbw, Transaction *ci);
   void DistributeAggreTaskAverage(MIIterator &mit);
   void PrepShardingCopy(MIIterator *mit, GroupByWrapper *gb_sharding,
                         std::vector<std::unique_ptr<GroupByWrapper>> *vGBW);