Skip to content

Commit

Permalink
[scan](improvement) Adjust parallel scanners num (apache#38430)
Browse files Browse the repository at this point in the history
## Proposed changes

Before:
```
mysql [tpcds]>select  count(*) from (     select distinct c_last_name, c_first_name, d_date     from store_sales, date_dim, customer           where store_sales.ss_sold_date_sk = date_dim.d_date_sk       and store_sales.ss_customer_sk = customer.c_customer_sk       and d_month_seq between 1186 and 1186 + 11   intersect     select distinct c_last_name, c_first_name, d_date     from catalog_sales, date_dim, customer           where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk       and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk       and d_month_seq between 1186 and 1186 + 11   intersect     select distinct c_last_name, c_first_name, d_date     from web_sales, date_dim, customer           where web_sales.ws_sold_date_sk = date_dim.d_date_sk       and web_sales.ws_bill_customer_sk = customer.c_customer_sk       and d_month_seq between 1186 and 1186 + 11 ) hot_cust limit 100;
+----------+
| count(*) |
+----------+
| 24874049 |
+----------+
1 row in set (53 sec)
```

After:
```
mysql [tpcds]>select  count(*) from (     select distinct c_last_name, c_first_name, d_date     from store_sales, date_dim, customer           where store_sales.ss_sold_date_sk = date_dim.d_date_sk       and store_sales.ss_customer_sk = customer.c_customer_sk       and d_month_seq between 1186 and 1186 + 11   intersect     select distinct c_last_name, c_first_name, d_date     from catalog_sales, date_dim, customer           where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk       and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk       and d_month_seq between 1186 and 1186 + 11   intersect     select distinct c_last_name, c_first_name, d_date     from web_sales, date_dim, customer           where web_sales.ws_sold_date_sk = date_dim.d_date_sk       and web_sales.ws_bill_customer_sk = customer.c_customer_sk       and d_month_seq between 1186 and 1186 + 11 ) hot_cust limit 100;
+----------+
| count(*) |
+----------+
| 24874049 |
+----------+
1 row in set (40.45 sec)
```

<!--Describe your changes.-->
  • Loading branch information
HappenLee authored Jul 29, 2024
1 parent 9bab631 commit 11b332f
Showing 1 changed file with 2 additions and 3 deletions.
5 changes: 2 additions & 3 deletions be/src/pipeline/exec/scan_operator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1223,9 +1223,8 @@ Status ScanLocalState<Derived>::_start_scanners(
state(), this, p._output_tuple_desc, p.output_row_descriptor(), scanners, p.limit(),
state()->scan_queue_mem_limit(), _scan_dependency,
// 1. If data distribution is ignored , we use 1 instance to scan.
// 2. Else if this operator is not file scan operator, we use config::doris_scanner_thread_pool_thread_num scanners to scan.
// 3. Else, file scanner will consume much memory so we use config::doris_scanner_thread_pool_thread_num / query_parallel_instance_num scanners to scan.
p.ignore_data_distribution() || !p.is_file_scan_operator()
// 2. Else, file scanner will consume much memory so we use config::doris_scanner_thread_pool_thread_num / query_parallel_instance_num scanners to scan.
p.ignore_data_distribution() && !p.is_file_scan_operator()
? 1
: state()->query_parallel_instance_num());
return Status::OK();
Expand Down

0 comments on commit 11b332f

Please sign in to comment.