how to speed up query on a table with over 20million records?
Posted by:
li zheng
Date: March 25, 2018 07:27AM
server:
aws Aurora MySQL 5.7.12
db.t2.small (1 vCPU, 2GB memory)
table structure:
CREATE TABLE `jpstock_sma_1d` (
`symbol_id` int(11) NOT NULL,
`time_period` int(11) NOT NULL,
`datetime` datetime NOT NULL,
`high` double NOT NULL,
`low` double NOT NULL,
`close` double NOT NULL,
`updated` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (`datetime`,`symbol_id`,`time_period`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
rows:
23,903,574
sql:
select symbol_id, time_period, max(datetime) from jpstock_sma_1d group by symbol_id, time_period;
explain:
"id": 1,
"select_type": "SIMPLE",
"table": "jpstock_sma_1d",
"partitions": null,
"type": "index",
"possible_keys": "PRIMARY",
"key": "PRIMARY",
"key_len": "13",
"ref": null,
"rows": 23903574,
"filtered": 100,
"Extra": "Using index; Using temporary; Using filesort"
Trace:
{
"steps": [
{
"join_preparation": {
"select#": 1,
"steps": [
{
"expanded_query": "/* select#1 */ select `jpstock_sma_1d`.`symbol_id` AS `symbol_id`,`jpstock_sma_1d`.`time_period` AS `time_period`,max(`jpstock_sma_1d`.`datetime`) AS `max(datetime)` from `jpstock_sma_1d` group by `jpstock_sma_1d`.`symbol_id`,`jpstock_sma_1d`.`time_period`"
}
]
}
},
{
"join_optimization": {
"select#": 1,
"steps": [
{
"substitute_generated_columns": {
}
},
{
"table_dependencies": [
{
"table": "`jpstock_sma_1d`",
"row_may_be_null": false,
"map_bit": 0,
"depends_on_map_bits": [
]
}
]
},
{
"rows_estimation": [
{
"table": "`jpstock_sma_1d`",
"const_keys_added": {
"keys": [
"PRIMARY"
],
"cause": "group_by"
},
"range_analysis": {
"table_scan": {
"rows": 23903574,
"cost": 4.89e6
},
"potential_range_indexes": [
{
"index": "PRIMARY",
"usable": true,
"key_parts": [
"datetime",
"symbol_id",
"time_period"
]
}
],
"best_covering_index_scan": {
"index": "PRIMARY",
"cost": 4.86e6,
"chosen": true
},
"group_index_range": {
"potential_group_range_indexes": [
{
"index": "PRIMARY",
"covering": true,
"usable": false,
"cause": "group_attribute_not_prefix_in_index"
}
]
}
}
}
]
},
{
"considered_execution_plans": [
{
"plan_prefix": [
],
"table": "`jpstock_sma_1d`",
"best_access_path": {
"considered_access_paths": [
{
"rows_to_scan": 23903574,
"access_type": "scan",
"resulting_rows": 2.39e7,
"cost": 4.89e6,
"chosen": true,
"use_tmp_table": true
}
]
},
"condition_filtering_pct": 100,
"rows_for_plan": 2.39e7,
"cost_for_plan": 4.89e6,
"sort_cost": 2.39e7,
"new_cost_for_plan": 2.88e7,
"chosen": true
}
]
},
{
"attaching_conditions_to_tables": {
"original_condition": null,
"attached_conditions_computation": [
],
"attached_conditions_summary": [
{
"table": "`jpstock_sma_1d`",
"attached": null
}
]
}
},
{
"clause_processing": {
"clause": "GROUP BY",
"original_clause": "`jpstock_sma_1d`.`symbol_id`,`jpstock_sma_1d`.`time_period`",
"items": [
{
"item": "`jpstock_sma_1d`.`symbol_id`"
},
{
"item": "`jpstock_sma_1d`.`time_period`"
}
],
"resulting_clause_is_simple": true,
"resulting_clause": "`jpstock_sma_1d`.`symbol_id`,`jpstock_sma_1d`.`time_period`"
}
},
{
"reconsidering_access_paths_for_index_ordering": {
"clause": "GROUP BY",
"index_order_summary": {
"table": "`jpstock_sma_1d`",
"index_provides_order": false,
"order_direction": "undefined",
"index": "PRIMARY",
"plan_changed": false
}
}
},
{
"refine_plan": [
{
"table": "`jpstock_sma_1d`"
}
]
}
]
}
},
{
"join_execution": {
"select#": 1,
"steps": [
{
"creating_tmp_table": {
"tmp_table_info": {
"table": "intermediate_tmp_table",
"row_length": 14,
"key_length": 8,
"unique_constraint": false,
"location": "memory (heap)",
"row_limit_estimate": 1198372
}
}
},
{
"filesort_information": [
{
"direction": "asc",
"table": "intermediate_tmp_table",
"field": "symbol_id"
},
{
"direction": "asc",
"table": "intermediate_tmp_table",
"field": "time_period"
}
],
"filesort_priority_queue_optimization": {
"usable": false,
"cause": "not applicable (no LIMIT)"
},
"filesort_execution": [
],
"filesort_summary": {
"rows": 14013,
"examined_rows": 14013,
"number_of_tmp_files": 2,
"sort_buffer_size": 262128,
"sort_mode": "<sort_key, rowid>"
}
}
]
}
}
]
}
executing the sql above takes 8 minute.
is 20million records too much for mysql? or (1 vCPU, 2GB memory) is really not enough?
how can I speed up the query other than upgrading the hardware?
the only way I can think about is sharding the table. maybe partition?
please help!