優化前
mysql> use test_old;
Database changed
mysql> set profiling = 'ON';
Query OK, 0 rows affected
mysql> show variables like 'profiling';
+---------------+-------+
| Variable_name | Value |
+---------------+-------+
| profiling | ON |
+---------------+-------+
1 row in set
mysql> show profiles;
+----------+------------+---------------------------------------------------------------------------------------------+
| Query_ID | Duration | Query |
+----------+------------+---------------------------------------------------------------------------------------------+
| 1 | 0.00419 | show variables like 'profiling' |
| 2 | 1.78590175 | SELECT * FROM `table_test` where test_id in ('99863885543', '99863900221', '99821363824') |
+----------+------------+---------------------------------------------------------------------------------------------+
2 rows in set
mysql> EXPLAIN SELECT * FROM `table_test` where test_id in ('99863885543', '99863900221', '99821363824');
+----+-------------+----------------------+------------+-------+---------------+----------+---------+------+--------+----------+----------------------------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+----------------------+------------+-------+---------------+----------+---------+------+--------+----------+----------------------------------+
| 1 | SIMPLE | table_test | NULL | range | test_id | test_id | 33 | NULL | 170496 | 100 | Using index condition; Using MRR |
+----+-------------+----------------------+------------+-------+---------------+----------+---------+------+--------+----------+----------------------------------+
1 row in set
優化後
mysql> use test;
Database changed
mysql> set profiling = 'ON';
Query OK, 0 rows affected
mysql> show variables like 'profiling';
+---------------+-------+
| Variable_name | Value |
+---------------+-------+
| profiling | ON |
+---------------+-------+
1 row in set
mysql> show profiles;
+----------+-----------+---------------------------------------------------------------------------------------------+
| Query_ID | Duration | Query |
+----------+-----------+---------------------------------------------------------------------------------------------+
| 1 | 0.0060565 | show variables like 'profiling' |
| 2 | 0.1755525 | SELECT * FROM `table_test` where test_id in ('99863885543', '99863900221', '99821363824') |
+----------+-----------+---------------------------------------------------------------------------------------------+
2 rows in set
mysql> EXPLAIN SELECT * FROM `table_test` where test_id in ('99863885543', '99863900221', '99821363824');
+----+-------------+----------------------+--------------------------------------------------------------------------------------+-------+------------------+---------+---------+------+--------+----------+-------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+----------------------+--------------------------------------------------------------------------------------+-------+------------------+---------+---------+------+--------+----------+-------------+
| 1 | SIMPLE | table_test | p20221126,p20221127,p20221128,p20221129,p20221130,p20221201,p20221202,p20221203,pmax | range | PRIMARY,test_id | PRIMARY | 32 | NULL | 185501 | 100 | Using where |
+----+-------------+----------------------+--------------------------------------------------------------------------------------+-------+------------------+---------+---------+------+--------+----------+-------------+
1 row in set
table_test表資訊
-- ----------------------------
-- Table structure for table_test
-- ----------------------------
DROP TABLE IF EXISTS `table_test`;
CREATE TABLE `table_test` (
`id` int(11) NOT NULL AUTO_INCREMENT COMMENT 'id',
`test_id` varchar(10) DEFAULT NULL COMMENT '場景id',
`data_time` timestamp NULL DEFAULT NULL COMMENT '資料時間',
`service_id` varchar(50) DEFAULT NULL COMMENT '流量型別',
`total_traffic` varchar(50) DEFAULT NULL COMMENT '總流量',
`ul_traffic` varchar(50) DEFAULT NULL COMMENT '上行流量',
`dl_traffic` varchar(50) DEFAULT NULL COMMENT '下行流量',
`tcp_conn_req_times` varchar(50) DEFAULT NULL COMMENT 'TCP連線請求次數',
`tcp_conn_succ_times` varchar(50) DEFAULT NULL COMMENT 'TCP連線成功次數',
`tcp_conn_succ_rat` varchar(50) DEFAULT NULL COMMENT 'TCP連線成功次率',
`tcp_conn_total_delay` varchar(50) DEFAULT '0' COMMENT 'TCP連線建立總時長',
`tcp_conn_avg_delay` varchar(50) DEFAULT '0' COMMENT 'TCP連線建立平均時延',
`tcp_ul_rtt_total_delay` varchar(50) DEFAULT '0' COMMENT 'TCP上行RTT總時延',
`tcp_dl_rtt_total_delay` varchar(50) DEFAULT '0' COMMENT 'TCP下行RTT總時延',
`tcp_ul_rtt_stat_times` varchar(50) DEFAULT NULL COMMENT 'TCP上行RTT總次數',
`tcp_dl_rtt_stat_times` varchar(50) DEFAULT NULL COMMENT 'TCP下行RTT總次數',
`tcp_ul_rtt_avg_delay` varchar(50) DEFAULT '0' COMMENT 'TCP上行RTT平均時延',
`tcp_dl_rtt_avg_delay` varchar(50) DEFAULT '0' COMMENT 'TCP下行RTT平均時延',
`day_id` varchar(50) DEFAULT NULL COMMENT 'day_id',
PRIMARY KEY (`id`) USING BTREE,
KEY `test_id` (`test_id`) USING BTREE,
KEY `data_time` (`data_time`) USING BTREE
) ENGINE=InnoDB AUTO_INCREMENT=19671082 DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC COMMENT='熱門app';
開啟slow_query_log
mysql> set global slow_query_log = 'ON';
Query OK, 0 rows affected
mysql> show variables like 'slow_query_log%';
+---------------------+------------------------------------------+
| Variable_name | Value |
+---------------------+------------------------------------------+
| slow_query_log | ON |
| slow_query_log_file | /usr/local/mysql/data/hadoop102-slow.log |
+---------------------+------------------------------------------+
2 rows in set
修改long_query_time閾值
mysql> set global long_query_time = 0.5;
Query OK, 0 rows affected
-- 經過測試,發現設定global時,只針對新的對談有效,對當前對談無效。
-- 所以還需要針對當前對談設定一次。
mysql> set long_query_time = 0.5;
Query OK, 0 rows affected
mysql> show variables like 'long_query_time';
+-----------------+----------+
| Variable_name | Value |
+-----------------+----------+
| long_query_time | 0.500000 |
+-----------------+----------+
1 row in set
mysql>
[realeo@hadoop102 ~]$ cd /usr/local/mysql/data/
[realeo@hadoop102 data]$ sudo cat hadoop102-slow.log
/usr/sbin/mysqld, Version: 5.7.27 (MySQL Community Server (GPL)). started with:
Tcp port: 3306 Unix socket: /var/lib/mysql/mysql.sock
Time Id Command Argument
# Time: 2023-04-03T14:46:51.877547Z
# User@Host: root[root] @ [192.168.10.1] Id: 118
# Query_time: 1.284210 Lock_time: 0.000317 Rows_sent: 91767 Rows_examined: 91767
use test;
SET timestamp=1680533211;
SELECT * FROM `table_test` where test_id in ('99863885543', '99863900221', '99821363824');
開啟show profile
-- 僅對當前對談開啟
mysql> set profiling = 'ON';
Query OK, 0 rows affected
mysql> show variables like 'profiling';
+---------------+-------+
| Variable_name | Value |
+---------------+-------+
| profiling | ON |
+---------------+-------+
1 row in set
檢視對談中sql執行情況
mysql> show profiles;
+----------+------------+---------------------------------------------------------------------------------------------+
| Query_ID | Duration | Query |
+----------+------------+---------------------------------------------------------------------------------------------+
| 1 | 0.00193025 | show variables like 'profiling' |
| 2 | 2.095192 | SELECT * FROM `table_test` where test_id in ('99863885543', '99863900221', '99821363824') |
+----------+------------+---------------------------------------------------------------------------------------------+
2 rows in set
檢視當前對談某條sql執行記錄的資源消耗情況
mysql> show profile cpu, block io for query 2;
+----------------------+----------+----------+------------+--------------+---------------+
| Status | Duration | CPU_user | CPU_system | Block_ops_in | Block_ops_out |
+----------------------+----------+----------+------------+--------------+---------------+
| starting | 0.000125 | 6.8E-5 | 4.5E-5 | 0 | 0 |
| checking permissions | 1E-5 | 5E-6 | 3E-6 | 0 | 0 |
| Opening tables | 0.00114 | 0.001145 | 0 | 0 | 0 |
| init | 5.1E-5 | 3.1E-5 | 1.6E-5 | 0 | 0 |
| System lock | 1E-5 | 6E-6 | 4E-6 | 0 | 0 |
| optimizing | 1E-5 | 6E-6 | 4E-6 | 0 | 0 |
| statistics | 0.000519 | 0.000521 | 0 | 0 | 0 |
| preparing | 1.8E-5 | 1.6E-5 | 0 | 0 | 0 |
| executing | 3E-6 | 2E-6 | 0 | 0 | 0 |
| Sending data | 2.093149 | 1.301995 | 0.998561 | 0 | 384 |
| end | 4.5E-5 | 0 | 1.9E-5 | 0 | 0 |
| query end | 1.2E-5 | 0 | 1.2E-5 | 0 | 0 |
| closing tables | 1.7E-5 | 0 | 1.7E-5 | 0 | 0 |
| freeing items | 2E-5 | 0 | 2E-5 | 0 | 0 |
| logging slow query | 5.1E-5 | 0 | 5.1E-5 | 0 | 0 |
| cleaning up | 1.5E-5 | 0 | 1.4E-5 | 0 | 0 |
+----------------------+----------+----------+------------+--------------+---------------+
16 rows in set
show profile常用查詢引數:
- all:顯示所有的開銷資訊。
- block io:顯示塊io開銷。
- context switches: 上下文切換開銷。
- cpu:顯示cpu開銷資訊。
- ipc:顯示傳送和接受開銷資訊。
- memory:顯示記憶體開銷資訊。
- page faults:顯示頁面錯誤開銷資訊。
- source:顯示和 source_function,source_file,source_line 相關的開銷資訊。
- swaps:顯示交換次數開銷資訊。
Hash索引與B+樹索引的區別
不能進行範圍性
的一個查詢,因為hash指向的資料是無序
的,而B+樹的葉子節點是個有序的連結串列。Hash索引僅能滿足(=、<>)和in查詢。如果進行範圍查詢,雜湊型索引,時間複雜化會退化為O(n)而樹型的有序特性,依然能保持O(log2n)的高效率不支援聯合索引的最左側原則
(即聯合索引的部分索引無法使用),而B+樹可以。對於聯合索引來說,Hash索引在計算Hash值得時候將索引鍵合併後再一起計算Hash值,所以不會針對每個索引單獨計算hash值。因此如果用到聯合索引的一個或者多個索引時,無法被利用。模糊查詢
,而B+樹使用模糊查詢的方式時,like後面後模糊查詢的話就可以起到優化作用。mysql> show profiles;
+----------+------------+---------------------------------------------------------------------------------------------+
| Query_ID | Duration | Query |
+----------+------------+---------------------------------------------------------------------------------------------+
| 1 | 0.00187275 | show variables like 'profiling' |
| 2 | 1.63446275 | SELECT * FROM `table_test` where test_id in ('99863885543', '99863900221', '99821363824') |
+----------+------------+---------------------------------------------------------------------------------------------+
2 rows in set
mysql> show index from `table_test`;
+----------------------+------------+-----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment |
+----------------------+------------+-----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
| table_test | 0 | PRIMARY | 1 | id | A | 1302941 | NULL | NULL | | BTREE | | |
| table_test | 1 | test_id | 1 | test_id | A | 94 | NULL | NULL | YES | BTREE | | |
| table_test | 1 | data_time | 1 | data_time | A | 75 | NULL | NULL | YES | BTREE | | |
+----------------------+------------+-----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
3 rows in set
mysql> SELECT max(LENGTH(test_id)) FROM `table_test`;
+-----------------------+
| max(LENGTH(test_id)) |
+-----------------------+
| 8 |
+-----------------------+
1 row in set
mysql> show profiles;
+----------+------------+---------------------------------------------------------------------------------------------+
| Query_ID | Duration | Query |
+----------+------------+---------------------------------------------------------------------------------------------+
| 1 | 0.00324825 | show variables like 'profiling' |
| 2 | 12.979875 | alter table table_test modify test_id char(8) |
| 3 | 1.683254 | SELECT * FROM `table_test` where test_id in ('99863885543', '99863900221', '99821363824') |
+----------+------------+---------------------------------------------------------------------------------------------+
3 rows in set
查詢語句的優化對於MySQL巨量資料查詢速度的提升非常重要。應該避免使用SELECT *,因為這會導致MySQL檢索整個表的所有列,從而降低查詢速度。應該只查詢需要的列,並使用WHERE子句限制檢索的行數。
MySQL組合索引(複合索引)的最左優先原則。最左優先就是說組合索引的第一個欄位必須出現在查詢組句中,這個索引才會被用到。只要組合索引最左邊第一個欄位出現在Where中,那麼不管後面的欄位出現與否或者出現順序如何,MySQL引擎都會自動呼叫索引來優化查詢效率。
在建立多列索引時,要根據業務需求,where 子句中使用最頻繁的一列放在最左邊。
-- 檢視當前表資訊
show create table table_test;
-- 建立新增索引
ALTER TABLE table_test ADD INDEX test_id_idx (test_id(8));
含義可參考:https://blog.csdn.net/jibaole/article/details/121293188
mysql> EXPLAIN SELECT * FROM `table_test` where test_id in ('99863885543', '99863900221', '99821363824');
+----+-------------+----------------------+------------+-------+---------------+----------+---------+------+--------+----------+----------------------------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+----------------------+------------+-------+---------------+----------+---------+------+--------+----------+----------------------------------+
| 1 | SIMPLE | table_test | NULL | range | test_id | test_id | 33 | NULL | 163326 | 100 | Using index condition; Using MRR |
+----+-------------+----------------------+------------+-------+---------------+----------+---------+------+--------+----------+----------------------------------+
1 row in set
mysql> EXPLAIN SELECT * FROM `table_test` where test_id in (99863885543,99863900221,99821363824);
+----+-------------+----------------------+------------+------+---------------+------+---------+------+---------+----------+-------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+----------------------+------------+------+---------------+------+---------+------+---------+----------+-------------+
| 1 | SIMPLE | table_test | NULL | ALL | test_id | NULL | NULL | NULL | 1397231 | 30 | Using where |
+----+-------------+----------------------+------------+------+---------------+------+---------+------+---------+----------+-------------+
1 row in set
分割區表是將大表分成小表的一種方法。在處理巨量資料時,使用分割區表可以大大提高查詢速度。分割區表將資料分成多個分割區,每個分割區可以獨立地進行查詢。當進行查詢時,MySQL只需要掃描需要的分割區,而不是整個表。
在進行自動增加分割區前一定得先對錶手動分幾個區
-- 建立複合主鍵
alter table table_test drop primary key,add primary key(`test_id`,`data_time`,`id`);
ALTER TABLE table_test PARTITION BY RANGE (UNIX_TIMESTAMP(data_time))(
PARTITION p20221126
VALUES
LESS THAN (
UNIX_TIMESTAMP('2022-11-27')
),
PARTITION p20221127
VALUES
LESS THAN (
UNIX_TIMESTAMP('2022-11-28')
),
PARTITION p20221128
VALUES
LESS THAN (
UNIX_TIMESTAMP('2022-11-29')
),
PARTITION p20221129
VALUES
LESS THAN (
UNIX_TIMESTAMP('2022-11-30')
)
)
-- 如果有大於分割區上限的值想插入表中,系統會返還錯誤,為了相容這種情況,我們可以新增一個分割區,上限為maxvalue。所有大於當前上限的值都會放入這個分割區:
alter table table_test add partition(partition pmax values less than(maxvalue));
ALTER TABLE table_test ADD PARTITION (PARTITION p20221130 VALUES LESS THAN (TO_DAYS ('2022-11-30')))
-- 刪除分割區,同時清除歷史資料
alter table table_test drop partition p20221127;
查詢表分割區資訊
mysql> SELECT PARTITION_NAME,PARTITION_METHOD,PARTITION_EXPRESSION,PARTITION_DESCRIPTION,
TABLE_ROWS,SUBPARTITION_NAME,SUBPARTITION_METHOD,SUBPARTITION_EXPRESSION
FROM information_schema.PARTITIONS
WHERE TABLE_SCHEMA=SCHEMA() AND TABLE_NAME='table_test';
+----------------+------------------+---------------------------+-----------------------+------------+-------------------+---------------------+-------------------------+
| PARTITION_NAME | PARTITION_METHOD | PARTITION_EXPRESSION | PARTITION_DESCRIPTION | TABLE_ROWS | SUBPARTITION_NAME | SUBPARTITION_METHOD | SUBPARTITION_EXPRESSION |
+----------------+------------------+---------------------------+-----------------------+------------+-------------------+---------------------+-------------------------+
| p20221126 | RANGE | UNIX_TIMESTAMP(data_time) | 1669536000 | 470450 | NULL | NULL | NULL |
| p20221127 | RANGE | UNIX_TIMESTAMP(data_time) | 1669622400 | 378562 | NULL | NULL | NULL |
| p20221128 | RANGE | UNIX_TIMESTAMP(data_time) | 1669708800 | 419724 | NULL | NULL | NULL |
| p20221129 | RANGE | UNIX_TIMESTAMP(data_time) | 1669795200 | 135171 | NULL | NULL | NULL |
+----------------+------------------+---------------------------+-----------------------+------------+-------------------+---------------------+-------------------------+
4 rows in set
-- 查詢指定分割區資料
SELECT * FROM `table_test` PARTITION(p20221129) where test_id in ('99863885543', '99863900221', '99821363824');
按天自動分割區儲存過程
DELIMITER $$
-- 切換資料庫test
USE `test`$$
DROP PROCEDURE IF EXISTS `create_partition_by_day`$$
CREATE DEFINER=`root`@`%` PROCEDURE `create_partition_by_day`()
BEGIN
/* 事務回滾,其實放這裡沒什麼作用,ALTER TABLE是隱式提交,回滾不了的。*/
DECLARE EXIT HANDLER FOR SQLEXCEPTION ROLLBACK;
START TRANSACTION;
/* 到系統表查出這個表的倒數第二大分割區,得到分割區的日期。在建立分割區的時候,名稱就以日期格式存放,方便後面維護 */
SELECT REPLACE(partition_name,'p','') INTO @P12_Name FROM INFORMATION_SCHEMA.PARTITIONS
WHERE table_name='table_test' ORDER BY partition_ordinal_position DESC LIMIT 1,1;
SET @Max_date= DATE(DATE_ADD(@P12_Name+0, INTERVAL 1 DAY))+0;
/* 修改表,在最大分割區的後面增加一個分割區,時間範圍加1天 */
SET @s1=CONCAT('ALTER TABLE table_test REORGANIZE PARTITION pmax INTO (PARTITION p',@Max_date,' VALUES LESS THAN (UNIX_TIMESTAMP (''',DATE(@Max_date+1),''')),partition pmax values less than(maxvalue))');
/* 輸出檢視增加分割區語句*/
SELECT @s1;
PREPARE stmt2 FROM @s1;
EXECUTE stmt2;
DEALLOCATE PREPARE stmt2;
/* 取出最小的分割區的名稱,並刪除掉 。
注意:刪除分割區會同時刪除分割區內的資料,慎重 */
/*select partition_name into @P0_Name from INFORMATION_SCHEMA.PARTITIONS
where table_name='table_test' order by partition_ordinal_position limit 1;
SET @s=concat('ALTER TABLE table_test DROP PARTITION ',@P0_Name);
PREPARE stmt1 FROM @s;
EXECUTE stmt1;
DEALLOCATE PREPARE stmt1; */
/* 提交 */
COMMIT ;
END$$
DELIMITER ;
增加事件執行
-- 開啟任務定時器
mysql> SET GLOBAL event_scheduler = ON;
Query OK, 0 rows affected
mysql> SHOW VARIABLES LIKE 'event_scheduler';
+-----------------+-------+
| Variable_name | Value |
+-----------------+-------+
| event_scheduler | ON |
+-----------------+-------+
1 row in set
-- 事件定義
DELIMITER ||
CREATE EVENT Partition_by_day_event
ON SCHEDULE
EVERY 1 day STARTS '2022-11-29 07:00:00'
DO
BEGIN
CALL test.`create_partition_by_day`;
END ||
DELIMITER ;
設定可參考:https://www.bbsmax.com/A/gAJG7rZJZR/
效能可參考:https://www.cnblogs.com/mzhaox/p/11201715.html
優化伺服器硬體可以提高MySQL巨量資料查詢速度。應該使用更快的CPU、更大的記憶體和更快的硬碟。MySQL可以更快地讀取和處理資料。