MySQL Forums
Forum List  »  Optimizer & Parser

Strange performance with JOIN/COUNT
Posted by: Tomas Pluskal
Date: February 09, 2012 12:30AM

Hi,

I am using mysql version:
mysql Ver 14.14 Distrib 5.1.52, for redhat-linux-gnu (x86_64) using readline 5.1


I have a database with the following 3 tables (among others):
CREATE TABLE `msruns` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`msrun_id` varchar(30) NOT NULL,
`created` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
`created_by` varchar(200) NOT NULL,
`sample_id` varchar(30) NOT NULL,
`separation_protocol_id` varchar(30) NOT NULL,
`processing_method_id` varchar(30) NOT NULL,
`ms_protocol_id` varchar(30) NOT NULL,
`raw_data_file` varchar(200) DEFAULT NULL,
`mzmine_project_file` varchar(255) DEFAULT NULL,
`description` text NOT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `msrun_id` (`msrun_id`),
UNIQUE KEY `raw_data_file` (`raw_data_file`),
KEY `msruns_ibfk_1` (`sample_id`),
KEY `msrun_msprotocol` (`ms_protocol_id`),
KEY `msrun_sepprotocol` (`separation_protocol_id`),
KEY `msrun_method_ibfk_1` (`processing_method_id`),
CONSTRAINT `msruns_ibfk_1` FOREIGN KEY (`sample_id`) REFERENCES `samples` (`sample_id`) ON DELETE CASCADE ON UPDATE CASCADE,
CONSTRAINT `msrun_method_ibfk_1` FOREIGN KEY (`processing_method_id`) REFERENCES `data_processing_methods` (`processing_method_id`) ON UPDATE CASCADE,
CONSTRAINT `msrun_msprotocol` FOREIGN KEY (`ms_protocol_id`) REFERENCES `protocols` (`protocol_id`) ON UPDATE CASCADE,
CONSTRAINT `msrun_sepprotocol` FOREIGN KEY (`separation_protocol_id`) REFERENCES `protocols` (`protocol_id`) ON UPDATE CASCADE
) ENGINE=InnoDB AUTO_INCREMENT=453 DEFAULT CHARSET=utf8


CREATE TABLE `msrun_peaks` (
`id` bigint(20) NOT NULL AUTO_INCREMENT,
`msrun_id` varchar(30) NOT NULL,
`mz` double NOT NULL,
`retention_time` double NOT NULL,
`normalized_mz` double DEFAULT NULL,
`normalized_retention_time` double DEFAULT NULL,
`peak_height` double NOT NULL,
`peak_area` double NOT NULL,
`global_peak_id` varchar(30) DEFAULT NULL,
`msms_pattern` blob,
`isotope_pattern` blob,
PRIMARY KEY (`id`),
KEY `msrun_peaks_ibfk_1` (`msrun_id`),
KEY `msrun_peaks_ibfk_2` (`global_peak_id`),
CONSTRAINT `msrun_peaks_ibfk_1` FOREIGN KEY (`msrun_id`) REFERENCES `msruns` (`msrun_id`) ON DELETE CASCADE ON UPDATE CASCADE,
CONSTRAINT `msrun_peaks_ibfk_2` FOREIGN KEY (`global_peak_id`) REFERENCES `global_peaklist` (`peak_id`) ON DELETE CASCADE ON UPDATE CASCADE
) ENGINE=InnoDB AUTO_INCREMENT=22751 DEFAULT CHARSET=utf8


CREATE TABLE `samples` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`sample_id` varchar(30) NOT NULL,
`created` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
`experiment_id` varchar(30) NOT NULL,
`subject_id` varchar(30) NOT NULL,
`sample_name` varchar(200) NOT NULL,
`prepared_by` varchar(200) NOT NULL,
`sample_type_id` varchar(30) NOT NULL,
`subject_condition` text NOT NULL,
`preparation_protocol_id` varchar(30) NOT NULL,
`preparation_protocol_details` text NOT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `sample_id` (`sample_id`),
KEY `samples_ibfk_1` (`experiment_id`),
KEY `samples_ibfk_2` (`sample_type_id`),
KEY `samples_ibfk_3` (`subject_id`),
KEY `sample_protocol_ibfk_1` (`preparation_protocol_id`),
CONSTRAINT `sample_protocol_ibfk_1` FOREIGN KEY (`preparation_protocol_id`) REFERENCES `protocols` (`protocol_id`) ON UPDATE CASCADE,
CONSTRAINT `samples_ibfk_1` FOREIGN KEY (`experiment_id`) REFERENCES `experiments` (`experiment_id`) ON DELETE CASCADE ON UPDATE CASCADE,
CONSTRAINT `samples_ibfk_2` FOREIGN KEY (`sample_type_id`) REFERENCES `sample_types` (`sample_type_id`) ON UPDATE CASCADE,
CONSTRAINT `samples_ibfk_3` FOREIGN KEY (`subject_id`) REFERENCES `subjects` (`subject_id`) ON UPDATE CASCADE
) ENGINE=InnoDB AUTO_INCREMENT=308 DEFAULT CHARSET=utf8


As you can see from the AUTO_INCREMENT values, there are not so many rows in the tables yet.

I found certain queries to be very slow, and after the bit of digging I narrowed down the problem to the following:



SELECT msruns.description, COUNT(msrun_peaks.id) AS peaks_count FROM msruns LEFT JOIN msrun_peaks USING (msrun_id) JOIN samples USING (sample_id) GROUP BY msrun_id ORDER BY msruns.id;
446 rows in set (0.42 sec)

SELECT msruns.msrun_id, COUNT(msrun_peaks.id) AS peaks_count FROM msruns LEFT JOIN msrun_peaks USING (msrun_id) JOIN samples USING (sample_id) GROUP BY msrun_id ORDER BY msruns.id;
446 rows in set (0.05 sec)

SELECT msruns.description, (SELECT COUNT(id) FROM msrun_peaks WHERE msrun_peaks.msrun_id = msruns.msrun_id) AS peaks_count FROM msruns JOIN samples USING (sample_id) ORDER BY msruns.id;
446 rows in set (0.01 sec)


Note that the first query takes 0.4 s, while the other 2 queries return almost immediately.
The only difference between the first and second query is the column that is selected.
The only difference between the first and third query is that I moved the COUNT to a subselect.
Could someone explain me the reason why the performance is so different?

Thanks!

Options: ReplyQuote


Subject
Views
Written By
Posted
Strange performance with JOIN/COUNT
2852
February 09, 2012 12:30AM
1198
February 11, 2012 11:11PM


Sorry, you can't reply to this topic. It has been closed.

Content reproduced on this site is the property of the respective copyright holders. It is not reviewed in advance by Oracle and does not necessarily represent the opinion of Oracle or any other party.