Hi,
I have four versions of some SQL. One uses a stored function and the other three just use SQL (including one that basically emulates the function). However, the function version runs in under a second, whereas the other versions all take considerably longer (9 seconds is the fastest).
I'm basically trying to join tablea to tableb on fk_a = fk_b and dateb <= datea, but joining to a single date each time (the closest to datea).
Here is a script to generate the tables, data and function:
DELIMITER $$
DROP TABLE IF EXISTS `tablea` $$
CREATE TABLE `tablea` (
`oid` int(10) unsigned NOT NULL AUTO_INCREMENT,
`datea` datetime DEFAULT NULL,
`A` varchar(45) DEFAULT NULL,
`fk_a` int(10) unsigned NOT NULL,
PRIMARY KEY (`oid`),
KEY `index_2` (`fk_a`,`datea`) USING BTREE
) ENGINE=MyISAM AUTO_INCREMENT=130001 DEFAULT CHARSET=latin1 $$
DROP TABLE IF EXISTS `tableb` $$
CREATE TABLE `tableb` (
`oid` int(10) unsigned NOT NULL AUTO_INCREMENT,
`dateb` datetime DEFAULT NULL,
`B` varchar(45) DEFAULT NULL,
`fk_b` int(10) unsigned NOT NULL,
PRIMARY KEY (`oid`),
KEY `index_2` (`fk_b`,`dateb`) USING BTREE
) ENGINE=MyISAM AUTO_INCREMENT=130001 DEFAULT CHARSET=latin1 $$
DROP PROCEDURE IF EXISTS `populatea` $$
CREATE DEFINER=`root`@`%` PROCEDURE `populatea`(in nrows int)
BEGIN
DECLARE i INT DEFAULT 0;
DECLARE d DATETIME;
DECLARE f INT DEFAULT 0;
WHILE i < nrows DO
SET d = from_unixtime(1104537600 + round(rand() * 86400 * 365 * 7));
SET f = round(rand() * 100);
INSERT INTO tablea(datea, A, fk_a)
SELECT d, concat('A: ', d), f FROM dual;
SET i = i + 1;
END WHILE;
END $$
DROP PROCEDURE IF EXISTS `populateb` $$
CREATE DEFINER=`root`@`%` PROCEDURE `populateb`(in nrows int)
BEGIN
DECLARE i INT DEFAULT 0;
DECLARE d DATETIME;
DECLARE f INT DEFAULT 0;
WHILE i < nrows DO
SET d = from_unixtime(1104537600 + round(rand() * 86400 * 365 * 7));
SET f = round(rand() * 100);
INSERT INTO tableb(dateb, B, fk_b)
SELECT d, concat('B: ', d), f FROM dual;
SET i = i + 1;
END WHILE;
END $$
DROP FUNCTION IF EXISTS `testsp` $$
CREATE DEFINER=`root`@`%` function `testsp`(this_adate datetime, this_f int(10)) returns varchar(45)
BEGIN
declare b_value varchar(45);
declare exit handler for 1329
begin
return null;
end;
select B into b_value from tableb where fk_b = this_f and dateb <= this_adate order by dateb desc limit 1;
return b_value;
END $$
call populatea(100000) $$
call populateb(100000) $$
and here are the various bits of SQL:
reset query cache;
-- sp (0.15 secs)
SELECT *, testsp(datea, a.fk_a) as B
from tablea as a
where fk_a = 9
order by fk_a, a.datea desc;
reset query cache;
-- sql 1 (9.1 secs)
SELECT *, (select B from tableb where fk_b = fk_a and dateb <= datea order by dateb desc limit 1) as B
from tablea as a
where fk_a = 9
order by fk_a, a.datea desc;
reset query cache;
-- sql 2 (352 secs)
select a.*, b1.B from tablea as a
left join tableb as b1 on a.datea >= b1.dateb and a.fk_a = b1.fk_b
left join tableb as b2 on a.datea >= b2.dateb and a.fk_a = b2.fk_b and b1.dateb < b2.dateb
where b2.dateb is null
and a.fk_a = 9
order by a.datea desc;
reset query cache;
-- sql 3 (18.5 secs)
SELECT a.*, b.B FROM tablea as a
JOIN tableb as b ON b.oid = (SELECT oid FROM tableb WHERE dateb <= datea and fk_b = fk_a ORDER BY dateb DESC LIMIT 1)
where fk_a = 9;
Any idea what's going on here? How can the function be so much faster?
Edited 1 time(s). Last edit at 05/10/2013 02:43AM by Tom Melly.