MySQL Forums
Forum List  »  Newbie

Best Query Rewrite?
Posted by: John Ringhofer
Date: December 03, 2012 02:32PM

Hello,

I'm trying to find the best rewrite for this 20-hour query:

select
T1.timestamp_date,
T1.timestamp_hour,
count(T2.dn_teid) as cdma_ctxid_cnt,
T1.location as cdma_location,
T1.service_area as cdma_service_area,
T1.tac as cdma_tac,
T1.imsi as cdma_imsi,
count(T1.moId) as cdma_count,
max(T1.start_time) as cdma_max_start,
max(T1.end_time) as cdma_max_end,
min(T1.start_time) as cdma_min_start,
min(T1.end_time) as cdma_min_end,
max(T1.timestamp) as cdma_max_ts,
sum(T1.s) as cdma_syn,
sum(T1.f) as cdma_fin,
sum(T1.downstream_bytes) as cdma_sdb,
sum(T1.upstream_bytes) as cdma_sub
from
T1,
T2
where
T2.timestamp_part = T1.timestamp_part and
T2.imsi = T1.imsi
group by
T1.timestamp_date,
T1.timestamp_hour,
T1.timestamp_part,
T1.location,
T1.service_area,
T1.tac,
T1.imsi
order by
count(T1.moId) asc limit 50;

One rewrite I tried took only 3 minutes and 28 seconds:

select
T1.timestamp_date,
T1.timestamp_hour,
T1.location as cdma_location,
T1.service_area as cdma_service_area,
T1.tac as cdma_tac,
T1.imsi as cdma_imsi,
count(T1.moId) as cdma_count,
max(T1.start_time) as cdma_max_start,
max(T1.end_time) as cdma_max_end,
min(T1.start_time) as cdma_min_start,
min(T1.end_time) as cdma_min_end,
max(T1.timestamp) as cdma_max_ts,
sum(T1.s) as cdma_syn,
sum(T1.f) as cdma_fin,
sum(T1.downstream_bytes) as cdma_sdb,
sum(T1.upstream_bytes) as cdma_sub
from
T1,
T2
where
T2.timestamp_part = (select timestamp_part from T1) and
T2.imsi = (select imsi from T1)
group by
T1.timestamp_date,
T1.timestamp_hour,
T1.timestamp_part,
T1.location,
T1.service_area,
T1.tac,
T1.imsi
order by
count(T1.moId) asc limit 50;

Note that the only two changes were (1) WHERE clause has two sub-selects; and (2) "count(T2.dn_teid) as cdma_ctxid_cnt," is not included. How can I get this latter count without making it run for almost 20 hours?

Here is the table DDL (Table T1 has 80 Million rows and T2 has 272 Million):

CREATE TABLE `T1` (
`moId` int(11) NOT NULL,
`cae_IpAddress` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`project_name` varchar(63) COLLATE latin1_bin DEFAULT NULL,
`timestamp` bigint(20) NOT NULL,
`policy` varchar(63) COLLATE latin1_bin DEFAULT NULL,
`timeunit` int(11) DEFAULT NULL,
`timestamp_date` date DEFAULT NULL,
`timestamp_hour` int(11) DEFAULT NULL,
`timestamp_part` bigint(20) DEFAULT NULL,
`id` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`location` bigint(20) DEFAULT NULL,
`service_area` bigint(20) DEFAULT NULL,
`tac` bigint(20) DEFAULT NULL,
`filter_string` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`vendor` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`model` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`user_group` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`imsi` bigint(20) DEFAULT NULL,
`frame` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`ip_source` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`tcp_source` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`ip_dest` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`tcp_dest` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`start_time` decimal(10,0) DEFAULT NULL,
`end_time` decimal(10,0) DEFAULT NULL,
`upstream_packets` bigint(20) DEFAULT NULL,
`downstream_packets` bigint(20) DEFAULT NULL,
`upstream_bytes` bigint(20) DEFAULT NULL,
`downstream_bytes` bigint(20) DEFAULT NULL,
`total_packets` bigint(20) DEFAULT NULL,
`total_bytes` bigint(20) DEFAULT NULL,
`retx_up_packets` bigint(20) DEFAULT NULL,
`retx_down_packets` bigint(20) DEFAULT NULL,
`s` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`sa` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`f` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`fa` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`r` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`p` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`pa` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`FQDN` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`close_reason` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`ooo_events_up` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`ooo_events_down` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`irtt` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`ttfb` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`tt1k` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`tt10k` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`tt100k` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`tt512k` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`tt2m` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`tt5m` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`syn_ack_time` decimal(10,0) DEFAULT NULL,
`ttfb_time` decimal(10,0) DEFAULT NULL,
`hold_time` decimal(10,0) DEFAULT NULL,
`sector_congestion` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`rtt_oor` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`data_cause_code` bigint(20) DEFAULT NULL,
`data_cc_initiator` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`voice_cause_code` bigint(20) DEFAULT NULL,
`voice_cc_initiator` varchar(255) COLLATE latin1_bin DEFAULT NULL
);

CREATE TABLE `T2` (
`moId` int(11) NOT NULL,
`cae_IpAddress` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`project_name` varchar(63) COLLATE latin1_bin DEFAULT NULL,
`timestamp` bigint(20) NOT NULL,
`policy` varchar(63) COLLATE latin1_bin DEFAULT NULL,
`timeunit` int(11) DEFAULT NULL,
`timestamp_date` date DEFAULT NULL,
`timestamp_hour` int(11) DEFAULT NULL,
`timestamp_part` bigint(20) DEFAULT NULL,
`pcf` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`dn_teid` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`pdsn` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`up_teid` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`start_time` decimal(10,0) DEFAULT NULL,
`end_time` decimal(10,0) DEFAULT NULL,
`location` bigint(20) DEFAULT NULL,
`service_area` bigint(20) DEFAULT NULL,
`tac` bigint(20) DEFAULT NULL,
`mbr` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`upstream_packets` bigint(20) DEFAULT NULL,
`downstream_packets` bigint(20) DEFAULT NULL,
`upstream_bytes` bigint(20) DEFAULT NULL,
`downstream_bytes` bigint(20) DEFAULT NULL,
`total_packets` bigint(20) DEFAULT NULL,
`total_bytes` bigint(20) DEFAULT NULL,
`device_vendor` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`device_model` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`hold_time` decimal(10,0) DEFAULT NULL,
`imsi` bigint(20) DEFAULT NULL,
`user_group` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`sector_congestion` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`sqi` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`reason` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`data_cause_code` bigint(20) DEFAULT NULL,
`data_cc_initiator` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`voice_cause_code` bigint(20) DEFAULT NULL,
`voice_cc_initiator` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`event_time` decimal(10,0) DEFAULT NULL
);

Final note: I changed column "dn_teid" in T2 from "varchar" to "bigint" (thinking count would work faster on a "bigint"); however, I got a message stating "out of resources error: Aggregation is too large".

Options: ReplyQuote


Subject
Written By
Posted
Best Query Rewrite?
December 03, 2012 02:32PM
December 03, 2012 02:47PM
December 03, 2012 02:51PM
December 03, 2012 02:56PM
December 03, 2012 08:12PM


Sorry, you can't reply to this topic. It has been closed.

Content reproduced on this site is the property of the respective copyright holders. It is not reviewed in advance by Oracle and does not necessarily represent the opinion of Oracle or any other party.