Best Query Rewrite?
Hello,
I'm trying to find the best rewrite for this 20-hour query:
select
T1.timestamp_date,
T1.timestamp_hour,
count(T2.dn_teid) as cdma_ctxid_cnt,
T1.location as cdma_location,
T1.service_area as cdma_service_area,
T1.tac as cdma_tac,
T1.imsi as cdma_imsi,
count(T1.moId) as cdma_count,
max(T1.start_time) as cdma_max_start,
max(T1.end_time) as cdma_max_end,
min(T1.start_time) as cdma_min_start,
min(T1.end_time) as cdma_min_end,
max(T1.timestamp) as cdma_max_ts,
sum(T1.s) as cdma_syn,
sum(T1.f) as cdma_fin,
sum(T1.downstream_bytes) as cdma_sdb,
sum(T1.upstream_bytes) as cdma_sub
from
T1,
T2
where
T2.timestamp_part = T1.timestamp_part and
T2.imsi = T1.imsi
group by
T1.timestamp_date,
T1.timestamp_hour,
T1.timestamp_part,
T1.location,
T1.service_area,
T1.tac,
T1.imsi
order by
count(T1.moId) asc limit 50;
One rewrite I tried took only 3 minutes and 28 seconds:
select
T1.timestamp_date,
T1.timestamp_hour,
T1.location as cdma_location,
T1.service_area as cdma_service_area,
T1.tac as cdma_tac,
T1.imsi as cdma_imsi,
count(T1.moId) as cdma_count,
max(T1.start_time) as cdma_max_start,
max(T1.end_time) as cdma_max_end,
min(T1.start_time) as cdma_min_start,
min(T1.end_time) as cdma_min_end,
max(T1.timestamp) as cdma_max_ts,
sum(T1.s) as cdma_syn,
sum(T1.f) as cdma_fin,
sum(T1.downstream_bytes) as cdma_sdb,
sum(T1.upstream_bytes) as cdma_sub
from
T1,
T2
where
T2.timestamp_part = (select timestamp_part from T1) and
T2.imsi = (select imsi from T1)
group by
T1.timestamp_date,
T1.timestamp_hour,
T1.timestamp_part,
T1.location,
T1.service_area,
T1.tac,
T1.imsi
order by
count(T1.moId) asc limit 50;
Note that the only two changes were (1) WHERE clause has two sub-selects; and (2) "count(T2.dn_teid) as cdma_ctxid_cnt," is not included. How can I get this latter count without making it run for almost 20 hours?
Here is the table DDL (Table T1 has 80 Million rows and T2 has 272 Million):
CREATE TABLE `T1` (
`moId` int(11) NOT NULL,
`cae_IpAddress` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`project_name` varchar(63) COLLATE latin1_bin DEFAULT NULL,
`timestamp` bigint(20) NOT NULL,
`policy` varchar(63) COLLATE latin1_bin DEFAULT NULL,
`timeunit` int(11) DEFAULT NULL,
`timestamp_date` date DEFAULT NULL,
`timestamp_hour` int(11) DEFAULT NULL,
`timestamp_part` bigint(20) DEFAULT NULL,
`id` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`location` bigint(20) DEFAULT NULL,
`service_area` bigint(20) DEFAULT NULL,
`tac` bigint(20) DEFAULT NULL,
`filter_string` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`vendor` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`model` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`user_group` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`imsi` bigint(20) DEFAULT NULL,
`frame` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`ip_source` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`tcp_source` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`ip_dest` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`tcp_dest` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`start_time` decimal(10,0) DEFAULT NULL,
`end_time` decimal(10,0) DEFAULT NULL,
`upstream_packets` bigint(20) DEFAULT NULL,
`downstream_packets` bigint(20) DEFAULT NULL,
`upstream_bytes` bigint(20) DEFAULT NULL,
`downstream_bytes` bigint(20) DEFAULT NULL,
`total_packets` bigint(20) DEFAULT NULL,
`total_bytes` bigint(20) DEFAULT NULL,
`retx_up_packets` bigint(20) DEFAULT NULL,
`retx_down_packets` bigint(20) DEFAULT NULL,
`s` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`sa` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`f` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`fa` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`r` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`p` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`pa` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`FQDN` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`close_reason` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`ooo_events_up` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`ooo_events_down` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`irtt` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`ttfb` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`tt1k` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`tt10k` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`tt100k` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`tt512k` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`tt2m` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`tt5m` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`syn_ack_time` decimal(10,0) DEFAULT NULL,
`ttfb_time` decimal(10,0) DEFAULT NULL,
`hold_time` decimal(10,0) DEFAULT NULL,
`sector_congestion` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`rtt_oor` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`data_cause_code` bigint(20) DEFAULT NULL,
`data_cc_initiator` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`voice_cause_code` bigint(20) DEFAULT NULL,
`voice_cc_initiator` varchar(255) COLLATE latin1_bin DEFAULT NULL
);
CREATE TABLE `T2` (
`moId` int(11) NOT NULL,
`cae_IpAddress` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`project_name` varchar(63) COLLATE latin1_bin DEFAULT NULL,
`timestamp` bigint(20) NOT NULL,
`policy` varchar(63) COLLATE latin1_bin DEFAULT NULL,
`timeunit` int(11) DEFAULT NULL,
`timestamp_date` date DEFAULT NULL,
`timestamp_hour` int(11) DEFAULT NULL,
`timestamp_part` bigint(20) DEFAULT NULL,
`pcf` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`dn_teid` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`pdsn` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`up_teid` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`start_time` decimal(10,0) DEFAULT NULL,
`end_time` decimal(10,0) DEFAULT NULL,
`location` bigint(20) DEFAULT NULL,
`service_area` bigint(20) DEFAULT NULL,
`tac` bigint(20) DEFAULT NULL,
`mbr` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`upstream_packets` bigint(20) DEFAULT NULL,
`downstream_packets` bigint(20) DEFAULT NULL,
`upstream_bytes` bigint(20) DEFAULT NULL,
`downstream_bytes` bigint(20) DEFAULT NULL,
`total_packets` bigint(20) DEFAULT NULL,
`total_bytes` bigint(20) DEFAULT NULL,
`device_vendor` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`device_model` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`hold_time` decimal(10,0) DEFAULT NULL,
`imsi` bigint(20) DEFAULT NULL,
`user_group` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`sector_congestion` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`sqi` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`reason` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`data_cause_code` bigint(20) DEFAULT NULL,
`data_cc_initiator` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`voice_cause_code` bigint(20) DEFAULT NULL,
`voice_cc_initiator` varchar(255) COLLATE latin1_bin DEFAULT NULL,
`event_time` decimal(10,0) DEFAULT NULL
);
Final note: I changed column "dn_teid" in T2 from "varchar" to "bigint" (thinking count would work faster on a "bigint"); however, I got a message stating "out of resources error: Aggregation is too large".