Hello Mr. Peter Brawley,
I have made some changes in the Oracle to MySQL converted function. It is working to some extent. It is even working with all those failure cases when I tried with the original Jaro Winkler MySQL version. But I suspect the matching score it is generating is slightly on the higher side.
I have kept the return type as float, since I need a decimal score in the program.
Quote
DROP FUNCTION IF EXISTS Fn_Jws;
/
CREATE FUNCTION Fn_Jws (p_string1 VARCHAR(1000), p_string2 VARCHAR(1000)) RETURNS float
DETERMINISTIC
CONTAINS SQL
BEGIN
DECLARE v_string1 VARCHAR (1000);
DECLARE v_string2 VARCHAR (1000);
DECLARE v_closeness int default 0;
DECLARE v_temp VARCHAR (1000);
DECLARE v_comp1 VARCHAR (1000);
DECLARE v_comp2 VARCHAR (1000);
DECLARE v_matches int default 0;
DECLARE v_char VARCHAR(1);
DECLARE v_transpositions int default 0;
DECLARE v_d_jaro float default 0;
DECLARE v_leading int default 0;
DECLARE v_d_winkler float default 0;
DECLARE v_jws, i float default 0;
IF p_string1 IS NULL OR p_string2 IS NULL THEN
RETURN 0;
END IF;
SET v_closeness = (greatest(length(v_string1), length(v_string2)) / 2) - 1;
-- find matching characters and transpositions within closeness:
SET @v_temp = v_string2;
SET @vLength = length(v_string1);
WHILE i < @vLength DO
SET i = i + 1;
IF (instr(@v_temp, substr(v_string1, i, 1))) > 0 THEN
SET v_char = substr(v_string1, i, 1);
IF abs(instr(v_string1, v_char) - instr(v_string2, v_char)) <= v_closeness THEN
SET v_comp1 = concat(IFNULL(v_comp1, ''), substr(v_string1, i, 1));
SET @v_temp = concat(substr(@v_temp, 1, instr(@v_temp, substr(v_string1, i, 1)) - 1),
substr(@v_temp, instr(@v_temp, substr(v_string1, i, 1)) + 1));
END IF;
END IF;
END WHILE;
SET @v_temp = v_string1;
SET i = 0;
SET @vLength = length(v_string2);
WHILE i < @vLength DO
SET i = i + 1;
IF (instr(@v_temp, substr(v_string2, i, 1))) > 0 THEN
SET v_char = substr(v_string2, i, 1);
IF abs(instr(v_string2, v_char) - instr(v_string1, v_char)) <= v_closeness + 1 THEN
SET v_comp2 = concat(IFNULL(v_comp2, ''), substr(v_string2, i, 1));
SET @v_temp = concat(substr(@v_temp, 1, instr(@v_temp, substr(v_string2, i, 1)) - 1),
substr(@v_temp, instr(@v_temp, substr(v_string2, i, 1)) + 1));
END IF;
END IF;
END WHILE;
IF v_comp1 IS NULL OR v_comp2 IS NULL THEN
return 0;
END IF;
-- count matches and transpositions within closeness:
SET I = 0;
SET @vLength = least(length(v_comp1), length(v_comp2));
WHILE i < @vLength DO
SET i = i + 1;
IF substr(v_comp1, i, 1) = substr(v_comp2, i, 1) THEN
SET v_matches = v_matches + 1;
ELSE
SET v_char = substr(v_comp1, i, 1);
IF abs(instr(v_string1, v_char) - instr(v_string2, v_char)) <= v_closeness + 1 THEN
SET v_transpositions = v_transpositions;
SET v_matches = v_matches + 1;
END IF;
END IF;
END WHILE;
SET v_transpositions = v_transpositions / 2;
IF v_matches = 0 THEN
RETURN 0;
END IF;
SET v_d_jaro = ((v_matches / length(v_string1)) +
(v_matches / length(v_string2)) +
((v_matches - v_transpositions) / v_matches))
/ 3;
SET i = 0;
-- count matching leading characters (up to 2):
SET @vLength = least(length(v_string1), length(v_string2), 2);
LOOP1: WHILE i < @vLength DO
SET i = i + 1;
IF substr(v_string1, i, 1) = substr(v_string2, i, 1) THEN
SET v_leading = v_leading + 1;
ELSE
SET v_leading = v_leading;
LEAVE LOOP1;
END IF;
END WHILE;
SET v_d_winkler = v_d_jaro + ((v_leading * .1) * (1 - v_d_jaro));
-- SET v_jws = round(v_d_winkler * 100);
SET v_jws = v_d_winkler;
return v_jws;
END;
I will be grateful to you if you can review the above modified code and guide me if anything is wrong.