発生してしまうという......そのため,登録処理は単純に START TRANSACTION 〜 COMMIT で
挟むだけじゃダメですね.words テーブルの書き込みは AUTO-INC のロックと行ロックが交錯するし,
行ロックのかかる順番もまちまちなんで,GET_LOCK() 〜 RELEASE_LOCK() で挟んでデッドロック回避.
この GET_LOCK() はテーブルロックとは違うものなので,SELECT での読み出しには影響しません.
CREATE PROCEDURE registurl(urlx varchar(256), mtimex int, totalwordsx int unsigned)
BEGIN
DECLARE urlid, totaldocs bigint unsigned;
START TRANSACTION;
SELECT id INTO urlid FROM urls WHERE url = urlx FOR UPDATE;
IF urlid IS NOT NULL THEN
IF GET_LOCK('keywords.words', 10) THEN
UPDATE regwords, words SET words.df = words.df - 1 WHERE regwords.url_id = urlid AND words.id = regwords.word_id;
DO RELEASE_LOCK('keywords.words');
DELETE FROM dispwords WHERE url_id = urlid;
DELETE FROM regwords WHERE url_id = urlid;
ELSE
ROLLBACK;
TRUNCATE tmpwords;
SET urlid = NULL, totalwordsx = NULL;
START TRANSACTION;
END IF;
END IF;
IF totalwordsx IS NULL THEN
DELETE FROM urls WHERE id = urlid;
UPDATE count_urls SET n = n - 1 WHERE urlid IS NOT NULL;
COMMIT;
ELSE
DO LAST_INSERT_ID(0);
INSERT urls (url, mtime, totalwords) VALUES (urlx, FROM_UNIXTIME(mtimex), totalwordsx)
ON DUPLICATE KEY UPDATE mtime = VALUES(mtime), totalwords = VALUES(totalwords);
IF urlid IS NULL THEN
SET urlid = LAST_INSERT_ID();
UPDATE count_urls SET n = n + 1 WHERE urlid;
END IF;
IF urlid && GET_LOCK('keywords.words', 10) THEN
INSERT words (word) SELECT word FROM tmpwords ON DUPLICATE KEY UPDATE df = words.df + 1;
DO RELEASE_LOCK('keywords.words');
UPDATE tmpwords JOIN words USING (word) SET tmpwords.id = words.id, tmpwords.df = words.df;
INSERT regwords SELECT urlid, id, tf FROM tmpwords;
SELECT n INTO totaldocs FROM count_urls;
INSERT dispwords SELECT urlid, id, tf / totalwordsx * (LN(totaldocs / df) + 1) tfidf
FROM tmpwords WHERE totaldocs / df < 100000 ORDER BY tfidf DESC LIMIT 10;
COMMIT;
ELSE
ROLLBACK;
END IF;
TRUNCATE tmpwords;
END IF;
END