Blog

  • FIND_IN_WILD_SET and FIND_WILD_IN_SET

    Two functions that work just like FIND_IN_SET, but support the % wildcard.

    SELECT FIND_WILD_IN_SET(‘%tack’, ‘haystack,bale,heap,needle,pile’);
    SELECT FIND_IN_WILD_SET(‘pin’, ‘haystack,bale,heap,needle,pi%’);

    DROP FUNCTION IF EXISTS FIND_IN_WILD_SET;
    DROP FUNCTION IF EXISTS FIND_WILD_IN_SET;
    
    DELIMITER //
    
    CREATE FUNCTION FIND_IN_WILD_SET(theString text, theSet text)
    RETURNS boolean
    DETERMINISTIC
    BEGIN
    
    DECLARE delimiterCount int;
    DECLARE pos int DEFAULT 0;
    DECLARE setElement text;
    DECLARE returnValue boolean DEFAULT FALSE;
    
    SET delimiterCount := CHARACTER_LENGTH(theSet) - CHARACTER_LENGTH(REPLACE(theSet, ',', ''));
    
    WHILE (pos <= delimiterCount) DO
        BEGIN 
            SET setElement := SUBSTRING_INDEX(SUBSTRING_INDEX(theSet, ',', pos+1), ',', -1);
            SET returnValue := returnValue OR (theString LIKE setElement);
            SET pos := pos + 1;
    
        END;    
    END WHILE;
    
    
    RETURN returnValue;
    
    END//
    
    CREATE FUNCTION FIND_WILD_IN_SET(theString text, theSet text)
    RETURNS boolean
    DETERMINISTIC
    BEGIN
    
    DECLARE delimiterCount int;
    DECLARE pos int DEFAULT 0;
    DECLARE setElement text;
    DECLARE returnValue boolean DEFAULT FALSE;
    
    SET delimiterCount := CHARACTER_LENGTH(theSet) - CHARACTER_LENGTH(REPLACE(theSet, ',', ''));
    
    WHILE (pos <= delimiterCount) DO
        BEGIN 
            SET setElement := SUBSTRING_INDEX(SUBSTRING_INDEX(theSet, ',', pos+1), ',', -1);
            SET returnValue := returnValue OR (setElement LIKE theString);
            SET pos := pos + 1;
    
        END;    
    END WHILE;
    
    
    RETURN returnValue;
    
    END//
    
    DELIMITER ;
  • whichLineTerminator

    A stored function to guess which line terminator (‘\r’ or ‘\r\n’) to use with LOAD DATA INFILE

    CREATE FUNCTION whichLineTerminator(fileName varchar(255))
    RETURNS varchar(20)
    NOT DETERMINISTIC
    BEGIN
      DECLARE cr_count int;
      DECLARE lf_Count int;
      DECLARE f text;
    
      SET f := LOAD_FILE(fileName);
    
      IF f IS NULL THEN RETURN 'Cannot read file.'; END IF;
    
      SET cr_count := CHARACTER_LENGTH(f) - CHARACTER_LENGTH(REPLACE(f, '\r', ''));
      SET lf_count := CHARACTER_LENGTH(f) - CHARACTER_LENGTH(REPLACE(f, '\n', ''));
    
      IF cr_count >= lf_count 
      THEN RETURN '\\r\\n';
      ELSE RETURN '\\n';
      END IF;
    END
  • Greatest Common Denominator

    Find the greatest common factor of two positive integers.

    CREATE FUNCTION gcd(x int, y int) RETURNS int DETERMINISTIC
    BEGIN
      DECLARE dividend int;
      DECLARE divisor int;
      DECLARE remainder int;
      SET dividend := GREATEST(x, y);
      SET remainder := LEAST(x, y);
    
      WHILE remainder != 0 DO
        SET divisor = remainder;
        SET remainder = MOD(dividend, divisor);
        SET dividend = divisor;
      END WHILE;
    
      RETURN divisor;
    END
  • explode string

    Explode a string around a delimiter without a loop.

    DROP PROCEDURE IF EXISTS explode;
    
    DELIMITER |
    
    CREATE PROCEDURE explode( pDelim VARCHAR(32), pStr TEXT)                                
    BEGIN                                
       DROP TABLE IF EXISTS temp_explode;                                
       CREATE TEMPORARY TABLE temp_explode (id INT AUTO_INCREMENT PRIMARY KEY NOT NULL, word VARCHAR(40));                                
       SET @sql := CONCAT('INSERT INTO temp_explode (word) VALUES (', REPLACE(QUOTE(pStr), pDelim, '\'), (\''), ')');                                
       PREPARE myStmt FROM @sql;                                
       EXECUTE myStmt;                                
    END |   
    
    DELIMITER ;
    
    -- example call:
    SET @str  = "The quick brown fox jumped over the lazy dog"; 
    SET @delim = " "; 
    
    CALL explode(@delim,@str);
    SELECT id,word FROM temp_explode;
  • Extract Regular Expression Match

    Extract the first, longest pattern that matches a regular expression.
    You could add a parameter or write a similar function to get the shortest match instead (by starting with e = s and then incrementing e, instead of starting with e = LENGTH(string) and decrementing).

    CREATE FUNCTION REGEXP_EXTRACT(string TEXT, exp TEXT)
    -- Extract the first longest string that matches the regular expression
    -- If the string is 'ABCD', check all strings and see what matches: 'ABCD', 'ABC', 'AB', 'A', 'BCD', 'BC', 'B', 'CD', 'C', 'D'
    -- It's not smart enough to handle things like (A)|(BCD) correctly in that it will return the whole string, not just the matching token.
    
    RETURNS TEXT
    DETERMINISTIC
    BEGIN
      DECLARE s INT DEFAULT 1;
      DECLARE e INT;
      DECLARE adjustStart TINYINT DEFAULT 1;
      DECLARE adjustEnd TINYINT DEFAULT 1;
    
      -- Because REGEXP matches anywhere in the string, and we only want the part that matches, adjust the expression to add '^' and '$'
      -- Of course, if those are already there, don't add them, but change the method of extraction accordingly.
    
      IF LEFT(exp, 1) = '^' THEN 
        SET adjustStart = 0;
      ELSE
        SET exp = CONCAT('^', exp);
      END IF;
    
      IF RIGHT(exp, 1) = '$' THEN
        SET adjustEnd = 0;
      ELSE
        SET exp = CONCAT(exp, '$');
      END IF;
    
      -- Loop through the string, moving the end pointer back towards the start pointer, then advance the start pointer and repeat
      -- Bail out of the loops early if the original expression started with '^' or ended with '$', since that means the pointers can't move
      WHILE (s <= LENGTH(string)) DO
        SET e = LENGTH(string);
        WHILE (e >= 1) DO
          IF SUBSTRING(string, s, e) REGEXP exp THEN
            RETURN SUBSTRING(string, s, e);
          END IF;
          IF adjustEnd THEN
            SET e = e - 1;
          ELSE
            SET e = s - 1; -- ugh, such a hack to end it early
          END IF;
        END WHILE;
        IF adjustStart THEN
          SET s = s + 1;
        ELSE
          SET s = LENGTH(string) + 1; -- ugh, such a hack to end it early
        END IF;
      END WHILE;
    
      RETURN NULL;
    
    END
  • UNION and ORDER and LIMIT

    From a discussion on Freenode (and don’t feel bad if you don’t get it; I had to ask Sinisa and Monty to explain it to me. Sinisa explained how it worked, and Monty found a bug.)–

    USE test;
    DROP TABLE IF EXISTS `t`;
    CREATE TABLE `t` (
    `a` int(11) DEFAULT NULL
    );

    INSERT INTO t VALUES (RAND() * 50);
    INSERT INTO t SELECT RAND() * 50 FROM t;

    Repeat that last INSERT a few times until you have about 1000 rows.

    What would you expect from the following?

    SELECT * FROM t WHERE false UNION SELECT * FROM t ORDER BY a LIMIT 19, 1;
    (SELECT * FROM t WHERE false) UNION (SELECT * FROM t ORDER BY a LIMIT 19, 1);
    (SELECT * FROM t WHERE false) UNION (SELECT * FROM t ORDER BY a) LIMIT 19, 1;

    Two things to remember:
    UNION is equivalent to UNION DISTINCT
    – an ORDER BY with no LIMIT is optimized away in UNIONed queries

    The first query:

    SELECT * FROM t WHERE false UNION SELECT * FROM t ORDER BY a LIMIT 19, 1;

    is equivalent to:

    (SELECT * FROM t WHERE false UNION SELECT * FROM t) ORDER BY a LIMIT 19, 1;

    Since the UNION is by default DISTINCT, get each distinct value from the table, then order by `a`. Take the row at offset 19. Assuming even distribution of our random data, your value should be ’19’.

    The second query:

    (SELECT * FROM t WHERE false) UNION (SELECT * FROM t ORDER BY a LIMIT 19, 1);

    Order all the rows by `a`. That’s roughly 20 rows of value ‘1’, then 20 rows of value ‘2’, then 20 rows of value ‘3’, etc. Take the row at offset 19. Assuming even distribution of our random data, your value should be ‘1’.

    The third query:

    (SELECT * FROM t WHERE false) UNION (SELECT * FROM t ORDER BY a) LIMIT 19, 1;

    The ORDER BY is discarded. We get distinct values in random order. Take the row at offset 19. Since the values are in random order, the value is not predictable.

  • Multi-master replication

    Implement multi-master to single-slave replication by periodically saving state and changing masters, and do it all in SQL.*

    Just a proof of concept. I suggest you add some error checking (like, don’t make the switch if the slave isn’t running).

    Remember that all this stuff goes on the SLAVE. If you absent-mindedly log in to the master, like I did a few times, you might wonder why your relay_log.info file isn’t read.

    Set up a table to keep track of each master file. Mine use the same user name and password for all hosts; you can add those fields and adjust the procedure accordingly if you like.

    USE mysql;
    DROP TABLE IF EXISTS rotate_master;
    CREATE TABLE rotate_master (
    id int auto_increment primary key,
    master_host varchar(255),
    master_port int unsigned,
    master_log_file varchar(255),
    master_log_pos int unsigned,
    in_use boolean DEFAULT 0
    );

    Insert the list of masters. The one that is currently master should have `in_use` set to 1; all the others should be 0. The order you insert them is the order they will rotate.

    INSERT INTO rotate_master VALUES (NULL, 'localhost', 3306, 'bin.000001', 0, 1);
    INSERT INTO rotate_master VALUES (NULL, 'localhost', 3308, 'bin.000001', 0, 0);

    Then, a procedure to grab the current position (by reading from the relay_log.info file, since the slave position is not otherwise exposed in any accessible manner), record it in the above table, and change masters to the next one in the list.

    DROP PROCEDURE IF EXISTS rotate_master;
    DELIMITER //

    CREATE PROCEDURE rotate_master()
    BEGIN
    DECLARE _info text;
    DECLARE _master_file varchar(255);
    DECLARE _master_pos int unsigned;
    DECLARE _master_host varchar(255);
    DECLARE _master_port int unsigned;
    DECLARE _id int;

    STOP SLAVE;

    -- fetch and store current position
    SELECT LOAD_FILE(@@relay_log_info_file) INTO _info;
    SELECT
    SUBSTRING_INDEX(SUBSTRING_INDEX(_info, '\n', 3), '\n', -1),
    SUBSTRING_INDEX(SUBSTRING_INDEX(_info, '\n', 4), '\n', -1)
    INTO _master_file, _master_pos;
    UPDATE mysql.rotate_master SET master_log_file = _master_file, master_log_pos = _master_pos, id = LAST_INSERT_ID(id) WHERE in_use = 1;

    -- fetch next host
    SELECT
    id,
    master_host,
    master_port,
    master_log_file,
    master_log_pos
    INTO _id, _master_host, _master_port, _master_file, _master_pos
    FROM rotate_master
    ORDER BY id <= LAST_INSERT_ID(), id LIMIT 1; -- advance to next host SET @sql := CONCAT( 'CHANGE MASTER TO master_host=', QUOTE(_master_host), ', master_port=', _master_port, ', master_log_file=', QUOTE(_master_file), ', master_log_pos=', _master_pos); PREPARE myStmt FROM @sql; EXECUTE myStmt; -- mark host as changed UPDATE mysql.rotate_master SET in_use = 0 WHERE in_use = 1; UPDATE mysql.rotate_master SET in_use = 1 WHERE id = _id; START SLAVE; END// DELIMITER ;

    Finally, schedule it to rotate. I used 10 seconds just to play with; I imagine a few minutes would be more practical. *If you don't have 5.1 or later, you'll have to schedule this part externally, with cron or task scheduler.

    DROP EVENT IF EXISTS rotate_master;
    CREATE EVENT rotate_master
    ON SCHEDULE EVERY 10 SECOND
    DO CALL mysql.rotate_master();

  • LOAD DATA and recovery

    A little two-part quiz. If you get the first one without peeking, you’re worth your pay as a DBA. If you get the second one without peeking, you may tell your boss that some random guy on the Internet says you deserve a raise.

    Start with a text file, ‘test.txt’, with these three lines:

    1
    1
    2

    Set up the test in MySQL:

    DROP TABLE IF EXISTS t1;
    CREATE TABLE t1 (id int primary key);
    LOAD DATA INFILE 'test.txt' INTO TABLE t1;

    (more…)

  • Restore from mysqldump --all-databases in parallel

    csplit backup.sql /"^-- Current Database"/ {*}; for f in xx*; do cat xx00 "$f" | mysql& done;

    and then rm xx* when the background jobs have finished.

    I haven’t done any benchmarking, and I haven’t worked out what to do if the number of databases exceeds the number of available connections.

  • mysql’s --xml and some XSLT (now with JSON!)

    Somebody asked in Freenode the other day how to get their data out of MySQL into a specific XML format.

    Both mysqldump and the mysql client have --xml options. The output from the cli looks like this:

    <resultset statement="SELECT here">
      <row>
        <field name="field1">value a</field>
        <field name="field2">value b</field>
      </row>
      <row>
        <field name="field1">value c</field>
        <field name="field2">value d</field>
      </row>
    </resultset>

    I wanted to change those <field name=”fieldName”> into <fieldName> tags, and all it takes is a little XSLT, which I’ve uploaded here.

    An XSLT processor lets you do something like this:

    mysql --xml -e "SELECT here" | xsltproc mysql.xsl -

    The result will be something like:

    <resultset statement="SELECT here">
      <row>
        <field1>value a</field1>
        <field2>value b</field2>
      </row>
      <row>
        <field1>value c</field1>
        <field2>value d</field2>
      </row>
    </resultset>

    If JSON is your thing, you can use xml2json to take that output a step farther:

    mysql --xml -e "SELECT here" | xsltproc mysql.xsl - | xsltproc xml2json.xsl -

    Result (whitespace added):

    {"resultset" : 
      {
        "statement" : "SELECT here", 
        "row" : [
          {"field1" : "value a", "field2" : "value b"},
          {"field1" : "value c", "field2" : "value d"}
        ]
      }
    }

    That will suffice until the mysql client has a –json parameter.