{"id":136,"date":"2011-10-12T11:08:40","date_gmt":"2011-10-12T16:08:40","guid":{"rendered":"http:\/\/thenoyes.com\/littlenoise\/?p=136"},"modified":"2013-06-07T13:56:34","modified_gmt":"2013-06-07T18:56:34","slug":"extract-regular-expression-match","status":"publish","type":"post","link":"https:\/\/thenoyes.com\/littlenoise\/?p=136","title":{"rendered":"Extract Regular Expression Match"},"content":{"rendered":"<p>Extract the first, longest pattern that matches a regular expression.<br \/>\nYou could add a parameter or write a similar function to get the shortest match instead (by starting with e = s and then incrementing e, instead of starting with e = LENGTH(string) and decrementing).<\/p>\n<pre>CREATE FUNCTION REGEXP_EXTRACT(string TEXT, exp TEXT)\r\n-- Extract the first longest string that matches the regular expression\r\n-- If the string is 'ABCD', check all strings and see what matches: 'ABCD', 'ABC', 'AB', 'A', 'BCD', 'BC', 'B', 'CD', 'C', 'D'\r\n-- It's not smart enough to handle things like (A)|(BCD) correctly in that it will return the whole string, not just the matching token.\r\n\r\nRETURNS TEXT\r\nDETERMINISTIC\r\nBEGIN\r\n  DECLARE s INT DEFAULT 1;\r\n  DECLARE e INT;\r\n  DECLARE adjustStart TINYINT DEFAULT 1;\r\n  DECLARE adjustEnd TINYINT DEFAULT 1;\r\n\r\n  -- Because REGEXP matches anywhere in the string, and we only want the part that matches, adjust the expression to add '^' and '$'\r\n  -- Of course, if those are already there, don't add them, but change the method of extraction accordingly.\r\n\r\n  IF LEFT(exp, 1) = '^' THEN \r\n    SET adjustStart = 0;\r\n  ELSE\r\n    SET exp = CONCAT('^', exp);\r\n  END IF;\r\n\r\n  IF RIGHT(exp, 1) = '$' THEN\r\n    SET adjustEnd = 0;\r\n  ELSE\r\n    SET exp = CONCAT(exp, '$');\r\n  END IF;\r\n\r\n  -- Loop through the string, moving the end pointer back towards the start pointer, then advance the start pointer and repeat\r\n  -- Bail out of the loops early if the original expression started with '^' or ended with '$', since that means the pointers can't move\r\n  WHILE (s <= LENGTH(string)) DO\r\n    SET e = LENGTH(string);\r\n    WHILE (e >= 1) DO\r\n      IF SUBSTRING(string, s, e) REGEXP exp THEN\r\n        RETURN SUBSTRING(string, s, e);\r\n      END IF;\r\n      IF adjustEnd THEN\r\n        SET e = e - 1;\r\n      ELSE\r\n        SET e = s - 1; -- ugh, such a hack to end it early\r\n      END IF;\r\n    END WHILE;\r\n    IF adjustStart THEN\r\n      SET s = s + 1;\r\n    ELSE\r\n      SET s = LENGTH(string) + 1; -- ugh, such a hack to end it early\r\n    END IF;\r\n  END WHILE;\r\n\r\n  RETURN NULL;\r\n\r\nEND<\/pre>\n","protected":false},"excerpt":{"rendered":"<p>Extract the first, longest pattern that matches a regular expression. You could add a parameter or write a similar function to get the shortest match instead (by starting with e = s and then incrementing e, instead of starting with e = LENGTH(string) and decrementing). CREATE FUNCTION REGEXP_EXTRACT(string TEXT, exp TEXT) &#8212; Extract the first [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"_jetpack_newsletter_access":"","_jetpack_dont_email_post_to_subs":false,"_jetpack_newsletter_tier_id":0,"_jetpack_memberships_contains_paywalled_content":false,"_jetpack_feature_clip_id":0,"_jetpack_memberships_contains_paid_content":false,"footnotes":"","jetpack_publicize_message":"","jetpack_publicize_feature_enabled":true,"jetpack_social_post_already_shared":false,"jetpack_social_options":{"image_generator_settings":{"template":"highway","default_image_id":0,"font":"","enabled":false},"version":2},"jetpack_post_was_ever_published":false},"categories":[4],"tags":[],"class_list":["post-136","post","type-post","status-publish","format-standard","hentry","category-mysql"],"jetpack_publicize_connections":[],"jetpack_featured_media_url":"","jetpack_shortlink":"https:\/\/wp.me\/p2IBF1-2c","jetpack_sharing_enabled":true,"jetpack-related-posts":[],"_links":{"self":[{"href":"https:\/\/thenoyes.com\/littlenoise\/index.php?rest_route=\/wp\/v2\/posts\/136","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/thenoyes.com\/littlenoise\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/thenoyes.com\/littlenoise\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/thenoyes.com\/littlenoise\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/thenoyes.com\/littlenoise\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=136"}],"version-history":[{"count":4,"href":"https:\/\/thenoyes.com\/littlenoise\/index.php?rest_route=\/wp\/v2\/posts\/136\/revisions"}],"predecessor-version":[{"id":229,"href":"https:\/\/thenoyes.com\/littlenoise\/index.php?rest_route=\/wp\/v2\/posts\/136\/revisions\/229"}],"wp:attachment":[{"href":"https:\/\/thenoyes.com\/littlenoise\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=136"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/thenoyes.com\/littlenoise\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=136"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/thenoyes.com\/littlenoise\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=136"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}