Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
0.00% |
0 / 337 |
|
0.00% |
0 / 23 |
CRAP | |
0.00% |
0 / 1 |
| SqlFormatter | |
0.00% |
0 / 337 |
|
0.00% |
0 / 23 |
34040 | |
0.00% |
0 / 1 |
| getCacheStats | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
2 | |||
| init | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
6 | |||
| getNextToken | |
0.00% |
0 / 43 |
|
0.00% |
0 / 1 |
992 | |||
| getQuotedString | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
| tokenize | |
0.00% |
0 / 27 |
|
0.00% |
0 / 1 |
72 | |||
| format | |
0.00% |
0 / 141 |
|
0.00% |
0 / 1 |
6162 | |||
| highlight | |
0.00% |
0 / 5 |
|
0.00% |
0 / 1 |
6 | |||
| splitQuery | |
0.00% |
0 / 17 |
|
0.00% |
0 / 1 |
72 | |||
| removeComments | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
20 | |||
| compress | |
0.00% |
0 / 16 |
|
0.00% |
0 / 1 |
90 | |||
| highlightToken | |
0.00% |
0 / 25 |
|
0.00% |
0 / 1 |
182 | |||
| highlightQuote | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
| highlightBacktickQuote | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
| highlightReservedWord | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
| highlightBoundary | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
20 | |||
| highlightNumber | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
| highlightError | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
| highlightComment | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
| highlightWord | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
| highlightVariable | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
| quote_regex | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
| output | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
12 | |||
| is_cli | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
6 | |||
| 1 | <?php |
| 2 | namespace App\Libs; |
| 3 | /** |
| 4 | * SQL Formatter is a collection of utilities for debugging SQL queries. |
| 5 | * It includes methods for formatting, syntax highlighting, removing comments, etc. |
| 6 | * |
| 7 | * @package SqlFormatter |
| 8 | * @author Jeremy Dorn <jeremy@jeremydorn.com> |
| 9 | * @author Florin Patan <florinpatan@gmail.com> |
| 10 | * @copyright 2013 Jeremy Dorn |
| 11 | * @license http://opensource.org/licenses/MIT |
| 12 | * @link http://github.com/jdorn/sql-formatter |
| 13 | * @version 1.2.17 |
| 14 | */ |
| 15 | class SqlFormatter |
| 16 | { |
| 17 | // Constants for token types |
| 18 | const TOKEN_TYPE_WHITESPACE = 0; |
| 19 | const TOKEN_TYPE_WORD = 1; |
| 20 | const TOKEN_TYPE_QUOTE = 2; |
| 21 | const TOKEN_TYPE_BACKTICK_QUOTE = 3; |
| 22 | const TOKEN_TYPE_RESERVED = 4; |
| 23 | const TOKEN_TYPE_RESERVED_TOPLEVEL = 5; |
| 24 | const TOKEN_TYPE_RESERVED_NEWLINE = 6; |
| 25 | const TOKEN_TYPE_BOUNDARY = 7; |
| 26 | const TOKEN_TYPE_COMMENT = 8; |
| 27 | const TOKEN_TYPE_BLOCK_COMMENT = 9; |
| 28 | const TOKEN_TYPE_NUMBER = 10; |
| 29 | const TOKEN_TYPE_ERROR = 11; |
| 30 | const TOKEN_TYPE_VARIABLE = 12; |
| 31 | |
| 32 | // Constants for different components of a token |
| 33 | const TOKEN_TYPE = 0; |
| 34 | const TOKEN_VALUE = 1; |
| 35 | |
| 36 | // Reserved words (for syntax highlighting) |
| 37 | protected static $reserved = array( |
| 38 | 'ACCESSIBLE', 'ACTION', 'AGAINST', 'AGGREGATE', 'ALGORITHM', 'ALL', 'ALTER', 'ANALYSE', 'ANALYZE', 'AS', 'ASC', |
| 39 | 'AUTOCOMMIT', 'AUTO_INCREMENT', 'BACKUP', 'BEGIN', 'BETWEEN', 'BINLOG', 'BOTH', 'CASCADE', 'CASE', 'CHANGE', 'CHANGED', 'CHARACTER SET', |
| 40 | 'CHARSET', 'CHECK', 'CHECKSUM', 'COLLATE', 'COLLATION', 'COLUMN', 'COLUMNS', 'COMMENT', 'COMMIT', 'COMMITTED', 'COMPRESSED', 'CONCURRENT', |
| 41 | 'CONSTRAINT', 'CONTAINS', 'CONVERT', 'CREATE', 'CROSS', 'CURRENT_TIMESTAMP', 'DATABASE', 'DATABASES', 'DAY', 'DAY_HOUR', 'DAY_MINUTE', |
| 42 | 'DAY_SECOND', 'DEFAULT', 'DEFINER', 'DELAYED', 'DELETE', 'DESC', 'DESCRIBE', 'DETERMINISTIC', 'DISTINCT', 'DISTINCTROW', 'DIV', |
| 43 | 'DO', 'DUMPFILE', 'DUPLICATE', 'DYNAMIC', 'ELSE', 'ENCLOSED', 'END', 'ENGINE', 'ENGINE_TYPE', 'ENGINES', 'ESCAPE', 'ESCAPED', 'EVENTS', 'EXECUTE', |
| 44 | 'EXISTS', 'EXPLAIN', 'EXTENDED', 'FAST', 'FIELDS', 'FILE', 'FIRST', 'FIXED', 'FLUSH', 'FOR', 'FORCE', 'FOREIGN', 'FULL', 'FULLTEXT', |
| 45 | 'FUNCTION', 'GLOBAL', 'GRANT', 'GRANTS', 'GROUP_CONCAT', 'HEAP', 'HIGH_PRIORITY', 'HOSTS', 'HOUR', 'HOUR_MINUTE', |
| 46 | 'HOUR_SECOND', 'IDENTIFIED', 'IF', 'IFNULL', 'IGNORE', 'IN', 'INDEX', 'INDEXES', 'INFILE', 'INSERT', 'INSERT_ID', 'INSERT_METHOD', 'INTERVAL', |
| 47 | 'INTO', 'INVOKER', 'IS', 'ISOLATION', 'KEY', 'KEYS', 'KILL', 'LAST_INSERT_ID', 'LEADING', 'LEVEL', 'LIKE', 'LINEAR', |
| 48 | 'LINES', 'LOAD', 'LOCAL', 'LOCK', 'LOCKS', 'LOGS', 'LOW_PRIORITY', 'MARIA', 'MASTER', 'MASTER_CONNECT_RETRY', 'MASTER_HOST', 'MASTER_LOG_FILE', |
| 49 | 'MATCH','MAX_CONNECTIONS_PER_HOUR', 'MAX_QUERIES_PER_HOUR', 'MAX_ROWS', 'MAX_UPDATES_PER_HOUR', 'MAX_USER_CONNECTIONS', |
| 50 | 'MEDIUM', 'MERGE', 'MINUTE', 'MINUTE_SECOND', 'MIN_ROWS', 'MODE', 'MODIFY', |
| 51 | 'MONTH', 'MRG_MYISAM', 'MYISAM', 'NAMES', 'NATURAL', 'NOT', 'NOW()','NULL', 'OFFSET', 'ON', 'OPEN', 'OPTIMIZE', 'OPTION', 'OPTIONALLY', |
| 52 | 'ON UPDATE', 'ON DELETE', 'OUTFILE', 'PACK_KEYS', 'PAGE', 'PARTIAL', 'PARTITION', 'PARTITIONS', 'PASSWORD', 'PRIMARY', 'PRIVILEGES', 'PROCEDURE', |
| 53 | 'PROCESS', 'PROCESSLIST', 'PURGE', 'QUICK', 'RANGE', 'RAID0', 'RAID_CHUNKS', 'RAID_CHUNKSIZE','RAID_TYPE', 'READ', 'READ_ONLY', |
| 54 | 'READ_WRITE', 'REFERENCES', 'REGEXP', 'RELOAD', 'RENAME', 'REPAIR', 'REPEATABLE', 'REPLACE', 'REPLICATION', 'RESET', 'RESTORE', 'RESTRICT', |
| 55 | 'RETURN', 'RETURNS', 'REVOKE', 'RLIKE', 'ROLLBACK', 'ROW', 'ROWS', 'ROW_FORMAT', 'SECOND', 'SECURITY', 'SEPARATOR', |
| 56 | 'SERIALIZABLE', 'SESSION', 'SHARE', 'SHOW', 'SHUTDOWN', 'SLAVE', 'SONAME', 'SOUNDS', 'SQL', 'SQL_AUTO_IS_NULL', 'SQL_BIG_RESULT', |
| 57 | 'SQL_BIG_SELECTS', 'SQL_BIG_TABLES', 'SQL_BUFFER_RESULT', 'SQL_CALC_FOUND_ROWS', 'SQL_LOG_BIN', 'SQL_LOG_OFF', 'SQL_LOG_UPDATE', |
| 58 | 'SQL_LOW_PRIORITY_UPDATES', 'SQL_MAX_JOIN_SIZE', 'SQL_QUOTE_SHOW_CREATE', 'SQL_SAFE_UPDATES', 'SQL_SELECT_LIMIT', 'SQL_SLAVE_SKIP_COUNTER', |
| 59 | 'SQL_SMALL_RESULT', 'SQL_WARNINGS', 'SQL_CACHE', 'SQL_NO_CACHE', 'START', 'STARTING', 'STATUS', 'STOP', 'STORAGE', |
| 60 | 'STRAIGHT_JOIN', 'STRING', 'STRIPED', 'SUPER', 'TABLE', 'TABLES', 'TEMPORARY', 'TERMINATED', 'THEN', 'TO', 'TRAILING', 'TRANSACTIONAL', 'TRUE', |
| 61 | 'TRUNCATE', 'TYPE', 'TYPES', 'UNCOMMITTED', 'UNIQUE', 'UNLOCK', 'UNSIGNED', 'USAGE', 'USE', 'USING', 'VARIABLES', |
| 62 | 'VIEW', 'WHEN', 'WITH', 'WORK', 'WRITE', 'YEAR_MONTH' |
| 63 | ); |
| 64 | |
| 65 | // For SQL formatting |
| 66 | // These keywords will all be on their own line |
| 67 | protected static $reserved_toplevel = array( |
| 68 | 'SELECT', 'FROM', 'WHERE', 'SET', 'ORDER BY', 'GROUP BY', 'LIMIT', 'DROP', |
| 69 | 'VALUES', 'UPDATE', 'HAVING', 'ADD', 'AFTER', 'ALTER TABLE', 'DELETE FROM', 'UNION ALL', 'UNION', 'EXCEPT', 'INTERSECT' |
| 70 | ); |
| 71 | |
| 72 | protected static $reserved_newline = array( |
| 73 | 'LEFT OUTER JOIN', 'RIGHT OUTER JOIN', 'LEFT JOIN', 'RIGHT JOIN', 'OUTER JOIN', 'INNER JOIN', 'JOIN', 'XOR', 'OR', 'AND' |
| 74 | ); |
| 75 | |
| 76 | protected static $functions = array ( |
| 77 | 'ABS', 'ACOS', 'ADDDATE', 'ADDTIME', 'AES_DECRYPT', 'AES_ENCRYPT', 'AREA', 'ASBINARY', 'ASCII', 'ASIN', 'ASTEXT', 'ATAN', 'ATAN2', |
| 78 | 'AVG', 'BDMPOLYFROMTEXT', 'BDMPOLYFROMWKB', 'BDPOLYFROMTEXT', 'BDPOLYFROMWKB', 'BENCHMARK', 'BIN', 'BIT_AND', 'BIT_COUNT', 'BIT_LENGTH', |
| 79 | 'BIT_OR', 'BIT_XOR', 'BOUNDARY', 'BUFFER', 'CAST', 'CEIL', 'CEILING', 'CENTROID', 'CHAR', 'CHARACTER_LENGTH', 'CHARSET', 'CHAR_LENGTH', |
| 80 | 'COALESCE', 'COERCIBILITY', 'COLLATION', 'COMPRESS', 'CONCAT', 'CONCAT_WS', 'CONNECTION_ID', 'CONTAINS', 'CONV', 'CONVERT', 'CONVERT_TZ', |
| 81 | 'CONVEXHULL', 'COS', 'COT', 'COUNT', 'CRC32', 'CROSSES', 'CURDATE', 'CURRENT_DATE', 'CURRENT_TIME', 'CURRENT_TIMESTAMP', 'CURRENT_USER', |
| 82 | 'CURTIME', 'DATABASE', 'DATE', 'DATEDIFF', 'DATE_ADD', 'DATE_DIFF', 'DATE_FORMAT', 'DATE_SUB', 'DAY', 'DAYNAME', 'DAYOFMONTH', 'DAYOFWEEK', |
| 83 | 'DAYOFYEAR', 'DECODE', 'DEFAULT', 'DEGREES', 'DES_DECRYPT', 'DES_ENCRYPT', 'DIFFERENCE', 'DIMENSION', 'DISJOINT', 'DISTANCE', 'ELT', 'ENCODE', |
| 84 | 'ENCRYPT', 'ENDPOINT', 'ENVELOPE', 'EQUALS', 'EXP', 'EXPORT_SET', 'EXTERIORRING', 'EXTRACT', 'EXTRACTVALUE', 'FIELD', 'FIND_IN_SET', 'FLOOR', |
| 85 | 'FORMAT', 'FOUND_ROWS', 'FROM_DAYS', 'FROM_UNIXTIME', 'GEOMCOLLFROMTEXT', 'GEOMCOLLFROMWKB', 'GEOMETRYCOLLECTION', 'GEOMETRYCOLLECTIONFROMTEXT', |
| 86 | 'GEOMETRYCOLLECTIONFROMWKB', 'GEOMETRYFROMTEXT', 'GEOMETRYFROMWKB', 'GEOMETRYN', 'GEOMETRYTYPE', 'GEOMFROMTEXT', 'GEOMFROMWKB', 'GET_FORMAT', |
| 87 | 'GET_LOCK', 'GLENGTH', 'GREATEST', 'GROUP_CONCAT', 'GROUP_UNIQUE_USERS', 'HEX', 'HOUR', 'IF', 'IFNULL', 'INET_ATON', 'INET_NTOA', 'INSERT', 'INSTR', |
| 88 | 'INTERIORRINGN', 'INTERSECTION', 'INTERSECTS', 'INTERVAL', 'ISCLOSED', 'ISEMPTY', 'ISNULL', 'ISRING', 'ISSIMPLE', 'IS_FREE_LOCK', 'IS_USED_LOCK', |
| 89 | 'LAST_DAY', 'LAST_INSERT_ID', 'LCASE', 'LEAST', 'LEFT', 'LENGTH', 'LINEFROMTEXT', 'LINEFROMWKB', 'LINESTRING', 'LINESTRINGFROMTEXT', 'LINESTRINGFROMWKB', |
| 90 | 'LN', 'LOAD_FILE', 'LOCALTIME', 'LOCALTIMESTAMP', 'LOCATE', 'LOG', 'LOG10', 'LOG2', 'LOWER', 'LPAD', 'LTRIM', 'MAKEDATE', 'MAKETIME', 'MAKE_SET', |
| 91 | 'MASTER_POS_WAIT', 'MAX', 'MBRCONTAINS', 'MBRDISJOINT', 'MBREQUAL', 'MBRINTERSECTS', 'MBROVERLAPS', 'MBRTOUCHES', 'MBRWITHIN', 'MD5', 'MICROSECOND', |
| 92 | 'MID', 'MIN', 'MINUTE', 'MLINEFROMTEXT', 'MLINEFROMWKB', 'MOD', 'MONTH', 'MONTHNAME', 'MPOINTFROMTEXT', 'MPOINTFROMWKB', 'MPOLYFROMTEXT', 'MPOLYFROMWKB', |
| 93 | 'MULTILINESTRING', 'MULTILINESTRINGFROMTEXT', 'MULTILINESTRINGFROMWKB', 'MULTIPOINT', 'MULTIPOINTFROMTEXT', 'MULTIPOINTFROMWKB', 'MULTIPOLYGON', |
| 94 | 'MULTIPOLYGONFROMTEXT', 'MULTIPOLYGONFROMWKB', 'NAME_CONST', 'NULLIF', 'NUMGEOMETRIES', 'NUMINTERIORRINGS', 'NUMPOINTS', 'OCT', 'OCTET_LENGTH', |
| 95 | 'OLD_PASSWORD', 'ORD', 'OVERLAPS', 'PASSWORD', 'PERIOD_ADD', 'PERIOD_DIFF', 'PI', 'POINT', 'POINTFROMTEXT', 'POINTFROMWKB', 'POINTN', 'POINTONSURFACE', |
| 96 | 'POLYFROMTEXT', 'POLYFROMWKB', 'POLYGON', 'POLYGONFROMTEXT', 'POLYGONFROMWKB', 'POSITION', 'POW', 'POWER', 'QUARTER', 'QUOTE', 'RADIANS', 'RAND', |
| 97 | 'RELATED', 'RELEASE_LOCK', 'REPEAT', 'REPLACE', 'REVERSE', 'RIGHT', 'ROUND', 'ROW_COUNT', 'RPAD', 'RTRIM', 'SCHEMA', 'SECOND', 'SEC_TO_TIME', |
| 98 | 'SESSION_USER', 'SHA', 'SHA1', 'SIGN', 'SIN', 'SLEEP', 'SOUNDEX', 'SPACE', 'SQRT', 'SRID', 'STARTPOINT', 'STD', 'STDDEV', 'STDDEV_POP', 'STDDEV_SAMP', |
| 99 | 'STRCMP', 'STR_TO_DATE', 'SUBDATE', 'SUBSTR', 'SUBSTRING', 'SUBSTRING_INDEX', 'SUBTIME', 'SUM', 'SYMDIFFERENCE', 'SYSDATE', 'SYSTEM_USER', 'TAN', |
| 100 | 'TIME', 'TIMEDIFF', 'TIMESTAMP', 'TIMESTAMPADD', 'TIMESTAMPDIFF', 'TIME_FORMAT', 'TIME_TO_SEC', 'TOUCHES', 'TO_DAYS', 'TRIM', 'TRUNCATE', 'UCASE', |
| 101 | 'UNCOMPRESS', 'UNCOMPRESSED_LENGTH', 'UNHEX', 'UNIQUE_USERS', 'UNIX_TIMESTAMP', 'UPDATEXML', 'UPPER', 'USER', 'UTC_DATE', 'UTC_TIME', 'UTC_TIMESTAMP', |
| 102 | 'UUID', 'VARIANCE', 'VAR_POP', 'VAR_SAMP', 'VERSION', 'WEEK', 'WEEKDAY', 'WEEKOFYEAR', 'WITHIN', 'X', 'Y', 'YEAR', 'YEARWEEK' |
| 103 | ); |
| 104 | |
| 105 | // Punctuation that can be used as a boundary between other tokens |
| 106 | protected static $boundaries = array(',', ';',':', ')', '(', '.', '=', '<', '>', '+', '-', '*', '/', '!', '^', '%', '|', '&', '#'); |
| 107 | |
| 108 | // For HTML syntax highlighting |
| 109 | // Styles applied to different token types |
| 110 | public static $quote_attributes = 'style="color: blue;"'; |
| 111 | public static $backtick_quote_attributes = 'style="color: purple;"'; |
| 112 | public static $reserved_attributes = 'style="font-weight:bold;"'; |
| 113 | public static $boundary_attributes = ''; |
| 114 | public static $number_attributes = 'style="color: green;"'; |
| 115 | public static $word_attributes = 'style="color: #333;"'; |
| 116 | public static $error_attributes = 'style="background-color: red;"'; |
| 117 | public static $comment_attributes = 'style="color: #aaa;"'; |
| 118 | public static $variable_attributes = 'style="color: orange;"'; |
| 119 | public static $pre_attributes = 'style="color: black; background-color: white;"'; |
| 120 | |
| 121 | // Boolean - whether or not the current environment is the CLI |
| 122 | // This affects the type of syntax highlighting |
| 123 | // If not defined, it will be determined automatically |
| 124 | public static $cli = true; |
| 125 | |
| 126 | // For CLI syntax highlighting |
| 127 | public static $cli_quote = "\x1b[34;1m"; |
| 128 | public static $cli_backtick_quote = "\x1b[35;1m"; |
| 129 | public static $cli_reserved = "\x1b[37m"; |
| 130 | public static $cli_boundary = ""; |
| 131 | public static $cli_number = "\x1b[32;1m"; |
| 132 | public static $cli_word = ""; |
| 133 | public static $cli_error = "\x1b[31;1;7m"; |
| 134 | public static $cli_comment = "\x1b[30;1m"; |
| 135 | public static $cli_functions = "\x1b[37m"; |
| 136 | public static $cli_variable = "\x1b[36;1m"; |
| 137 | |
| 138 | // The tab character to use when formatting SQL |
| 139 | public static $tab = ' '; |
| 140 | |
| 141 | // This flag tells us if queries need to be enclosed in <pre> tags |
| 142 | public static $use_pre = true; |
| 143 | |
| 144 | // This flag tells us if SqlFormatted has been initialized |
| 145 | protected static $init; |
| 146 | |
| 147 | // Regular expressions for tokenizing |
| 148 | protected static $regex_boundaries; |
| 149 | protected static $regex_reserved; |
| 150 | protected static $regex_reserved_newline; |
| 151 | protected static $regex_reserved_toplevel; |
| 152 | protected static $regex_function; |
| 153 | |
| 154 | // Cache variables |
| 155 | // Only tokens shorter than this size will be cached. Somewhere between 10 and 20 seems to work well for most cases. |
| 156 | public static $max_cachekey_size = 15; |
| 157 | protected static $token_cache = array(); |
| 158 | protected static $cache_hits = 0; |
| 159 | protected static $cache_misses = 0; |
| 160 | |
| 161 | /** |
| 162 | * Get stats about the token cache |
| 163 | * @return Array An array containing the keys 'hits', 'misses', 'entries', and 'size' in bytes |
| 164 | */ |
| 165 | public static function getCacheStats() |
| 166 | { |
| 167 | return array( |
| 168 | 'hits'=>self::$cache_hits, |
| 169 | 'misses'=>self::$cache_misses, |
| 170 | 'entries'=>count(self::$token_cache), |
| 171 | 'size'=>strlen(serialize(self::$token_cache)) |
| 172 | ); |
| 173 | } |
| 174 | |
| 175 | /** |
| 176 | * Stuff that only needs to be done once. Builds regular expressions and sorts the reserved words. |
| 177 | */ |
| 178 | protected static function init() |
| 179 | { |
| 180 | if (self::$init) return; |
| 181 | |
| 182 | // Sort reserved word list from longest word to shortest, 3x faster than usort |
| 183 | $reservedMap = array_combine(self::$reserved, array_map('strlen', self::$reserved)); |
| 184 | arsort($reservedMap); |
| 185 | self::$reserved = array_keys($reservedMap); |
| 186 | |
| 187 | // Set up regular expressions |
| 188 | self::$regex_boundaries = '('.implode('|',array_map(array(__CLASS__, 'quote_regex'),self::$boundaries)).')'; |
| 189 | self::$regex_reserved = '('.implode('|',array_map(array(__CLASS__, 'quote_regex'),self::$reserved)).')'; |
| 190 | self::$regex_reserved_toplevel = str_replace(' ','\\s+','('.implode('|',array_map(array(__CLASS__, 'quote_regex'),self::$reserved_toplevel)).')'); |
| 191 | self::$regex_reserved_newline = str_replace(' ','\\s+','('.implode('|',array_map(array(__CLASS__, 'quote_regex'),self::$reserved_newline)).')'); |
| 192 | |
| 193 | self::$regex_function = '('.implode('|',array_map(array(__CLASS__, 'quote_regex'),self::$functions)).')'; |
| 194 | |
| 195 | self::$init = true; |
| 196 | } |
| 197 | |
| 198 | /** |
| 199 | * Return the next token and token type in a SQL string. |
| 200 | * Quoted strings, comments, reserved words, whitespace, and punctuation are all their own tokens. |
| 201 | * |
| 202 | * @param String $string The SQL string |
| 203 | * @param array $previous The result of the previous getNextToken() call |
| 204 | * |
| 205 | * @return Array An associative array containing the type and value of the token. |
| 206 | */ |
| 207 | protected static function getNextToken($string, $previous = null) |
| 208 | { |
| 209 | // Whitespace |
| 210 | if (preg_match('/^\s+/',$string,$matches)) { |
| 211 | return array( |
| 212 | self::TOKEN_VALUE => $matches[0], |
| 213 | self::TOKEN_TYPE=>self::TOKEN_TYPE_WHITESPACE |
| 214 | ); |
| 215 | } |
| 216 | |
| 217 | // Comment |
| 218 | if ($string[0] === '#' || (isset($string[1])&&($string[0]==='-'&&$string[1]==='-') || ($string[0]==='/'&&$string[1]==='*'))) { |
| 219 | // Comment until end of line |
| 220 | if ($string[0] === '-' || $string[0] === '#') { |
| 221 | $last = strpos($string, "\n"); |
| 222 | $type = self::TOKEN_TYPE_COMMENT; |
| 223 | } else { // Comment until closing comment tag |
| 224 | $last = strpos($string, "*/", 2) + 2; |
| 225 | $type = self::TOKEN_TYPE_BLOCK_COMMENT; |
| 226 | } |
| 227 | |
| 228 | if ($last === false) { |
| 229 | $last = strlen($string); |
| 230 | } |
| 231 | |
| 232 | return array( |
| 233 | self::TOKEN_VALUE => substr($string, 0, $last), |
| 234 | self::TOKEN_TYPE => $type |
| 235 | ); |
| 236 | } |
| 237 | |
| 238 | // Quoted String |
| 239 | if ($string[0]==='"' || $string[0]==='\'' || $string[0]==='`') { |
| 240 | $return = array( |
| 241 | self::TOKEN_TYPE => ($string[0]==='`'? self::TOKEN_TYPE_BACKTICK_QUOTE : self::TOKEN_TYPE_QUOTE), |
| 242 | self::TOKEN_VALUE => self::getQuotedString($string) |
| 243 | ); |
| 244 | |
| 245 | return $return; |
| 246 | } |
| 247 | |
| 248 | // User-defined Variable |
| 249 | if ($string[0] === '@' && isset($string[1])) { |
| 250 | $ret = array( |
| 251 | self::TOKEN_VALUE => null, |
| 252 | self::TOKEN_TYPE => self::TOKEN_TYPE_VARIABLE |
| 253 | ); |
| 254 | |
| 255 | // If the variable name is quoted |
| 256 | if ($string[1]==='"' || $string[1]==='\'' || $string[1]==='`') { |
| 257 | $ret[self::TOKEN_VALUE] = '@'.self::getQuotedString(substr($string,1)); |
| 258 | } |
| 259 | // Non-quoted variable name |
| 260 | else { |
| 261 | preg_match('/^(@[a-zA-Z0-9\._\$]+)/',$string,$matches); |
| 262 | if ($matches) { |
| 263 | $ret[self::TOKEN_VALUE] = $matches[1]; |
| 264 | } |
| 265 | } |
| 266 | |
| 267 | if($ret[self::TOKEN_VALUE] !== null) return $ret; |
| 268 | } |
| 269 | |
| 270 | // Number (decimal, binary, or hex) |
| 271 | if (preg_match('/^([0-9]+(\.[0-9]+)?|0x[0-9a-fA-F]+|0b[01]+)($|\s|"\'`|'.self::$regex_boundaries.')/',$string,$matches)) { |
| 272 | return array( |
| 273 | self::TOKEN_VALUE => $matches[1], |
| 274 | self::TOKEN_TYPE=>self::TOKEN_TYPE_NUMBER |
| 275 | ); |
| 276 | } |
| 277 | |
| 278 | // Boundary Character (punctuation and symbols) |
| 279 | if (preg_match('/^('.self::$regex_boundaries.')/',$string,$matches)) { |
| 280 | return array( |
| 281 | self::TOKEN_VALUE => $matches[1], |
| 282 | self::TOKEN_TYPE => self::TOKEN_TYPE_BOUNDARY |
| 283 | ); |
| 284 | } |
| 285 | |
| 286 | // A reserved word cannot be preceded by a '.' |
| 287 | // this makes it so in "mytable.from", "from" is not considered a reserved word |
| 288 | if (!$previous || !isset($previous[self::TOKEN_VALUE]) || $previous[self::TOKEN_VALUE] !== '.') { |
| 289 | $upper = strtoupper($string); |
| 290 | // Top Level Reserved Word |
| 291 | if (preg_match('/^('.self::$regex_reserved_toplevel.')($|\s|'.self::$regex_boundaries.')/', $upper,$matches)) { |
| 292 | return array( |
| 293 | self::TOKEN_TYPE=>self::TOKEN_TYPE_RESERVED_TOPLEVEL, |
| 294 | self::TOKEN_VALUE=>substr($string,0,strlen($matches[1])) |
| 295 | ); |
| 296 | } |
| 297 | // Newline Reserved Word |
| 298 | if (preg_match('/^('.self::$regex_reserved_newline.')($|\s|'.self::$regex_boundaries.')/', $upper,$matches)) { |
| 299 | return array( |
| 300 | self::TOKEN_TYPE=>self::TOKEN_TYPE_RESERVED_NEWLINE, |
| 301 | self::TOKEN_VALUE=>substr($string,0,strlen($matches[1])) |
| 302 | ); |
| 303 | } |
| 304 | // Other Reserved Word |
| 305 | if (preg_match('/^('.self::$regex_reserved.')($|\s|'.self::$regex_boundaries.')/', $upper,$matches)) { |
| 306 | return array( |
| 307 | self::TOKEN_TYPE=>self::TOKEN_TYPE_RESERVED, |
| 308 | self::TOKEN_VALUE=>substr($string,0,strlen($matches[1])) |
| 309 | ); |
| 310 | } |
| 311 | } |
| 312 | |
| 313 | // A function must be suceeded by '(' |
| 314 | // this makes it so "count(" is considered a function, but "count" alone is not |
| 315 | $upper = strtoupper($string); |
| 316 | // function |
| 317 | if (preg_match('/^('.self::$regex_function.'[(]|\s|[)])/', $upper,$matches)) { |
| 318 | return array( |
| 319 | self::TOKEN_TYPE=>self::TOKEN_TYPE_RESERVED, |
| 320 | self::TOKEN_VALUE=>substr($string,0,strlen($matches[1])-1) |
| 321 | ); |
| 322 | } |
| 323 | |
| 324 | // Non reserved word |
| 325 | preg_match('/^(.*?)($|\s|["\'`]|'.self::$regex_boundaries.')/',$string,$matches); |
| 326 | |
| 327 | return array( |
| 328 | self::TOKEN_VALUE => $matches[1], |
| 329 | self::TOKEN_TYPE => self::TOKEN_TYPE_WORD |
| 330 | ); |
| 331 | } |
| 332 | |
| 333 | protected static function getQuotedString($string) |
| 334 | { |
| 335 | $ret = null; |
| 336 | |
| 337 | // This checks for the following patterns: |
| 338 | // 1. backtick quoted string using `` to escape |
| 339 | // 2. double quoted string using "" or \" to escape |
| 340 | // 3. single quoted string using '' or \' to escape |
| 341 | if ( preg_match('/^(((`[^`]*($|`))+)|(("[^"\\\\]*(?:\\\\.[^"\\\\]*)*("|$))+)|((\'[^\'\\\\]*(?:\\\\.[^\'\\\\]*)*(\'|$))+))/s', $string, $matches)) { |
| 342 | $ret = $matches[1]; |
| 343 | } |
| 344 | |
| 345 | return $ret; |
| 346 | } |
| 347 | |
| 348 | /** |
| 349 | * Takes a SQL string and breaks it into tokens. |
| 350 | * Each token is an associative array with type and value. |
| 351 | * |
| 352 | * @param String $string The SQL string |
| 353 | * |
| 354 | * @return Array An array of tokens. |
| 355 | */ |
| 356 | protected static function tokenize($string) |
| 357 | { |
| 358 | self::init(); |
| 359 | |
| 360 | $tokens = array(); |
| 361 | |
| 362 | // Used for debugging if there is an error while tokenizing the string |
| 363 | $original_length = strlen($string); |
| 364 | |
| 365 | // Used to make sure the string keeps shrinking on each iteration |
| 366 | $old_string_len = strlen($string) + 1; |
| 367 | |
| 368 | $token = null; |
| 369 | |
| 370 | $current_length = strlen($string); |
| 371 | |
| 372 | // Keep processing the string until it is empty |
| 373 | while ($current_length) { |
| 374 | // If the string stopped shrinking, there was a problem |
| 375 | if ($old_string_len <= $current_length) { |
| 376 | $tokens[] = array( |
| 377 | self::TOKEN_VALUE=>$string, |
| 378 | self::TOKEN_TYPE=>self::TOKEN_TYPE_ERROR |
| 379 | ); |
| 380 | |
| 381 | return $tokens; |
| 382 | } |
| 383 | $old_string_len = $current_length; |
| 384 | |
| 385 | // Determine if we can use caching |
| 386 | if ($current_length >= self::$max_cachekey_size) { |
| 387 | $cacheKey = substr($string,0,self::$max_cachekey_size); |
| 388 | } else { |
| 389 | $cacheKey = false; |
| 390 | } |
| 391 | |
| 392 | // See if the token is already cached |
| 393 | if ($cacheKey && isset(self::$token_cache[$cacheKey])) { |
| 394 | // Retrieve from cache |
| 395 | $token = self::$token_cache[$cacheKey]; |
| 396 | $token_length = strlen($token[self::TOKEN_VALUE]); |
| 397 | self::$cache_hits++; |
| 398 | } else { |
| 399 | // Get the next token and the token type |
| 400 | $token = self::getNextToken($string, $token); |
| 401 | $token_length = strlen($token[self::TOKEN_VALUE]); |
| 402 | self::$cache_misses++; |
| 403 | |
| 404 | // If the token is shorter than the max length, store it in cache |
| 405 | if ($cacheKey && $token_length < self::$max_cachekey_size) { |
| 406 | self::$token_cache[$cacheKey] = $token; |
| 407 | } |
| 408 | } |
| 409 | |
| 410 | $tokens[] = $token; |
| 411 | |
| 412 | // Advance the string |
| 413 | $string = substr($string, $token_length); |
| 414 | |
| 415 | $current_length -= $token_length; |
| 416 | } |
| 417 | |
| 418 | return $tokens; |
| 419 | } |
| 420 | |
| 421 | /** |
| 422 | * Format the whitespace in a SQL string to make it easier to read. |
| 423 | * |
| 424 | * @param String $string The SQL string |
| 425 | * @param boolean $highlight If true, syntax highlighting will also be performed |
| 426 | * |
| 427 | * @return String The SQL string with HTML styles and formatting wrapped in a <pre> tag |
| 428 | */ |
| 429 | public static function format($string, $highlight=true) |
| 430 | { |
| 431 | // This variable will be populated with formatted html |
| 432 | $return = ''; |
| 433 | |
| 434 | // Use an actual tab while formatting and then switch out with self::$tab at the end |
| 435 | $tab = "\t"; |
| 436 | |
| 437 | $indent_level = 0; |
| 438 | $newline = false; |
| 439 | $inline_parentheses = false; |
| 440 | $increase_special_indent = false; |
| 441 | $increase_block_indent = false; |
| 442 | $indent_types = array(); |
| 443 | $added_newline = false; |
| 444 | $inline_count = 0; |
| 445 | $inline_indented = false; |
| 446 | $clause_limit = false; |
| 447 | |
| 448 | // Tokenize String |
| 449 | $original_tokens = self::tokenize($string); |
| 450 | |
| 451 | // Remove existing whitespace |
| 452 | $tokens = array(); |
| 453 | foreach ($original_tokens as $i=>$token) { |
| 454 | if ($token[self::TOKEN_TYPE] !== self::TOKEN_TYPE_WHITESPACE) { |
| 455 | $token['i'] = $i; |
| 456 | $tokens[] = $token; |
| 457 | } |
| 458 | } |
| 459 | |
| 460 | // Format token by token |
| 461 | foreach ($tokens as $i=>$token) { |
| 462 | // Get highlighted token if doing syntax highlighting |
| 463 | if ($highlight) { |
| 464 | $highlighted = self::highlightToken($token); |
| 465 | } else { // If returning raw text |
| 466 | $highlighted = $token[self::TOKEN_VALUE]; |
| 467 | } |
| 468 | |
| 469 | // If we are increasing the special indent level now |
| 470 | if ($increase_special_indent) { |
| 471 | $indent_level++; |
| 472 | $increase_special_indent = false; |
| 473 | array_unshift($indent_types,'special'); |
| 474 | } |
| 475 | // If we are increasing the block indent level now |
| 476 | if ($increase_block_indent) { |
| 477 | $indent_level++; |
| 478 | $increase_block_indent = false; |
| 479 | array_unshift($indent_types,'block'); |
| 480 | } |
| 481 | |
| 482 | // If we need a new line before the token |
| 483 | if ($newline) { |
| 484 | $return .= "\n" . str_repeat($tab, $indent_level); |
| 485 | $newline = false; |
| 486 | $added_newline = true; |
| 487 | } else { |
| 488 | $added_newline = false; |
| 489 | } |
| 490 | |
| 491 | // Display comments directly where they appear in the source |
| 492 | if ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_COMMENT || $token[self::TOKEN_TYPE] === self::TOKEN_TYPE_BLOCK_COMMENT) { |
| 493 | if ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_BLOCK_COMMENT) { |
| 494 | $indent = str_repeat($tab,$indent_level); |
| 495 | $return .= "\n" . $indent; |
| 496 | $highlighted = str_replace("\n","\n".$indent,$highlighted); |
| 497 | } |
| 498 | |
| 499 | $return .= $highlighted; |
| 500 | $newline = true; |
| 501 | continue; |
| 502 | } |
| 503 | |
| 504 | if ($inline_parentheses) { |
| 505 | // End of inline parentheses |
| 506 | if ($token[self::TOKEN_VALUE] === ')') { |
| 507 | $return = rtrim($return,' '); |
| 508 | |
| 509 | if ($inline_indented) { |
| 510 | array_shift($indent_types); |
| 511 | $indent_level --; |
| 512 | $return .= "\n" . str_repeat($tab, $indent_level); |
| 513 | } |
| 514 | |
| 515 | $inline_parentheses = false; |
| 516 | |
| 517 | $return .= $highlighted . ' '; |
| 518 | continue; |
| 519 | } |
| 520 | |
| 521 | if ($token[self::TOKEN_VALUE] === ',') { |
| 522 | if ($inline_count >= 30) { |
| 523 | $inline_count = 0; |
| 524 | $newline = true; |
| 525 | } |
| 526 | } |
| 527 | |
| 528 | $inline_count += strlen($token[self::TOKEN_VALUE]); |
| 529 | } |
| 530 | |
| 531 | // Opening parentheses increase the block indent level and start a new line |
| 532 | if ($token[self::TOKEN_VALUE] === '(') { |
| 533 | // First check if this should be an inline parentheses block |
| 534 | // Examples are "NOW()", "COUNT(*)", "int(10)", key(`somecolumn`), DECIMAL(7,2) |
| 535 | // Allow up to 3 non-whitespace tokens inside inline parentheses |
| 536 | $length = 0; |
| 537 | for ($j=1;$j<=250;$j++) { |
| 538 | // Reached end of string |
| 539 | if (!isset($tokens[$i+$j])) break; |
| 540 | |
| 541 | $next = $tokens[$i+$j]; |
| 542 | |
| 543 | // Reached closing parentheses, able to inline it |
| 544 | if ($next[self::TOKEN_VALUE] === ')') { |
| 545 | $inline_parentheses = true; |
| 546 | $inline_count = 0; |
| 547 | $inline_indented = false; |
| 548 | break; |
| 549 | } |
| 550 | |
| 551 | // Reached an invalid token for inline parentheses |
| 552 | if ($next[self::TOKEN_VALUE]===';' || $next[self::TOKEN_VALUE]==='(') { |
| 553 | break; |
| 554 | } |
| 555 | |
| 556 | // Reached an invalid token type for inline parentheses |
| 557 | if ($next[self::TOKEN_TYPE]===self::TOKEN_TYPE_RESERVED_TOPLEVEL || $next[self::TOKEN_TYPE]===self::TOKEN_TYPE_RESERVED_NEWLINE || $next[self::TOKEN_TYPE]===self::TOKEN_TYPE_COMMENT || $next[self::TOKEN_TYPE]===self::TOKEN_TYPE_BLOCK_COMMENT) { |
| 558 | break; |
| 559 | } |
| 560 | |
| 561 | $length += strlen($next[self::TOKEN_VALUE]); |
| 562 | } |
| 563 | |
| 564 | if ($inline_parentheses && $length > 30) { |
| 565 | $increase_block_indent = true; |
| 566 | $inline_indented = true; |
| 567 | $newline = true; |
| 568 | } |
| 569 | |
| 570 | // Take out the preceding space unless there was whitespace there in the original query |
| 571 | if (isset($original_tokens[$token['i']-1]) && $original_tokens[$token['i']-1][self::TOKEN_TYPE] !== self::TOKEN_TYPE_WHITESPACE) { |
| 572 | $return = rtrim($return,' '); |
| 573 | } |
| 574 | |
| 575 | if (!$inline_parentheses) { |
| 576 | $increase_block_indent = true; |
| 577 | // Add a newline after the parentheses |
| 578 | $newline = true; |
| 579 | } |
| 580 | |
| 581 | } |
| 582 | |
| 583 | // Closing parentheses decrease the block indent level |
| 584 | elseif ($token[self::TOKEN_VALUE] === ')') { |
| 585 | // Remove whitespace before the closing parentheses |
| 586 | $return = rtrim($return,' '); |
| 587 | |
| 588 | $indent_level--; |
| 589 | |
| 590 | // Reset indent level |
| 591 | while ($j=array_shift($indent_types)) { |
| 592 | if ($j==='special') { |
| 593 | $indent_level--; |
| 594 | } else { |
| 595 | break; |
| 596 | } |
| 597 | } |
| 598 | |
| 599 | if ($indent_level < 0) { |
| 600 | // This is an error |
| 601 | $indent_level = 0; |
| 602 | |
| 603 | if ($highlight) { |
| 604 | $return .= "\n".self::highlightError($token[self::TOKEN_VALUE]); |
| 605 | continue; |
| 606 | } |
| 607 | } |
| 608 | |
| 609 | // Add a newline before the closing parentheses (if not already added) |
| 610 | if (!$added_newline) { |
| 611 | $return .= "\n" . str_repeat($tab, $indent_level); |
| 612 | } |
| 613 | } |
| 614 | |
| 615 | // Top level reserved words start a new line and increase the special indent level |
| 616 | elseif ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_RESERVED_TOPLEVEL) { |
| 617 | $increase_special_indent = true; |
| 618 | |
| 619 | // If the last indent type was 'special', decrease the special indent for this round |
| 620 | reset($indent_types); |
| 621 | if (current($indent_types)==='special') { |
| 622 | $indent_level--; |
| 623 | array_shift($indent_types); |
| 624 | } |
| 625 | |
| 626 | // Add a newline after the top level reserved word |
| 627 | $newline = true; |
| 628 | // Add a newline before the top level reserved word (if not already added) |
| 629 | if (!$added_newline) { |
| 630 | $return .= "\n" . str_repeat($tab, $indent_level); |
| 631 | } |
| 632 | // If we already added a newline, redo the indentation since it may be different now |
| 633 | else { |
| 634 | $return = rtrim($return,$tab).str_repeat($tab, $indent_level); |
| 635 | } |
| 636 | |
| 637 | // If the token may have extra whitespace |
| 638 | if (strpos($token[self::TOKEN_VALUE],' ')!==false || strpos($token[self::TOKEN_VALUE],"\n")!==false || strpos($token[self::TOKEN_VALUE],"\t")!==false) { |
| 639 | $highlighted = preg_replace('/\s+/',' ',$highlighted); |
| 640 | } |
| 641 | //if SQL 'LIMIT' clause, start variable to reset newline |
| 642 | if ($token[self::TOKEN_VALUE] === 'LIMIT' && !$inline_parentheses) { |
| 643 | $clause_limit = true; |
| 644 | } |
| 645 | } |
| 646 | |
| 647 | // Checks if we are out of the limit clause |
| 648 | elseif ($clause_limit && $token[self::TOKEN_VALUE] !== "," && $token[self::TOKEN_TYPE] !== self::TOKEN_TYPE_NUMBER && $token[self::TOKEN_TYPE] !== self::TOKEN_TYPE_WHITESPACE) { |
| 649 | $clause_limit = false; |
| 650 | } |
| 651 | |
| 652 | // Commas start a new line (unless within inline parentheses or SQL 'LIMIT' clause) |
| 653 | elseif ($token[self::TOKEN_VALUE] === ',' && !$inline_parentheses) { |
| 654 | //If the previous TOKEN_VALUE is 'LIMIT', resets new line |
| 655 | if ($clause_limit === true) { |
| 656 | $newline = false; |
| 657 | $clause_limit = false; |
| 658 | } |
| 659 | // All other cases of commas |
| 660 | else { |
| 661 | $newline = true; |
| 662 | } |
| 663 | } |
| 664 | |
| 665 | // Newline reserved words start a new line |
| 666 | elseif ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_RESERVED_NEWLINE) { |
| 667 | // Add a newline before the reserved word (if not already added) |
| 668 | if (!$added_newline) { |
| 669 | $return .= "\n" . str_repeat($tab, $indent_level); |
| 670 | } |
| 671 | |
| 672 | // If the token may have extra whitespace |
| 673 | if (strpos($token[self::TOKEN_VALUE],' ')!==false || strpos($token[self::TOKEN_VALUE],"\n")!==false || strpos($token[self::TOKEN_VALUE],"\t")!==false) { |
| 674 | $highlighted = preg_replace('/\s+/',' ',$highlighted); |
| 675 | } |
| 676 | } |
| 677 | |
| 678 | // Multiple boundary characters in a row should not have spaces between them (not including parentheses) |
| 679 | elseif ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_BOUNDARY) { |
| 680 | if (isset($tokens[$i-1]) && $tokens[$i-1][self::TOKEN_TYPE] === self::TOKEN_TYPE_BOUNDARY) { |
| 681 | if (isset($original_tokens[$token['i']-1]) && $original_tokens[$token['i']-1][self::TOKEN_TYPE] !== self::TOKEN_TYPE_WHITESPACE) { |
| 682 | $return = rtrim($return,' '); |
| 683 | } |
| 684 | } |
| 685 | } |
| 686 | |
| 687 | // If the token shouldn't have a space before it |
| 688 | if ($token[self::TOKEN_VALUE] === '.' || $token[self::TOKEN_VALUE] === ',' || $token[self::TOKEN_VALUE] === ';') { |
| 689 | $return = rtrim($return, ' '); |
| 690 | } |
| 691 | |
| 692 | $return .= $highlighted.' '; |
| 693 | |
| 694 | // If the token shouldn't have a space after it |
| 695 | if ($token[self::TOKEN_VALUE] === '(' || $token[self::TOKEN_VALUE] === '.') { |
| 696 | $return = rtrim($return,' '); |
| 697 | } |
| 698 | |
| 699 | // If this is the "-" of a negative number, it shouldn't have a space after it |
| 700 | if($token[self::TOKEN_VALUE] === '-' && isset($tokens[$i+1]) && $tokens[$i+1][self::TOKEN_TYPE] === self::TOKEN_TYPE_NUMBER && isset($tokens[$i-1])) { |
| 701 | $prev = $tokens[$i-1][self::TOKEN_TYPE]; |
| 702 | if($prev !== self::TOKEN_TYPE_QUOTE && $prev !== self::TOKEN_TYPE_BACKTICK_QUOTE && $prev !== self::TOKEN_TYPE_WORD && $prev !== self::TOKEN_TYPE_NUMBER) { |
| 703 | $return = rtrim($return,' '); |
| 704 | } |
| 705 | } |
| 706 | } |
| 707 | |
| 708 | // If there are unmatched parentheses |
| 709 | if ($highlight && array_search('block',$indent_types) !== false) { |
| 710 | $return .= "\n".self::highlightError("WARNING: unclosed parentheses or section"); |
| 711 | } |
| 712 | |
| 713 | // Replace tab characters with the configuration tab character |
| 714 | $return = trim(str_replace("\t",self::$tab,$return)); |
| 715 | |
| 716 | if ($highlight) { |
| 717 | $return = self::output($return); |
| 718 | } |
| 719 | |
| 720 | return $return; |
| 721 | } |
| 722 | |
| 723 | /** |
| 724 | * Add syntax highlighting to a SQL string |
| 725 | * |
| 726 | * @param String $string The SQL string |
| 727 | * |
| 728 | * @return String The SQL string with HTML styles applied |
| 729 | */ |
| 730 | public static function highlight($string) |
| 731 | { |
| 732 | $tokens = self::tokenize($string); |
| 733 | |
| 734 | $return = ''; |
| 735 | |
| 736 | foreach ($tokens as $token) { |
| 737 | $return .= self::highlightToken($token); |
| 738 | } |
| 739 | |
| 740 | return self::output($return); |
| 741 | } |
| 742 | |
| 743 | /** |
| 744 | * Split a SQL string into multiple queries. |
| 745 | * Uses ";" as a query delimiter. |
| 746 | * |
| 747 | * @param String $string The SQL string |
| 748 | * |
| 749 | * @return Array An array of individual query strings without trailing semicolons |
| 750 | */ |
| 751 | public static function splitQuery($string) |
| 752 | { |
| 753 | $queries = array(); |
| 754 | $current_query = ''; |
| 755 | $empty = true; |
| 756 | |
| 757 | $tokens = self::tokenize($string); |
| 758 | |
| 759 | foreach ($tokens as $token) { |
| 760 | // If this is a query separator |
| 761 | if ($token[self::TOKEN_VALUE] === ';') { |
| 762 | if (!$empty) { |
| 763 | $queries[] = $current_query.';'; |
| 764 | } |
| 765 | $current_query = ''; |
| 766 | $empty = true; |
| 767 | continue; |
| 768 | } |
| 769 | |
| 770 | // If this is a non-empty character |
| 771 | if ($token[self::TOKEN_TYPE] !== self::TOKEN_TYPE_WHITESPACE && $token[self::TOKEN_TYPE] !== self::TOKEN_TYPE_COMMENT && $token[self::TOKEN_TYPE] !== self::TOKEN_TYPE_BLOCK_COMMENT) { |
| 772 | $empty = false; |
| 773 | } |
| 774 | |
| 775 | $current_query .= $token[self::TOKEN_VALUE]; |
| 776 | } |
| 777 | |
| 778 | if (!$empty) { |
| 779 | $queries[] = trim($current_query); |
| 780 | } |
| 781 | |
| 782 | return $queries; |
| 783 | } |
| 784 | |
| 785 | /** |
| 786 | * Remove all comments from a SQL string |
| 787 | * |
| 788 | * @param String $string The SQL string |
| 789 | * |
| 790 | * @return String The SQL string without comments |
| 791 | */ |
| 792 | public static function removeComments($string) |
| 793 | { |
| 794 | $result = ''; |
| 795 | |
| 796 | $tokens = self::tokenize($string); |
| 797 | |
| 798 | foreach ($tokens as $token) { |
| 799 | // Skip comment tokens |
| 800 | if ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_COMMENT || $token[self::TOKEN_TYPE] === self::TOKEN_TYPE_BLOCK_COMMENT) { |
| 801 | continue; |
| 802 | } |
| 803 | |
| 804 | $result .= $token[self::TOKEN_VALUE]; |
| 805 | } |
| 806 | $result = self::format( $result,false); |
| 807 | |
| 808 | return $result; |
| 809 | } |
| 810 | |
| 811 | /** |
| 812 | * Compress a query by collapsing white space and removing comments |
| 813 | * |
| 814 | * @param String $string The SQL string |
| 815 | * |
| 816 | * @return String The SQL string without comments |
| 817 | */ |
| 818 | public static function compress($string) |
| 819 | { |
| 820 | $result = ''; |
| 821 | |
| 822 | $tokens = self::tokenize($string); |
| 823 | |
| 824 | $whitespace = true; |
| 825 | foreach ($tokens as $token) { |
| 826 | // Skip comment tokens |
| 827 | if ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_COMMENT || $token[self::TOKEN_TYPE] === self::TOKEN_TYPE_BLOCK_COMMENT) { |
| 828 | continue; |
| 829 | } |
| 830 | // Remove extra whitespace in reserved words (e.g "OUTER JOIN" becomes "OUTER JOIN") |
| 831 | elseif ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_RESERVED || $token[self::TOKEN_TYPE] === self::TOKEN_TYPE_RESERVED_NEWLINE || $token[self::TOKEN_TYPE] === self::TOKEN_TYPE_RESERVED_TOPLEVEL) { |
| 832 | $token[self::TOKEN_VALUE] = preg_replace('/\s+/',' ',$token[self::TOKEN_VALUE]); |
| 833 | } |
| 834 | |
| 835 | if ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_WHITESPACE) { |
| 836 | // If the last token was whitespace, don't add another one |
| 837 | if ($whitespace) { |
| 838 | continue; |
| 839 | } else { |
| 840 | $whitespace = true; |
| 841 | // Convert all whitespace to a single space |
| 842 | $token[self::TOKEN_VALUE] = ' '; |
| 843 | } |
| 844 | } else { |
| 845 | $whitespace = false; |
| 846 | } |
| 847 | |
| 848 | $result .= $token[self::TOKEN_VALUE]; |
| 849 | } |
| 850 | |
| 851 | return rtrim($result); |
| 852 | } |
| 853 | |
| 854 | /** |
| 855 | * Highlights a token depending on its type. |
| 856 | * |
| 857 | * @param Array $token An associative array containing type and value. |
| 858 | * |
| 859 | * @return String HTML code of the highlighted token. |
| 860 | */ |
| 861 | protected static function highlightToken($token) |
| 862 | { |
| 863 | $type = $token[self::TOKEN_TYPE]; |
| 864 | |
| 865 | if (self::is_cli()) { |
| 866 | $token = $token[self::TOKEN_VALUE]; |
| 867 | } else { |
| 868 | $token = htmlentities($token[self::TOKEN_VALUE],ENT_COMPAT,'UTF-8'); |
| 869 | } |
| 870 | |
| 871 | if ($type===self::TOKEN_TYPE_BOUNDARY) { |
| 872 | return self::highlightBoundary($token); |
| 873 | } elseif ($type===self::TOKEN_TYPE_WORD) { |
| 874 | return self::highlightWord($token); |
| 875 | } elseif ($type===self::TOKEN_TYPE_BACKTICK_QUOTE) { |
| 876 | return self::highlightBacktickQuote($token); |
| 877 | } elseif ($type===self::TOKEN_TYPE_QUOTE) { |
| 878 | return self::highlightQuote($token); |
| 879 | } elseif ($type===self::TOKEN_TYPE_RESERVED) { |
| 880 | return self::highlightReservedWord($token); |
| 881 | } elseif ($type===self::TOKEN_TYPE_RESERVED_TOPLEVEL) { |
| 882 | return self::highlightReservedWord($token); |
| 883 | } elseif ($type===self::TOKEN_TYPE_RESERVED_NEWLINE) { |
| 884 | return self::highlightReservedWord($token); |
| 885 | } elseif ($type===self::TOKEN_TYPE_NUMBER) { |
| 886 | return self::highlightNumber($token); |
| 887 | } elseif ($type===self::TOKEN_TYPE_VARIABLE) { |
| 888 | return self::highlightVariable($token); |
| 889 | } elseif ($type===self::TOKEN_TYPE_COMMENT || $type===self::TOKEN_TYPE_BLOCK_COMMENT) { |
| 890 | return self::highlightComment($token); |
| 891 | } |
| 892 | |
| 893 | return $token; |
| 894 | } |
| 895 | |
| 896 | /** |
| 897 | * Highlights a quoted string |
| 898 | * |
| 899 | * @param String $value The token's value |
| 900 | * |
| 901 | * @return String HTML code of the highlighted token. |
| 902 | */ |
| 903 | protected static function highlightQuote($value) |
| 904 | { |
| 905 | if (self::is_cli()) { |
| 906 | return self::$cli_quote . $value . "\x1b[0m"; |
| 907 | } else { |
| 908 | return '<span ' . self::$quote_attributes . '>' . $value . '</span>'; |
| 909 | } |
| 910 | } |
| 911 | |
| 912 | /** |
| 913 | * Highlights a backtick quoted string |
| 914 | * |
| 915 | * @param String $value The token's value |
| 916 | * |
| 917 | * @return String HTML code of the highlighted token. |
| 918 | */ |
| 919 | protected static function highlightBacktickQuote($value) |
| 920 | { |
| 921 | if (self::is_cli()) { |
| 922 | return self::$cli_backtick_quote . $value . "\x1b[0m"; |
| 923 | } else { |
| 924 | return '<span ' . self::$backtick_quote_attributes . '>' . $value . '</span>'; |
| 925 | } |
| 926 | } |
| 927 | |
| 928 | /** |
| 929 | * Highlights a reserved word |
| 930 | * |
| 931 | * @param String $value The token's value |
| 932 | * |
| 933 | * @return String HTML code of the highlighted token. |
| 934 | */ |
| 935 | protected static function highlightReservedWord($value) |
| 936 | { |
| 937 | if (self::is_cli()) { |
| 938 | return self::$cli_reserved . $value . "\x1b[0m"; |
| 939 | } else { |
| 940 | return '<span ' . self::$reserved_attributes . '>' . $value . '</span>'; |
| 941 | } |
| 942 | } |
| 943 | |
| 944 | /** |
| 945 | * Highlights a boundary token |
| 946 | * |
| 947 | * @param String $value The token's value |
| 948 | * |
| 949 | * @return String HTML code of the highlighted token. |
| 950 | */ |
| 951 | protected static function highlightBoundary($value) |
| 952 | { |
| 953 | if ($value==='(' || $value===')') return $value; |
| 954 | |
| 955 | if (self::is_cli()) { |
| 956 | return self::$cli_boundary . $value . "\x1b[0m"; |
| 957 | } else { |
| 958 | return '<span ' . self::$boundary_attributes . '>' . $value . '</span>'; |
| 959 | } |
| 960 | } |
| 961 | |
| 962 | /** |
| 963 | * Highlights a number |
| 964 | * |
| 965 | * @param String $value The token's value |
| 966 | * |
| 967 | * @return String HTML code of the highlighted token. |
| 968 | */ |
| 969 | protected static function highlightNumber($value) |
| 970 | { |
| 971 | if (self::is_cli()) { |
| 972 | return self::$cli_number . $value . "\x1b[0m"; |
| 973 | } else { |
| 974 | return '<span ' . self::$number_attributes . '>' . $value . '</span>'; |
| 975 | } |
| 976 | } |
| 977 | |
| 978 | /** |
| 979 | * Highlights an error |
| 980 | * |
| 981 | * @param String $value The token's value |
| 982 | * |
| 983 | * @return String HTML code of the highlighted token. |
| 984 | */ |
| 985 | protected static function highlightError($value) |
| 986 | { |
| 987 | if (self::is_cli()) { |
| 988 | return self::$cli_error . $value . "\x1b[0m"; |
| 989 | } else { |
| 990 | return '<span ' . self::$error_attributes . '>' . $value . '</span>'; |
| 991 | } |
| 992 | } |
| 993 | |
| 994 | /** |
| 995 | * Highlights a comment |
| 996 | * |
| 997 | * @param String $value The token's value |
| 998 | * |
| 999 | * @return String HTML code of the highlighted token. |
| 1000 | */ |
| 1001 | protected static function highlightComment($value) |
| 1002 | { |
| 1003 | if (self::is_cli()) { |
| 1004 | return self::$cli_comment . $value . "\x1b[0m"; |
| 1005 | } else { |
| 1006 | return '<span ' . self::$comment_attributes . '>' . $value . '</span>'; |
| 1007 | } |
| 1008 | } |
| 1009 | |
| 1010 | /** |
| 1011 | * Highlights a word token |
| 1012 | * |
| 1013 | * @param String $value The token's value |
| 1014 | * |
| 1015 | * @return String HTML code of the highlighted token. |
| 1016 | */ |
| 1017 | protected static function highlightWord($value) |
| 1018 | { |
| 1019 | if (self::is_cli()) { |
| 1020 | return self::$cli_word . $value . "\x1b[0m"; |
| 1021 | } else { |
| 1022 | return '<span ' . self::$word_attributes . '>' . $value . '</span>'; |
| 1023 | } |
| 1024 | } |
| 1025 | |
| 1026 | /** |
| 1027 | * Highlights a variable token |
| 1028 | * |
| 1029 | * @param String $value The token's value |
| 1030 | * |
| 1031 | * @return String HTML code of the highlighted token. |
| 1032 | */ |
| 1033 | protected static function highlightVariable($value) |
| 1034 | { |
| 1035 | if (self::is_cli()) { |
| 1036 | return self::$cli_variable . $value . "\x1b[0m"; |
| 1037 | } else { |
| 1038 | return '<span ' . self::$variable_attributes . '>' . $value . '</span>'; |
| 1039 | } |
| 1040 | } |
| 1041 | |
| 1042 | /** |
| 1043 | * Helper function for building regular expressions for reserved words and boundary characters |
| 1044 | * |
| 1045 | * @param String $a The string to be quoted |
| 1046 | * |
| 1047 | * @return String The quoted string |
| 1048 | */ |
| 1049 | private static function quote_regex($a) |
| 1050 | { |
| 1051 | return preg_quote($a,'/'); |
| 1052 | } |
| 1053 | |
| 1054 | /** |
| 1055 | * Helper function for building string output |
| 1056 | * |
| 1057 | * @param String $string The string to be quoted |
| 1058 | * |
| 1059 | * @return String The quoted string |
| 1060 | */ |
| 1061 | private static function output($string) |
| 1062 | { |
| 1063 | if (self::is_cli()) { |
| 1064 | return $string."\n"; |
| 1065 | } else { |
| 1066 | $string=trim($string); |
| 1067 | if (!self::$use_pre) { |
| 1068 | return $string; |
| 1069 | } |
| 1070 | |
| 1071 | return '<pre '.self::$pre_attributes.'>' . $string . '</pre>'; |
| 1072 | } |
| 1073 | } |
| 1074 | |
| 1075 | private static function is_cli() |
| 1076 | { |
| 1077 | if (isset(self::$cli)) return self::$cli; |
| 1078 | else return php_sapi_name() === 'cli'; |
| 1079 | } |
| 1080 | |
| 1081 | } |