1616
1717class Search
1818{
19- public function __construct ()
20- {
21-
22-
23- // Make a regex that will match CJK or Hangul characters
24- define ('FEATHER_CJK_HANGUL_REGEX ' , '[ ' .
25- '\x{1100}-\x{11FF} ' . // Hangul Jamo 1100-11FF (http://www.fileformat.info/info/unicode/block/hangul_jamo/index.htm)
26- '\x{3130}-\x{318F} ' . // Hangul Compatibility Jamo 3130-318F (http://www.fileformat.info/info/unicode/block/hangul_compatibility_jamo/index.htm)
27- '\x{AC00}-\x{D7AF} ' . // Hangul Syllables AC00-D7AF (http://www.fileformat.info/info/unicode/block/hangul_syllables/index.htm)
28-
29- // Hiragana
30- '\x{3040}-\x{309F} ' . // Hiragana 3040-309F (http://www.fileformat.info/info/unicode/block/hiragana/index.htm)
31-
32- // Katakana
33- '\x{30A0}-\x{30FF} ' . // Katakana 30A0-30FF (http://www.fileformat.info/info/unicode/block/katakana/index.htm)
34- '\x{31F0}-\x{31FF} ' . // Katakana Phonetic Extensions 31F0-31FF (http://www.fileformat.info/info/unicode/block/katakana_phonetic_extensions/index.htm)
35-
36- // CJK Unified Ideographs (http://en.wikipedia.org/wiki/CJK_Unified_Ideographs)
37- '\x{2E80}-\x{2EFF} ' . // CJK Radicals Supplement 2E80-2EFF (http://www.fileformat.info/info/unicode/block/cjk_radicals_supplement/index.htm)
38- '\x{2F00}-\x{2FDF} ' . // Kangxi Radicals 2F00-2FDF (http://www.fileformat.info/info/unicode/block/kangxi_radicals/index.htm)
39- '\x{2FF0}-\x{2FFF} ' . // Ideographic Description Characters 2FF0-2FFF (http://www.fileformat.info/info/unicode/block/ideographic_description_characters/index.htm)
40- '\x{3000}-\x{303F} ' . // CJK Symbols and Punctuation 3000-303F (http://www.fileformat.info/info/unicode/block/cjk_symbols_and_punctuation/index.htm)
41- '\x{31C0}-\x{31EF} ' . // CJK Strokes 31C0-31EF (http://www.fileformat.info/info/unicode/block/cjk_strokes/index.htm)
42- '\x{3200}-\x{32FF} ' . // Enclosed CJK Letters and Months 3200-32FF (http://www.fileformat.info/info/unicode/block/enclosed_cjk_letters_and_months/index.htm)
43- '\x{3400}-\x{4DBF} ' . // CJK Unified Ideographs Extension A 3400-4DBF (http://www.fileformat.info/info/unicode/block/cjk_unified_ideographs_extension_a/index.htm)
44- '\x{4E00}-\x{9FFF} ' . // CJK Unified Ideographs 4E00-9FFF (http://www.fileformat.info/info/unicode/block/cjk_unified_ideographs/index.htm)
45- '\x{20000}-\x{2A6DF} ' . // CJK Unified Ideographs Extension B 20000-2A6DF (http://www.fileformat.info/info/unicode/block/cjk_unified_ideographs_extension_b/index.htm)
46- '] ' );
47- }
48-
49-
5019 //
5120 // "Cleans up" a text string and returns an array of unique words
5221 // This function depends on the current locale setting
5322 //
54- public function split_words ($ text , $ idx )
23+ public static function split_words ($ text , $ idx )
5524 {
5625 // Remove BBCode
5726 $ text = preg_replace ('%\[/?(b|u|s|ins|del|em|i|h|colou?r|quote|code|img|url|email|list|topic|post|forum|user)(?:\=[^\]]*)?\]% ' , ' ' , $ text );
@@ -71,7 +40,7 @@ public function split_words($text, $idx)
7140 // Remove any words that should not be indexed
7241 foreach ($ words as $ key => $ value ) {
7342 // If the word shouldn't be indexed, remove it
74- if (!$ this -> validate_search_word ($ value , $ idx )) {
43+ if (!self :: validate_search_word ($ value , $ idx )) {
7544 unset($ words [$ key ]);
7645 }
7746 }
@@ -83,12 +52,12 @@ public function split_words($text, $idx)
8352 //
8453 // Checks if a word is a valid searchable word
8554 //
86- public function validate_search_word ($ word , $ idx )
55+ public static function validate_search_word ($ word , $ idx )
8756 {
8857 static $ stopwords ;
8958
9059 // If the word is a keyword we don't want to index it, but we do want to be allowed to search it
91- if ($ this -> is_keyword ($ word )) {
60+ if (self :: is_keyword ($ word )) {
9261 return !$ idx ;
9362 }
9463
@@ -105,7 +74,7 @@ public function validate_search_word($word, $idx)
10574 }
10675
10776 // If the word is CJK we don't want to index it, but we do want to be allowed to search it
108- if ($ this -> is_cjk ($ word )) {
77+ if (self :: is_cjk ($ word )) {
10978 return !$ idx ;
11079 }
11180
@@ -121,7 +90,7 @@ public function validate_search_word($word, $idx)
12190 //
12291 // Check a given word is a search keyword.
12392 //
124- public function is_keyword ($ word )
93+ public static function is_keyword ($ word )
12594 {
12695 return $ word == 'and ' || $ word == 'or ' || $ word == 'not ' ;
12796 }
@@ -130,16 +99,39 @@ public function is_keyword($word)
13099 //
131100 // Check if a given word is CJK or Hangul.
132101 //
133- public function is_cjk ($ word )
102+ public static function is_cjk ($ word )
134103 {
135- return preg_match ('%^ ' . FEATHER_CJK_HANGUL_REGEX . '+$%u ' , $ word ) ? true : false ;
104+ // Make a regex that will match CJK or Hangul characters
105+ return preg_match ('%^ ' . '[ ' .
106+ '\x{1100}-\x{11FF} ' . // Hangul Jamo 1100-11FF (http://www.fileformat.info/info/unicode/block/hangul_jamo/index.htm)
107+ '\x{3130}-\x{318F} ' . // Hangul Compatibility Jamo 3130-318F (http://www.fileformat.info/info/unicode/block/hangul_compatibility_jamo/index.htm)
108+ '\x{AC00}-\x{D7AF} ' . // Hangul Syllables AC00-D7AF (http://www.fileformat.info/info/unicode/block/hangul_syllables/index.htm)
109+
110+ // Hiragana
111+ '\x{3040}-\x{309F} ' . // Hiragana 3040-309F (http://www.fileformat.info/info/unicode/block/hiragana/index.htm)
112+
113+ // Katakana
114+ '\x{30A0}-\x{30FF} ' . // Katakana 30A0-30FF (http://www.fileformat.info/info/unicode/block/katakana/index.htm)
115+ '\x{31F0}-\x{31FF} ' . // Katakana Phonetic Extensions 31F0-31FF (http://www.fileformat.info/info/unicode/block/katakana_phonetic_extensions/index.htm)
116+
117+ // CJK Unified Ideographs (http://en.wikipedia.org/wiki/CJK_Unified_Ideographs)
118+ '\x{2E80}-\x{2EFF} ' . // CJK Radicals Supplement 2E80-2EFF (http://www.fileformat.info/info/unicode/block/cjk_radicals_supplement/index.htm)
119+ '\x{2F00}-\x{2FDF} ' . // Kangxi Radicals 2F00-2FDF (http://www.fileformat.info/info/unicode/block/kangxi_radicals/index.htm)
120+ '\x{2FF0}-\x{2FFF} ' . // Ideographic Description Characters 2FF0-2FFF (http://www.fileformat.info/info/unicode/block/ideographic_description_characters/index.htm)
121+ '\x{3000}-\x{303F} ' . // CJK Symbols and Punctuation 3000-303F (http://www.fileformat.info/info/unicode/block/cjk_symbols_and_punctuation/index.htm)
122+ '\x{31C0}-\x{31EF} ' . // CJK Strokes 31C0-31EF (http://www.fileformat.info/info/unicode/block/cjk_strokes/index.htm)
123+ '\x{3200}-\x{32FF} ' . // Enclosed CJK Letters and Months 3200-32FF (http://www.fileformat.info/info/unicode/block/enclosed_cjk_letters_and_months/index.htm)
124+ '\x{3400}-\x{4DBF} ' . // CJK Unified Ideographs Extension A 3400-4DBF (http://www.fileformat.info/info/unicode/block/cjk_unified_ideographs_extension_a/index.htm)
125+ '\x{4E00}-\x{9FFF} ' . // CJK Unified Ideographs 4E00-9FFF (http://www.fileformat.info/info/unicode/block/cjk_unified_ideographs/index.htm)
126+ '\x{20000}-\x{2A6DF} ' . // CJK Unified Ideographs Extension B 20000-2A6DF (http://www.fileformat.info/info/unicode/block/cjk_unified_ideographs_extension_b/index.htm)
127+ '] ' . '+$%u ' , $ word ) ? true : false ;
136128 }
137129
138130
139131 //
140132 // Strip [img] [url] and [email] out of the message so we don't index their contents
141133 //
142- public function strip_bbcode ($ text )
134+ public static function strip_bbcode ($ text )
143135 {
144136 static $ patterns ;
145137
@@ -159,17 +151,17 @@ public function strip_bbcode($text)
159151 //
160152 // Updates the search index with the contents of $post_id (and $subject)
161153 //
162- public function update_search_index ($ mode , $ post_id , $ message , $ subject = null )
154+ public static function update_search_index ($ mode , $ post_id , $ message , $ subject = null )
163155 {
164156 $ message = utf8_strtolower ($ message );
165157 $ subject = utf8_strtolower ($ subject );
166158
167159 // Remove any bbcode that we shouldn't index
168- $ message = $ this -> strip_bbcode ($ message );
160+ $ message = self :: strip_bbcode ($ message );
169161
170162 // Split old and new post/subject to obtain array of 'words'
171- $ words_message = $ this -> split_words ($ message , true );
172- $ words_subject = ($ subject ) ? $ this -> split_words ($ subject , true ) : array ();
163+ $ words_message = self :: split_words ($ message , true );
164+ $ words_subject = ($ subject ) ? self :: split_words ($ subject , true ) : array ();
173165
174166 if ($ mode == 'edit ' ) {
175167 $ select_update_search_index = array ('w.id ' , 'w.word ' , 'm.subject_match ' );
0 commit comments