Commit 185c2bea authored by serg@serg.mysql.com's avatar serg@serg.mysql.com

ft_* variables added

parent f751ab1a
...@@ -21342,6 +21342,9 @@ differ somewhat: ...@@ -21342,6 +21342,9 @@ differ somewhat:
| delayed_queue_size | 1000 | | delayed_queue_size | 1000 |
| flush | OFF | | flush | OFF |
| flush_time | 0 | | flush_time | 0 |
| ft_min_word_len | 4 |
| ft_max_word_len | 254 |
| ft_max_word_len_for_sort| 20 |
| have_bdb | YES | | have_bdb | YES |
| have_gemini | NO | | have_gemini | NO |
| have_innodb | YES | | have_innodb | YES |
...@@ -21525,6 +21528,31 @@ tables will be closed (to free up resources and sync things to disk). We ...@@ -21525,6 +21528,31 @@ tables will be closed (to free up resources and sync things to disk). We
only recommend this option on Win95, Win98, or on systems where you have only recommend this option on Win95, Win98, or on systems where you have
very little resources. very little resources.
@item @code{ft_min_word_len}
The minimum length of the word to be included in a @code{FULLTEXT} index.
@strong{Note: @code{FULLTEXT} index have to be rebuilt after changing
this variable.}
@item @code{ft_max_word_len}
The maximum length of the word to be included in a @code{FULLTEXT} index.
@strong{Note: @code{FULLTEXT} index have to be rebuilt after changing
this variable.}
@item @code{ft_max_word_len_sort}
The maximum length of the word in a @code{FULLTEXT} index
to be used in fast index recreation method in
@code{REPAIR}, @code{CREATE INDEX}, or
@code{ALTER TABLE}. Longer words are inserted the slow way.
The rule of the thumb is as follows: with @code{ft_max_word_len_sort}
increasing, @strong{MySQL} will create bigger temporary files
(thus slowing the process down, due to disk I/O), and will put
fewer keys in one sort block (againg, decreasing the efficiency).
When @code{ft_max_word_len_sort} is too small, instead,
@strong{MySQL} will insert a lot of words into index the slow way -
but short words will be inserted very fast. It applies only to
Index recreation during @code{REPAIR}, @code{CREATE INDEX}, or
@code{ALTER TABLE}.
@item @code{have_bdb} @item @code{have_bdb}
@code{YES} if @code{mysqld} supports Berkeley DB tables. @code{DISABLED} @code{YES} if @code{mysqld} supports Berkeley DB tables. @code{DISABLED}
if @code{--skip-bdb} is used. if @code{--skip-bdb} is used.
...@@ -28279,12 +28307,9 @@ unless you know what you are doing! ...@@ -28279,12 +28307,9 @@ unless you know what you are doing!
@itemize @itemize
@item @item
Minimal length of word to be indexed is defined in Minimal length of word to be indexed is defined by @strong{MySQL}
@code{myisam/ftdefs.h} file by the line variable @code{ft_min_word_length}. @xref{SHOW VARIABLES}.
@example Change it to the value you prefer, and rebuild
#define MIN_WORD_LEN 4
@end example
Change it to the value you prefer, recompile @strong{MySQL}, and rebuild
your @code{FULLTEXT} indexes. your @code{FULLTEXT} indexes.
@item @item
...@@ -42463,6 +42488,8 @@ Responsible for @strong{MySQL} configure. ...@@ -42463,6 +42488,8 @@ Responsible for @strong{MySQL} configure.
Full-text search. Full-text search.
@item @item
Added keys to the @code{MERGE} library. Added keys to the @code{MERGE} library.
@item
@code{HANDLER} command.
@end itemize @end itemize
@item Jeremy Cole @item Jeremy Cole
...@@ -42801,6 +42828,8 @@ Our TODO section contains what we plan to have in 4.0. @xref{TODO MySQL 4.0}. ...@@ -42801,6 +42828,8 @@ Our TODO section contains what we plan to have in 4.0. @xref{TODO MySQL 4.0}.
@itemize @bullet @itemize @bullet
@item @item
Added @code{HANDLER} command.
@item
Added @code{SQL_CALC_FOUND_ROWS} and @code{FOUND_ROWS()}. This make it Added @code{SQL_CALC_FOUND_ROWS} and @code{FOUND_ROWS()}. This make it
possible to know how many rows a query would have returned if one hadn't possible to know how many rows a query would have returned if one hadn't
used @code{LIMIT}. used @code{LIMIT}.
...@@ -42903,6 +42932,9 @@ not yet 100% confident in this code. ...@@ -42903,6 +42932,9 @@ not yet 100% confident in this code.
@appendixsubsec Changes in release 3.23.37 @appendixsubsec Changes in release 3.23.37
@itemize @bullet @itemize @bullet
@item @item
Added variables @code{ft_min_word_len}, @code{ft_max_word_len}, and
@code{ft_max_word_len_for_sort}.
@item
Changed @code{INNOBASE} to @code{INNODB} (because the @code{INNOBASE} Changed @code{INNOBASE} to @code{INNODB} (because the @code{INNOBASE}
name was already used). Note that all @code{configure} options and name was already used). Note that all @code{configure} options and
@code{mysqld} start options are now using @code{innodb} instead of @code{mysqld} start options are now using @code{innodb} instead of
...@@ -27,6 +27,7 @@ extern "C" { ...@@ -27,6 +27,7 @@ extern "C" {
#endif #endif
#define FT_QUERY_MAXLEN 1024 #define FT_QUERY_MAXLEN 1024
#define HA_FT_MAXLEN 254
typedef struct ft_doc_rec { typedef struct ft_doc_rec {
my_off_t dpos; my_off_t dpos;
...@@ -42,6 +43,10 @@ typedef struct st_ft_doclist { ...@@ -42,6 +43,10 @@ typedef struct st_ft_doclist {
extern const char *ft_precompiled_stopwords[]; extern const char *ft_precompiled_stopwords[];
extern uint ft_min_word_len;
extern uint ft_max_word_len;
extern uint ft_max_word_len_for_sort;
int ft_init_stopwords(const char **); int ft_init_stopwords(const char **);
void ft_free_stopwords(void); void ft_free_stopwords(void);
......
...@@ -25,7 +25,7 @@ static void complain(int val); ...@@ -25,7 +25,7 @@ static void complain(int val);
static int count=0, stats=0, dump=0, verbose=0; static int count=0, stats=0, dump=0, verbose=0;
static char *query=NULL; static char *query=NULL;
#define MAX (MAX_WORD_LEN+10) #define MAX (HA_FT_MAXLEN+10)
#define HOW_OFTEN_TO_WRITE 1000 #define HOW_OFTEN_TO_WRITE 1000
int main(int argc,char *argv[]) int main(int argc,char *argv[])
......
...@@ -164,7 +164,7 @@ byte ft_get_word(byte **start, byte *end, FT_WORD *word, FTB_PARAM *param) ...@@ -164,7 +164,7 @@ byte ft_get_word(byte **start, byte *end, FT_WORD *word, FTB_PARAM *param)
if ((param->trunc=(doc<end && *doc == FTB_TRUNC))) if ((param->trunc=(doc<end && *doc == FTB_TRUNC)))
doc++; doc++;
if (word->len >= MIN_WORD_LEN && word->len < MAX_WORD_LEN && if (word->len >= ft_min_word_len && word->len < ft_max_word_len &&
!is_stopword(word->pos, word->len)) !is_stopword(word->pos, word->len))
{ {
*start=doc; *start=doc;
...@@ -195,7 +195,7 @@ byte ft_simple_get_word(byte **start, byte *end, FT_WORD *word) ...@@ -195,7 +195,7 @@ byte ft_simple_get_word(byte **start, byte *end, FT_WORD *word)
word->len= (uint)(doc-word->pos) - mwc; word->len= (uint)(doc-word->pos) - mwc;
if (word->len >= MIN_WORD_LEN && word->len < MAX_WORD_LEN && if (word->len >= ft_min_word_len && word->len < ft_max_word_len &&
!is_stopword(word->pos, word->len)) !is_stopword(word->pos, word->len))
{ {
*start=doc; *start=doc;
......
...@@ -18,6 +18,10 @@ ...@@ -18,6 +18,10 @@
#include "ftdefs.h" #include "ftdefs.h"
uint ft_min_word_len=4;
uint ft_max_word_len=HA_FT_MAXLEN;
uint ft_max_word_len_for_sort=20;
const MI_KEYSEG ft_keysegs[FT_SEGS]={ const MI_KEYSEG ft_keysegs[FT_SEGS]={
{ {
HA_KEYTYPE_VARTEXT, /* type */ HA_KEYTYPE_VARTEXT, /* type */
......
...@@ -48,7 +48,7 @@ int ft_init_stopwords(const char **sws) ...@@ -48,7 +48,7 @@ int ft_init_stopwords(const char **sws)
for(;*sws;sws++) for(;*sws;sws++)
{ {
if( (sw.len= (uint) strlen(sw.pos=*sws)) < MIN_WORD_LEN) continue; if( (sw.len= (uint) strlen(sw.pos=*sws)) < ft_min_word_len) continue;
if(!tree_insert(stopwords3, &sw, 0)) if(!tree_insert(stopwords3, &sw, 0))
{ {
delete_tree(stopwords3); /* purecov: inspected */ delete_tree(stopwords3); /* purecov: inspected */
......
...@@ -22,10 +22,6 @@ ...@@ -22,10 +22,6 @@
#include <m_ctype.h> #include <m_ctype.h>
#include <my_tree.h> #include <my_tree.h>
#define MIN_WORD_LEN 4
#define MAX_WORD_LEN HA_FT_MAXLEN
#define MAX_WORD_LEN_FOR_SORT 20
#define HYPHEN_IS_DELIM #define HYPHEN_IS_DELIM
#define HYPHEN_IS_CONCAT /* not used for now */ #define HYPHEN_IS_CONCAT /* not used for now */
......
...@@ -24,7 +24,6 @@ ...@@ -24,7 +24,6 @@
/* shoudn't be def'ed when linking with mysql */ /* shoudn't be def'ed when linking with mysql */
#undef EVAL_RUN #undef EVAL_RUN
#define HA_FT_MAXLEN 254
#define HA_FT_WTYPE HA_KEYTYPE_FLOAT #define HA_FT_WTYPE HA_KEYTYPE_FLOAT
#define HA_FT_WLEN 4 #define HA_FT_WLEN 4
#ifdef EVAL_RUN #ifdef EVAL_RUN
......
...@@ -1875,10 +1875,10 @@ int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info, ...@@ -1875,10 +1875,10 @@ int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info,
if (sort_info->keyinfo->flag & HA_FULLTEXT) if (sort_info->keyinfo->flag & HA_FULLTEXT)
{ {
sort_param.max_records=sort_info->max_records= sort_param.max_records=sort_info->max_records=
(ha_rows) (sort_info->filelength/MAX_WORD_LEN_FOR_SORT+1); (ha_rows) (sort_info->filelength/ft_max_word_len_for_sort+1);
sort_param.key_read=sort_ft_key_read; sort_param.key_read=sort_ft_key_read;
sort_param.key_length+=MAX_WORD_LEN_FOR_SORT-MAX_WORD_LEN; sort_param.key_length+=ft_max_word_len_for_sort-ft_max_word_len;
} }
else else
sort_param.key_read=sort_key_read; sort_param.key_read=sort_key_read;
......
...@@ -152,6 +152,12 @@ static CHANGEABLE_VAR changeable_vars[] = { ...@@ -152,6 +152,12 @@ static CHANGEABLE_VAR changeable_vars[] = {
{ "sort_key_blocks",(long*) &check_param.sort_key_blocks,BUFFERS_WHEN_SORTING,4L,100L,0L, { "sort_key_blocks",(long*) &check_param.sort_key_blocks,BUFFERS_WHEN_SORTING,4L,100L,0L,
1L }, 1L },
{ "decode_bits",(long*) &decode_bits,9L,4L,17L,0L,1L }, { "decode_bits",(long*) &decode_bits,9L,4L,17L,0L,1L },
{ "ft_min_word_len", (long*) &ft_min_word_len,
4, 1, HA_FT_MAXLEN, 0, 1 },
{ "ft_max_word_len", (long*) &ft_max_word_len,
HA_FT_MAXLEN, 10, HA_FT_MAXLEN, 0, 1 },
{ "ft_max_word_len_for_sort",(long*) &ft_max_word_len_for_sort,
20, 4, HA_FT_MAXLEN, 0, 1 },
{ NullS,(long*) 0,0L,0L,0L,0L,0L,} }; { NullS,(long*) 0,0L,0L,0L,0L,0L,} };
enum options {OPT_CHARSETS_DIR=256, OPT_SET_CHARSET,OPT_START_CHECK_POS}; enum options {OPT_CHARSETS_DIR=256, OPT_SET_CHARSET,OPT_START_CHECK_POS};
......
Variable_name Value
ft_min_word_len 4
ft_max_word_len 254
ft_max_word_len_for_sort 20
#
# Fulltext configurable parameters
#
show variables like "ft\_%";
...@@ -2664,6 +2664,12 @@ CHANGEABLE_VAR changeable_vars[] = { ...@@ -2664,6 +2664,12 @@ CHANGEABLE_VAR changeable_vars[] = {
DELAYED_QUEUE_SIZE, 1, ~0L, 0, 1 }, DELAYED_QUEUE_SIZE, 1, ~0L, 0, 1 },
{ "flush_time", (long*) &flush_time, { "flush_time", (long*) &flush_time,
FLUSH_TIME, 0, ~0L, 0, 1 }, FLUSH_TIME, 0, ~0L, 0, 1 },
{ "ft_min_word_len", (long*) &ft_min_word_len,
4, 1, HA_FT_MAXLEN, 0, 1 },
{ "ft_max_word_len", (long*) &ft_max_word_len,
HA_FT_MAXLEN, 10, HA_FT_MAXLEN, 0, 1 },
{ "ft_max_word_len_for_sort",(long*) &ft_max_word_len_for_sort,
20, 4, HA_FT_MAXLEN, 0, 1 },
#ifdef HAVE_GEMINI_DB #ifdef HAVE_GEMINI_DB
{ "gemini_buffer_cache", (long*) &gemini_buffer_cache, { "gemini_buffer_cache", (long*) &gemini_buffer_cache,
128 * 8192, 16, LONG_MAX, 0, 1 }, 128 * 8192, 16, LONG_MAX, 0, 1 },
...@@ -2804,6 +2810,9 @@ struct show_var_st init_vars[]= { ...@@ -2804,6 +2810,9 @@ struct show_var_st init_vars[]= {
{"delayed_queue_size", (char*) &delayed_queue_size, SHOW_LONG}, {"delayed_queue_size", (char*) &delayed_queue_size, SHOW_LONG},
{"flush", (char*) &myisam_flush, SHOW_MY_BOOL}, {"flush", (char*) &myisam_flush, SHOW_MY_BOOL},
{"flush_time", (char*) &flush_time, SHOW_LONG}, {"flush_time", (char*) &flush_time, SHOW_LONG},
{"ft_min_word_len", (char*) &ft_min_word_len, SHOW_LONG},
{"ft_max_word_len", (char*) &ft_max_word_len, SHOW_LONG},
{"ft_max_word_len_for_sort",(char*) &ft_max_word_len_for_sort, SHOW_LONG},
#ifdef HAVE_GEMINI_DB #ifdef HAVE_GEMINI_DB
{"gemini_buffer_cache", (char*) &gemini_buffer_cache, SHOW_LONG}, {"gemini_buffer_cache", (char*) &gemini_buffer_cache, SHOW_LONG},
{"gemini_connection_limit", (char*) &gemini_connection_limit, SHOW_LONG}, {"gemini_connection_limit", (char*) &gemini_connection_limit, SHOW_LONG},
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment