Class: Groonga::Normalizer

Inherits:
Object
  • Object
show all
Defined in:
ext/groonga/rb-grn-normalizer.c,
ext/groonga/rb-grn-normalizer.c

Overview

It normalizes string.

Class Method Summary collapse

Class Method Details

.normalize(string, options = {:remove_blank => true}) ⇒ String

Normalizes the @string@.

Examples:

# Normalizes "ABC" with the default normalizer
Groonga::Normalizer.normalize("AbC") # => "abc"

Returns The normalized string

Parameters:

  • string (String)

    The original string

  • options (::Hash) (defaults to: {:remove_blank => true})

    The optional parameters.

Options Hash (options):

  • :remove_blank (Object) — default: true

    If it’s true, all blank characters are removed.

  • :remove_tokenized_delimiter (Object) — default: false

    If it’s true, all tokenized delimiter characters are removed. The character is U+FFFE.

Returns:

  • (String)

    The normalized string



50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# File 'ext/groonga/rb-grn-normalizer.c', line 50

static VALUE
rb_grn_normalizer_s_normalize (int argc, VALUE *argv, VALUE klass)
{
    VALUE rb_context = Qnil;
    VALUE rb_string;
    VALUE rb_options;
    VALUE rb_remove_blank_p;
    VALUE rb_remove_tokenized_delimiter_p;
    VALUE rb_encoded_string;
    VALUE rb_normalized_string;
    grn_ctx *context = NULL;
    grn_obj *grn_string;
    grn_obj *normalizer = GRN_NORMALIZER_AUTO;
    int flags = 0;
    const char *normalized_string;
    unsigned int normalized_string_length;

    rb_scan_args(argc, argv, "11", &rb_string, &rb_options);
    rb_grn_scan_options(rb_options,
                        "remove_blank", &rb_remove_blank_p,
                        "remove_tokenized_delimiter_p",
                        &rb_remove_tokenized_delimiter_p,
                        NULL);

    context = rb_grn_context_ensure(&rb_context);
    rb_encoded_string = rb_grn_context_rb_string_encode(context, rb_string);
    if (RSTRING_LEN(rb_encoded_string) == 0) {
        return rb_grn_context_rb_string_new(context, "", 0);
    }

    if (NIL_P(rb_remove_blank_p)) {
        rb_remove_blank_p = Qtrue;
    }
    if (RVAL2CBOOL(rb_remove_blank_p)) {
        flags |= GRN_STRING_REMOVE_BLANK;
    }
    if (NIL_P(rb_remove_tokenized_delimiter_p)) {
        rb_remove_tokenized_delimiter_p = Qfalse;
    }
    if (RVAL2CBOOL(rb_remove_tokenized_delimiter_p)) {
        flags |= GRN_STRING_REMOVE_TOKENIZED_DELIMITER;
    }
    grn_string = grn_string_open(context,
                                 RSTRING_PTR(rb_encoded_string),
                                 RSTRING_LEN(rb_encoded_string),
                                 normalizer,
                                 flags);
    rb_grn_context_check(context, argv[0]);
    if (!grn_string) {
        return Qnil;
    }
    grn_string_get_normalized(context, grn_string,
                              &normalized_string, &normalized_string_length,
                              NULL);
    rb_normalized_string =
        rb_grn_context_rb_string_new(context,
                                     normalized_string,
                                     normalized_string_length);
    grn_obj_close(context, grn_string);

    return rb_normalized_string;
}