Class: Groonga::VariableSizeColumn

Inherits:
DataColumn show all
Defined in:
ext/groonga/rb-grn-variable-size-column.c,
ext/groonga/rb-grn-variable-size-column.c

Overview

A column for variable size data like text family types and vector column.

Instance Method Summary collapse

Methods inherited from DataColumn

#apply_window_function

Methods inherited from Column

#clear_lock, #disk_usage, #find_indexes, #index?, #indexes, #local_name, #lock, #locked?, #reference?, #rename, #scalar?, #select, #table, #truncate, #unlock, #vector?, #with_weight?

Methods included from Flushable

#flush

Methods inherited from Object

#==, #accessor?, #append, #builtin?, #close, #closed?, #column?, #dirty?, #domain, #function_procedure?, #id, #index_column?, #inspect, #key_accessor?, #last_modified, #name, #path, #persistent?, #prepend, #procedure?, #range, #reference_column?, #remove, #scorer_procedure?, #selector_only_procedure?, #selector_procedure?, #table?, #temporary?, #touch, #unlink, #window_function_procedure?

Instance Method Details

#[](id) ⇒ Array<Hash<Symbol, String>>, ::Object

It gets a value of variable size column value for the record that ID is id.

Examples:

Gets weight vector value

Groonga::Schema.define do |schema|
  schema.create_table("Products",
                      :type => :patricia_trie,
                      :key_type => "ShortText") do |table|
    # This is weight vector.
    # ":with_weight => true" is important to store weight value.
    table.short_text("tags",
                     :type => :vector,
                     :with_weight => true)
  end
end

products = Groonga["Products"]
rroonga = products.add("Rroonga")
rroonga.tags = [
  {
    :value  => "ruby",
    :weight => 100,
  },
  {
    :value  => "groonga",
    :weight => 10,
  },
]

p rroonga.tags
# => [
#      {:value => "ruby",    :weight => 100},
#      {:value => "groonga", :weight => 10}
#    ]

Parameters:

  • id (Integer, Record)

    The record ID.

Returns:

  • (Array<Hash<Symbol, String>>)

    An array of value if the column is a weight vector column. Each value is a Hash like the following form:

    
    {
      :value  => [KEY],
      :weight => [WEIGHT],
    }
    

    @[KEY]@ is the key of the table that is specified as range on creating the weight vector.

    @[WEIGHT]@ is a positive integer.

  • (::Object)

    See Object#[] for columns except weight vector column.

Since:

  • 4.0.1.



161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
# File 'ext/groonga/rb-grn-variable-size-column.c', line 161

static VALUE
rb_grn_variable_size_column_array_reference (VALUE self, VALUE rb_id)
{
    grn_ctx *context = NULL;
    grn_obj *column, *range;
    grn_column_flags flags;
    grn_id id;
    grn_obj *value;
    VALUE rb_value;
    VALUE rb_range;
    unsigned int i, n;

    rb_grn_variable_size_column_deconstruct(SELF(self), &column, &context,
                                            NULL, NULL, &value, NULL,
                                            NULL, &range);

    flags = grn_column_get_flags(context, column);
    if (!(flags & GRN_OBJ_WITH_WEIGHT)) {
        return rb_call_super(1, &rb_id);
    }

    id = RVAL2GRNID(rb_id, context, range, self);

    grn_obj_reinit(context, value,
                   value->header.domain,
                   value->header.flags | GRN_OBJ_VECTOR);
    grn_obj_get_value(context, column, id, value);
    rb_grn_context_check(context, self);

    rb_range = GRNTABLE2RVAL(context, range, GRN_FALSE);

    n = grn_vector_size(context, value);
    rb_value = rb_ary_new2(n);
    for (i = 0; i < n; i++) {
        VALUE rb_element_value;
        unsigned int weight = 0;
        grn_id domain;
        VALUE rb_element;

        if (value->header.type == GRN_UVECTOR) {
            grn_id id;
            id = grn_uvector_get_element(context, value, i, &weight);
            rb_element_value = rb_grn_record_new(rb_range, id, Qnil);
        } else {
            const char *element_value;
            unsigned int element_value_length;
            element_value_length = grn_vector_get_element(context,
                                                          value,
                                                          i,
                                                          &element_value,
                                                          &weight,
                                                          &domain);
            rb_element_value = rb_str_new(element_value, element_value_length);
        }

        rb_element = rb_hash_new();
        rb_hash_aset(rb_element,
                     RB_GRN_INTERN("value"),
                     rb_element_value);
        rb_hash_aset(rb_element,
                     RB_GRN_INTERN("weight"),
                     UINT2NUM(weight));

        rb_ary_push(rb_value, rb_element);
    }

    return rb_value;
}

#[]=(id, elements) ⇒ Object #[]=(id, value) ⇒ Object

It updates a value of variable size column value for the record that ID is id.

Weight vector column is a special variable size column. This description describes only weight vector column. Other variable size column works what you think.

Examples:

Use weight vector as matrix search result weight

Groonga::Schema.define do |schema|
  schema.create_table("Products",
                      :type => :patricia_trie,
                      :key_type => "ShortText") do |table|
    # This is weight vector.
    # ":with_weight => true" is important for matrix search result weight.
    table.short_text("tags",
                     :type => :vector,
                     :with_weight => true)
  end

  schema.create_table("Tags",
                      :type => :hash,
                      :key_type => "ShortText") do |table|
    # This is inverted index. It also needs ":with_weight => true".
    table.index("Products.tags", :with_weight => true)
  end
end

products = Groonga["Products"]
groonga = products.add("Groonga")
groonga.tags = [
  {
    :value  => "groonga",
    :weight => 100,
  },
]
rroonga = products.add("Rroonga")
rroonga.tags = [
  {
    :value  => "ruby",
    :weight => 100,
  },
  {
    :value  => "groonga",
    :weight => 10,
  },
]

result = products.select do |record|
  # Search by "groonga"
  record.match("groonga") do |match_target|
    match_target.tags
  end
end

result.each do |record|
  p [record.key.key, record.score]
end
# Matches all records with weight.
# => ["Groonga", 101]
#    ["Rroonga", 11]

# Increases score for "ruby" 10 times
products.select(# The previous search result. Required.
                :result => result,
                # It just adds score to existing records in the result. Required.
                :operator => Groonga::Operator::ADJUST) do |record|
  record.match("ruby") do |target|
    target.tags * 10 # 10 times
  end
end

result.each do |record|
  p [record.key.key, record.score]
end
# Weight is used for increasing score.
# => ["Groonga", 101]  <- Not changed.
#    ["Rroonga", 1021] <- 1021 (= 101 * 10 + 1) increased.

Overloads:

  • #[]=(id, elements) ⇒ Object

    This description is for weight vector column.

    Parameters:

    • id (Integer, Record)

      The record ID.

    • elements (Array<Hash<Symbol, String>>)

      An array of values for weight vector. Each value is a Hash like the following form:

      
      {
        :value  => [KEY],
        :weight => [WEIGHT],
      }
      

      @[KEY]@ must be the same type of the key of the table that is specified as range on creating the weight vector.

      @[WEIGHT]@ must be an positive integer. Note that search becomes @weight + 1@. It means that You want to get 10 as score, you should set 9 as weight.

  • #[]=(id, value) ⇒ Object

    This description is for variable size columns except weight vector column.

    Parameters:

    • id (Integer, Record)

      The record ID.

    • value (::Object)

      A new value.

    See Also:

Since:

  • 4.0.1



375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
# File 'ext/groonga/rb-grn-variable-size-column.c', line 375

static VALUE
rb_grn_variable_size_column_array_set (VALUE self, VALUE rb_id, VALUE rb_value)
{
    grn_ctx *context = NULL;
    grn_obj *column, *range;
    grn_column_flags column_flags;
    grn_rc rc;
    grn_id id;
    grn_obj *value, *element_value;
    int flags = GRN_OBJ_SET;

    rb_grn_variable_size_column_deconstruct(SELF(self), &column, &context,
                                            NULL, NULL, &value, &element_value,
                                            NULL, &range);

    column_flags = grn_column_get_flags(context, column);
    if (!(column_flags & GRN_OBJ_WITH_WEIGHT)) {
        VALUE args[2];
        args[0] = rb_id;
        args[1] = rb_value;
        return rb_call_super(2, args);
    }

    id = RVAL2GRNID(rb_id, context, range, self);

    grn_obj_reinit(context, value,
                   value->header.domain,
                   value->header.flags | GRN_OBJ_VECTOR);
    value->header.flags |= GRN_OBJ_WITH_WEIGHT;
    if (RVAL2CBOOL(rb_obj_is_kind_of(rb_value, rb_cArray))) {
        int i, n;
        n = RARRAY_LEN(rb_value);
        for (i = 0; i < n; i++) {
            unsigned int weight = 0;
            VALUE rb_element_value, rb_weight;

            rb_grn_scan_options(RARRAY_PTR(rb_value)[i],
                                "value", &rb_element_value,
                                "weight", &rb_weight,
                                NULL);

            if (!NIL_P(rb_weight)) {
                weight = NUM2UINT(rb_weight);
            }

            if (value->header.type == GRN_UVECTOR) {
                grn_id id = RVAL2GRNID(rb_element_value, context, range, self);
                grn_uvector_add_element(context, value, id, weight);
            } else {
                GRN_BULK_REWIND(element_value);
                if (!NIL_P(rb_element_value)) {
                    RVAL2GRNBULK(rb_element_value, context, element_value);
                }

                grn_vector_add_element(context, value,
                                       GRN_BULK_HEAD(element_value),
                                       GRN_BULK_VSIZE(element_value),
                                       weight,
                                       element_value->header.domain);
            }
        }
    } else if (RVAL2CBOOL(rb_obj_is_kind_of(rb_value, rb_cHash))) {
        HashElementToVectorElementData data;
        data.self = self;
        data.context = context;
        data.vector = value;
        data.element_value = element_value;
        data.range = range;
        rb_hash_foreach(rb_value, hash_element_to_vector_element, (VALUE)&data);
    } else {
        rb_raise(rb_eArgError,
                 "<%s>: "
                 "weight vector value must be an array of index value or "
                 "a hash that key is vector value and value is vector weight: "
                 "<%s>",
                 rb_grn_inspect(self),
                 rb_grn_inspect(rb_value));
    }

    rc = grn_obj_set_value(context, column, id, value, flags);
    rb_grn_context_check(context, self);
    rb_grn_rc_check(rc, self);

    return rb_value;
}

#compressed?Boolean #compressed?(type) ⇒ Boolean

Returns whether the column is compressed or not. If @type@ is specified, it returns whether the column is compressed by @type@ or not.

Overloads:

  • #compressed?Boolean

    Returns whether the column is compressed or not.

    Returns:

    • (Boolean)

      whether the column is compressed or not.

  • #compressed?(type) ⇒ Boolean

    Returns whether specified compressed type is used or not.

    Parameters:

    • type (:zlib, :lz4, :zstd, :zstandard)

      (nil)

    Returns:

    • (Boolean)

      whether specified compressed type is used or not.

Returns:

  • (Boolean)

Since:

  • 1.3.1



472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
# File 'ext/groonga/rb-grn-variable-size-column.c', line 472

static VALUE
rb_grn_variable_size_column_compressed_p (int argc, VALUE *argv, VALUE self)
{
    RbGrnVariableSizeColumn *rb_grn_column;
    grn_ctx *context = NULL;
    grn_obj *column;
    grn_column_flags flags;
    VALUE type;
    grn_bool compressed_p = GRN_FALSE;
    grn_bool accept_any_type = GRN_FALSE;
    grn_bool need_zlib_check = GRN_FALSE;
    grn_bool need_lz4_check = GRN_FALSE;
    grn_bool need_zstd_check = GRN_FALSE;

    rb_scan_args(argc, argv, "01", &type);

    if (NIL_P(type)) {
        accept_any_type = GRN_TRUE;
    } else {
        if (rb_grn_equal_option(type, "zlib")) {
            need_zlib_check = GRN_TRUE;
        } else if (rb_grn_equal_option(type, "lzo")) {
            /* TODO: for backward compatibility */
            need_lz4_check = GRN_TRUE;
        } else if (rb_grn_equal_option(type, "lz4")) {
            need_lz4_check = GRN_TRUE;
        } else if (rb_grn_equal_option(type, "zstd")) {
            need_zstd_check = GRN_TRUE;
        } else if (rb_grn_equal_option(type, "zstandardd")) {
            need_zstd_check = GRN_TRUE;
        } else {
            rb_raise(rb_eArgError,
                     "compressed type should be "
                     "<:zlib>, <:lz4>, <:zstd> or <:zstandard>: <%s>",
                     rb_grn_inspect(type));
        }
    }

    rb_grn_column = SELF(self);
    rb_grn_object_deconstruct(RB_GRN_OBJECT(rb_grn_column), &column, &context,
                              NULL, NULL,
                              NULL, NULL);

    flags = grn_column_get_flags(context, column);
    switch (flags & GRN_OBJ_COMPRESS_MASK) {
      case GRN_OBJ_COMPRESS_ZLIB:
        if (accept_any_type || need_zlib_check) {
            grn_obj support_p;
            GRN_BOOL_INIT(&support_p, 0);
            grn_obj_get_info(context, NULL, GRN_INFO_SUPPORT_ZLIB, &support_p);
            compressed_p = GRN_BOOL_VALUE(&support_p);
        }
        break;
      case GRN_OBJ_COMPRESS_LZ4:
        if (accept_any_type || need_lz4_check) {
            grn_obj support_p;
            GRN_BOOL_INIT(&support_p, 0);
            grn_obj_get_info(context, NULL, GRN_INFO_SUPPORT_LZ4, &support_p);
            compressed_p = GRN_BOOL_VALUE(&support_p);
        }
        break;
      case GRN_OBJ_COMPRESS_ZSTD:
        if (accept_any_type || need_zstd_check) {
            grn_obj support_p;
            GRN_BOOL_INIT(&support_p, 0);
            grn_obj_get_info(context, NULL, GRN_INFO_SUPPORT_ZSTD, &support_p);
            compressed_p = GRN_BOOL_VALUE(&support_p);
        }
        break;
    }

    return CBOOL2RVAL(compressed_p);
}

#defrag(options = {}) ⇒ Integer

Defrags the column.

Parameters:

  • options (::Hash) (defaults to: {})

    The name and value pairs. Omitted names are initialized as the default value.

Options Hash (options):

  • :threshold (Integer) — default: 0

    the threshold to determine whether a segment is defraged. Available values are -4..22. -4 means all segments are defraged. 22 means no segment is defraged.

Returns:

  • (Integer)

    the number of defraged segments

Since:

  • 1.2.6



559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
# File 'ext/groonga/rb-grn-variable-size-column.c', line 559

static VALUE
rb_grn_variable_size_column_defrag (int argc, VALUE *argv, VALUE self)
{
    RbGrnVariableSizeColumn *rb_grn_column;
    grn_ctx *context = NULL;
    grn_obj *column;
    int n_segments;
    VALUE options, rb_threshold;
    int threshold = 0;

    rb_scan_args(argc, argv, "01", &options);
    rb_grn_scan_options(options,
                        "threshold", &rb_threshold,
                        NULL);
    if (!NIL_P(rb_threshold)) {
        threshold = NUM2INT(rb_threshold);
    }

    rb_grn_column = SELF(self);
    rb_grn_object_deconstruct(RB_GRN_OBJECT(rb_grn_column), &column, &context,
                              NULL, NULL,
                              NULL, NULL);
    n_segments = grn_obj_defrag(context, column, threshold);
    rb_grn_context_check(context, self);

    return INT2NUM(n_segments);
}

#reindexvoid

Recreates all index columns for the column.

This method is useful when you have any broken index columns for the column. You don’t need to specify each index column. But this method spends more time rather than you specify only reindex needed index columns.

You can use Database#reindex to recreate all index columns in a database.

You can use TableKeySupport#reindex to recreate all index columns in a table.

You can use IndexColumn#reindex to specify the reindex target index column.

Examples:

How to recreate all index columns for the column

Groonga::Schema.define do |schema|
  schema.create_table("Memos") do |table|
    table.short_text("title")
    table.text("content")
  end

  schema.create_table("BigramTerms",
                      :type => :patricia_trie,
                      :key_type => :short_text,
                      :normalizer => "NormalizerAuto",
                      :default_tokenizer => "TokenBigram") do |table|
    table.index("Memos.title")
    table.index("Memos.content")
  end

  schema.create_table("MeCabTerms",
                      :type => :patricia_trie,
                      :key_type => :short_text,
                      :normalizer => "NormalizerAuto",
                      :default_tokenizer => "TokenMecab") do |table|
    table.index("Memos.title")
    table.index("Memos.content")
  end
end

Groonga["Memos.content"].reindex
# They are called:
#   Groonga["BigramTerms.Memos_content"].reindex
#   Groonga["MeCabTerms.Memos_content"].reindex
#
# They aren't called:
#   Groonga["BigramTerms.Memos_title"].reindex
#   Groonga["MeCabTerms.Memos_title"].reindex

This method returns an undefined value.

See Also:

Since:

  • 5.1.1



649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
# File 'ext/groonga/rb-grn-variable-size-column.c', line 649

static VALUE
rb_grn_variable_size_column_reindex (VALUE self)
{
    grn_rc rc;
    grn_ctx *context;
    grn_obj *column;

    rb_grn_variable_size_column_deconstruct(SELF(self), &column, &context,
                                            NULL, NULL, NULL, NULL,
                                            NULL, NULL);

    rc = grn_obj_reindex(context, column);
    rb_grn_context_check(context, self);
    rb_grn_rc_check(rc, self);

    return Qnil;
}