Class: RedAmber::Group
- Inherits:
-
Object
- Object
- RedAmber::Group
- Includes:
- Enumerable, Helper
- Defined in:
- lib/red_amber/group.rb
Overview
Group class
Instance Attribute Summary collapse
-
#dataframe ⇒ DataFrame
readonly
Source DataFrame.
-
#group_keys ⇒ Array
readonly
Keys for grouping by value.
Instance Method Summary collapse
-
#agg_sum(*summary_keys) ⇒ Object
private
Aggregating summary.
-
#all(*group_keys) ⇒ DataFrame
Aggregated DataFrame.
-
#any(*group_keys) ⇒ DataFrame
Aggregated DataFrame.
- #count(*group_keys) ⇒ Object
-
#count_uniq(*group_keys) ⇒ DataFrame
Count the unique values in each group.
-
#each ⇒ Object
private
Iterates over each record group as a DataFrame or returns a Enumerator.
-
#filters ⇒ Array
private
Returns Array of boolean filters to select each records in the Group.
-
#group_count ⇒ DataFrame
(also: #count_all)
Returns each record group size as a DataFrame.
-
#grouped_frame ⇒ DataFrame
(also: #none)
Return grouped DataFrame only for group keys.
-
#initialize(dataframe, *group_keys) ⇒ Group
constructor
Creates a new Group object.
-
#inspect ⇒ String
String representation of self.
-
#max(*group_keys) ⇒ DataFrame
Aggregated DataFrame.
-
#mean(*group_keys) ⇒ DataFrame
Aggregated DataFrame.
-
#median(*group_keys) ⇒ DataFrame
Aggregated DataFrame.
-
#min(*group_keys) ⇒ DataFrame
Aggregated DataFrame.
-
#one(*group_keys) ⇒ DataFrame
Aggregated DataFrame.
-
#product(*group_keys) ⇒ DataFrame
Aggregated DataFrame.
-
#stddev(*group_keys) ⇒ DataFrame
Aggregated DataFrame.
-
#sum(*group_keys) ⇒ DataFrame
Aggregated DataFrame.
-
#summarize(*args, &block) ⇒ Object
Summarize Group by aggregation functions from the block.
-
#variance(*group_keys) ⇒ DataFrame
Aggregated DataFrame.
Constructor Details
#initialize(dataframe, *group_keys) ⇒ Group
Creates a new Group object.
65 66 67 68 69 70 71 72 73 74 75 |
# File 'lib/red_amber/group.rb', line 65 def initialize(dataframe, *group_keys) @dataframe = dataframe @group_keys = group_keys.flatten raise GroupArgumentError, 'group_keys are empty.' if @group_keys.empty? d = @group_keys - @dataframe.keys raise GroupArgumentError, "#{d} is not a key of\n #{@dataframe}." unless d.empty? @group = @dataframe.table.group(*@group_keys) end |
Instance Attribute Details
#dataframe ⇒ DataFrame (readonly)
Source DataFrame.
16 17 18 |
# File 'lib/red_amber/group.rb', line 16 def dataframe @dataframe end |
#group_keys ⇒ Array (readonly)
Keys for grouping by value.
23 24 25 |
# File 'lib/red_amber/group.rb', line 23 def group_keys @group_keys end |
Instance Method Details
#agg_sum(*summary_keys) ⇒ Object
This method is part of a private API. You should avoid using this method if possible, as it may be removed or be changed in the future.
Aggregating summary.
604 605 606 |
# File 'lib/red_amber/group.rb', line 604 def agg_sum(*summary_keys) call_aggregating_function(:sum, summary_keys, = nil) end |
#all(*group_keys) ⇒ DataFrame
Returns aggregated DataFrame.
111 |
# File 'lib/red_amber/group.rb', line 111 define_group_aggregation :all |
#any(*group_keys) ⇒ DataFrame
Returns aggregated DataFrame.
128 |
# File 'lib/red_amber/group.rb', line 128 define_group_aggregation :any |
#count(*group_keys) ⇒ Object
162 163 164 165 166 167 168 169 |
# File 'lib/red_amber/group.rb', line 162 def count(*group_keys) df = __count(group_keys) if df.pick(@group_keys.size..).to_h.values.uniq.size == 1 df.pick(0..@group_keys.size).rename { [keys[-1], :count] } else df end end |
#count_uniq(*group_keys) ⇒ DataFrame
Count the unique values in each group.
Returns aggregated DataFrame.
208 |
# File 'lib/red_amber/group.rb', line 208 define_group_aggregation :count_distinct |
#each ⇒ Enumerator #each {|df| ... } ⇒ Integer
This method is part of a private API. You should avoid using this method if possible, as it may be removed or be changed in the future.
Iterates over each record group as a DataFrame or returns a Enumerator.
431 432 433 434 435 436 437 438 |
# File 'lib/red_amber/group.rb', line 431 def each return enum_for(:each) unless block_given? filters.each do |filter| yield @dataframe.filter(filter) end @filters.size end |
#filters ⇒ Array
This method is part of a private API. You should avoid using this method if possible, as it may be removed or be changed in the future.
Returns Array of boolean filters to select each records in the Group.
385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 |
# File 'lib/red_amber/group.rb', line 385 def filters @filters ||= begin group_values = group_table[group_keys].each_record.map(&:to_a) Enumerator.new(group_table.n_rows) do |yielder| group_values.each do |values| booleans = values.map.with_index do |value, i| column = @dataframe[group_keys[i]].data if value.nil? Arrow::Function.find('is_null').execute([column]) elsif value.is_a?(Float) && value.nan? Arrow::Function.find('is_nan').execute([column]) else Arrow::Function.find('equal').execute([column, value]) end end filter = booleans.reduce do |result, datum| Arrow::Function.find('and_kleene').execute([result, datum]) end yielder << Vector.create(filter.value) end end end end |
#group_count ⇒ DataFrame Also known as: count_all
Returns each record group size as a DataFrame.
188 189 190 |
# File 'lib/red_amber/group.rb', line 188 def group_count DataFrame.create(group_table) end |
#grouped_frame ⇒ DataFrame Also known as: none
Return grouped DataFrame only for group keys.
595 596 597 |
# File 'lib/red_amber/group.rb', line 595 def grouped_frame DataFrame.create(group_table[group_keys]) end |
#inspect ⇒ String
String representation of self.
455 456 457 |
# File 'lib/red_amber/group.rb', line 455 def inspect "#<#{self.class} : #{format('0x%016x', object_id)}>\n#{group_count}" end |
#max(*group_keys) ⇒ DataFrame
Returns aggregated DataFrame.
158 |
# File 'lib/red_amber/group.rb', line 158 define_group_aggregation :count |
#mean(*group_keys) ⇒ DataFrame
Returns aggregated DataFrame.
250 |
# File 'lib/red_amber/group.rb', line 250 define_group_aggregation :mean |
#median(*group_keys) ⇒ DataFrame
Returns aggregated DataFrame.
267 |
# File 'lib/red_amber/group.rb', line 267 define_group_aggregation :approximate_median |
#min(*group_keys) ⇒ DataFrame
Returns aggregated DataFrame.
292 |
# File 'lib/red_amber/group.rb', line 292 define_group_aggregation :min |
#one(*group_keys) ⇒ DataFrame
Returns aggregated DataFrame.
309 |
# File 'lib/red_amber/group.rb', line 309 define_group_aggregation :one |
#product(*group_keys) ⇒ DataFrame
Returns aggregated DataFrame.
326 |
# File 'lib/red_amber/group.rb', line 326 define_group_aggregation :product |
#stddev(*group_keys) ⇒ DataFrame
Returns aggregated DataFrame.
343 |
# File 'lib/red_amber/group.rb', line 343 define_group_aggregation :stddev |
#sum(*group_keys) ⇒ DataFrame
Returns aggregated DataFrame.
360 |
# File 'lib/red_amber/group.rb', line 360 define_group_aggregation :sum |
#summarize {|group| ... } ⇒ DataFrame #summarize {|group| ... } ⇒ DataFrame #summarize {|group| ... } ⇒ DataFrame
Summarize Group by aggregation functions from the block.
549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 |
# File 'lib/red_amber/group.rb', line 549 def summarize(*args, &block) if block agg = instance_eval(&block) unless args.empty? agg = [agg] if agg.is_a?(DataFrame) agg = args.zip(agg).to_h end else agg = args end case agg when DataFrame agg when Array aggregations = agg.map do |df| v = df.vectors[-1] [v.key, v] end agg[0].assign(aggregations) when Hash aggregations = agg.map do |key, df| aggregated_keys = df.keys - @group_keys if aggregated_keys.size > 1 = "accept only one column from the Hash: #{aggregated_keys.join(', ')}" raise GroupArgumentError, end v = df.vectors[-1] [key, v] end agg.values[-1].drop(-1).assign(aggregations) else raise GroupArgumentError, "Unknown argument: #{agg}" end end |
#variance(*group_keys) ⇒ DataFrame
Returns aggregated DataFrame.
377 |
# File 'lib/red_amber/group.rb', line 377 define_group_aggregation :variance |