开发者

Optimizing daily ranking tabulation

开发者 https://www.devze.com 2022-12-20 15:22 出处:网络
Each Poem as two Votes, one as poem_id, other_poem_id, wins & the second record which is the inverse of the first.Maybe there is a better way, but I\'m trying to find the poems with the highest wi

Each Poem as two Votes, one as poem_id, other_poem_id, wins & the second record which is the inverse of the first. Maybe there is a better way, but I'm trying to find the poems with the highest win percent over a period of time. It's confusing because of the double records for each comparison. Should I add another table, Results, which has a comparison_id for the two Vote records?

Here is a sample
poem_id:1 other_poem_id:2 wins:3
poem_id:2 other_poem_id:1 wins:3
so it is 50% rather than a running tally

   scope :recent, lambda {
    { :joins => "JOIN votes ON votes.poem_id = poems.id",
      :conditions => ["poems.created_at > ?", 8.days.ago],
      :order => "votes.wins DESC",
      :limit => 10 
    } 
   }

ActiveRecord::StatementInvalid: SQLite3::SQLException: ambiguous column name: created_at: SELECT

"poems".* FROM "poems" JOIN votes ON votes.poem_id = poems.id WHERE (created_at > '2010-02-12 15:12:35.764252') ORDER BY wins DESC LIMIT 10

edit: I changed the schema, here is what I'm working with now...

the following is a model keeping track of rankings for poems. I just wrote this 1st draft yesterday. It seems a bit clunky, but I don't know how to improve it just yet. DailyRanking.tabulate will be called every night by cron. (following the model is the schema for the Comparison.)

# == Schema Information
# Schema version: 20100221120442
#
# Table name: daily_rankings
#
#  id          :integer         not null, primary key
#  poem_id     :integer
#  rank        :integer
#  percentile  :integer
#  wins        :integer
#  losses      :integer
#  draws       :integer
#  comparisons :integer
#  created_at  :datetime
#  updated_at  :datetime
#

class DailyRanking < ActiveRecord::Base
  belongs_to :poem
  class << self
    def tabulate
      # 1. get all comparisons over the past 24 hours
      comparisons = Comparison.day.all
      # 2. collect poem id for each time it wins

      # TODO make hash of "poem_id" => {:wins => a, :losses => b, :draws => c}

      a, results = 0, []

      while a < comparisons.size
        c = comparisons[a]
        if c.poem1_id == c.winner_id
          results << c.poem1_id
        elsif c.poem2_id == c.winner_id
          results << c.poem2_id
        end
        a += 1
      end
      # 3. presort by poem count

      a, unsorted_wins = 0, []

      until results.empty?
        unsorted_wins << [results.first, results.count(results.first)]
        results.delete(results.first)
      end
      # 4. sort by win count

      sorted_wins = unsorted_wins.sort { |a, b| b[1] <=> a[1] }

      # 5. repeat for losses
      a, results = 0, []

      while a < comparisons.size
        c = comparisons[a]
        if c.poem1_id == c.loser_id
          results << c.poem1_id
        elsif c.poem2_id == c.loser_id
          results << c.poem2_id
        end
        a += 1
      end

      unsorted_losses = []

      until results.empty?
        unsorted_losses << [results.first, results.count(results.first)]
        results.delete(results.first)
      end

      sorted_losses = unsorted_losses.sort { |a, b| b[1] <=> a[1] } 

      # 6. sort wins v losses

        # a. sort wins[poem] v losses[poem]
        # b. get poem and pct wins for wins[poem]
        # c. delete wins[poem] and losses[poem]
        # repeat

      unsorted_results, a = [], 0

      while a < sorted_wins.size
        poem_id = sorted_wins[a][0]
        wins = sorted_wins[a][1]
        losses = sorted_losses.select do |item| 
            item.second if item.first == poem_id
          end.compact.first.second

        unsorted_results << [ poem_id, wins / (wins + losses).to_f ]

        a += 1
      end

      # 7. sort by pct

      sorted开发者_Go百科_results = unsorted_results.sort { |a, b| b[1] <=> a[1] }

      # 8. persist rankings

      sorted_results.each_with_index do |result, index|
        ranking = find_or_create_by_rank(index + 1)
        ranking.poem_id = result.first
        ranking.save!
      end     
    end
  end
end

# == Schema Information
# Schema version: 20100221120442
#
# Table name: comparisons
#
#  id         :integer         not null, primary key
#  poem1_id   :integer
#  poem2_id   :integer
#  response   :string(4)       default("none"), not null
#  winner_id  :integer
#  loser_id   :integer
#  user_id    :integer
#  session_id :integer
#  ip         :string(15)
#  created_at :datetime
#  updated_at :datetime
#

class Comparison < ActiveRecord::Base

  scope :day,   lambda { { :conditions => ["created_at > ?", 1.day.ago] } }

end


I think a SQL query that would work for you would be something like SELECT poems.*, percentage as ((SELECT wins FROM votes WHERE poem_id = poem.id WHERE created_at > 8.days.ago) / (SELECT wins FROM votes WHERE other_poem_id = poem.id WHERE created_at > 8.days.ago)) ORDER BY percentage DESC LIMIT 10. As far as how to optimize that and translate it to a Rails scope, I'm not sure.

However, the error you are experiencing is due to the way that the ["poems.created_at > ?", 8.days.ago] condition is being translated to SQL. SQLite doesn't know whether you are looking for poems.created_at or votes.created_at (and by the way, from your description, I think you are wanting votes.created_at).

0

精彩评论

暂无评论...
验证码 换一张
取 消