关于C#:随机加权选择

关于C#:随机加权选择

Random weighted choice

考虑以下代表经纪人的类:

1
2
3
4
5
6
7
8
9
10
11
public class Broker
{
    public string Name = string.Empty;
    public int Weight = 0;

    public Broker(string n, int w)
    {
        this.Name = n;
        this.Weight = w;
    }
}

考虑到它们的权重,我想从一个数组中随机选择一个Broker。

您如何看待下面的代码?

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
class Program
    {
        private static Random _rnd = new Random();

        public static Broker GetBroker(List<Broker> brokers, int totalWeight)
        {
            // totalWeight is the sum of all brokers' weight

            int randomNumber = _rnd.Next(0, totalWeight);

            Broker selectedBroker = null;
            foreach (Broker broker in brokers)
            {
                if (randomNumber <= broker.Weight)
                {
                    selectedBroker = broker;
                    break;
                }

                randomNumber = randomNumber - broker.Weight;
            }

            return selectedBroker;
        }


        static void Main(string[] args)
        {
            List<Broker> brokers = new List<Broker>();
            brokers.Add(new Broker("A", 10));
            brokers.Add(new Broker("B", 20));
            brokers.Add(new Broker("C", 20));
            brokers.Add(new Broker("D", 10));

            // total the weigth
            int totalWeight = 0;
            foreach (Broker broker in brokers)
            {
                totalWeight += broker.Weight;
            }

            while (true)
            {
                Dictionary<string, int> result = new Dictionary<string, int>();

                Broker selectedBroker = null;

                for (int i = 0; i < 1000; i++)
                {
                    selectedBroker = GetBroker(brokers, totalWeight);
                    if (selectedBroker != null)
                    {
                        if (result.ContainsKey(selectedBroker.Name))
                        {
                            result[selectedBroker.Name] = result[selectedBroker.Name] + 1;
                        }
                        else
                        {
                            result.Add(selectedBroker.Name, 1);
                        }
                    }
                }


                Console.WriteLine("A\\t\\t" + result["A"]);
                Console.WriteLine("B\\t\\t" + result["B"]);
                Console.WriteLine("C\\t\\t" + result["C"]);
                Console.WriteLine("D\\t\\t" + result["D"]);

                result.Clear();
                Console.WriteLine();
                Console.ReadLine();
            }
        }
    }

我不太自信 当我运行此代码时,经纪人A总是比经纪人D获得更多的匹配,而且它们的权重相同。

有没有更准确的算法?

谢谢!


您的算法几乎是正确的。但是,测试应该是<而不是<=

1
if (randomNumber < broker.Weight)

这是因为随机数包括0,而totalWeight是排除的。换句话说,权重为0的经纪人仍然很少有机会被选中-根本不是您想要的。这说明经纪人A的点击率高于经纪人D。

除此之外,您的算法很好,而且实际上是解决此问题的规范方法。


可以用于任何数据类型的通用性怎么样?

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
using System;
using System.Linq;
using System.Collections;
using System.Collections.Generic;

public static class IEnumerableExtensions {

    public static T RandomElementByWeight< T >(this IEnumerable< T > sequence, Func<T, float> weightSelector) {
        float totalWeight = sequence.Sum(weightSelector);
        // The weight we are after...
        float itemWeightIndex =  new Random().NextDouble() * totalWeight;
        float currentWeightIndex = 0;

        foreach(var item in from weightedItem in sequence select new { Value = weightedItem, Weight = weightSelector(weightedItem) }) {
            currentWeightIndex += item.Weight;

            // If we've hit or passed the weight we are after for this item then it's the one we want....
            if(currentWeightIndex >= itemWeightIndex)
                return item.Value;

        }

        return default(T);

    }

}

只需致电

1
2
3
4
5
6
7
    Dictionary<string, float> foo = new Dictionary<string, float>();
    foo.Add("Item 25% 1", 0.5f);
    foo.Add("Item 25% 2", 0.5f);
    foo.Add("Item 50%", 1f);

    for(int i = 0; i < 10; i++)
        Console.WriteLine(this,"Item Chosen {0}", foo.RandomElementByWeight(e => e.Value));

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
class Program
{
    static void Main(string[] args)
    {
        var books = new List<Book> {
        new Book{Isbn=1,Name="A",Weight=1},
        new Book{Isbn=2,Name="B",Weight=100},
        new Book{Isbn=3,Name="C",Weight=1000},
        new Book{Isbn=4,Name="D",Weight=10000},
        new Book{Isbn=5,Name="E",Weight=100000}};

        Book randomlySelectedBook = WeightedRandomization.Choose(books);
    }
}

public static class WeightedRandomization
{
    public static T Choose< T >(List< T > list) where T : IWeighted
    {
        if (list.Count == 0)
        {
            return default(T);
        }

        int totalweight = list.Sum(c => c.Weight);
        Random rand = new Random();
        int choice = rand.Next(totalweight);
        int sum = 0;

        foreach (var obj in list)
        {
            for (int i = sum; i < obj.Weight + sum; i++)
            {
                if (i >= choice)
                {
                    return obj;
                }
            }
            sum += obj.Weight;
        }

        return list.First();
    }
}

public interface IWeighted
{
    int Weight { get; set; }
}

public class Book : IWeighted
{
    public int Isbn { get; set; }
    public string Name { get; set; }
    public int Weight { get; set; }
}

由于这是Google的最佳结果:

我为随机选择的加权项目创建了一个C#库。

  • 它同时实现了树选择和Walker别名方法算法,以在所有用例中提供最佳性能。
  • 它已经过单元测试和优化。
  • 它具有LINQ支持。
  • 它是免费的并且是开源的,并根据MIT许可进行了许可。

一些示例代码:

1
2
3
4
5
6
7
8
9
10
IWeightedRandomizer<string> randomizer = new DynamicWeightedRandomizer<string>();
randomizer["Joe"] = 1;
randomizer["Ryan"] = 2;
randomizer["Jason"] = 2;

string name1 = randomizer.RandomWithReplacement();
//name1 has a 20% chance of being"Joe", 40% of"Ryan", 40% of"Jason"

string name2 = randomizer.RandomWithRemoval();
//Same as above, except whichever one was chosen has been removed from the list.

当选择代理而不是内存使用量时,另一种方法有利于提高速度。基本上,我们创建的列表包含与指定权重相同数量的对代理实例的引用。

1
2
3
4
5
6
7
8
9
List<Broker> brokers = new List<Broker>();
for (int i=0; i<10; i++)
    brokers.Add(new Broker("A", 10));
for (int i=0; i<20; i++)
    brokers.Add(new Broker("B", 20));
for (int i=0; i<20; i++)
    brokers.Add(new Broker("C", 20));
for (int i=0; i<10; i++)
    brokers.Add(new Broker("D", 10));

然后,要选择一个随机加权的实例是O(1)操作:

1
2
int randomNumber = _rnd.Next(0, brokers.length);
selectedBroker = brokers[randomNumber];

有点晚了,但这是C#7示例。它很小,可以正确分配。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
public static class RandomTools
{
    public static T PickRandomItemWeighted< T >(IList<(T Item, int Weight)> items)
    {
        if ((items?.Count ?? 0) == 0)
        {
            return default;
        }

        int offset = 0;
        (T Item, int RangeTo)[] rangedItems = items
            .OrderBy(item => item.Weight)
            .Select(entry => (entry.Item, RangeTo: offset += entry.Weight))
            .ToArray();

        int randomNumber = new Random().Next(items.Sum(item => item.Weight)) + 1;
        return rangedItems.First(item => randomNumber <= item.RangeTo).Item;
    }
}

如果您想提高速度,则可以考虑使用加权储层采样,而不必提前找到总权重(但可以从随机数生成器中采样更多)。该代码可能看起来像

1
2
3
4
5
6
7
8
Broker selected = null;
int s = 0;
foreach(Broker broker in brokers) {
    s += broker.Weight;
    if (broker.Weight <= _rnd.Next(0,s)) {
        selected = broker;
    }
}

这需要通过列表代理进行一次。但是,如果经纪人列表是固定的或没有变化,那么您通常可以保留一组累加总和,即A [i]是所有经纪人的权重之和0,..,i-1。那么A [n]是总权重,如果您选择一个介于1到A [n-1]之间的数字,则说x,您会找到经纪人js.t。 A [j-1] <= x


原始问题中的实现对我来说有点奇怪;

列表的总权重为60,因此随机数为0-59。
它总是对照权重检查随机数,然后将其递减。
在我看来,它会根据顺序对列表中的事物表示青睐。

这是我正在使用的通用实现-关键在Random属性中:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
using System;
using System.Collections.Generic;
using System.Linq;

public class WeightedList< T >
{
    private readonly Dictionary<T,int> _items = new Dictionary<T,int>();

    // Doesn't allow items with zero weight; to remove an item, set its weight to zero
    public void SetWeight(T item, int weight)
    {
        if (_items.ContainsKey(item))
        {
            if (weight != _items[item])
            {
                if (weight > 0)
                {
                    _items[item] = weight;
                }
                else
                {
                    _items.Remove(item);
                }

                _totalWeight = null; // Will recalculate the total weight later
            }
        }
        else if (weight > 0)
        {
            _items.Add(item, weight);

            _totalWeight = null; // Will recalculate the total weight later
        }
    }

    public int GetWeight(T item)
    {
        return _items.ContainsKey(item) ? _items[item] : 0;
    }

    private int? _totalWeight;
    public int totalWeight
    {
        get
        {
            if (!_totalWeight.HasValue) _totalWeight = _items.Sum(x => x.Value);

            return _totalWeight.Value;
        }
    }

    public T Random
    {
        get
        {
            var temp = 0;
            var random = new Random().Next(totalWeight);

            foreach (var item in _items)
            {
                temp += item.Value;

                if (random < temp) return item.Key;
            }

            throw new Exception($"unable to determine random {typeof(T)} at {random} in {totalWeight}");
        }
    }
}

只是分享我自己的实现。希望您会发现它有用。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
    // Author: Giovanni Costagliola <giovanni.costagliola@gmail.com>

    using System;
    using System.Collections.Generic;
    using System.Linq;

    namespace Utils
    {
    /// <summary>
    /// Represent a Weighted Item.
    /// </summary>
    public interface IWeighted
    {
        /// <summary>
        /// A positive weight. It's up to the implementer ensure this requirement
        /// </summary>
        int Weight { get; }
    }

    /// <summary>
    /// Pick up an element reflecting its weight.
    /// </summary>
    /// <typeparam name="T"></typeparam>
    public class RandomWeightedPicker< T > where T:IWeighted
    {
        private readonly IEnumerable< T > items;
        private readonly int totalWeight;
        private Random random = new Random();

        /// <summary>
        /// Initiliaze the structure. O(1) or O(n) depending by the options, default O(n).
        /// </summary>
        /// <param name="items">The items</param>
        /// <param name="checkWeights">If <c>true</c> will check that the weights are positive. O(N)</param>
        /// <param name="shallowCopy">If <c>true</c> will copy the original collection structure (not the items). Keep in mind that items lifecycle is impacted.</param>
        public RandomWeightedPicker(IEnumerable< T > items, bool checkWeights = true, bool shallowCopy = true)
        {
            if (items == null) throw new ArgumentNullException("items");
            if (!items.Any()) throw new ArgumentException("items cannot be empty");
            if (shallowCopy)
                this.items = new List< T >(items);
            else
                this.items = items;
            if (checkWeights && this.items.Any(i => i.Weight <= 0))
            {
                throw new ArgumentException("There exists some items with a non positive weight");
            }
            totalWeight = this.items.Sum(i => i.Weight);
        }
        /// <summary>
        /// Pick a random item based on its chance. O(n)
        /// </summary>
        /// <param name="defaultValue">The value returned in case the element has not been found</param>
        /// <returns></returns>
        public T PickAnItem()
        {
            int rnd = random.Next(totalWeight);
            return items.First(i => (rnd -= i.Weight) < 0);
        }

        /// <summary>
        /// Resets the internal random generator. O(1)
        /// </summary>
        /// <param name="seed"></param>
        public void ResetRandomGenerator(int? seed)
        {
            random = seed.HasValue ? new Random(seed.Value) : new Random();
        }
    }
}

要点:https://gist.github.com/MrBogomips/ae6f6c9af8032392e4b93aaa393df447


我想出了该解决方案的通用版本:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
public static class WeightedEx
{
    /// <summary>
    /// Select an item from the given sequence according to their respective weights.
    /// </summary>
    /// <typeparam name="TItem">Type of item item in the given sequence.</typeparam>
    /// <param name="a_source">Given sequence of weighted items.</param>
    /// <returns>Randomly picked item.</returns>
    public static TItem PickWeighted<TItem>(this IEnumerable<TItem> a_source)
        where TItem : IWeighted
    {
        if (!a_source.Any())
            return default(TItem);

        var source= a_source.OrderBy(i => i.Weight);

        double dTotalWeight = source.Sum(i => i.Weight);

        Random rand = new Random();

        while (true)
        {
            double dRandom = rand.NextDouble() * dTotalWeight;

            foreach (var item in source)
            {
                if (dRandom < item.Weight)
                    return item;

                dRandom -= item.Weight;
            }
        }
    }
}

/// <summary>
/// IWeighted: Implementation of an item that is weighted.
/// </summary>
public interface IWeighted
{
    double Weight { get; }
}


推荐阅读