Showing posts with label ConcurrentQueue. Show all posts
Showing posts with label ConcurrentQueue. Show all posts

Sunday, September 25, 2016

.NET Asynchronous Parallel Batch Processor

Last year, I wrote about how to handle dynamically sized batches of data in an asynchronous manner. That original implementation used an abstract base class, and only supported a single background processing thread. I recently updated that implementation to support lambdas rather than requiring inheritance, and support a dynamic number of background threads.

...basically, this is a ConcurrentQueue that supports taking a lambda and thread count to asynchronously process enqueued items.

Unit Tests

public class ParallelProcessorTests
{
    [Fact]
    public async Task NoDisposeTimeout()
    {
        var results = new ConcurrentQueue<int>();
 
        using (var processor = new ParallelProcessor<int>(2, async (i, token) =>
        {
            await Task.Delay(200, token).ConfigureAwait(false);
            results.Enqueue(i);
        }, disposeTimeoutMs: 0))
        {
            processor.Enqueue(1);
            processor.Enqueue(2);
            processor.Enqueue(3);
            processor.Enqueue(4);
            processor.Enqueue(5);
 
            await Task.Delay(300).ConfigureAwait(false);
        }
 
        Assert.Equal(2, results.Count);
    }
 
    [Fact]
    public void MaxParallelizationLimit()
    {
        const int parallelism = 3;
        var results = new ConcurrentQueue<Tuple<int, int>>();
        var active = 0;
 
        using (var processor = new ParallelProcessor<int>(parallelism, async (i, token) =>
        {
            Interlocked.Increment(ref active);
            await Task.Delay(200, token).ConfigureAwait(false);
            var currentActive = Interlocked.Decrement(ref active) + 1;
 
            var tuple = Tuple.Create(currentActive, i);
            results.Enqueue(tuple);
        }))
        {
            processor.Enqueue(1);
            processor.Enqueue(2);
            processor.Enqueue(3);
            processor.Enqueue(4);
            processor.Enqueue(5);
        }
 
        Assert.Equal(5, results.Count);
 
        var maxParallelism = results.Max(t => t.Item1);
        Assert.Equal(parallelism, maxParallelism);
    }
 
    [Fact]
    public void BatchProcessor()
    {
        var results = new List<Tuple<long, List<int>>>();
        var sw = Stopwatch.StartNew();
 
        using (var processor = new BatchParallelProcessor<int>(1, 2, async (ints, token) =>
        {
            await Task.Delay(100, token).ConfigureAwait(false);
            var tuple = Tuple.Create(sw.ElapsedMilliseconds, ints);
            results.Add(tuple);
        }))
        {
            processor.Enqueue(1);
            processor.Enqueue(2);
            processor.Enqueue(3);
            processor.Enqueue(4);
            processor.Enqueue(5);
        }
 
        Assert.Equal(3, results.Count);
 
        Assert.Equal(2, results[0].Item2.Count);
        Assert.Equal(1, results[0].Item2[0]);
        Assert.Equal(2, results[0].Item2[1]);
 
        Assert.True(results[0].Item1 < results[1].Item1);
        Assert.Equal(2, results[1].Item2.Count);
        Assert.Equal(3, results[1].Item2[0]);
        Assert.Equal(4, results[1].Item2[1]);
 
        Assert.True(results[1].Item1 < results[2].Item1);
        Assert.Equal(1, results[2].Item2.Count);
        Assert.Equal(5, results[2].Item2[0]);
    }
}

Sunday, June 28, 2015

.NET Asynchronous Batch Processor

The .NET Framework offers a series of Thread-Safe Collections that allows you to consume collections across threads. Processing the contents of these collections still requires a thread, and while there is a BlockingCollection there is unfortunately no such class to support this in an asynchronous fashion. (Please note that the always awesome Stephen Cleary did actually implement an AsyncCollection.)

What if you want to handle dynamically sized batches of data in an asynchronous manner?

You could use a series of Dataflow blocks, or if you are looking for a simple solution you can write a small class that uses an async loop to process a ConcurrentQueue. Below is an abstract base class that can help you implement this:

Base Class

public abstract class BatchProcessorBase<T> : IDisposable
{
    protected readonly int MaxBatchSize;
    private readonly ConcurrentQueue<T> _queue;
    private readonly CancellationTokenSource _cancelSource;
    private readonly object _queueTaskLock;
    private Task _queueTask;
    private bool _isDiposed;
 
    protected BatchProcessorBase(int maxBatchSize)
    {
        MaxBatchSize = maxBatchSize;
        _queue = new ConcurrentQueue<T>();
        _cancelSource = new CancellationTokenSource();
        _queueTaskLock = new object();
        _queueTask = Task.FromResult(true);
    }
        
    public void Enqueue(T item)
    {
        _queue.Enqueue(item);
        TryStartProcessLoop();
    }
 
    public void Dispose()
    {
        if (_isDiposed)
            return;
 
        _cancelSource.Cancel();
        _isDiposed = true;
    }
 
    protected abstract Task ProcessBatchAsync(
        IList<T> list, 
        CancellationToken cancelToken);
 
    private void TryStartProcessLoop()
    {
        // Lock so only one thread can manipulate the queue task.
        lock (_queueTaskLock)
        {
            // If cancellationhas been requested, do not start.
            if (_cancelSource.IsCancellationRequested)
                return;
 
            // If the loop is still active, do not start.
            if (!_queueTask.IsCompleted)
                return;
 
            // If the queue is empty, do not start.
            if (_queue.Count == 0)
                return;
 
            // Start a new task to process the queue.
            _queueTask = Task.Run(() => ProcessQueue(), _cancelSource.Token);
 
            // When the process queue task completes, check to see if
            // the queue has been populated again and needs to restart.
            _queueTask.ContinueWith(t => TryStartProcessLoop());
        }
    }
 
    private async Task ProcessQueue()
    {
        // Stay alive until the queue is empty or cancellation is requested.
        while (!_cancelSource.IsCancellationRequested && _queue.Count > 0)
        {
            var list = new List<T>();
            T item;
 
            // Dequeue up to a full batch from the queue.
            while (list.Count < MaxBatchSize && _queue.TryDequeue(out item))
                list.Add(item);
 
            // Process the dequeued items.
            await ProcessBatchAsync(list, _cancelSource.Token);
        }
    }
}
Real Time Web Analytics