I need fastest way to convert byte array to short array of audio data.
Audio data byte array contains data from two audio channels placed in such way:
C1C1C2C2 C1C1C2C2 C1C1C2C2 ...
where
C1C1 - two bytes of first channel
C2C2 - two bytes of second channel
Currently I use such algorithm, but I feel there is better way to perform this task.
byte[] rawData = //from audio device
short[] shorts = new short[rawData.Length / 2];
short[] channel1 = new short[rawData.Length / 4];
short[] channel2 = new short[rawData.Length / 4];
System.Buffer.BlockCopy(rawData, 0, shorts, 0, rawData.Length);
for (int i = 0, j = 0; i < shorts.Length; i+=2, ++j)
{
channel1[j] = shorts[i];
channel2[j] = 开发者_运维知识库shorts[i+1];
}
You can leave out copying the buffer:
byte[] rawData = //from audio device
short[] channel1 = new short[rawData.Length / 4];
short[] channel2 = new short[rawData.Length / 4];
for (int i = 0, j = 0; i < rawData.Length; i+=4, ++j)
{
channel1[j] = (short)(((ushort)rawData[i + 1]) << 8 | (ushort)rawData[i]);
channel2[j] = (short)(((ushort)rawData[i + 3]) << 8 | (ushort)rawData[i + 2]);
}
To get the loop faster, you can have a look at the Task Parralel Library, exspecially Parallel.For:
[EDIT]
System.Threading.Tasks.Parallel.For( 0, shorts.Length/2, ( i ) =>
{
channel1[i] = shorts[i*2];
channel2[i] = shorts[i*2+1];
} );
[/EDIT]
Another way is loop unrolling, but I think the TPL will boost this up as well.
You could use unsafe code to avoid array addressing or bit shifts. But as PVitt said on new PCs you are better using standard managed code and the TPL if your data size is important.
short[] channel1 = new short[rawData.Length / 4];
short[] channel2 = new short[rawData.Length / 4];
fixed(byte* pRawData = rawData)
fixed(short* pChannel1 = channel1)
fixed(short* pChannel2 = channel2)
{
byte* end = pRawData + rawData.Length;
while(pRawData < end)
{
(*(pChannel1++)) = *((short*)pRawData);
pRawData += sizeof(short);
(*(pChannel2++)) = *((short*)pRawData);
pRawData += sizeof(short);
}
}
As with all optimization problems you need to time carefully, pay special attention to your buffer allocations, channel1 and channel2 could be static (big) buffers growing automatically and you could use only the nth first bytes. You will be able to skip 2 big arrays allocations for each executions of this function. and will make the GC work less (always better when timing is important)
As noted by CodeInChaos the endianness could be important, if your data is not in the correct endianness you will need to do the conversion, for example to convert between big and little endian assuming 8bit atomic elements the code will look like :
short[] channel1 = new short[rawData.Length / 4];
short[] channel2 = new short[rawData.Length / 4];
fixed(byte* pRawData = rawData)
fixed(byte* pChannel1 = (byte*)channel1)
fixed(byte* pChannel2 = (byte*)channel2)
{
byte* end = pRawData + rawData.Length;
byte* pChannel1High = pChannel1 + 1;
byte* pChannel2High = pChannel2 + 1;
while(pRawData < end)
{
*pChannel1High = *pRawData;
pChannel1High += 2 * sizeof(short);
*pChannel1 = *pRawData;
pChannel1 += 2 * sizeof(short);
*pChannel2High = *pRawData;
pChannel2High += 2 * sizeof(short);
*pChannel2 = *pRawData;
pChannel2 += 2 * sizeof(short);
}
}
I didn't compile any code in this post with an actual compiler so if you find errors feel free to edit it.
You can benchmark it by yourself! remember to use Release Mode and run without Debug (Ctrl+F5)
class Program
{
[StructLayout(LayoutKind.Explicit)]
struct UnionArray
{
[FieldOffset(0)]
public byte[] Bytes;
[FieldOffset(0)]
public short[] Shorts;
}
unsafe static void Main(string[] args)
{
Process.GetCurrentProcess().PriorityClass = ProcessPriorityClass.High;
byte[] rawData = new byte[10000000];
new Random().NextBytes(rawData);
Stopwatch sw1 = Stopwatch.StartNew();
short[] shorts = new short[rawData.Length / 2];
short[] channel1 = new short[rawData.Length / 4];
short[] channel2 = new short[rawData.Length / 4];
System.Buffer.BlockCopy(rawData, 0, shorts, 0, rawData.Length);
for (int i = 0, j = 0; i < shorts.Length; i += 2, ++j)
{
channel1[j] = shorts[i];
channel2[j] = shorts[i + 1];
}
sw1.Stop();
Stopwatch sw2 = Stopwatch.StartNew();
short[] channel1b = new short[rawData.Length / 4];
short[] channel2b = new short[rawData.Length / 4];
for (int i = 0, j = 0; i < rawData.Length; i += 4, ++j)
{
channel1b[j] = BitConverter.ToInt16(rawData, i);
channel2b[j] = BitConverter.ToInt16(rawData, i + 2);
}
sw2.Stop();
Stopwatch sw3 = Stopwatch.StartNew();
short[] shortsc = new UnionArray { Bytes = rawData }.Shorts;
short[] channel1c = new short[rawData.Length / 4];
short[] channel2c = new short[rawData.Length / 4];
for (int i = 0, j = 0; i < shorts.Length; i += 2, ++j)
{
channel1c[j] = shortsc[i];
channel2c[j] = shortsc[i + 1];
}
sw3.Stop();
Stopwatch sw4 = Stopwatch.StartNew();
short[] channel1d = new short[rawData.Length / 4];
short[] channel2d = new short[rawData.Length / 4];
for (int i = 0, j = 0; i < rawData.Length; i += 4, ++j)
{
channel1d[j] = (short)((short)(rawData[i + 1]) << 8 | (short)rawData[i]);
channel2d[j] = (short)((short)(rawData[i + 3]) << 8 | (short)rawData[i + 2]);
//Equivalent warning-less version
//channel1d[j] = (short)(((ushort)rawData[i + 1]) << 8 | (ushort)rawData[i]);
//channel2d[j] = (short)(((ushort)rawData[i + 3]) << 8 | (ushort)rawData[i + 2]);
}
sw4.Stop();
Stopwatch sw5 = Stopwatch.StartNew();
short[] channel1e = new short[rawData.Length / 4];
short[] channel2e = new short[rawData.Length / 4];
fixed (byte* pRawData = rawData)
fixed (short* pChannel1 = channel1e)
fixed (short* pChannel2 = channel2e)
{
byte* pRawData2 = pRawData;
short* pChannel1e = pChannel1;
short* pChannel2e = pChannel2;
byte* end = pRawData2 + rawData.Length;
while (pRawData2 < end)
{
(*(pChannel1e++)) = *((short*)pRawData2);
pRawData2 += sizeof(short);
(*(pChannel2e++)) = *((short*)pRawData2);
pRawData2 += sizeof(short);
}
}
sw5.Stop();
Stopwatch sw6 = Stopwatch.StartNew();
short[] shortse = new short[rawData.Length / 2];
short[] channel1f = new short[rawData.Length / 4];
short[] channel2f = new short[rawData.Length / 4];
System.Buffer.BlockCopy(rawData, 0, shortse, 0, rawData.Length);
System.Threading.Tasks.Parallel.For(0, shortse.Length / 2, (i) =>
{
channel1f[i] = shortse[i * 2];
channel2f[i] = shortse[i * 2 + 1];
});
sw6.Stop();
if (!channel1.SequenceEqual(channel1b) || !channel1.SequenceEqual(channel1c) || !channel1.SequenceEqual(channel1d) || !channel1.SequenceEqual(channel1e) || !channel1.SequenceEqual(channel1f))
{
throw new Exception();
}
if (!channel2.SequenceEqual(channel2b) || !channel2.SequenceEqual(channel2c) || !channel2.SequenceEqual(channel2d) || !channel2.SequenceEqual(channel2e) || !channel2.SequenceEqual(channel2f))
{
throw new Exception();
}
Console.WriteLine("Original: {0}ms", sw1.ElapsedMilliseconds);
Console.WriteLine("BitConverter: {0}ms", sw2.ElapsedMilliseconds);
Console.WriteLine("Super-unsafe struct: {0}ms", sw3.ElapsedMilliseconds);
Console.WriteLine("PVitt shifts: {0}ms", sw4.ElapsedMilliseconds);
Console.WriteLine("unsafe VirtualBlackFox: {0}ms", sw5.ElapsedMilliseconds);
Console.WriteLine("TPL: {0}ms", sw6.ElapsedMilliseconds);
Console.ReadKey();
return;
}
}
- On x86 fastest is the unsafe code of VirtualBlackFox, second the "super unsafe"
struct
"trick" of C# unsafe value type array to byte array conversions, third PVitt. - On x64 fastest is the unsafe code of VirtualBlackFox, second PVitt.
精彩评论