use std::fs::File;
use std::io::BufReader;
use crate::io::byteio::*;
use crate::io::bitreader::*;
use super::super::*;

const FPS: u32 = 12;

fn unpack_mono(audio: &mut Vec<i16>, br: &mut dyn ByteIO, bsize: usize, pred: &mut i16) -> DecoderResult<()> {
    for _ in 0..bsize {
        let delta = usize::from(br.read_byte()?);
        validate!((delta & 0x7F) < DELTAS.len());
        if (delta & 0x80) == 0 {
            *pred += DELTAS[delta & 0x7F];
        } else {
            *pred -= DELTAS[delta & 0x7F];
        }
        audio.push(*pred);
    }
    Ok(())
}

fn unpack_stereo(audio: &mut Vec<i16>, br: &mut dyn ByteIO, bsize: usize, pred: &mut [i16; 2]) -> DecoderResult<()> {
    for _ in (0..bsize).step_by(2) {
        let delta = usize::from(br.read_byte()?);
        validate!((delta & 0x7F) < DELTAS.len());
        if (delta & 0x80) == 0 {
            pred[0] += DELTAS[delta & 0x7F];
        } else {
            pred[0] -= DELTAS[delta & 0x7F];
        }
        audio.push(pred[0]);

        let delta = usize::from(br.read_byte()?);
        validate!((delta & 0x7F) < DELTAS.len());
        if (delta & 0x80) == 0 {
            pred[1] += DELTAS[delta & 0x7F];
        } else {
            pred[1] -= DELTAS[delta & 0x7F];
        }
        audio.push(pred[1]);
    }
    Ok(())
}

struct YUV2RGB {
    u2b: [i16; 256],
    v2r: [i16; 256],
    u2g: [i16; 256],
    v2g: [i16; 256],
}

impl YUV2RGB {
    fn new() -> Self {
        let mut v2r = [0; 256];
        let mut val = -0xB2F480;
        for el in v2r.iter_mut() {
            *el = (val >> 16) as i16;
            val += 0x166E9;
        }

        let mut u2b = [0; 256];
        let mut val = -0xE25100;
        for el in u2b.iter_mut() {
            *el = (val >> 16) as i16;
            val += 0x1C5A2;
        }

        let mut v2g = [0; 256];
        let mut val = 0x5B6900;
        for el in v2g.iter_mut() {
            *el = (val >> 16) as i16;
            val -= 0xB2D2;
        }

        let mut u2g = [0; 256];
        let mut val = 0x2C8D00;
        for el in u2g.iter_mut() {
            *el = (val >> 16) as i16;
            val -= 0x581A;
        }

        Self { u2b, v2r, u2g, v2g }
    }
}

struct DerfVideoDecoder {
    fr:         FileReader<BufReader<File>>,
    frame:      Vec<u8>,
    pframe:     Vec<u8>,
    data:       Vec<u8>,
    abuf:       Vec<i16>,
    aframe_len: usize,
    audio:      bool,
    stereo:     bool,
    pred:       [i16; 2],
    width:      usize,
    height:     usize,
    yuv2rgb:    YUV2RGB,
}

trait ReadNibble {
    fn read_nibble(&mut self) -> BitReaderResult<u8>;
    fn read_nibble_s(&mut self) -> BitReaderResult<i16>;
}

impl<'a> ReadNibble for BitReader<'a> {
    fn read_nibble(&mut self) -> BitReaderResult<u8> {
        Ok(self.read(4)? as u8)
    }
    fn read_nibble_s(&mut self) -> BitReaderResult<i16> {
        let nib = self.read(4)? as i8;
        Ok(i16::from(nib << 4) >> 4)
    }
}

struct FrameDecoder<'a> {
    frame:      &'a mut [u8],
    pframe:     &'a [u8],
    br:         &'a mut dyn ByteIO,
    flags:      BitReader<'a>,
    nibbles:    BitReader<'a>,
    width:      usize,
    height:     usize,
    stride:     usize,
    yuv2rgb:    &'a YUV2RGB,
}

const W1: i32 = -669;
const W2: i32 = 473;
const W4: i32 = 362;
const W6: i32 = 277;

#[allow(clippy::erasing_op)]
#[allow(clippy::identity_op)]
fn idct_col(blk: &mut [i32; 64], col: usize) {
    let c0 = blk[0 * 8 + col];
    let c1 = blk[1 * 8 + col];
    let c2 = blk[2 * 8 + col];
    let c3 = blk[3 * 8 + col];
    let c4 = blk[4 * 8 + col];
    let c5 = blk[5 * 8 + col];
    let c6 = blk[6 * 8 + col];
    let c7 = blk[7 * 8 + col];

    let tmp00 = c0 + c4;
    let tmp01 = c0 - c4;
    let tmp02 = c2 + c6;
    let tmp03 = ((c2 - c6) * W4 + 0x80) >> 8;
    let tmp04 = tmp03 - tmp02;
    let tmp05 = tmp01 + tmp04;
    let tmp06 = tmp01 - tmp04;
    let tmp07 = c5 + c3;
    let tmp08 = c5 - c3;
    let tmp09 = c1 + c7;
    let tmp10 = c1 - c7;
    let tmp11 = tmp09 + tmp07;
    let tmp12 = ((tmp08 + tmp10) * W2 + 0x80) >> 8;
    let tmp13 = ((tmp08 * W1 + 0x80) >> 8) - tmp11 + tmp12;
    let tmp14 = (((tmp09 - tmp07) * W4 + 0x80) >> 8) - tmp13;
    let tmp15 = ((tmp10 * W6 + 0x80) >> 8) - tmp12 + tmp14;

    blk[0 * 8 + col] = tmp00 + tmp02 + tmp11;
    blk[1 * 8 + col] = tmp05 + tmp13;
    blk[2 * 8 + col] = tmp06 + tmp14;
    blk[3 * 8 + col] = tmp00 - tmp02 - tmp15;
    blk[4 * 8 + col] = tmp00 - tmp02 + tmp15;
    blk[5 * 8 + col] = tmp06 - tmp14;
    blk[6 * 8 + col] = tmp05 - tmp13;
    blk[7 * 8 + col] = tmp00 + tmp02 - tmp11;
}

fn idct_row(row: &mut [i32; 8]) {
    let c0 = row[0];
    let c1 = row[1];
    let c2 = row[2];
    let c3 = row[3];
    let c4 = row[4];
    let c5 = row[5];
    let c6 = row[6];
    let c7 = row[7];

    let tmp00 = c0 + c4;
    let tmp01 = c0 - c4;
    let tmp02 = c2 + c6;
    let tmp03 = ((c2 - c6) * W4 + 0x80) >> 8;
    let tmp04 = tmp03 - tmp02;
    let tmp05 = tmp01 + tmp04;
    let tmp06 = tmp01 - tmp04;
    let tmp07 = c5 + c3;
    let tmp08 = c5 - c3;
    let tmp09 = c1 + c7;
    let tmp10 = c1 - c7;
    let tmp11 = tmp09 + tmp07;
    let tmp12 = ((tmp08 + tmp10) * W2 + 0x80) >> 8;
    let tmp13 = ((tmp08 * W1 + 0x80) >> 8) - tmp11 + tmp12;
    let tmp14 = (((tmp09 - tmp07) * W4 + 0x80) >> 8) - tmp13;
    let tmp15 = ((tmp10 * W6 + 0x80) >> 8) - tmp12 + tmp14;

    row[0] = tmp00 + tmp02 + tmp11;
    row[1] = tmp05 + tmp13;
    row[2] = tmp06 + tmp14;
    row[3] = tmp00 - tmp02 - tmp15;
    row[4] = tmp00 - tmp02 + tmp15;
    row[5] = tmp06 - tmp14;
    row[6] = tmp05 - tmp13;
    row[7] = tmp00 + tmp02 - tmp11;
}

fn idct(block: &mut [i16; 64], qmat: &[i16; 64]) {
    let mut blk = [0; 64];
    for (dst, (&src, (&coef, &qmod))) in blk.iter_mut()
            .zip(block.iter().zip(qmat.iter().zip(QUANT_MOD.iter()))) {
        let q = (i32::from(coef) * i32::from(qmod) + 0x800) >> 12;
        *dst = i32::from(src) * q;
    }

    for i in 0..8 {
        idct_col(&mut blk, i);
    }
    for row in blk.chunks_exact_mut(8) {
        let row: &mut [i32; 8] = row.try_into().unwrap();
        idct_row(row);
    }

    for (dst, &src) in block.iter_mut().zip(blk.iter()) {
        *dst = (((src + 0x10) >> 5) + 0x80).max(0).min(255) as i16;
    }
}

impl<'a> FrameDecoder<'a> {
    fn decode_dct(&mut self, xpos: usize, ypos: usize) -> DecoderResult<()> {
        let mut coeffs = [0; 192];

        let flags = self.nibbles.read_nibble()?;
        for (i, coef) in coeffs[..4].iter_mut().enumerate() {
            *coef = if (flags & (1 << i)) == 0 {
                    self.br.read_u16le()? as i16
                } else {
                    i16::from(self.br.read_byte()? as i8)
                };
        }

        let mut idx = 4;
        while idx < coeffs.len() {
            match self.nibbles.read_nibble()? {
                0x0 => {
                    coeffs[idx] = i16::from(self.br.read_byte()? as i8);
                    idx += 1;
                },
                0x1 => {
                    coeffs[idx] = self.nibbles.read_nibble_s()?;
                    idx += 1;
                },
                0x2 => {
                    loop {
                        let skip = usize::from(self.br.read_byte()?);
                        if skip == 0 {
                            break;
                        }
                        validate!(idx + skip < coeffs.len());
                        idx += skip;
                        coeffs[idx] = i16::from(self.br.read_byte()? as i8);
                        idx += 1;
                    }
                    idx = coeffs.len();
                },
                0x3 => {
                    loop {
                        let skip = usize::from(self.br.read_byte()?);
                        if skip == 0 {
                            break;
                        }
                        validate!(idx + skip < coeffs.len());
                        idx += skip;
                        coeffs[idx] = self.nibbles.read_nibble_s()?;
                        idx += 1;
                    }
                    idx = coeffs.len();
                },
                0x4 => {
                    loop {
                        let skip = usize::from(self.nibbles.read_nibble()?);
                        if skip == 0 {
                            break;
                        }
                        validate!(idx + skip < coeffs.len());
                        idx += skip;
                        coeffs[idx] = i16::from(self.br.read_byte()? as i8);
                        idx += 1;
                    }
                    idx = coeffs.len();
                },
                0x5 => {
                    loop {
                        let skip = usize::from(self.nibbles.read_nibble()?);
                        if skip == 0 {
                            break;
                        }
                        validate!(idx + skip < coeffs.len());
                        idx += skip;
                        coeffs[idx] = self.nibbles.read_nibble_s()?;
                        idx += 1;
                    }
                    idx = coeffs.len();
                },
                0x6 => {
                    let nib = usize::from(self.nibbles.read_nibble()?);
                    let count = (nib & 3) + 1;
                    let skip = (nib >> 2) + 1;
                    validate!(idx + skip + count <= coeffs.len());
                    idx += skip;
                    for _ in 0..count {
                        coeffs[idx] = self.nibbles.read_nibble_s()?;
                        idx += 1;
                    }
                },
                0x7 => {
                    let nib = usize::from(self.nibbles.read_nibble()?);
                    let count = (nib & 1) + 1;
                    let skip = (nib >> 1) + 1;
                    validate!(idx + skip + count <= coeffs.len());
                    idx += skip;
                    for _ in 0..count {
                        coeffs[idx] = self.nibbles.read_nibble_s()?;
                        idx += 1;
                    }
                },
                0x8 => {
                    let nib = usize::from(self.nibbles.read_nibble()?);
                    let count = (nib & 7) + 1;
                    let skip = (nib >> 3) + 1;
                    validate!(idx + skip + count <= coeffs.len());
                    idx += skip;
                    for _ in 0..count {
                        coeffs[idx] = self.nibbles.read_nibble_s()?;
                        idx += 1;
                    }
                },
                0x9 => {
                    let count = usize::from(self.nibbles.read_nibble()?) + 1;
                    validate!(idx + count <= coeffs.len());
                    for _ in 0..count {
                        coeffs[idx] = self.nibbles.read_nibble_s()?;
                        idx += 1;
                    }
                },
                0xA => {
                    loop {
                        let skip = usize::from(self.nibbles.read_nibble()?);
                        if skip == 0 {
                            break;
                        }
                        validate!(idx + skip < coeffs.len());
                        idx += skip;
                        coeffs[idx] = self.br.read_u16le()? as i16;
                        idx += 1;
                    }
                    idx = coeffs.len();
                },
                0xB => {
                    loop {
                        let skip = usize::from(self.br.read_byte()?);
                        if skip == 0 {
                            break;
                        }
                        validate!(idx + skip < coeffs.len());
                        idx += skip;
                        coeffs[idx] = self.br.read_u16le()? as i16;
                        idx += 1;
                    }
                    idx = coeffs.len();
                },
                0xC => {
                    coeffs[idx] = self.br.read_u16le()? as i16;
                    idx += 1;
                },
                _ => return Err(DecoderError::InvalidData),
            }
        }

        let mut blk0 = [0; 64];
        let mut blk1 = [0; 64];
        let mut blk2 = [0; 64];
        for (&idx, src) in ZIGZAG.iter().zip(coeffs.chunks_exact(3)) {
            blk0[idx] = src[0];
            blk1[idx] = src[1];
            blk2[idx] = src[2];
        }
        idct(&mut blk0, &QMAT_Y);
        idct(&mut blk1, &QMAT_C);
        idct(&mut blk2, &QMAT_C);

        for (y, row) in self.frame[xpos * 3 + ypos * self.stride..].chunks_mut(self.stride)
                .take(8).enumerate() {
            for (x, dst) in row.chunks_exact_mut(3).take(8).enumerate() {
                let luma = blk0[x + y * 8];
                let v = blk1[x + y * 8] as usize;
                let u = blk2[x + y * 8] as usize;

                dst[0] = (luma + self.yuv2rgb.v2r[v]).max(0).min(255) as u8;
                dst[1] = (luma + self.yuv2rgb.u2g[u] + self.yuv2rgb.v2g[v]).max(0).min(255) as u8;
                dst[2] = (luma + self.yuv2rgb.u2b[u]).max(0).min(255) as u8;
            }
        }

        Ok(())
    }
    fn do_block(&mut self, x: usize, y: usize, bw: usize, bh: usize) -> DecoderResult<()> {
        if (bw == 2 && bh == 2) || self.flags.read_bool()? {
            let word = self.br.read_u16le()?;
            let mode = word >> 13;
            let mv_y = ((word >> 7) & 0x3F) as isize;
            let mv_x = ((word >> 1) & 0x3F) as isize - 0x1F;
            let use_cur = (word & 1) != 0;
            if mode < 7 {
                let mut dst_idx = x * 3 + y * self.stride;
                let src_idx = ((x & !7) as isize) * 3 + mv_x * 3 + (((y & !7) as isize) + (if use_cur { mv_y - 0x3E } else { mv_y - 0x1F })) * (self.stride as isize);
                validate!(src_idx >= 0);
                let mut src_idx = src_idx as usize;
                if mode < 4 {
                    validate!(src_idx + bw * 3 + (bh - 1) * self.stride <= self.frame.len());
                } else {
                    validate!(src_idx + bh * 3 + (bw - 1) * self.stride <= self.frame.len());
                }
                match mode {
                    0 => { // normal copy
                        if !use_cur {
                            for (dline, sline) in self.frame[dst_idx..].chunks_mut(self.stride)
                                    .zip(self.pframe[src_idx..].chunks(self.stride).take(bh)) {
                                dline[..bw * 3].copy_from_slice(&sline[..bw * 3]);
                            }
                        } else {
                            for _ in 0..bh {
                                for i in 0..bw * 3 {
                                    self.frame[dst_idx + i] = self.frame[src_idx + i];
                                }
                                src_idx += self.stride;
                                dst_idx += self.stride;
                            }
                        }
                    },
                    1 => { // flipped copy
                        if !use_cur {
                            for (dline, sline) in self.frame[dst_idx..].chunks_mut(self.stride)
                                    .zip(self.pframe[src_idx..].chunks(self.stride).take(bh).rev()) {
                                dline[..bw * 3].copy_from_slice(&sline[..bw * 3]);
                            }
                        } else {
                            src_idx += bh * self.stride;
                            for _ in 0..bh {
                                src_idx -= self.stride;
                                for i in 0..bw * 3 {
                                    self.frame[dst_idx + i] = self.frame[src_idx + i];
                                }
                                dst_idx += self.stride;
                            }
                        }
                    },
                    2 => { // mirrored copy
                        if !use_cur {
                            for (dline, sline) in self.frame[dst_idx..].chunks_mut(self.stride)
                                    .zip(self.pframe[src_idx..].chunks(self.stride).take(bh)) {
                                for (dst, src) in dline.chunks_exact_mut(3).take(bw)
                                        .zip(sline.chunks_exact(3).take(bw).rev()) {
                                    dst.copy_from_slice(src);
                                }
                            }
                        } else {
                            for _ in 0..bh {
                                for i in 0..bw {
                                    for k in 0..3 {
                                        self.frame[dst_idx + i * 3 + k] = self.frame[src_idx + (bw - 1 - i) * 3 + k];
                                    }
                                }
                                src_idx += self.stride;
                                dst_idx += self.stride;
                            }
                        }
                    },
                    3 => { // flip+mirror copy
                        if !use_cur {
                            for (dline, sline) in self.frame[dst_idx..].chunks_mut(self.stride)
                                    .zip(self.pframe[src_idx..].chunks(self.stride).take(bh).rev()) {
                                for (dst, src) in dline.chunks_exact_mut(3).take(bw)
                                        .zip(sline.chunks_exact(3).take(bw).rev()) {
                                    dst.copy_from_slice(src);
                                }
                            }
                        } else {
                            src_idx += bh * self.stride;
                            for _ in 0..bh {
                                src_idx -= self.stride;
                                for i in 0..bw {
                                    for k in 0..3 {
                                        self.frame[dst_idx + i * 3 + k] = self.frame[src_idx + (bw - 1 - i) * 3 + k];
                                    }
                                }
                                dst_idx += self.stride;
                            }
                        }
                    },
                    4 => { // transpose
                        if !use_cur {
                            for dline in self.frame[dst_idx..].chunks_mut(self.stride).take(bh){
                                for (i, el) in dline.chunks_exact_mut(3).take(bw).enumerate() {
                                    el.copy_from_slice(&self.pframe[src_idx + i * self.stride..][..3]);
                                }
                                src_idx += 3;
                            }
                        } else {
                            for _ in 0..bh {
                                for i in 0..bw {
                                    for k in 0..3 {
                                        self.frame[dst_idx + i * 3 + k] = self.frame[src_idx + i * self.stride + k];
                                    }
                                }
                                dst_idx += self.stride;
                                src_idx += 3;
                            }
                        }
                    },
                    5 => { // transpose+mirror
                        if !use_cur {
                            for dline in self.frame[dst_idx..].chunks_mut(self.stride).take(bh) {
                                for (i, el) in dline.chunks_exact_mut(3).take(bw).enumerate() {
                                    el.copy_from_slice(&self.pframe[src_idx + (bw - 1 - i) * self.stride..][..3]);
                                }
                                src_idx += 3;
                            }
                        } else {
                            for _ in 0..bh {
                                for i in 0..bw {
                                    for k in 0..3 {
                                        self.frame[dst_idx + i * 3 + k] = self.frame[src_idx + (bw - 1 - i) * self.stride + k];
                                    }
                                }
                                dst_idx += self.stride;
                                src_idx += 3;
                            }
                        }
                    },
                    6 => { // transpose+mirror+flip
                        src_idx += bh * 3;
                        if !use_cur {
                            for dline in self.frame[dst_idx..].chunks_mut(self.stride).take(bh) {
                                src_idx -= 3;
                                for (i, el) in dline.chunks_exact_mut(3).take(bw).enumerate() {
                                    el.copy_from_slice(&self.pframe[src_idx + i * self.stride..][..3]);
                                }
                            }
                        } else {
                            for _ in 0..bh {
                                src_idx -= 3;
                                for i in 0..bw {
                                    for k in 0..3 {
                                        self.frame[dst_idx + i * 3 + k] = self.frame[src_idx + i * self.stride + k];
                                    }
                                }
                                dst_idx += self.stride;
                            }
                        }
                    }
                    _ => unreachable!(),
                }
            } else {
                validate!(bw == 8 && bh == 8);
                self.decode_dct(x, y)?;
            }
        } else if bw == bh {
            if self.flags.read_bool()? {
                self.do_block(x, y,          bw, bh / 2)?;
                self.do_block(x, y + bh / 2, bw, bh / 2)?;
            } else {
                self.do_block(x,          y, bw / 2, bh)?;
                self.do_block(x + bw / 2, y, bw / 2, bh)?;
            }
        } else if bw < bh {
            self.do_block(x, y,          bw, bh / 2)?;
            self.do_block(x, y + bh / 2, bw, bh / 2)?;
        } else {
            self.do_block(x,          y, bw / 2, bh)?;
            self.do_block(x + bw / 2, y, bw / 2, bh)?;
        }
        Ok(())
    }
}

impl DerfVideoDecoder {
    fn decode_video(&mut self, is_intra: bool) -> DecoderResult<()> {
        if is_intra {
            validate!(!self.data.is_empty());
        } else if self.data.is_empty() {
            return Ok(());
        }

        std::mem::swap(&mut self.frame, &mut self.pframe);

        validate!(self.data.len() > 8);
        let flg_offset = read_u32le(&self.data)? as usize;
        let nib_offset = read_u32le(&self.data[4..])? as usize;
        validate!(flg_offset < self.data.len() && nib_offset <= self.data.len());

        let mut br = MemoryReader::new_read(&self.data[8..]);
        let flags = BitReader::new(&self.data[flg_offset..], BitReaderMode::LE32MSB);
        let nibbles = BitReader::new(&self.data[nib_offset..], BitReaderMode::LE);

        let mut fdec = FrameDecoder {
                yuv2rgb:    &self.yuv2rgb,
                frame:      &mut self.frame,
                pframe:     &self.pframe,
                width:      self.width,
                height:     self.height,
                stride:     self.width * 3,
                br:         &mut br,
                flags, nibbles,
            };
        for y in (0..self.height).step_by(8) {
            for x in (0..self.width).step_by(8) {
                fdec.do_block(x, y, 8, 8)?;
            }
        }

        Ok(())
    }
}

impl InputSource for DerfVideoDecoder {
    fn get_num_streams(&self) -> usize { 2 }
    fn get_stream_info(&self, stream_no: usize) -> StreamInfo {
        match stream_no {
            0 => StreamInfo::Video(VideoInfo{
                    width:  self.width,
                    height: self.height,
                    bpp:    24,
                    tb_num: 1,
                    tb_den: FPS,
                 }),
            1 => StreamInfo::Audio(AudioInfo{
                    sample_rate: 22050,
                    channels:    if self.stereo { 2 } else { 1 },
                    sample_type: AudioSample::S16,
                }),
            _ => StreamInfo::None
        }
    }
    fn decode_frame(&mut self) -> DecoderResult<(usize, Frame)> {
        let br = &mut self.fr;
        loop {
            if self.audio && self.abuf.len() >= self.aframe_len {
                let mut audio = vec![0; self.aframe_len];
                audio.copy_from_slice(&self.abuf[..self.aframe_len]);
                self.abuf.drain(..self.aframe_len);
                self.audio = false;
                return Ok((1, Frame::AudioS16(audio)));
            }
            let mut ctag = [0; 2];
            br.read_buf(&mut ctag).map_err(|_| DecoderError::EOF)?;
            br.read_skip(2)?;
            let csize = br.read_u32le()? as usize;
            validate!(csize >= 16);
            let csize = csize - 16;
            br.read_skip(8)?;
            match &ctag {
                b"CM" => {
                    unpack_mono(&mut self.abuf, &mut *br, csize, &mut self.pred[0])?;
                },
                b"CS" => {
                    validate!((csize & 1) == 0);
                    unpack_stereo(&mut self.abuf, &mut *br, csize, &mut self.pred)?;
                },
                b"FK" | b"KB" => {
                    self.data.resize(csize, 0);
                    br.read_buf(&mut self.data)?;
                    self.decode_video(&ctag == b"FK").map_err(|_| DecoderError::InvalidData)?;
                    self.audio = true;
                    return Ok((0, Frame::VideoRGB24(self.frame.clone())));
                },
                _ => br.read_skip(csize)?,
            }
        }
    }
}

pub fn open_video(name: &str) -> DecoderResult<Box<dyn InputSource>> {
    let file = File::open(name).map_err(|_| DecoderError::InputNotFound(name.to_owned()))?;
    let mut br = FileReader::new_read(BufReader::new(file));

    let tag = br.read_tag()?;
    validate!(&tag == b"DERF");
    let _size = br.read_u32le()?; // often less than actual file size
    let _nframes = br.read_u16le()?;
    let width = br.read_u16le()? as usize;
    let height = br.read_u16le()? as usize;
    validate!(width > 0 && height > 0 && ((width | height) & 7) == 0);
    let _fps = br.read_u16le()?; // should be 24
    br.read_skip(20)?;

    let mut first_tag = [0; 2];
    br.peek_buf(&mut first_tag)?;
    let stereo = &first_tag == b"CS";

    Ok(Box::new(DerfVideoDecoder {
        fr: br,
        width, height,
        data: Vec::new(),
        frame:  vec![0; width * height * 3],
        pframe: vec![0; width * height * 3],
        abuf: Vec::with_capacity(32000),
        aframe_len: (22050 / (FPS as usize)) * (if stereo { 2 } else { 1 }),
        stereo,
        audio: false,
        pred: [0; 2],
        yuv2rgb: YUV2RGB::new(),
    }))
}

struct DerfAudioDecoder {
    fr:         FileReader<BufReader<File>>,
    stereo:     bool,
    size:       u32,
    pred:       [i16; 2],
}

impl InputSource for DerfAudioDecoder {
    fn get_num_streams(&self) -> usize { 1 }
    fn get_stream_info(&self, stream_no: usize) -> StreamInfo {
        if stream_no == 0 {
            StreamInfo::Audio(AudioInfo{
                sample_rate: 22050,
                channels:    if self.stereo { 2 } else { 1 },
                sample_type: AudioSample::S16,
            })
        } else {
            StreamInfo::None
        }
    }
    fn decode_frame(&mut self) -> DecoderResult<(usize, Frame)> {
        const BLOCK_SIZE: usize = 512;

        let br = &mut self.fr;
        if br.tell() >= u64::from(self.size) {
            return Err(DecoderError::EOF);
        }

        let bsize = BLOCK_SIZE.min((u64::from(self.size) - br.tell()) as usize);
        if self.stereo {
            validate!((bsize & 1) == 0);
        }
        let mut audio = Vec::with_capacity(bsize);
        if !self.stereo {
            unpack_mono(&mut audio, br, bsize, &mut self.pred[0])?;
        } else {
            unpack_stereo(&mut audio, br, bsize, &mut self.pred)?;
        }

        Ok((0, Frame::AudioS16(audio)))
    }
}

pub fn open_audio(name: &str) -> DecoderResult<Box<dyn InputSource>> {
    let file = File::open(name).map_err(|_| DecoderError::InputNotFound(name.to_owned()))?;
    let mut br = FileReader::new_read(BufReader::new(file));

    let tag = br.read_tag()?;
    validate!(&tag == b"DERF");
    let nchannels = br.read_u32le()?;
    validate!(nchannels == 1 || nchannels == 2);
    let size = br.read_u32le()?;
    validate!(size > 0);

    Ok(Box::new(DerfAudioDecoder {
        fr: br,
        size: size + 12,
        stereo: nchannels == 2,
        pred: [0; 2],
    }))
}

const DELTAS: [i16; 96] = [
       0x0,    0x1,    0x2,    0x3,    0x4,    0x5,    0x6,    0x7,
       0x8,    0x9,    0xA,    0xB,    0xC,    0xD,    0xE,   0x10,
      0x11,   0x13,   0x15,   0x17,   0x19,   0x1C,   0x1F,   0x22,
      0x25,   0x29,   0x2D,   0x32,   0x37,   0x3C,   0x42,   0x49,
      0x50,   0x58,   0x61,   0x6B,   0x76,   0x82,   0x8F,   0x9D,
      0xAD,   0xBE,   0xD1,   0xE6,   0xFD,  0x117,  0x133,  0x151,
     0x173,  0x198,  0x1C1,  0x1EE,  0x220,  0x256,  0x292,  0x2D4,
     0x31C,  0x36C,  0x3C3,  0x424,  0x48E,  0x502,  0x583,  0x610,
     0x6AB,  0x756,  0x812,  0x8E0,  0x9C3,  0xABD,  0xBD0,  0xCFF,
     0xE4C,  0xFBA, 0x114C, 0x1307, 0x14EE, 0x1706, 0x1954, 0x1BDC,
    0x1EA5, 0x21B6, 0x2515, 0x28CA, 0x2CDF, 0x315B, 0x364B, 0x3BB9,
    0x41B2, 0x4844, 0x4F7E, 0x5771, 0x602F, 0x69CE, 0x7462, 0x7FFF
];

const ZIGZAG: [usize; 64] = [
     0,  1,  8, 16,  9,  2,  3, 10,
    17, 24, 32, 25, 18, 11,  4,  5,
    12, 19, 26, 33, 40, 48, 41, 34,
    27, 20, 13,  6,  7, 14, 21, 28,
    35, 42, 49, 56, 57, 50, 43, 36,
    29, 22, 15, 23, 30, 37, 44, 51,
    58, 59, 52, 45, 38, 31, 39, 46,
    53, 60, 61, 54, 47, 55, 62, 63
];

// default libjpeg matrices derived at quality=90
const QMAT_Y: [i16; 64] = [
     3,  2,  2,  3,  5,  8, 10, 12,
     2,  2,  3,  4,  5, 12, 12, 11,
     3,  3,  3,  5,  8, 11, 14, 11,
     3,  3,  4,  6, 10, 17, 16, 12,
     4,  4,  7, 11, 14, 22, 21, 15,
     5,  7, 11, 13, 16, 21, 23, 18,
    10, 13, 16, 17, 21, 24, 24, 20,
    14, 18, 19, 20, 22, 20, 21, 20
];
const QMAT_C: [i16; 64] = [
     3,  4,  5,  9, 20, 20, 20, 20,
     4,  4,  5, 13, 20, 20, 20, 20,
     5,  5, 11, 20, 20, 20, 20, 20,
     9, 13, 20, 20, 20, 20, 20, 20,
    20, 20, 20, 20, 20, 20, 20, 20,
    20, 20, 20, 20, 20, 20, 20, 20,
    20, 20, 20, 20, 20, 20, 20, 20,
    20, 20, 20, 20, 20, 20, 20, 20
];
const QUANT_MOD: [i16; 64] = [
    16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
    22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
    21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
    19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
    16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
    12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
     8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
     4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
];
