use std::fs::File;
use std::io::BufReader;
use crate::io::byteio::*;
use crate::io::bitreader::*;
use super::super::*;

struct ACFDecoder {
    fr:         FileReader<BufReader<File>>,
    pal:        [u8; 768],
    width:      usize,
    height:     usize,
    fps:        u32,
    frame:      Vec<u8>,
    pframe:     Vec<u8>,
    data:       Vec<u8>,
    arate:      u32,
    channels:   u32,
    audio:      Vec<u8>,
    anext:      bool,
}

fn refine_block(blk: &mut [u8], stride: usize, part1: &mut dyn ByteIO, part2: &mut dyn ByteIO, op: u32) -> DecoderResult<()> {
    match op {
        0 => { // masked update
            let mut mask = 0;
            for (y, line) in blk.chunks_mut(stride).enumerate() {
                if (y & 3) == 0 {
                    mask = part1.read_u32le()? as usize;
                }
                for el in line[..8].iter_mut() {
                    if (mask & 1) != 0 {
                        *el = part2.read_byte()?;
                    }
                    mask >>= 1;
                }
            }
        },
        1 => {}, // no refinement
        _ => { // four or eight pixels
            let niters = if op == 2 { 1 } else { 2 };
            for _ in 0..niters {
                let mut mask = part2.read_u24le()? as usize;
                let mut clrs = [0; 4];
                part1.read_buf(&mut clrs)?;
                for &clr in clrs.iter() {
                    let idx = mask & 0x3F;
                    mask >>= 6;
                    blk[(idx & 7) + (idx / 8) * stride] = clr;
                }
            }
        },
    }
    Ok(())
}

fn calc_addr(mv: u8, cur_pos: usize, stride: usize, end: usize, tile_size: usize) -> DecoderResult<usize> {
    let mut mv_x = (mv & 0xF) as i8;
    if mv_x >= 8 {
        mv_x -= 16;
    }
    let mut mv_y = (mv >> 4) as i8;
    if mv_y >= 8 {
        mv_y -= 16;
    }

    let pos = ((cur_pos + tile_size / 2 * (stride + 1)) as isize) + isize::from(mv_x) + isize::from(mv_y) * (stride as isize);
    validate!(pos >= 0);
    let pos = pos as usize;
    validate!(pos + tile_size + (tile_size - 1) * stride <= end);

    Ok(pos)
}

impl ACFDecoder {
    fn unpack_frame(&mut self) -> DecoderResult<()> {
        std::mem::swap(&mut self.frame, &mut self.pframe);

        let src = &self.data;
        let op_size = ((self.width / 8) * (self.height / 8) * 6 + 7) / 8;
        validate!(src.len() >= op_size + 4);
        let part2_off = read_u32le(src)? as usize;
        validate!(part2_off >= op_size + 4 && part2_off <= src.len());

        let mut ops = BitReader::new(&src[4..][..op_size], BitReaderMode::LE);
        let mut part1 = MemoryReader::new_read(&src[op_size + 4..part2_off]);
        let mut part2 = MemoryReader::new_read(&src[part2_off..]);

        let mut blk_y = 0;
        for strip in self.frame.chunks_exact_mut(self.width * 8) {
            for blk_x in (0..self.width).step_by(8) {
                let op = ops.read(6)?;
                match op {
                    0 => { // raw
                        for line in strip[blk_x..].chunks_mut(self.width) {
                            part1.read_buf(&mut line[..8])?;
                        }
                    },
                    1..=4 => { // skip
                        let src = &mut self.pframe[blk_x + blk_y..];
                        for (dline, sline) in strip[blk_x..].chunks_mut(self.width)
                                .zip(src.chunks(self.width)) {
                            dline[..8].copy_from_slice(&sline[..8]);
                        }
                        refine_block(&mut strip[blk_x..], self.width, &mut part1, &mut part2, op & 3)?;
                    },
                    5..=8 => { // mv
                        let mv = part2.read_byte()?;
                        let pos = calc_addr(mv, blk_x + blk_y, self.width, self.pframe.len(), 8)?;
                        let src = &self.pframe[pos..];
                        for (dline, sline) in strip[blk_x..].chunks_mut(self.width)
                                .zip(src.chunks(self.width)) {
                            dline[..8].copy_from_slice(&sline[..8]);
                        }
                        refine_block(&mut strip[blk_x..], self.width, &mut part1, &mut part2, op & 3)?;
                    },
                    9..=12 => { // mv with absolute offset
                        let saddr = part2.read_u16le()? as usize;
                        validate!(saddr + 8 + 7 * self.width <= self.pframe.len());

                        let src = &self.pframe[saddr..];
                        for (dline, sline) in strip[blk_x..].chunks_mut(self.width)
                                .zip(src.chunks(self.width)) {
                            dline[..8].copy_from_slice(&sline[..8]);
                        }
                        refine_block(&mut strip[blk_x..], self.width, &mut part1, &mut part2, op & 3)?;
                    },
                    13..=16 => { // fourmv
                        for quad in 0..4 {
                            let mv = part1.read_byte()?;
                            let pos = calc_addr(mv, blk_x + blk_y + (quad & 1) * 4 + (quad & 2) * 2 * self.width, self.width, self.pframe.len(), 4)?;
                            let dst = &mut strip[blk_x + (quad & 1) * 4 + (quad & 2) * 2 * self.width..];
                            let src = &self.pframe[pos..];
                            for (dline, sline) in dst.chunks_mut(self.width)
                                    .zip(src.chunks(self.width)).take(4) {
                                dline[..4].copy_from_slice(&sline[..4]);
                            }
                        }
                        refine_block(&mut strip[blk_x..], self.width, &mut part1, &mut part2, op & 3)?;
                    },
                    17..=20 => { // fourmv with absoff
                        for quad in 0..4 {
                            let pos = part1.read_u16le()? as usize;
                            validate!(pos + 4 + 3 * self.width <= self.pframe.len());
                            let dst = &mut strip[blk_x + (quad & 1) * 4 + (quad & 2) * 2 * self.width..];
                            let src = &self.pframe[pos..];
                            for (dline, sline) in dst.chunks_mut(self.width)
                                    .zip(src.chunks(self.width)).take(4) {
                                dline[..4].copy_from_slice(&sline[..4]);
                            }
                        }
                        refine_block(&mut strip[blk_x..], self.width, &mut part1, &mut part2, op & 3)?;
                    },
                    21..=24 => { // fill
                        let clr = part2.read_byte()?;
                        for line in strip[blk_x..].chunks_mut(self.width) {
                            for el in line[..8].iter_mut() {
                                *el = clr;
                            }
                        }
                        refine_block(&mut strip[blk_x..], self.width, &mut part1, &mut part2, op & 3)?;
                    },
                    25..=28 => { // scaled 2x2 block
                        let mut clr = [0; 4];
                        part1.read_buf(&mut clr)?;
                        for (y, line) in strip[blk_x..].chunks_mut(self.width).enumerate() {
                            for (x, el) in line[..8].iter_mut().enumerate() {
                                *el = clr[(y / 4) * 2 + (x / 4)];
                            }
                        }
                        refine_block(&mut strip[blk_x..], self.width, &mut part1, &mut part2, op & 3)?;
                    },
                    29 => { // 2-clr pattern
                        let mut clr = [0; 2];
                        part2.read_buf(&mut clr)?;
                        let mut mask = 0;
                        for (y, line) in strip[blk_x..].chunks_mut(self.width).enumerate() {
                            if (y & 3) == 0 {
                                mask = part1.read_u32le()? as usize;
                            }
                            for el in line[..8].iter_mut() {
                                *el = clr[mask & 1];
                                mask >>= 1;
                            }
                        }
                    },
                    30 => { // 4-clr pattern
                        let mut clr = [0; 4];
                        part1.read_buf(&mut clr)?;
                        let mut mask = 0;
                        for (y, line) in strip[blk_x..].chunks_mut(self.width).enumerate() {
                            if (y & 1) == 0 {
                                mask = part1.read_u32le()? as usize;
                            }
                            for el in line[..8].iter_mut() {
                                *el = clr[mask & 3];
                                mask >>= 2;
                            }
                        }
                    },
                    31 => { // 8-clr pattern
                        let mut clr = [0; 8];
                        part2.read_buf(&mut clr)?;
                        for line in strip[blk_x..].chunks_mut(self.width) {
                            let mut mask = part1.read_u24le()? as usize;
                            for el in line[..8].iter_mut() {
                                *el = clr[mask & 7];
                                mask >>= 3;
                            }
                        }
                    },
                    32 => { // 16-clr pattern
                        let mut clr = [0; 16];
                        part2.read_buf(&mut clr)?;
                        for line in strip[blk_x..].chunks_mut(self.width) {
                            let mut mask = part1.read_u32le()? as usize;
                            for el in line[..8].iter_mut() {
                                *el = clr[mask & 0xF];
                                mask >>= 4;
                            }
                        }
                    },
                    33 => { // 2-clr pattern per 4x4 block
                        let mut clrs = [0; 2];
                        for quad_no in 0..4 {
                            let pos = blk_x + (quad_no & 1) * 4 + (quad_no & 2) * 2 * self.width;
                            let mut mask = part1.read_u16le()? as usize;
                            part1.read_buf(&mut clrs)?;
                            for line in strip[pos..].chunks_mut(self.width).take(4) {
                                for el in line[..4].iter_mut() {
                                    *el = clrs[mask & 1];
                                    mask >>= 1;
                                }
                            }
                        }
                    },
                    34 => { // 4-clr pattern per 4x4 block
                        let mut clrs = [0; 4];
                        for quad_no in 0..4 {
                            let pos = blk_x + (quad_no & 1) * 4 + (quad_no & 2) * 2 * self.width;
                            let mut mask = part1.read_u32le()? as usize;
                            part1.read_buf(&mut clrs)?;
                            for line in strip[pos..].chunks_mut(self.width).take(4) {
                                for el in line[..4].iter_mut() {
                                    *el = clrs[mask & 3];
                                    mask >>= 2;
                                }
                            }
                        }
                    },
                    35 => { // 8-clr pattern per 4x4 block
                        let mut clrs = [0; 8];
                        for quad_no in 0..4 {
                            let pos = blk_x + (quad_no & 1) * 4 + (quad_no & 2) * 2 * self.width;
                            part2.read_buf(&mut clrs)?;
                            let mut mask = 0;
                            for (y, line) in strip[pos..].chunks_mut(self.width).take(4).enumerate() {
                                if (y & 1) == 0 {
                                    mask = part1.read_u24le()? as usize;
                                }
                                for el in line[..4].iter_mut() {
                                    *el = clrs[mask & 7];
                                    mask >>= 3;
                                }
                            }
                        }
                    },
                    36 => { // partial 4-colour pattern for 4x4 blocks
                        // small table for convenience:
                        // low nibble is the default index,
                        // top nibble is the alternative index ORed with 8 if applicable
                        const INDICES: [u8; 16] = [
                            0x90, 0x00, 0x00, 0xB0,
                            0x01, 0x90, 0xB0, 0x03,
                            0x01, 0xA1, 0xB2, 0x03,
                            0xA1, 0x02, 0x02, 0xB2
                        ];
                        let mut mask = part1.read_u32le()?;
                        let mut clrs = [0; 4];
                        for quad_no in 0..4 {
                            let pos = blk_x + (quad_no & 1) * 4 + (quad_no & 2) * 2 * self.width;
                            part1.read_buf(&mut clrs)?;
                            for (idc, line) in INDICES.chunks_exact(4)
                                    .zip(strip[pos..].chunks_mut(self.width)) {
                                for (&idx, el) in idc.iter().zip(line.iter_mut()) {
                                    if (idx & 0x80) == 0 {
                                        *el = clrs[usize::from(idx)];
                                    } else {
                                        if (mask & 1) == 0 {
                                            *el = clrs[usize::from(idx & 0xF)];
                                        } else {
                                            *el = clrs[usize::from((idx >> 4) & 0x7)];
                                        }
                                        mask >>= 1;
                                    }
                                }
                            }
                        }
                    },
                    37 => { // mostly uniform
                        let clr = part2.read_byte()?;
                        for line in strip[blk_x..].chunks_mut(self.width) {
                            let mut mask = part1.read_byte()?;
                            for el in line[..8].iter_mut() {
                                *el = if (mask & 1) == 0 {
                                        clr
                                    } else {
                                        part2.read_byte()?
                                    };
                                mask >>= 1;
                            }
                        }
                    },
                    38 => { // average plus differences
                        let base_clr = part2.read_byte()?;
                        for line in strip[blk_x..].chunks_mut(self.width) {
                            let mut mask = part1.read_u32le()?;
                            for el in line[..8].iter_mut() {
                                *el = base_clr.wrapping_add((mask & 0xF) as u8);
                                mask >>= 4;
                            }
                        }
                    },
                    39 => { // masked nibble colour pattern
                        let b = part2.read_byte()?;
                        let base_clr = [b << 4, b & 0xF0];
                        for line in strip[blk_x..].chunks_mut(self.width) {
                            let mut mask = part1.read_u32le()?;
                            for (i, el) in line[..8].iter_mut().enumerate() {
                                if i == 6 {
                                    mask |= u32::from(part1.read_byte()?) << 2;
                                }
                                let nib = (mask & 0xF) as u8;
                                let idx = if (mask & 0x10) != 0 { 1 } else { 0 };
                                *el = base_clr[idx] | nib;
                                mask >>= 5;
                            }
                        }
                    },
                    40..=43 => { // RLE+scan
                        let scan_id = op & 3;

                        let mut mask = 0;
                        let mut clr = 0;
                        for (idx, zz) in ZIGZAG.iter().enumerate() {
                            if (idx & 0x1F) == 0 {
                                mask = part1.read_u32le()?;
                            }
                            let (x, y) = match scan_id {
                                    0 => (idx & 7, idx / 8),
                                    1 => (idx / 8, idx & 7),
                                    2 => (usize::from(zz.0), usize::from(zz.1)),
                                    _ => (usize::from(zz.0) ^ 7, usize::from(zz.1)),
                                };
                            if (mask & 1) != 0 {
                                clr = part2.read_byte()?;
                            }
                            mask >>= 1;
                            strip[blk_x + x + y * self.width] = clr;
                        }
                    },
                    44..=47 => { // special pattern
                        let scan_id = op & 3;

                        let mut mask = 0;
                        let mut cur_nibs = part2.read_byte()?;
                        let mut nib_hi = true;
                        let mut clr = cur_nibs << 4;
                        for (idx, zz) in ZIGZAG.iter().enumerate() {
                            if (idx & 0x1F) == 0 {
                                mask = part1.read_u32le()?;
                            }
                            let (x, y) = match scan_id {
                                    0 => (idx & 7, idx / 8),
                                    1 => (idx / 8, idx & 7),
                                    2 => (usize::from(zz.0), usize::from(zz.1)),
                                    _ => (usize::from(zz.0) ^ 7, usize::from(zz.1)),
                                };
                            if (mask & 1) != 0 {
                                let nib = if nib_hi {
                                        cur_nibs >> 4
                                    } else {
                                        cur_nibs = part2.read_byte()?;
                                        cur_nibs & 0xF
                                    };
                                nib_hi = !nib_hi;
                                clr = (clr & 0xF0) | nib;
                            }
                            mask >>= 1;
                            strip[blk_x + x + y * self.width] = clr;
                        }
                    },
                    48..=51 => { // copy with signed offset
                        let offs = part2.read_u16le()? as i16;
                        let pos = (blk_x + 4 + blk_y + 4 * self.width) as isize + (offs as isize);
                        validate!(pos >= 0);
                        let pos = pos as usize;
                        validate!(pos + 8 + 7 * self.width <= self.pframe.len());

                        let src = &self.pframe[pos..];
                        for (dline, sline) in strip[blk_x..].chunks_mut(self.width)
                                .zip(src.chunks(self.width)) {
                            dline[..8].copy_from_slice(&sline[..8]);
                        }
                        refine_block(&mut strip[blk_x..], self.width, &mut part1, &mut part2, (op - 47) & 3)?;
                    },
                    52..=55 => { // large mv
                        let mv_x = part2.read_byte()? as i8;
                        let mv_y = part2.read_byte()? as i8;

                        let pos = ((blk_x + blk_y + 4 * (self.width + 1)) as isize) + isize::from(mv_x) + isize::from(mv_y) * (self.width as isize);
                        validate!(pos >= 0);
                        let pos = pos as usize;
                        validate!(pos + 8 + 7 * self.width <= self.pframe.len());

                        let src = &self.pframe[pos..];
                        for (dline, sline) in strip[blk_x..].chunks_mut(self.width)
                                .zip(src.chunks(self.width)) {
                            dline[..8].copy_from_slice(&sline[..8]);
                        }
                        refine_block(&mut strip[blk_x..], self.width, &mut part1, &mut part2, (op - 51) & 3)?;
                    },
                    56..=59 => { // fourmv with signed offset
                        for quad in 0..4 {
                            let offs = part1.read_u16le()? as i16;
                            let pos = (blk_x + (quad & 1) * 4 + 2 + blk_y + 2 * self.width + (quad & 2) * 2 * self.width) as isize + (offs as isize);
                            validate!(pos >= 0);
                            let pos = pos as usize;
                            validate!(pos + 4 + 3 * self.width <= self.pframe.len());
                            let dst = &mut strip[blk_x + (quad & 1) * 4 + (quad & 2) * 2 * self.width..];
                            let src = &self.pframe[pos..];
                            for (dline, sline) in dst.chunks_mut(self.width)
                                    .zip(src.chunks(self.width)).take(4) {
                                dline[..4].copy_from_slice(&sline[..4]);
                            }
                        }
                        refine_block(&mut strip[blk_x..], self.width, &mut part1, &mut part2, (op - 55) & 3)?;
                    },
                    60..=63 => { // fourmv with large mvs
                        for quad in 0..4 {
                            let mv_x = part1.read_byte()? as i8;
                            let mv_y = part1.read_byte()? as i8;
                            let pos = (blk_x + (quad & 1) * 4 + 2 + blk_y + 2 * self.width + (quad & 2) * 2 * self.width) as isize + isize::from(mv_x) + isize::from(mv_y) * (self.width as isize);
                            validate!(pos >= 0);
                            let pos = pos as usize;
                            validate!(pos + 4 + 3 * self.width <= self.pframe.len());
                            let dst = &mut strip[blk_x + (quad & 1) * 4 + (quad & 2) * 2 * self.width..];
                            let src = &self.pframe[pos..];
                            for (dline, sline) in dst.chunks_mut(self.width)
                                    .zip(src.chunks(self.width)).take(4) {
                                dline[..4].copy_from_slice(&sline[..4]);
                            }
                        }
                        refine_block(&mut strip[blk_x..], self.width, &mut part1, &mut part2, (op - 59) & 3)?;
                    },
                    _ => unreachable!(),
                }
            }
            blk_y += self.width * 8;
        }

        Ok(())
    }
}

impl InputSource for ACFDecoder {
    fn get_num_streams(&self) -> usize { if self.arate > 0 { 2 } else { 1 } }
    fn get_stream_info(&self, stream_no: usize) -> StreamInfo {
        match stream_no {
            0 => StreamInfo::Video(VideoInfo{
                    width:  self.width,
                    height: self.height,
                    bpp:    8,
                    tb_num: 1,
                    tb_den: self.fps,
                 }),
            1 if self.arate > 0 => StreamInfo::Audio(AudioInfo{
                    sample_rate: self.arate,
                    channels:    self.channels as u8,
                    sample_type: AudioSample::U8,
                }),
            _ => StreamInfo::None
        }
    }
    fn decode_frame(&mut self) -> DecoderResult<(usize, Frame)> {
        if self.anext {
            self.anext = false;
            let len = self.audio.len().min((self.arate * self.channels / self.fps).max(self.channels) as usize);
            let mut ret = vec![0; len];
            ret.copy_from_slice(&self.audio[..len]);
            self.audio.drain(..len);
            return Ok((1, Frame::AudioU8(ret)));
        }
        let br = &mut self.fr;
        let mut name = [0; 8];
        loop {
            br.read_buf(&mut name).map_err(|_| DecoderError::EOF)?;
            let size = br.read_u32le()? as usize;
            match &name {
                b"Palette " => {
                    validate!(size <= self.pal.len());
                    br.read_buf(&mut self.pal[..size])?;
                },
                b"KeyFrame" | b"DltFrame" => {
                    self.data.resize(size, 0);
                    br.read_buf(&mut self.data)?;
                    self.unpack_frame().map_err(|_| DecoderError::InvalidData)?;
                    self.anext = !self.audio.is_empty();
                    return Ok((0, Frame::VideoPal(self.frame.clone(), self.pal)));
                },
                b"SoundBuf" | b"SoundFrm" => {
                    br.read_extend(&mut self.audio, size)?;
                },
                b"NulChunk" => {
                    br.read_skip(size)?;
                },
                b"SoundEnd" => {
                    br.read_skip(size)?;
                },
                b"Recouvre" | b"Camera  " | b"SAL_STRT" | b"SAL_PART" | b"SAL_END " => {
                    br.read_skip(size)?;
                },
                b"End     " => return Err(DecoderError::EOF),
                b"Format  " => { // let's hope the format does not really change
                    br.read_skip(size)?;
                },
                _ => {
                    println!("unknown chunk @ {:X}", br.tell() - 12);
                    br.read_skip(size)?;
                },
            }
        }
    }
}

pub fn open(name: &str) -> DecoderResult<Box<dyn InputSource>> {
    let file = File::open(name).map_err(|_| DecoderError::InputNotFound(name.to_owned()))?;
    let mut br = FileReader::new_read(BufReader::new(file));

    let mut width = 0;
    let mut height = 0;
    let mut fps = 0;
    let mut arate = 0;
    let mut channels = 0;
    let mut pal = [0; 768];

    let mut name = [0; 8];
    loop {
        br.read_buf(&mut name)?;
        let size = br.read_u32le()? as usize;
        match &name {
            b"FrameLen" => {
                validate!(br.tell() == 12);
                br.read_skip(size)?;
            },
            b"Format  " => {
                let end = br.tell() + (size as u64);
                validate!(width == 0 && height == 0);
                let actual_size = br.read_u32le()? as usize;
                validate!(actual_size >= 0x1C && actual_size + 4 <= size);
                width = br.read_u32le()? as usize;
                validate!(width > 0 && width <= 320 && (width & 63) == 0);
                height = br.read_u32le()? as usize;
                validate!(height > 0 && height <= 240 && (height & 7) == 0);
                let _some_size = br.read_u32le()?;
                let _vsize = br.read_u32le()?;
                br.read_u32le()?;
                fps = br.read_u32le()?;
                validate!(fps > 1 && fps <= 30);
                if actual_size > 0x1C {
                    validate!(actual_size >= 0x2C);
                    arate = br.read_u32le()?;
                    validate!(arate <= 48000);
                    channels = br.read_u32le()?;
                    validate!((arate == 0 && channels == 0) || (arate > 0 && (channels == 1 || channels == 2)));
                }

                br.seek(SeekFrom::Start(end))?;
            },
            b"Palette " => { // it may occur before format sometimes
                validate!(br.tell() > 12);
                validate!(size > 0 && (size % 3) == 0 && size <= pal.len());
                br.read_buf(&mut pal[..size])?;
            },
            b"NulChunk" | b"SoundBuf" | b"SoundFrm" | b"KeyFrame" | b"DltFrame" => {
                validate!(br.tell() > 12);
                br.seek(SeekFrom::Current(-12))?;
                break;
            },
            _ => {
                validate!(br.tell() > 12);
                br.read_skip(size)?;
            },
        }
    }
    validate!(width > 0);

    Ok(Box::new(ACFDecoder {
        fr: br,
        pal,
        width, height, fps,
        data: Vec::new(),
        frame: vec![0; width * height],
        pframe: vec![0; width * height],
        arate, channels,
        audio: Vec::new(),
        anext: false,
    }))
}

const ZIGZAG: [(u8, u8); 64] = [
    (0,0), (1,0), (0,1), (0,2), (1,1), (2,0), (3,0), (2,1),
    (1,2), (0,3), (0,4), (1,3), (2,2), (3,1), (4,0), (5,0),
    (4,1), (3,2), (2,3), (1,4), (0,5), (0,6), (1,5), (2,4),
    (3,3), (4,2), (5,1), (6,0), (7,0), (6,1), (5,2), (4,3),
    (3,4), (2,5), (1,6), (0,7), (1,7), (2,6), (3,5), (4,4),
    (5,3), (6,2), (7,1), (7,2), (6,3), (5,4), (4,5), (3,6),
    (2,7), (3,7), (4,6), (5,5), (6,4), (7,3), (7,4), (6,5),
    (5,6), (4,7), (5,7), (6,6), (7,5), (7,6), (6,7), (7,7)
];
