1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
//! Custom 7-bit ASCII graphic only encoding.

use encoding::types::{ByteWriter, CodecError, Encoding, RawDecoder, RawEncoder, StringWriter};
use std::convert::Into;

/// A static castable reference to `AsciiGraphicEncoding`.
/// Usage: `let enc = ASCII_GRAPHIC as encoding::EncodingRef`.
pub const ASCII_GRAPHIC: &self::AsciiGraphicEncoding = &self::AsciiGraphicEncoding;

/// This custom encoding is derived from encoding::ASCIIEncoding.
/// The only difference is that it represents only graphic characters. All control characters
/// except tab and space are regarded as invalid.
#[derive(Clone, Copy)]
pub struct AsciiGraphicEncoding;

impl Encoding for AsciiGraphicEncoding {
    fn name(&self) -> &'static str {
        "ascii"
    }
    fn whatwg_name(&self) -> Option<&'static str> {
        None
    }
    fn raw_encoder(&self) -> Box<dyn RawEncoder> {
        AsciiGraphicEncoder::new()
    }
    fn raw_decoder(&self) -> Box<dyn RawDecoder> {
        AsciiGraphicDecoder::new()
    }
}

/// An encoder for ASCII.
#[derive(Clone, Copy)]
pub struct AsciiGraphicEncoder;

impl AsciiGraphicEncoder {
    #[allow(clippy::new_ret_no_self)]
    pub fn new() -> Box<dyn RawEncoder> {
        Box::new(AsciiGraphicEncoder)
    }
}

impl RawEncoder for AsciiGraphicEncoder {
    fn from_self(&self) -> Box<dyn RawEncoder> {
        AsciiGraphicEncoder::new()
    }
    fn is_ascii_compatible(&self) -> bool {
        true
    }

    fn raw_feed(
        &mut self,
        input: &str,
        output: &mut dyn ByteWriter,
    ) -> (usize, Option<CodecError>) {
        output.writer_hint(input.len());

        // all non graphic is unrepresentable
        match input
            .as_bytes()
            .iter()
            .position(|&ch| ch >= 0x7F || (ch < 0x20) && (ch != 0x09))
        {
            Some(first_error) => {
                output.write_bytes(&input.as_bytes()[..first_error]);
                let len = input[first_error..].chars().next().unwrap().len_utf8();
                (
                    first_error,
                    Some(CodecError {
                        upto: (first_error + len) as isize,
                        cause: "non-graphic character".into(),
                    }),
                )
            }
            None => {
                output.write_bytes(input.as_bytes());
                (input.len(), None)
            }
        }
    }

    fn raw_finish(&mut self, _output: &mut dyn ByteWriter) -> Option<CodecError> {
        None
    }
}

/// A decoder for ASCII.
#[derive(Clone, Copy)]
pub struct AsciiGraphicDecoder;

impl AsciiGraphicDecoder {
    #[allow(clippy::new_ret_no_self)]
    pub fn new() -> Box<dyn RawDecoder> {
        Box::new(AsciiGraphicDecoder)
    }
}

impl RawDecoder for AsciiGraphicDecoder {
    fn from_self(&self) -> Box<dyn RawDecoder> {
        AsciiGraphicDecoder::new()
    }
    fn is_ascii_compatible(&self) -> bool {
        true
    }

    fn raw_feed(
        &mut self,
        input: &[u8],
        output: &mut dyn StringWriter,
    ) -> (usize, Option<CodecError>) {
        output.writer_hint(input.len());

        fn write_ascii_bytes(output: &mut dyn StringWriter, buf: &[u8]) {
            output.write_str(std::str::from_utf8(buf).unwrap());
        }

        // all non graphic is error
        match input
            .iter()
            .position(|&ch| ch >= 0x7F || (ch < 0x20) && (ch != 0x09))
        {
            Some(first_error) => {
                write_ascii_bytes(output, &input[..first_error]);
                (
                    first_error,
                    Some(CodecError {
                        upto: first_error as isize + 1,
                        cause: "non graphic character".into(),
                    }),
                )
            }
            None => {
                write_ascii_bytes(output, input);
                (input.len(), None)
            }
        }
    }

    fn raw_finish(&mut self, _output: &mut dyn StringWriter) -> Option<CodecError> {
        None
    }
}

#[cfg(test)]
mod tests {
    use super::ASCII_GRAPHIC;
    use encoding::EncodingRef;

    #[test]
    fn test_decoder() {
        let enc = ASCII_GRAPHIC as EncodingRef;
        let mut decoder = enc.raw_decoder();
        let mut ret = String::new();
        let input = "abc\u{3}\u{3}\u{3}\u{0}def\nghijk".as_bytes();
        let (offset, err) = decoder.raw_feed(&input[..], &mut ret);
        assert_eq!(ret, "abc");
        assert_eq!(offset, 3);
        assert_eq!(err.unwrap().upto, 4);
    }
}