1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
//! This module deals with command-line arguments and directly related data
//! structures.

use crate::input::ByteCounter;
use clap::arg_enum;
use lazy_static::lazy_static;
use std::path::PathBuf;
use structopt::StructOpt;

/// Encoding name literal used when simulating non-built-in
/// ASCII-decoder.
pub const ASCII_ENC_LABEL: &str = "ascii";

/// If no command-line argument `--chars_min` is given
/// and none is specified in `--encoding` use this.
/// Must be one of `--list-encodings`.
pub const ENCODING_DEFAULT: &str = "UTF-8";

/// Default value, when no `--chars-min` command-line-argument
/// is given. Must be `u8`.
pub const CHARS_MIN_DEFAULT: u8 = 4;

/// Default value, when no `--counter-offset` command-line-argument
/// is given.
pub const COUNTER_OFFSET_DEFAULT: ByteCounter = 0;

/// Default value when no `--output-line-len`
/// command-line-argument is given.
pub const OUTPUT_LINE_CHAR_NB_MAX_DEFAULT: usize = 64;

/// There must be space for at least 3 long Unicode characters,
/// to guarantee progress in streaming. You want much longer lines.
pub const OUTPUT_LINE_CHAR_NB_MIN: usize = 6;

#[derive(Debug, PartialEq, StructOpt)]
#[structopt(
    name = "stringsext",
    about = "Find multi-byte encoded strings in binary data."
)]
/// This structure holds the command-line-options and is populated by `docopt`.
/// See man-page and the output of `--list-encodings` and `--help` for more
/// information about their meaning.
pub struct Args {
    /// filter applied after decoding (see
    /// `--list-encodings` for AF examples)
    #[structopt(long, short = "a")]
    pub ascii_filter: Option<String>,
    /// never print byte-counter, encoding or filter
    #[structopt(long, short = "c")]
    pub no_metadata: bool,
    #[structopt(long, short = "d")]
    /// show how command-line-options are interpreted
    pub debug_option: bool,
    /// paths to files to scan (or `-` for stdin)
    #[structopt(name = "FILE", parse(from_os_str))]
    pub inputs: Vec<PathBuf>,
    /// set (multiple) encodings to search for
    #[structopt(long, short = "e")]
    pub encoding: Vec<String>,
    /// grep for characters with ASCII-code in output lines
    #[structopt(long, short = "g")]
    pub grep_char: Option<String>,
    #[structopt(long, short = "l")]
    /// list predefined encoding and filter names for ENC
    pub list_encodings: bool,
    #[structopt(long, short = "n")]
    /// minimum characters of printed strings
    pub chars_min: Option<String>,
    #[structopt(long, short = "r")]
    /// require chars in finding to be in the same Unicode-block
    pub same_unicode_block: bool,
    #[structopt(long, short = "p", parse(from_os_str))]
    /// print not to stdout but in file
    pub output: Option<PathBuf>,
    /// output line length in Unicode-codepoints
    #[structopt(long, short = "q")]
    pub output_line_len: Option<String>,
    /// start counting input bytes with NUM
    #[structopt(long, short = "s")]
    pub counter_offset: Option<String>,
    // enable byte-counter with radix `o`, `x` or `d`
    #[structopt(long, short = "t", possible_values = &Radix::variants(), case_insensitive = true)]
    pub radix: Option<Radix>,
    /// filter applied after decoding
    /// (see `--list-encodings` for UBF examples)
    #[structopt(long, short = "u")]
    pub unicode_block_filter: Option<String>,
    /// print version and exit
    #[structopt(long, short = "V")]
    pub version: bool,
}

arg_enum! {
#[derive(Debug, PartialEq)]
/// radix of the `byte-counter` when printed
pub enum Radix {
    // octal
    O,
    // hexadecimal
    X,
    // decimal
    D,
}
}

lazy_static! {
/// Structure to hold the parsed command-line arguments.
pub static ref ARGS : Args = Args::from_args();
}

#[cfg(test)]
mod tests {

    /// Are the command-line option read and processed correctly?
    #[test]
    fn test_arg_parser() {
        use super::{Args, Radix};
        use std::path::PathBuf;
        use structopt::StructOpt;

        // The argv. Normally you"d just use `parse` which will automatically
        // use `std::env::args()`.
        let argv = vec![
            "stringsext",
            "-d",
            "-n",
            "10",
            "-g",
            "64",
            "-e",
            "ascii",
            "-e",
            "utf-8",
            "-V",
            "-l",
            "-s",
            "1500",
            "-p",
            "outfile",
            "-q",
            "40",
            "-t",
            "o",
            "-r",
            "infile1",
            "infile2",
        ];
        let args = Args::from_iter(argv);

        assert_eq!(args.inputs[0], PathBuf::from("infile1"));
        assert_eq!(args.inputs[1], PathBuf::from("infile2"));
        assert_eq!(args.debug_option, true);
        assert_eq!(
            args.encoding,
            vec!["ascii".to_string(), "utf-8".to_string()]
        );
        assert_eq!(args.version, true);
        assert_eq!(args.list_encodings, true);
        assert_eq!(args.chars_min, Some("10".to_string()));
        assert_eq!(args.same_unicode_block, true);
        assert_eq!(args.grep_char, Some("64".to_string()));
        assert_eq!(args.radix, Some(Radix::O));
        assert_eq!(args.counter_offset, Some("1500".to_string()));
        assert_eq!(args.output, Some(PathBuf::from("outfile")));
        assert_eq!(args.output_line_len, Some("40".to_string()));
        assert_eq!(args.no_metadata, false);
    }
}