1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
use std::cmp;
use std::collections::HashMap;
use super::Code;
use super::Sink;
use super::Lz77Encode;
#[derive(Debug)]
pub struct DefaultLz77Encoder {
window_size: u16,
buf: Vec<u8>,
}
impl DefaultLz77Encoder {
pub fn new() -> Self {
Self::with_window_size(super::MAX_WINDOW_SIZE)
}
pub fn with_window_size(size: u16) -> Self {
DefaultLz77Encoder {
window_size: cmp::min(size, super::MAX_WINDOW_SIZE),
buf: Vec::new(),
}
}
}
impl Lz77Encode for DefaultLz77Encoder {
fn encode<S>(&mut self, buf: &[u8], sink: S)
where
S: Sink,
{
self.buf.extend_from_slice(buf);
if self.buf.len() >= self.window_size as usize * 8 {
self.flush(sink);
}
}
fn flush<S>(&mut self, mut sink: S)
where
S: Sink,
{
let mut prefix_table = HashMap::new();
let mut i = 0;
while i < cmp::max(3, self.buf.len()) - 3 {
let key = prefix(&self.buf[i..]);
let matched = prefix_table.insert(key, i);
if let Some(j) = matched {
let distance = i - j;
if distance <= self.window_size as usize {
let length = 3 + longest_common_prefix(&self.buf, i + 3, j + 3);
sink.consume(Code::Pointer {
length: length,
backward_distance: distance as u16,
});
i += length as usize;
continue;
}
}
sink.consume(Code::Literal(self.buf[i]));
i += 1;
}
for b in &self.buf[i..] {
sink.consume(Code::Literal(*b));
}
self.buf.clear();
}
fn window_size(&self) -> u16 {
self.window_size
}
}
fn prefix(buf: &[u8]) -> [u8; 3] {
unsafe {
[
*buf.get_unchecked(0),
*buf.get_unchecked(1),
*buf.get_unchecked(2),
]
}
}
fn longest_common_prefix(buf: &[u8], i: usize, j: usize) -> u16 {
buf[i..]
.iter()
.take(super::MAX_LENGTH as usize - 3)
.zip(&buf[j..])
.take_while(|&(x, y)| x == y)
.count() as u16
}