
Коды программ
Алгоритм Хаффмана class Node: def __init__(self, char, freq): self.char = char self.freq = freq self.left = None self.right = None
def __lt__(self, other): return self.freq < other.freq
def build_huffman_tree(text: bytes): frequency = defaultdict(int) for byte in text: frequency[byte] += 1
queue_p = [Node(byte, freq) for byte, freq in frequency.items()] heapq.heapify(queue_p)
while len(queue_p) > 1: left = heapq.heappop(queue_p) right = heapq.heappop(queue_p) merged = Node(None, left.freq + right.freq) merged.left = left merged.right = right heapq.heappush(queue_p, merged) return queue_p[0]
def generate_codes(node, code=0, bit_length=0, codes={}): if node is not None: if node.char is not None: codes[node.char] = (code, bit_length) generate_codes(node.left, (code << 1), bit_length + 1, codes) generate_codes(node.right, (code << 1) | 1, bit_length + 1, codes) return codes
def huffman_encode(text: bytes): root = build_huffman_tree(text) codes = generate_codes(root) encoded_bits = [] for byte in text: code, length = codes[byte] for i in reversed(range(length)): encoded_bits.append((code >> i) & 1) return encoded_bits, codes
def huffman_decode(encoded_bits, codes): reverse_codes = {v: k for k, v in codes.items()} code = 0 bit_length = 0 decoded_bytes = bytearray()
for bit in encoded_bits: code = (code << 1) | bit bit_length += 1 if (code, bit_length) in reverse_codes: decoded_bytes.append(reverse_codes[(code, bit_length)]) code = 0 bit_length = 0 return bytes(decoded_bytes)
MTF
def mtf_encode(data: bytes) -> list: symbol_table = list(range(256)) encoded = [] for byte in data: index = symbol_table.index(byte) encoded.append(index) symbol_table.pop(index) symbol_table.insert(0, byte) return encoded
def mtf_decode(encoded: list) -> bytes: symbol_table = list(range(256)) decoded = bytearray() for index in encoded: byte = symbol_table[index] decoded.append(byte) symbol_table.pop(index) symbol_table.insert(0, byte) return bytes(decoded) |
RLE
def rle_encode(data: bytes, M: int) -> bytes: if M not in [8, 16, 24]: raise ValueError("M должно быть 8, 16 или 24.") encoded = bytearray() i = 0 n = len(data) while i < n: if M == 8: symb = data[i] symb_size = 1 elif M == 16: if i + 1 >= n: raise ValueError("Недостаточно данных 16.") symb = data[i:i + 2] symb_size = 2 elif M == 24: if i + 2 >= n: raise ValueError("Недостаточно данных 24.") symb = data[i:i + 3] symb_size = 3 count = 1 while i + count * symb_size < n and data[i + count * symb_size:i + (count + 1) * symb_size] == symb and count < 127: count += 1 if count > 1: encoded.append(count) encoded.extend(symb) i += count * symb_size else: start = i while i < n and (i + symb_size >= n or data[i:i + symb_size] != data[i + symb_size:i + 2 * symb_size]) and (i - start) // symb_size < 127: i += symb_size length = (i - start) // symb_size encoded.append(0x80 | length) encoded.extend(data[start:i]) return bytes(encoded)
def rle_decode(encoded: bytes, M: int) -> bytes: if M not in [8, 16, 24]: raise ValueError("M должно быть 8, 16 или 24.") decoded = bytearray() i = 0 n = len(encoded) while i < n: control_byte = encoded[i] i += 1 if control_byte & 0x80: length = control_byte & 0x7F if M == 8: decoded.extend(encoded[i:i + length]) i += length elif M == 16: decoded.extend(encoded[i:i + 2 * length]) i += 2 * length elif M == 24: decoded.extend(encoded[i:i + 3 * length]) i += 3 * length else: count = control_byte if M == 8: symb = encoded[i] decoded.extend([symb] * count) i += 1 elif M == 16: symb = encoded[i:i + 2] decoded.extend(symb * count) i += 2 elif M == 24: symb = encoded[i:i + 3] decoded.extend(symb * count) i += 3 return bytes(decoded)
def detect_m(data: bytes) -> int: n = len(data) if n % 2 == 0: is_16_bit = True for i in range(0, n, 2): if not (0x0000 <= int.from_bytes(data[i:i + 2], 'big') <= 0xFFFF): is_16_bit = False break if is_16_bit: return 16 if n % 3 == 0: is_24_bit = True for i in range(0, n, 3): if not (0x000000 <= int.from_bytes(data[i:i + 3], 'big') <= 0xFFFFFF): is_24_bit = False break if is_24_bit: return 24 return 8 |
Алгоритм Барроуза-Уиллера для работы с блоками
def encode_bwt(origin_data, block_size): indices = [] data = bytearray()
for i in range(0, len(origin_data), block_size): block = origin_data[i:i + block_size] rotations = sorted((block[j:] + block[:j], j) for j in range(len(block))) index = next(j for j, (rot, _) in enumerate(rotations) if rot == block) encoded_block = bytes(rot[0][-1] for rot in rotations)
data.extend(encoded_block) indices.append(index) return bytes(data), indices
|
def decode_bwt(encoded_data, indices, block_size): restored_data = bytearray() for i in range(len(indices)): start = i * block_size end = min(start + block_size, len(encoded_data)) block = encoded_data[start:end] original_ind = indices[i] row = original_ind result = bytearray() table = sorted((char, idx) for idx, char in enumerate(block)) for _ in range(len(block)): char, row = table[row] result.append(char) restored_data.extend(result) return bytes(restored_data)
|
LZ77
def lz77_compress(input_data, window_size=2048, buffer_size=64): data_len = len(input_data) output = [] window = deque(maxlen=window_size) ind = 0 while ind < data_len: length_l,pos_l = 0,0 for st in range(max(0, ind - window_size), ind): lenght = 0 while lenght < buffer_size and ind + lenght < data_len and input_data[ st + lenght] == input_data[ind + lenght]: lenght += 1 if lenght > length_l: length_l = lenght pos_l = ind - st if length_l >= 3: if ind + length_l < data_len: output.append((pos_l, length_l, input_data[ind + length_l])) ind += length_l + 1 else: output.append((0, 0, input_data[ind])) ind += 1 else: output.append((0, 0, input_data[ind])) ind += 1 window.append(input_data[ind - 1]) res = bytearray() for pos, length, char in output: res.extend(pos.to_bytes(2, 'big')) res.extend(length.to_bytes(2, 'big')) res.append(char) return bytes(res)
def decode_LZ77(data): ind = 0 decode = bytearray() data_length = len(data) while ind < data_length: position, lenght = int.from_bytes(data[ind:ind + 2], 'big'),int.from_bytes(data[ind + 2:ind + 4], 'big') byte = data[ind + 4] if position == 0 and lenght == 0: decode.append(byte) else: st_ind = len(decode) - position for _ in range(lenght): decode.append(decode[st_ind]) st_ind += 1 decode.append(byte) ind += 5 return bytes(decode) |
LZ78
def lz78_compress(input_filename: str, output_filename: str): with open(input_filename, 'rb') as input_file: data = input_file.read() dictionary = {} output = [] str_curr = bytearray() ind = 1 i = 0 while i < len(data): str_curr.append(data[i]) bytes_curr = bytes(str_curr) if bytes_curr in dictionary: i += 1 else: output.append((dictionary.get(bytes(str_curr[:-1]), 0), str_curr[-1])) dictionary[bytes_curr] = ind ind += 1 str_curr = bytearray() i += 1 if str_curr: output.append((dictionary.get(bytes(str_curr[:-1]), 0), str_curr[-1]))
with open(output_filename, 'wb') as output_file: for pair in output: ind_bytes = pair[0].to_bytes(4, 'big') char_bytes = bytes([pair[1]]) output_file.write(ind_bytes + char_bytes) print(f"Сжатые данные записаны в {output_filename}") original_size = os.path.getsize(input_filename) compressed_size = os.path.getsize(output_filename) print("original_size", original_size) print("compressed_size", compressed_size) compression_ratio = original_size / compressed_size print(f"Коэффициент сжатия: {compression_ratio:.3f}")
def lz78_decompress(input_filename: str, output_filename: str): with open(input_filename, 'rb') as input_file: compressed_data = input_file.read() dictionary = {} output = bytearray() index = 1 i = 0 while i < len(compressed_data): index_bytes = compressed_data[i:i + 4] current_index = int.from_bytes(index_bytes, 'big') i += 4 char_bytes = compressed_data[i:i + 1] char = char_bytes[0] i += 1 if current_index == 0: output.append(char) dictionary[index] = bytearray([char]) else: str_dict = dictionary[current_index] output.extend(str_dict) output.append(char) dictionary[index] = str_dict + bytearray([char]) index += 1 with open(output_filename, 'wb') as output_file: output_file.write(output) print(f"Данные записаны в {output_filename}") |
Ссылка на гитхаб: