-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
95 lines (72 loc) · 2.46 KB
/
main.py
File metadata and controls
95 lines (72 loc) · 2.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# -*- coding: utf-8 -*-
"""Untitled3.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1xR_gPCnK5FuTSfRaqT-2-Bg1MYMp4ZJJ
"""
!nvidia-smi
!pip install cupy-cuda12x
import cupy as cp
import numpy as np
from heapq import heappush, heappop, heapify
from collections import defaultdict
# Step 1: Input string
data = "this is a test string for huffman encoding. Let's make it large!" * 1000 # Large data
data
# Step 2: Convert to byte array and move to GPU
byte_data = np.frombuffer(data.encode(), dtype=np.uint8)
gpu_data = cp.asarray(byte_data)
# Step 3: GPU-based frequency count
unique, counts = cp.unique(gpu_data, return_counts=True)
symbols = cp.asnumpy(unique)
frequencies = cp.asnumpy(counts)
freq_table = dict(zip(symbols, frequencies))
print("Frequency Table (from GPU):")
print(freq_table)
# Step 4: Build Huffman Tree (on CPU)
class Node:
def __init__(self, symbol=None, freq=0):
self.symbol = symbol
self.freq = freq
self.left = None
self.right = None
def __lt__(self, other): # Needed for heapq
return self.freq < other.freq
# Create leaf nodes and heapify
heap = [Node(sym, freq) for sym, freq in freq_table.items()]
heapify(heap)
# Build the Huffman Tree
while len(heap) > 1:
node1 = heappop(heap)
node2 = heappop(heap)
merged = Node(freq=node1.freq + node2.freq)
merged.left = node1
merged.right = node2
heappush(heap, merged)
root = heap[0]
# Step 5: Generate Huffman Codes
code_map = {}
def generate_codes(node, code=""):
if node is None:
return
if node.symbol is not None:
code_map[node.symbol] = code
generate_codes(node.left, code + "0")
generate_codes(node.right, code + "1")
generate_codes(root)
print("\nHuffman Codes:")
for k, v in code_map.items():
print(f"{chr(k)}: {v}")
# Step 6: Encode the data
symbol_to_code = np.array([code_map.get(i, '') for i in range(256)], dtype=object) # 256 for all byte values
# Convert original data to GPU again
gpu_data = cp.asarray(byte_data)
# Map byte values to bitstrings using GPU
# Back to CPU array
cpu_bytes = gpu_data.get()
# Map symbols to Huffman codes (CPU side, works with strings)
encoded = [symbol_to_code[byte] for byte in cpu_bytes]
encoded_string = ''.join(encoded)
print(f"\nEncoded bitstring (first 500 bits): {encoded_string[:500]}")
print(f"\nOriginal size: {len(byte_data) * 8} bits")
print(f"Compressed size: {len(encoded_string)} bits")