LZ77 compression scratch

2020-05-06 11:39发布

问题:

I'm building a LZ77 compression. I have read the whole file as a single string and tried to compress it. Is there any other way to do it? I'll attach my code below do tell if there is any changes to be made so that the program does compression very quickly even if it reads a big file..

import fileinput

class Assign:

def pattern(self, data):
    self.skip = []
    self.m = len(data)
    for k in range(256): self.skip.append(self.m)
    for k in range(self.m - 1): self.skip[ord(data[k])] = self.m - k - 1
    self.skip = tuple(self.skip)
    self.data = data

def find(self, text):
    n = len(text)
    if self.m > n: return -1
    k = self.m - 1
    while k < n:
        j = self.m - 1; i = k
        while j >= 0 and text[i] == self.data[j]:
            j -= 1; i -= 1
        if j == -1: return i + 1
        k += self.skip[ord(text[k])]
    return -1

class LZ77:

def __init__(self, data):
    self.position = 0
    self.window = ""
    self.stream = data
    self.streamSize = len(self.stream)
    self.search = Assign()
def Encode(self):
    p = 0
    c = ''
    lastresult = 0
    found = 0
    for i in range(self.streamSize):
        self.search.pattern(self.stream[self.position:self.position+i+1])
        result = self.search.find(self.window)
        if result < 0: break
        lastresult = result
        found = 1
    c = self.stream[self.position+i]
    p = lastresult
    B = 0
    if i > 0: B = self.position - p
    L = i
    if self.streamSize > 0:
        self.position += i + 1
        self.streamSize -= i + 1
        self.window = self.stream[:self.position]
    #print B,L,c
    return ((B, L), c)



def Encoder(self):
    output = ""
    length = self.streamSize
    while self.streamSize > 0:
        ((B, L), C) = self.Encode()
        output += str(B) +   str(L) +  C
    return (output)

def aiyoo(filename):

enter = raw_input("enter the filename to which the original file is to e compressed to")
enter1 = enter
fob1 = open(enter,'wb')
print filename
fob = open(filename,'rb')
original = ''
for i in fob: 
    original += i
lz = LZ77(original)
stream = lz.Encoder()
fob1.write(stream)
fob.close()
fob1.close()

Thanks in advance