prefix compressor

Community Forums/Showcase/prefix compressor

flounder22001(Posted 2006) [#1]
this is a prefix compressor i've been working on. It works by switching characters in a file with prefix codes of varying lengths, some shorter than the character and some longer. If you've come across things like "fibinacci encoders/compressors", this one is a little different because it optimizes the prefix codes a bit (or 2, *slaps knee*).

Use it for whatever you want. If it's in a project you don't need to include any credits, but if you want to share the source code somewhere else try to include my name ;)

-edit: forgot to say: This should only use ~1-2mb of memory (never bothered to check) regardless of the file. It is also fast (though i haven't benchmarked that either)

;******************************************************************************************************************************
;************************************************* Prefix compression *********************************************************
;******************************************************************************************************************************
;by Alec Dee
;this compression method swaps bytes with prefix codes of lengths 2 Or greater. The prefix codes are generated by
;fibinacci numbers with some improvement To the original codes, For example:
;Before   After
;11       11
;011      011
;0011     001
;1011     101
;00011    000
;10011    100
;01011    010
;the Last "block" of prefixes can have the Last two 1's taken away And the second To Last block can have the Last
;1 taken away, For shorter prefixes.
;this compressor also uses order-1 modeling. This changes the frequency of characters based on the last character
;read, english text usually has big compression improvements with this method

Dim counts(0,0)
Dim places(0,0)
Dim replaces(0,0)
Dim fibs(0)
Dim prefix_codes(0)
Dim prefix_lengths(0)

Function prefix(file$,newfile$)
	in=ReadFile(file)
	If in=0
		RuntimeError file+" cannot be read"
	EndIf
	out=WriteFile(newfile)
	If out=0
		RuntimeError newfile+" cannot be written"
	EndIf
	size=FileSize(file)
	Dim counts(255,255)
	Dim places(255,255)
	Dim replaces(255,255)
	Dim fibs(13)
	;holds the binary string of prefix codes
	Dim prefix_codes(255)
	;holds the lengths of the prefix codes
	Dim prefix_lengths(255)
	;these fibinacci numbers are going to be used to compute the prefix codes
	fibs(0)=1
	fibs(1)=2
	fibs(2)=3
	fibs(3)=5
	fibs(4)=8
	fibs(5)=13
	fibs(6)=21
	fibs(7)=34
	fibs(8)=55
	fibs(9)=89
	fibs(10)=144
	fibs(11)=233
	fibs(12)=377
	oldpos=0
	prepos=0
	bit=0
	;compute the prefix's
	For i=0 To 255
		bit=i+1
		If fibs(oldpos+1)=bit
			oldpos=oldpos+1
		EndIf
		prefix_lengths(i)=oldpos+1
		prepos=oldpos
		prefix_codes(i)=1
		While prepos>-1
			If fibs(prepos)<=bit
				prefix_codes(i)=prefix_codes(i)+(1 Shl (prefix_lengths(i)-prepos))
				bit=bit-fibs(prepos)
			EndIf
			prepos=prepos-1
		Wend
	Next
	;remove unecessary bits from the end prefix's
	For i=0 To 255
		move=prefix_lengths(i)-oldpos+1
		If move>0
			prefix_lengths(i)=prefix_lengths(i)-move
			prefix_codes(i)=prefix_codes(i) Shr move
		EndIf
	Next
	last_byte=0
	buffer=0
	bits_left=7
	max_frequency=16382 ;when one of the counts(x,x) crosses this it's values get halved, if you change this value make sure to
						;change it for unprefix
	;set the default values for count, and lookup tables
	For i=0 To 255
		For t=0 To 255
			counts(i,t)=0
			places(i,t)=t
			replaces(i,t)=t
		Next
	Next
	;read each bit of the file
	For i=1 To size
		c=ReadByte(in)
		counts(last_byte,c)=counts(last_byte,c)+32
		If counts(last_byte,c)>max_frequency
			For t=0 To 255
				counts(last_byte,t)=counts(last_byte,t) Shr 1
			Next
		EndIf
		;find the character's place (rank according to other counts)
		place=replaces(last_byte,c)
		;output the prefix code for that place, which is hopefully has a length < 8
		For t=prefix_lengths(place) To 0 Step -1
			buffer=buffer Or (((prefix_codes(place) Shr t) And 1) Shl bits_left)
			bits_left=bits_left-1
			;if the output buffer gets filled, output the buffer and reset it
			If bits_left=-1
				WriteByte out,buffer
				bits_left=7
				buffer=0
			EndIf
		Next
		;sort the character based on it's neighbors
		If place>0
			While counts(last_byte,places(last_byte,place-1))<counts(last_byte,places(last_byte,place))
				oldplace=places(last_byte,place-1)
				places(last_byte,place-1)=places(last_byte,place)
				places(last_byte,place)=oldplace
				replaces(last_byte,c)=replaces(last_byte,c)-1
				replaces(last_byte,oldplace)=replaces(last_byte,oldplace)+1
				place=place-1
				If place=0
					Exit
				EndIf
			Wend
		EndIf
		;record this byte, this is what's needed for the order-1 effect
		last_byte=c
	Next
	;the buffer wasn't completely filled, but has imformation anyway, so output it
	If bits_left<>7
		WriteByte out,buffer
	EndIf
	Dim counts(0,0)
	Dim places(0,0)
	Dim replaces(0,0)
	Dim fibs(0)
	Dim prefix_codes(0)
	Dim prefix_lengths(0)
	CloseFile(in)
	CloseFile(out)
End Function

;this decompresses the prefix encoded files
;the prefix codes aren't used in this one, because they aren't needed. Prefix values can be calculated instead just using
;the fibinacci numbers
Function unprefix(file$,newfile$)
	in=ReadFile(file)
	If in=0
		RuntimeError file+" cannot be read"
	EndIf
	out=WriteFile(newfile)
	If out=0
		RuntimeError newfile+" cannot be written"
	EndIf
	size=FileSize(file)
	Dim counts(255,255)
	Dim places(255,255)
	Dim replaces(255,255)
	Dim fibs(12)
	fibs(0)=1
	fibs(1)=2
	fibs(2)=3
	fibs(3)=5
	fibs(4)=8
	fibs(5)=13
	fibs(6)=21
	fibs(7)=34
	fibs(8)=55
	fibs(9)=89
	fibs(10)=144
	fibs(11)=233
	fibs(12)=377
	last_byte=0
	bitval=0
	prepos=0
	thisbit=0
	lastbit=0
	madesymbol=0
	max_frequency=16382
	For i=0 To 255
		For t=0 To 255
			counts(i,t)=0
			places(i,t)=t
			replaces(i,t)=t
		Next
	Next
	For i=1 To size
		c=ReadByte(in)
		For mask=7 To 0 Step -1
			thisbit=(c Shr mask) And 1
			If lastbit And thisbit
				madesymbol=1
			ElseIf prepos=10
				If thisbit
					bitval=bitval+144
				Else
					bitval=bitval+233
				EndIf
				madesymbol=1
			Else
				bitval=bitval+thisbit*fibs(prepos)
				prepos=prepos+1
				lastbit=thisbit
			EndIf
			If madesymbol
				bitval=bitval-1
				symbol=places(last_byte,bitval)
				WriteByte out,symbol
				counts(last_byte,symbol)=counts(last_byte,symbol)+32
				If counts(last_byte,symbol)>max_frequency
					For t=0 To 255
						counts(last_byte,t)=counts(last_byte,t) Shr 1
					Next
				EndIf
				place=bitval
				If place>0
					While counts(last_byte,places(last_byte,place-1))<counts(last_byte,places(last_byte,place))
						oldplace=places(last_byte,place-1)
						places(last_byte,place-1)=places(last_byte,place)
						places(last_byte,place)=oldplace
						replaces(last_byte,symbol)=replaces(last_byte,c)-1
						replaces(last_byte,oldplace)=replaces(last_byte,oldplace)+1
						place=place-1
						If place=0
							Exit
						EndIf
					Wend
				EndIf
				last_byte=symbol
				bitval=0
				prepos=0
				madesymbol=0
				lastbit=0
			EndIf
		Next
	Next
	Dim counts(0,0)
	Dim places(0,0)
	Dim replaces(0,0)
	Dim fibs(0)
	CloseFile(in)
	CloseFile(out)
End Function

;prefix("agog.bmp","alec.txt")
;unprefix("alec.txt","agog2.bmp")

;sometimes this compressor doesn't compress everything completely, try compressing more than once
;prefix("agog.bmp","alec1.txt")
;prefix("alec1.txt","alec2.txt")
;unprefix("alec2.txt","alec3.txt")
;unprefix("alec3.txt","agog2.bmp")