The Compound File Binary File Format is also known as the Object Linking and Embedding (OLE) or Component Object Model (COM) format and was used by early MS software such as MS Office. See [http://msdn.microsoft.com/en-us/library/dd942138.aspx](http://msdn.microsoft.com/en-us/library/dd942138.aspx) for more details
Install with `go get github.com/richardlehane/mscfb`
A reader for Microsoft's Compound File Binary File Format.
The Compound File Binary File Format is also known as the Object Linking and Embedding (OLE) or Component Object Model (COM) format and was used by early MS software such as MS Office. See [http://msdn.microsoft.com/en-us/library/dd942138.aspx](http://msdn.microsoft.com/en-us/library/dd942138.aspx) for more details
Install with `go get github.com/richardlehane/mscfb`
// Copyright 2013 Richard Lehane. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package mscfb implements a reader for Microsoft's Compound File Binary File Format (http://msdn.microsoft.com/en-us/library/dd942138.aspx).
//
// The Compound File Binary File Format is also known as the Object Linking and Embedding (OLE) or Component Object Model (COM) format and was used by many
maxRegSectuint32=0xFFFFFFFA// Maximum regular sector number
difatSectuint32=0xFFFFFFFC//Specifies a DIFAT sector in the FAT
fatSectuint32=0xFFFFFFFD// Specifies a FAT sector in the FAT
endOfChainuint32=0xFFFFFFFE// End of linked chain of sectors
freeSectuint32=0xFFFFFFFF// Speficies unallocated sector in the FAT, Mini FAT or DIFAT
maxRegStreamIDuint32=0xFFFFFFFA// maximum regular stream ID
noStreamuint32=0xFFFFFFFF// empty pointer
)
constlenHeaderint=8+16+10+6+12+8+16+109*4
typeheaderFieldsstruct{
signatureuint64
_[16]byte//CLSID - ignore, must be null
minorVersionuint16//Version number for non-breaking changes. This field SHOULD be set to 0x003E if the major version field is either 0x0003 or 0x0004.
majorVersionuint16//Version number for breaking changes. This field MUST be set to either 0x0003 (version 3) or 0x0004 (version 4).
_[2]byte//byte order - ignore, must be little endian
sectorSizeuint16//This field MUST be set to 0x0009, or 0x000c, depending on the Major Version field. This field specifies the sector size of the compound file as a power of 2. If Major Version is 3, then the Sector Shift MUST be 0x0009, specifying a sector size of 512 bytes. If Major Version is 4, then the Sector Shift MUST be 0x000C, specifying a sector size of 4096 bytes.
_[2]byte// ministream sector size - ignore, must be 64 bytes
_[6]byte// reserved - ignore, not used
numDirectorySectorsuint32//This integer field contains the count of the number of directory sectors in the compound file. If Major Version is 3, then the Number of Directory Sectors MUST be zero. This field is not supported for version 3 compound files.
numFatSectorsuint32//This integer field contains the count of the number of FAT sectors in the compound file.
directorySectorLocuint32//This integer field contains the starting sector number for the directory stream.
_[4]byte// transaction - ignore, not used
_[4]byte// mini stream size cutooff - ignore, must be 4096 bytes
miniFatSectorLocuint32//This integer field contains the starting sector number for the mini FAT.
numMiniFatSectorsuint32//This integer field contains the count of the number of mini FAT sectors in the compound file.
difatSectorLocuint32//This integer field contains the starting sector number for the DIFAT.
numDifatSectorsuint32//This integer field contains the count of the number of DIFAT sectors in the compound file.
initialDifats[109]uint32//The first 109 difat sectors are included in the header
return0,Error{ErrRead,"bad read finding next sector ("+err.Error()+")",offset}
}
returnbinary.LittleEndian.Uint32(buf),nil
}
// Reader provides sequential access to the contents of a MS compound file (MSCFB)
typeReaderstruct{
slicerbool
sectorSizeuint32
buf[]byte
header*header
File[]*File// File is an ordered slice of final directory entries.
direntries[]*File// unordered raw directory entries
entryint
raio.ReaderAt
waio.WriterAt
}
// New returns a MSCFB reader
funcNew(raio.ReaderAt)(*Reader,error){
r:=&Reader{ra:ra}
if_,ok:=ra.(slicer);ok{
r.slicer=true
}else{
r.buf=make([]byte,lenHeader)
}
iferr:=r.setHeader();err!=nil{
returnnil,err
}
// resize the buffer to 4096 if sector size isn't 512
if!r.slicer&&int(r.sectorSize)>len(r.buf){
r.buf=make([]byte,r.sectorSize)
}
iferr:=r.setDifats();err!=nil{
returnnil,err
}
iferr:=r.setDirEntries();err!=nil{
returnnil,err
}
iferr:=r.setMiniStream();err!=nil{
returnnil,err
}
iferr:=r.traverse();err!=nil{
returnnil,err
}
returnr,nil
}
// ID returns the CLSID (class ID) field from the root directory entry
func(r*Reader)ID()string{
returnr.File[0].ID()
}
// Created returns the created field from the root directory entry
func(r*Reader)Created()time.Time{
returnr.File[0].Created()
}
// Modified returns the last modified field from the root directory entry
func(r*Reader)Modified()time.Time{
returnr.File[0].Modified()
}
// Next iterates to the next directory entry.
// This isn't necessarily an adjacent *File within the File slice, but is based on the Left Sibling, Right Sibling and Child information in directory entries.
func(r*Reader)Next()(*File,error){
r.entry++
ifr.entry>=len(r.File){
returnnil,io.EOF
}
returnr.File[r.entry],nil
}
// Read the current directory entry
func(r*Reader)Read(b[]byte)(nint,errerror){
ifr.entry>=len(r.File){
return0,io.EOF
}
returnr.File[r.entry].Read(b)
}
// Debug provides granular information from an mscfb file to assist with debugging
// Slicer interface avoids a copy by obtaining a byte slice directly from the underlying reader
typeslicerinterface{
Slice(offsetint64,lengthint)([]byte,error)
}
// Copyright 2013 Richard Lehane. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package mscfb implements a reader for Microsoft's Compound File Binary File Format (http://msdn.microsoft.com/en-us/library/dd942138.aspx).
//
// The Compound File Binary File Format is also known as the Object Linking and Embedding (OLE) or Component Object Model (COM) format and was used by many
maxRegSectuint32=0xFFFFFFFA// Maximum regular sector number
difatSectuint32=0xFFFFFFFC//Specifies a DIFAT sector in the FAT
fatSectuint32=0xFFFFFFFD// Specifies a FAT sector in the FAT
endOfChainuint32=0xFFFFFFFE// End of linked chain of sectors
freeSectuint32=0xFFFFFFFF// Speficies unallocated sector in the FAT, Mini FAT or DIFAT
maxRegStreamIDuint32=0xFFFFFFFA// maximum regular stream ID
noStreamuint32=0xFFFFFFFF// empty pointer
)
constlenHeaderint=8+16+10+6+12+8+16+109*4
typeheaderFieldsstruct{
signatureuint64
_[16]byte//CLSID - ignore, must be null
minorVersionuint16//Version number for non-breaking changes. This field SHOULD be set to 0x003E if the major version field is either 0x0003 or 0x0004.
majorVersionuint16//Version number for breaking changes. This field MUST be set to either 0x0003 (version 3) or 0x0004 (version 4).
_[2]byte//byte order - ignore, must be little endian
sectorSizeuint16//This field MUST be set to 0x0009, or 0x000c, depending on the Major Version field. This field specifies the sector size of the compound file as a power of 2. If Major Version is 3, then the Sector Shift MUST be 0x0009, specifying a sector size of 512 bytes. If Major Version is 4, then the Sector Shift MUST be 0x000C, specifying a sector size of 4096 bytes.
_[2]byte// ministream sector size - ignore, must be 64 bytes
_[6]byte// reserved - ignore, not used
numDirectorySectorsuint32//This integer field contains the count of the number of directory sectors in the compound file. If Major Version is 3, then the Number of Directory Sectors MUST be zero. This field is not supported for version 3 compound files.
numFatSectorsuint32//This integer field contains the count of the number of FAT sectors in the compound file.
directorySectorLocuint32//This integer field contains the starting sector number for the directory stream.
_[4]byte// transaction - ignore, not used
_[4]byte// mini stream size cutooff - ignore, must be 4096 bytes
miniFatSectorLocuint32//This integer field contains the starting sector number for the mini FAT.
numMiniFatSectorsuint32//This integer field contains the count of the number of mini FAT sectors in the compound file.
difatSectorLocuint32//This integer field contains the starting sector number for the DIFAT.
numDifatSectorsuint32//This integer field contains the count of the number of DIFAT sectors in the compound file.
initialDifats[109]uint32//The first 109 difat sectors are included in the header
return0,Error{ErrRead,"bad read finding next sector ("+err.Error()+")",offset}
}
returnbinary.LittleEndian.Uint32(buf),nil
}
// Reader provides sequential access to the contents of a MS compound file (MSCFB)
typeReaderstruct{
slicerbool
sectorSizeuint32
buf[]byte
header*header
File[]*File// File is an ordered slice of final directory entries.
direntries[]*File// unordered raw directory entries
entryint
raio.ReaderAt
waio.WriterAt
}
// New returns a MSCFB reader
funcNew(raio.ReaderAt)(*Reader,error){
r:=&Reader{ra:ra}
if_,ok:=ra.(slicer);ok{
r.slicer=true
}else{
r.buf=make([]byte,lenHeader)
}
iferr:=r.setHeader();err!=nil{
returnnil,err
}
// resize the buffer to 4096 if sector size isn't 512
if!r.slicer&&int(r.sectorSize)>len(r.buf){
r.buf=make([]byte,r.sectorSize)
}
iferr:=r.setDifats();err!=nil{
returnnil,err
}
iferr:=r.setDirEntries();err!=nil{
returnnil,err
}
iferr:=r.setMiniStream();err!=nil{
returnnil,err
}
iferr:=r.traverse();err!=nil{
returnnil,err
}
returnr,nil
}
// ID returns the CLSID (class ID) field from the root directory entry
func(r*Reader)ID()string{
returnr.File[0].ID()
}
// Created returns the created field from the root directory entry
func(r*Reader)Created()time.Time{
returnr.File[0].Created()
}
// Modified returns the last modified field from the root directory entry
func(r*Reader)Modified()time.Time{
returnr.File[0].Modified()
}
// Next iterates to the next directory entry.
// This isn't necessarily an adjacent *File within the File slice, but is based on the Left Sibling, Right Sibling and Child information in directory entries.
func(r*Reader)Next()(*File,error){
r.entry++
ifr.entry>=len(r.File){
returnnil,io.EOF
}
returnr.File[r.entry],nil
}
// Read the current directory entry
func(r*Reader)Read(b[]byte)(nint,errerror){
ifr.entry>=len(r.File){
return0,io.EOF
}
returnr.File[r.entry].Read(b)
}
// Debug provides granular information from an mscfb file to assist with debugging
// Copyright 2014 Richard Lehane. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
packagetypes
import(
"encoding/binary"
"strconv"
)
//The CURRENCY type specifies currency information. It is represented as an 8-byte integer, scaled by 10,000, to give a fixed-point number with 15 digits to the left of the decimal point, and four digits to the right. This representation provides a range of 922337203685477.5807 to –922337203685477.5808. For example, $5.25 is stored as the value 52500.
// Copyright 2014 Richard Lehane. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
packagetypes
import(
"encoding/binary"
"strconv"
)
//The CURRENCY type specifies currency information. It is represented as an 8-byte integer, scaled by 10,000, to give a fixed-point number with 15 digits to the left of the decimal point, and four digits to the right. This representation provides a range of 922337203685477.5807 to –922337203685477.5808. For example, $5.25 is stored as the value 52500.