Files
decypharr/pkg/rar/rarar.go
Mukhtar Akere 700d00b802 - Fix issues with new setup
- Fix arr setup getting thr wrong crendentials
- Add file link invalidator
- Other minor bug fixes
2025-10-08 08:13:13 +01:00

686 lines
16 KiB
Go

// Source: https://github.com/eliasbenb/RARAR.py
// Note that this code only translates the original Python for RAR3 (not RAR5) support.
package rar
import (
"bytes"
"encoding/binary"
"errors"
"fmt"
"io"
"math/rand"
"net/http"
"strings"
"time"
"unicode/utf8"
)
// Constants from the Python code
var (
// Chunk sizes
DefaultChunkSize = 4096
HttpChunkSize = 32768
MaxSearchSize = 1 << 20 // 1MB
// RAR marker and block types
Rar3Marker = []byte{0x52, 0x61, 0x72, 0x21, 0x1A, 0x07, 0x00}
BlockFile = byte(0x74)
BlockHeader = byte(0x73)
BlockMarker = byte(0x72)
BlockEnd = byte(0x7B)
// Header flags
FlagDirectory = 0xE0
FlagHasHighSize = 0x100
FlagHasUnicodeName = 0x200
FlagHasData = 0x8000
)
// Compression methods
var CompressionMethods = map[byte]string{
0x30: "Store",
0x31: "Fastest",
0x32: "Fast",
0x33: "Normal",
0x34: "Good",
0x35: "Best",
}
// Error definitions
var (
ErrMarkerNotFound = errors.New("RAR marker not found within search limit")
ErrInvalidFormat = errors.New("invalid RAR format")
ErrNetworkError = errors.New("network error")
ErrRangeRequestsNotSupported = errors.New("server does not support range requests")
ErrCompressionNotSupported = errors.New("compression method not supported")
ErrDirectoryExtractNotSupported = errors.New("directory extract not supported")
)
// Name returns the base filename of the file
func (f *File) Name() string {
if i := strings.LastIndexAny(f.Path, "\\/"); i >= 0 {
return f.Path[i+1:]
}
return f.Path
}
func (f *File) ByteRange() *[2]int64 {
return &[2]int64{f.DataOffset, f.DataOffset + f.CompressedSize - 1}
}
func NewHttpFile(url string) (*HttpFile, error) {
client := &http.Client{}
file := &HttpFile{
URL: url,
Position: 0,
Client: client,
MaxRetries: 3,
RetryDelay: time.Second,
}
// Get file size
size, err := file.getFileSize()
if err != nil {
return nil, fmt.Errorf("failed to get file size: %w", err)
}
file.FileSize = size
return file, nil
}
func (f *HttpFile) doWithRetry(operation func() (interface{}, error)) (interface{}, error) {
var lastErr error
for attempt := 0; attempt <= f.MaxRetries; attempt++ {
if attempt > 0 {
// Jitter + exponential backoff delay
delay := f.RetryDelay * time.Duration(1<<uint(attempt-1))
jitter := time.Duration(rand.Int63n(int64(delay / 4)))
time.Sleep(delay + jitter)
}
result, err := operation()
if err == nil {
return result, nil
}
lastErr = err
// Only retry on network errors
if !errors.Is(err, ErrNetworkError) {
return nil, err
}
}
return nil, fmt.Errorf("after %d retries: %w", f.MaxRetries, lastErr)
}
// getFileSize gets the total file size from the server
func (f *HttpFile) getFileSize() (int64, error) {
result, err := f.doWithRetry(func() (interface{}, error) {
resp, err := f.Client.Head(f.URL)
if err != nil {
return int64(0), fmt.Errorf("%w: %v", ErrNetworkError, err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return int64(0), fmt.Errorf("%w: unexpected status code: %d", ErrNetworkError, resp.StatusCode)
}
contentLength := resp.Header.Get("Content-Length")
if contentLength == "" {
return int64(0), fmt.Errorf("%w: content length not provided", ErrNetworkError)
}
var size int64
_, err = fmt.Sscanf(contentLength, "%d", &size)
if err != nil {
return int64(0), fmt.Errorf("%w: %v", ErrNetworkError, err)
}
return size, nil
})
if err != nil {
return 0, err
}
return result.(int64), nil
}
// ReadAt implements the io.ReaderAt interface
func (f *HttpFile) ReadAt(p []byte, off int64) (n int, err error) {
if len(p) == 0 {
return 0, nil
}
// Ensure we don't read past the end of the file
size := int64(len(p))
if f.FileSize > 0 {
remaining := f.FileSize - off
if remaining <= 0 {
return 0, io.EOF
}
if size > remaining {
size = remaining
p = p[:size]
}
}
result, err := f.doWithRetry(func() (interface{}, error) {
// Create HTTP request with Range header
req, err := http.NewRequest("GET", f.URL, nil)
if err != nil {
return 0, fmt.Errorf("%w: %v", ErrNetworkError, err)
}
end := off + size - 1
req.Header.Set("Range", fmt.Sprintf("bytes=%d-%d", off, end))
// Make the request
resp, err := f.Client.Do(req)
if err != nil {
return 0, fmt.Errorf("%w: %v", ErrNetworkError, err)
}
defer resp.Body.Close()
// Handle response
switch resp.StatusCode {
case http.StatusPartialContent:
// Read the content
bytesRead, err := io.ReadFull(resp.Body, p)
return bytesRead, err
case http.StatusOK:
fullData, err := io.ReadAll(resp.Body)
if err != nil {
return 0, fmt.Errorf("%w: %v", ErrNetworkError, err)
}
if int64(len(fullData)) <= off {
return 0, io.EOF
}
end = off + size
if int64(len(fullData)) < end {
end = int64(len(fullData))
}
copy(p, fullData[off:end])
return int(end - off), nil
case http.StatusRequestedRangeNotSatisfiable:
// We're at EOF
return 0, io.EOF
default:
return 0, fmt.Errorf("%w: unexpected status code: %d", ErrNetworkError, resp.StatusCode)
}
})
if err != nil {
return 0, err
}
return result.(int), nil
}
// NewReader creates a new RAR3 reader
func NewReader(url string) (*Reader, error) {
file, err := NewHttpFile(url)
if err != nil {
return nil, err
}
reader := &Reader{
File: file,
ChunkSize: HttpChunkSize,
Files: make([]*File, 0),
}
// Find RAR marker
marker, err := reader.findMarker()
if err != nil {
return nil, err
}
reader.Marker = marker
pos := reader.Marker + int64(len(Rar3Marker)) // Skip marker block
headerData, err := reader.readBytes(pos, 7)
if err != nil {
return nil, err
}
if len(headerData) < 7 {
return nil, ErrInvalidFormat
}
headType := headerData[2]
headSize := int(binary.LittleEndian.Uint16(headerData[5:7]))
if headType != BlockHeader {
return nil, ErrInvalidFormat
}
// Store the position after the archive header
reader.HeaderEndPos = pos + int64(headSize)
return reader, nil
}
// readBytes reads a range of bytes from the file
func (r *Reader) readBytes(start int64, length int) ([]byte, error) {
if length <= 0 {
return []byte{}, nil
}
data := make([]byte, length)
n, err := r.File.ReadAt(data, start)
if err != nil && err != io.EOF {
return nil, err
}
if n < length {
// Partial read, return what we got
return data[:n], nil
}
return data, nil
}
// findMarker finds the RAR marker in the file
func (r *Reader) findMarker() (int64, error) {
// First try to find marker in the first chunk
firstChunkSize := 8192 // 8KB
chunk, err := r.readBytes(0, firstChunkSize)
if err != nil {
return 0, err
}
markerPos := bytes.Index(chunk, Rar3Marker)
if markerPos != -1 {
return int64(markerPos), nil
}
// If not found, continue searching
position := int64(firstChunkSize - len(Rar3Marker) + 1)
maxSearch := int64(MaxSearchSize)
for position < maxSearch {
chunkSize := min(r.ChunkSize, int(maxSearch-position))
chunk, err := r.readBytes(position, chunkSize)
if err != nil || len(chunk) == 0 {
break
}
markerPos := bytes.Index(chunk, Rar3Marker)
if markerPos != -1 {
return position + int64(markerPos), nil
}
// Move forward by chunk size minus the marker length
position += int64(max(1, len(chunk)-len(Rar3Marker)+1))
}
return 0, ErrMarkerNotFound
}
// decodeUnicode decodes RAR3 Unicode encoding
func decodeUnicode(asciiStr string, unicodeData []byte) string {
if len(unicodeData) == 0 {
return asciiStr
}
result := []rune{}
asciiPos := 0
dataPos := 0
highByte := byte(0)
for dataPos < len(unicodeData) {
flags := unicodeData[dataPos]
dataPos++
// Determine the number of character positions this flag byte controls
var flagBits uint
var flagCount int
var bitCount int
if flags&0x80 != 0 {
// Extended flag - controls up to 32 characters (16 bit pairs)
flagBits = uint(flags)
bitCount = 1
for (flagBits&(0x80>>bitCount) != 0) && dataPos < len(unicodeData) {
flagBits = ((flagBits & ((0x80 >> bitCount) - 1)) << 8) | uint(unicodeData[dataPos])
dataPos++
bitCount++
}
flagCount = bitCount * 4
} else {
// Simple flag - controls 4 characters (4 bit pairs)
flagBits = uint(flags)
flagCount = 4
}
// Process each 2-bit flag
for i := 0; i < flagCount; i++ {
if asciiPos >= len(asciiStr) && dataPos >= len(unicodeData) {
break
}
flagValue := (flagBits >> (i * 2)) & 0x03
switch flagValue {
case 0:
// Use ASCII character
if asciiPos < len(asciiStr) {
result = append(result, rune(asciiStr[asciiPos]))
asciiPos++
}
case 1:
// Unicode character with high byte 0
if dataPos < len(unicodeData) {
result = append(result, rune(unicodeData[dataPos]))
dataPos++
}
case 2:
// Unicode character with current high byte
if dataPos < len(unicodeData) {
lowByte := uint(unicodeData[dataPos])
dataPos++
result = append(result, rune(lowByte|(uint(highByte)<<8)))
}
case 3:
// Set new high byte
if dataPos < len(unicodeData) {
highByte = unicodeData[dataPos]
dataPos++
}
}
}
}
// Append any remaining ASCII characters
for asciiPos < len(asciiStr) {
result = append(result, rune(asciiStr[asciiPos]))
asciiPos++
}
return string(result)
}
// readFiles reads all file entries in the archive
func (r *Reader) readFiles() error {
pos := r.Marker
pos += int64(len(Rar3Marker)) // Skip marker block
// Read archive header
headerData, err := r.readBytes(pos, 7)
if err != nil {
return err
}
if len(headerData) < 7 {
return ErrInvalidFormat
}
headType := headerData[2]
headSize := int(binary.LittleEndian.Uint16(headerData[5:7]))
if headType != BlockHeader {
return ErrInvalidFormat
}
pos += int64(headSize) // Skip archive header
// Track whether we've found the end marker
foundEndMarker := false
// Process file entries
for !foundEndMarker {
headerData, err := r.readBytes(pos, 7)
if err != nil {
// Don't stop on EOF, might be temporary network error
// For definitive errors, return the error
if !errors.Is(err, io.EOF) && !errors.Is(err, ErrNetworkError) {
return fmt.Errorf("error reading block header: %w", err)
}
// If we get EOF or network error, retry a few times
retryCount := 0
maxRetries := 3
retryDelay := time.Second
for retryCount < maxRetries {
time.Sleep(retryDelay * time.Duration(1<<uint(retryCount)))
retryCount++
headerData, err = r.readBytes(pos, 7)
if err == nil && len(headerData) >= 7 {
break // Successfully got data
}
}
if len(headerData) < 7 {
return fmt.Errorf("failed to read block header after retries: %w", err)
}
}
if len(headerData) < 7 {
return fmt.Errorf("incomplete block header at position %d", pos)
}
headType := headerData[2]
headFlags := int(binary.LittleEndian.Uint16(headerData[3:5]))
headSize := int(binary.LittleEndian.Uint16(headerData[5:7]))
if headType == BlockEnd {
// End of archive
foundEndMarker = true
break
}
if headType == BlockFile {
// Get complete header data
completeHeader, err := r.readBytes(pos, headSize)
if err != nil || len(completeHeader) < headSize {
// Retry logic for incomplete headers
retryCount := 0
maxRetries := 3
retryDelay := time.Second
for retryCount < maxRetries && (err != nil || len(completeHeader) < headSize) {
time.Sleep(retryDelay * time.Duration(1<<uint(retryCount)))
retryCount++
completeHeader, err = r.readBytes(pos, headSize)
if err == nil && len(completeHeader) >= headSize {
break // Successfully got data
}
}
if len(completeHeader) < headSize {
return fmt.Errorf("failed to read complete file header after retries: %w", err)
}
}
fileInfo, err := r.parseFileHeader(completeHeader, pos)
if err == nil && fileInfo != nil {
r.Files = append(r.Files, fileInfo)
pos = fileInfo.NextOffset
} else {
pos += int64(headSize)
}
} else {
// Skip non-file block
pos += int64(headSize)
// Skip data if present
if headFlags&FlagHasData != 0 {
// Read data size
sizeData, err := r.readBytes(pos-4, 4)
if err != nil || len(sizeData) < 4 {
// Retry logic for data size read errors
retryCount := 0
maxRetries := 3
retryDelay := time.Second
for retryCount < maxRetries && (err != nil || len(sizeData) < 4) {
time.Sleep(retryDelay * time.Duration(1<<uint(retryCount)))
retryCount++
sizeData, err = r.readBytes(pos-4, 4)
if err == nil && len(sizeData) >= 4 {
break // Successfully got data
}
}
if len(sizeData) < 4 {
return fmt.Errorf("failed to read data size after retries: %w", err)
}
}
dataSize := int64(binary.LittleEndian.Uint32(sizeData))
pos += dataSize
}
}
}
if !foundEndMarker {
return fmt.Errorf("end marker not found in archive")
}
return nil
}
// parseFileHeader parses a file header and returns file info
func (r *Reader) parseFileHeader(headerData []byte, position int64) (*File, error) {
if len(headerData) < 7 {
return nil, fmt.Errorf("header data too short")
}
headType := headerData[2]
headFlags := int(binary.LittleEndian.Uint16(headerData[3:5]))
headSize := int(binary.LittleEndian.Uint16(headerData[5:7]))
if headType != BlockFile {
return nil, fmt.Errorf("not a file block")
}
// Check if we have enough data
if len(headerData) < 32 {
return nil, fmt.Errorf("file header too short")
}
// Parse basic file header fields
packSize := binary.LittleEndian.Uint32(headerData[7:11])
unpackSize := binary.LittleEndian.Uint32(headerData[11:15])
// fileOS := headerData[15]
fileCRC := binary.LittleEndian.Uint32(headerData[16:20])
// fileTime := binary.LittleEndian.Uint32(headerData[20:24])
// unpVer := headerData[24]
method := headerData[25]
nameSize := binary.LittleEndian.Uint16(headerData[26:28])
// fileAttr := binary.LittleEndian.Uint32(headerData[28:32])
// Handle high pack/unp sizes
highPackSize := uint32(0)
highUnpSize := uint32(0)
offset := 32 // Start after basic header fields
if headFlags&FlagHasHighSize != 0 {
if offset+8 <= len(headerData) {
highPackSize = binary.LittleEndian.Uint32(headerData[offset : offset+4])
highUnpSize = binary.LittleEndian.Uint32(headerData[offset+4 : offset+8])
}
offset += 8
}
// Calculate actual sizes
fullPackSize := int64(packSize) + (int64(highPackSize) << 32)
fullUnpSize := int64(unpackSize) + (int64(highUnpSize) << 32)
// Read filename
var fileName string
if offset+int(nameSize) <= len(headerData) {
fileNameBytes := headerData[offset : offset+int(nameSize)]
if headFlags&FlagHasUnicodeName != 0 {
zeroPos := bytes.IndexByte(fileNameBytes, 0)
if zeroPos != -1 {
// Try UTF-8 first
asciiPart := fileNameBytes[:zeroPos]
if utf8.Valid(asciiPart) {
fileName = string(asciiPart)
} else {
// Fall back to custom decoder
asciiStr := string(asciiPart)
unicodePart := fileNameBytes[zeroPos+1:]
fileName = decodeUnicode(asciiStr, unicodePart)
}
} else {
// No null byte
if utf8.Valid(fileNameBytes) {
fileName = string(fileNameBytes)
} else {
fileName = string(fileNameBytes) // Last resort
}
}
} else {
// Non-Unicode filename
if utf8.Valid(fileNameBytes) {
fileName = string(fileNameBytes)
} else {
fileName = string(fileNameBytes) // Fallback
}
}
} else {
fileName = fmt.Sprintf("UnknownFile%d", len(r.Files))
}
isDirectory := (headFlags & FlagDirectory) == FlagDirectory
// Calculate data offsets
dataOffset := position + int64(headSize)
nextOffset := dataOffset
// Only add data size if it's not a directory and has data
if !isDirectory && headFlags&FlagHasData != 0 {
nextOffset += fullPackSize
}
return &File{
Path: fileName,
Size: fullUnpSize,
CompressedSize: fullPackSize,
Method: method,
CRC: fileCRC,
IsDirectory: isDirectory,
DataOffset: dataOffset,
NextOffset: nextOffset,
}, nil
}
// GetFiles returns all files in the archive
func (r *Reader) GetFiles() ([]*File, error) {
if len(r.Files) == 0 {
err := r.readFiles()
if err != nil {
return nil, err
}
}
return r.Files, nil
}
// ExtractFile extracts a file from the archive
func (r *Reader) ExtractFile(file *File) ([]byte, error) {
if file.IsDirectory {
return nil, ErrDirectoryExtractNotSupported
}
// Only support "Store" method
if file.Method != 0x30 { // 0x30 = "Store"
return nil, ErrCompressionNotSupported
}
return r.readBytes(file.DataOffset, int(file.CompressedSize))
}