Implementing a streaming setup with Usenet
This commit is contained in:
863
pkg/usenet/parser.go
Normal file
863
pkg/usenet/parser.go
Normal file
@@ -0,0 +1,863 @@
|
||||
package usenet
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"github.com/Tensai75/nzbparser"
|
||||
"github.com/chrisfarms/yenc"
|
||||
"github.com/nwaples/rardecode/v2"
|
||||
"github.com/rs/zerolog"
|
||||
"github.com/sirrobot01/decypharr/internal/nntp"
|
||||
"github.com/sirrobot01/decypharr/internal/utils"
|
||||
"github.com/sourcegraph/conc/pool"
|
||||
"io"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// NZBParser provides a simplified, robust NZB parser
|
||||
type NZBParser struct {
|
||||
logger zerolog.Logger
|
||||
client *nntp.Client
|
||||
cache *SegmentCache
|
||||
}
|
||||
|
||||
type FileGroup struct {
|
||||
BaseName string
|
||||
ActualFilename string
|
||||
Type FileType
|
||||
Files []nzbparser.NzbFile
|
||||
Groups map[string]struct{}
|
||||
}
|
||||
|
||||
type FileInfo struct {
|
||||
Size int64
|
||||
ChunkSize int64
|
||||
Name string
|
||||
}
|
||||
|
||||
// NewNZBParser creates a new simplified NZB parser
|
||||
func NewNZBParser(client *nntp.Client, cache *SegmentCache, logger zerolog.Logger) *NZBParser {
|
||||
return &NZBParser{
|
||||
logger: logger.With().Str("component", "nzb_parser").Logger(),
|
||||
client: client,
|
||||
cache: cache,
|
||||
}
|
||||
}
|
||||
|
||||
type FileType int
|
||||
|
||||
const (
|
||||
FileTypeMedia FileType = iota // Direct media files (.mkv, .mp4, etc.) // Check internal/utils.IsMediaFile
|
||||
FileTypeRar // RAR archives (.rar, .r00, .r01, etc.)
|
||||
FileTypeArchive // Other archives (.7z, .zip, etc.)
|
||||
FileTypeIgnore // Files to ignore (.nfo, .txt, par2 etc.)
|
||||
FileTypeUnknown
|
||||
)
|
||||
|
||||
var (
|
||||
// RAR file patterns - simplified and more accurate
|
||||
rarMainPattern = regexp.MustCompile(`\.rar$`)
|
||||
rarPartPattern = regexp.MustCompile(`\.r\d{2}$`) // .r00, .r01, etc.
|
||||
rarVolumePattern = regexp.MustCompile(`\.part\d+\.rar$`)
|
||||
ignoreExtensions = []string{".par2", ".sfv", ".nfo", ".jpg", ".png", ".txt", ".srt", ".idx", ".sub"}
|
||||
sevenZMainPattern = regexp.MustCompile(`\.7z$`)
|
||||
sevenZPartPattern = regexp.MustCompile(`\.7z\.\d{3}$`)
|
||||
extWithNumberPattern = regexp.MustCompile(`\.[^ "\.]*\.\d+$`)
|
||||
volPar2Pattern = regexp.MustCompile(`(?i)\.vol\d+\+\d+\.par2?$`)
|
||||
partPattern = regexp.MustCompile(`(?i)\.part\d+\.[^ "\.]*$`)
|
||||
regularExtPattern = regexp.MustCompile(`\.[^ "\.]*$`)
|
||||
)
|
||||
|
||||
type PositionTracker struct {
|
||||
reader io.Reader
|
||||
position int64
|
||||
}
|
||||
|
||||
func (pt *PositionTracker) Read(p []byte) (n int, err error) {
|
||||
n, err = pt.reader.Read(p)
|
||||
pt.position += int64(n)
|
||||
return n, err
|
||||
}
|
||||
|
||||
func (pt *PositionTracker) Position() int64 {
|
||||
return pt.position
|
||||
}
|
||||
|
||||
func (p *NZBParser) Parse(ctx context.Context, filename string, category string, content []byte) (*NZB, error) {
|
||||
// Parse raw XML
|
||||
raw, err := nzbparser.Parse(bytes.NewReader(content))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse NZB content: %w", err)
|
||||
}
|
||||
|
||||
// Create base NZB structure
|
||||
nzb := &NZB{
|
||||
Files: []NZBFile{},
|
||||
Status: "parsed",
|
||||
Category: category,
|
||||
Name: determineNZBName(filename, raw.Meta),
|
||||
Title: raw.Meta["title"],
|
||||
Password: raw.Meta["password"],
|
||||
}
|
||||
// Group files by base name and type
|
||||
fileGroups := p.groupFiles(ctx, raw.Files)
|
||||
|
||||
// Process each group
|
||||
files := p.processFileGroups(ctx, fileGroups, nzb.Password)
|
||||
|
||||
nzb.ID = generateID(nzb)
|
||||
|
||||
if len(files) == 0 {
|
||||
return nil, fmt.Errorf("no valid files found in NZB")
|
||||
}
|
||||
|
||||
// Calculate total size
|
||||
for _, file := range files {
|
||||
nzb.TotalSize += file.Size
|
||||
file.NzbID = nzb.ID
|
||||
nzb.Files = append(nzb.Files, file)
|
||||
}
|
||||
return nzb, nil
|
||||
}
|
||||
|
||||
func (p *NZBParser) groupFiles(ctx context.Context, files nzbparser.NzbFiles) map[string]*FileGroup {
|
||||
|
||||
var unknownFiles []nzbparser.NzbFile
|
||||
var knownFiles []struct {
|
||||
file nzbparser.NzbFile
|
||||
fileType FileType
|
||||
}
|
||||
|
||||
for _, file := range files {
|
||||
if len(file.Segments) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
fileType := p.detectFileType(file.Filename)
|
||||
|
||||
if fileType == FileTypeUnknown {
|
||||
unknownFiles = append(unknownFiles, file)
|
||||
} else {
|
||||
knownFiles = append(knownFiles, struct {
|
||||
file nzbparser.NzbFile
|
||||
fileType FileType
|
||||
}{file, fileType})
|
||||
}
|
||||
}
|
||||
|
||||
p.logger.Info().
|
||||
Int("known_files", len(knownFiles)).
|
||||
Int("unknown_files", len(unknownFiles)).
|
||||
Msg("File type detection")
|
||||
|
||||
unknownResults := p.batchDetectContentTypes(ctx, unknownFiles)
|
||||
|
||||
allFiles := make([]struct {
|
||||
file nzbparser.NzbFile
|
||||
fileType FileType
|
||||
actualFilename string
|
||||
}, 0, len(knownFiles)+len(unknownResults))
|
||||
|
||||
// Add known files
|
||||
for _, known := range knownFiles {
|
||||
allFiles = append(allFiles, struct {
|
||||
file nzbparser.NzbFile
|
||||
fileType FileType
|
||||
actualFilename string
|
||||
}{known.file, known.fileType, known.file.Filename})
|
||||
}
|
||||
|
||||
// Add unknown results
|
||||
allFiles = append(allFiles, unknownResults...)
|
||||
|
||||
return p.groupProcessedFiles(allFiles)
|
||||
}
|
||||
|
||||
// Batch process unknown files in parallel
|
||||
func (p *NZBParser) batchDetectContentTypes(ctx context.Context, unknownFiles []nzbparser.NzbFile) []struct {
|
||||
file nzbparser.NzbFile
|
||||
fileType FileType
|
||||
actualFilename string
|
||||
} {
|
||||
if len(unknownFiles) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Use worker pool for parallel processing
|
||||
workers := min(len(unknownFiles), 10) // Max 10 concurrent downloads
|
||||
workerPool := pool.New().WithMaxGoroutines(workers).WithContext(ctx)
|
||||
|
||||
type result struct {
|
||||
index int
|
||||
file nzbparser.NzbFile
|
||||
fileType FileType
|
||||
actualFilename string
|
||||
}
|
||||
|
||||
results := make([]result, len(unknownFiles))
|
||||
var mu sync.Mutex
|
||||
|
||||
// Process each unknown file
|
||||
for i, file := range unknownFiles {
|
||||
i, file := i, file // Capture loop variables
|
||||
|
||||
workerPool.Go(func(ctx context.Context) error {
|
||||
detectedType, actualFilename := p.detectFileTypeByContent(ctx, file)
|
||||
|
||||
mu.Lock()
|
||||
results[i] = result{
|
||||
index: i,
|
||||
file: file,
|
||||
fileType: detectedType,
|
||||
actualFilename: actualFilename,
|
||||
}
|
||||
mu.Unlock()
|
||||
|
||||
return nil // Don't fail the entire batch for one file
|
||||
})
|
||||
}
|
||||
|
||||
// Wait for all to complete
|
||||
|
||||
if err := workerPool.Wait(); err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Convert results
|
||||
processedFiles := make([]struct {
|
||||
file nzbparser.NzbFile
|
||||
fileType FileType
|
||||
actualFilename string
|
||||
}, 0, len(results))
|
||||
|
||||
for _, result := range results {
|
||||
if result.fileType != FileTypeUnknown {
|
||||
processedFiles = append(processedFiles, struct {
|
||||
file nzbparser.NzbFile
|
||||
fileType FileType
|
||||
actualFilename string
|
||||
}{result.file, result.fileType, result.actualFilename})
|
||||
}
|
||||
}
|
||||
|
||||
return processedFiles
|
||||
}
|
||||
|
||||
// Group already processed files (fast)
|
||||
func (p *NZBParser) groupProcessedFiles(allFiles []struct {
|
||||
file nzbparser.NzbFile
|
||||
fileType FileType
|
||||
actualFilename string
|
||||
}) map[string]*FileGroup {
|
||||
groups := make(map[string]*FileGroup)
|
||||
|
||||
for _, item := range allFiles {
|
||||
// Skip unwanted files
|
||||
if item.fileType == FileTypeIgnore || item.fileType == FileTypeArchive {
|
||||
continue
|
||||
}
|
||||
|
||||
var groupKey string
|
||||
if item.actualFilename != "" && item.actualFilename != item.file.Filename {
|
||||
groupKey = p.getBaseFilename(item.actualFilename)
|
||||
} else {
|
||||
groupKey = item.file.Basefilename
|
||||
}
|
||||
|
||||
group, exists := groups[groupKey]
|
||||
if !exists {
|
||||
group = &FileGroup{
|
||||
ActualFilename: item.actualFilename,
|
||||
BaseName: groupKey,
|
||||
Type: item.fileType,
|
||||
Files: []nzbparser.NzbFile{},
|
||||
Groups: make(map[string]struct{}),
|
||||
}
|
||||
groups[groupKey] = group
|
||||
}
|
||||
|
||||
// Update filename
|
||||
item.file.Filename = item.actualFilename
|
||||
|
||||
group.Files = append(group.Files, item.file)
|
||||
for _, g := range item.file.Groups {
|
||||
group.Groups[g] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
return groups
|
||||
}
|
||||
|
||||
func (p *NZBParser) getBaseFilename(filename string) string {
|
||||
if filename == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
// First remove any quotes and trim spaces
|
||||
cleaned := strings.Trim(filename, `" -`)
|
||||
|
||||
// Check for vol\d+\+\d+\.par2? (PAR2 volume files)
|
||||
if volPar2Pattern.MatchString(cleaned) {
|
||||
return volPar2Pattern.ReplaceAllString(cleaned, "")
|
||||
}
|
||||
|
||||
// Check for part\d+\.[^ "\.]* (part files like .part01.rar)
|
||||
|
||||
if partPattern.MatchString(cleaned) {
|
||||
return partPattern.ReplaceAllString(cleaned, "")
|
||||
}
|
||||
|
||||
// Check for [^ "\.]*\.\d+ (extensions with numbers like .7z.001, .r01, etc.)
|
||||
if extWithNumberPattern.MatchString(cleaned) {
|
||||
return extWithNumberPattern.ReplaceAllString(cleaned, "")
|
||||
}
|
||||
|
||||
// Check for regular extensions [^ "\.]*
|
||||
|
||||
if regularExtPattern.MatchString(cleaned) {
|
||||
return regularExtPattern.ReplaceAllString(cleaned, "")
|
||||
}
|
||||
|
||||
return cleaned
|
||||
}
|
||||
|
||||
// Simplified file type detection
|
||||
func (p *NZBParser) detectFileType(filename string) FileType {
|
||||
lower := strings.ToLower(filename)
|
||||
|
||||
// Check for media first
|
||||
if p.isMediaFile(lower) {
|
||||
return FileTypeMedia
|
||||
}
|
||||
|
||||
// Check rar next
|
||||
if p.isRarFile(lower) {
|
||||
return FileTypeRar
|
||||
}
|
||||
|
||||
// Check for 7z files
|
||||
if sevenZMainPattern.MatchString(lower) || sevenZPartPattern.MatchString(lower) {
|
||||
return FileTypeArchive
|
||||
}
|
||||
|
||||
if strings.HasSuffix(lower, ".zip") || strings.HasSuffix(lower, ".tar") ||
|
||||
strings.HasSuffix(lower, ".gz") || strings.HasSuffix(lower, ".bz2") {
|
||||
return FileTypeArchive
|
||||
}
|
||||
|
||||
// Check for ignored file types
|
||||
for _, ext := range ignoreExtensions {
|
||||
if strings.HasSuffix(lower, ext) {
|
||||
return FileTypeIgnore
|
||||
}
|
||||
}
|
||||
// Default to unknown type
|
||||
return FileTypeUnknown
|
||||
}
|
||||
|
||||
// Simplified RAR detection
|
||||
func (p *NZBParser) isRarFile(filename string) bool {
|
||||
return rarMainPattern.MatchString(filename) ||
|
||||
rarPartPattern.MatchString(filename) ||
|
||||
rarVolumePattern.MatchString(filename)
|
||||
}
|
||||
|
||||
func (p *NZBParser) isMediaFile(filename string) bool {
|
||||
return utils.IsMediaFile(filename)
|
||||
}
|
||||
|
||||
func (p *NZBParser) processFileGroups(ctx context.Context, groups map[string]*FileGroup, password string) []NZBFile {
|
||||
if len(groups) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Channel to collect results
|
||||
results := make(chan *NZBFile, len(groups))
|
||||
var wg sync.WaitGroup
|
||||
|
||||
// Process each group concurrently
|
||||
for _, group := range groups {
|
||||
wg.Add(1)
|
||||
go func(g *FileGroup) {
|
||||
defer wg.Done()
|
||||
file := p.processFileGroup(ctx, g, password)
|
||||
results <- file // nil values are fine, we'll filter later
|
||||
}(group)
|
||||
}
|
||||
|
||||
// Close results channel when all goroutines complete
|
||||
go func() {
|
||||
wg.Wait()
|
||||
close(results)
|
||||
}()
|
||||
|
||||
// Collect results
|
||||
var files []NZBFile
|
||||
for file := range results {
|
||||
if file != nil {
|
||||
files = append(files, *file)
|
||||
}
|
||||
}
|
||||
|
||||
return files
|
||||
}
|
||||
|
||||
// Simplified individual group processing
|
||||
func (p *NZBParser) processFileGroup(ctx context.Context, group *FileGroup, password string) *NZBFile {
|
||||
switch group.Type {
|
||||
case FileTypeMedia:
|
||||
return p.processMediaFile(group, password)
|
||||
case FileTypeRar:
|
||||
return p.processRarArchive(ctx, group, password)
|
||||
case FileTypeArchive:
|
||||
return nil
|
||||
default:
|
||||
// Treat unknown files as media files with conservative estimation
|
||||
return p.processMediaFile(group, password)
|
||||
}
|
||||
}
|
||||
|
||||
// Process regular media files
|
||||
func (p *NZBParser) processMediaFile(group *FileGroup, password string) *NZBFile {
|
||||
if len(group.Files) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Sort files for consistent ordering
|
||||
sort.Slice(group.Files, func(i, j int) bool {
|
||||
return group.Files[i].Number < group.Files[j].Number
|
||||
})
|
||||
|
||||
// Determine extension
|
||||
ext := p.determineExtension(group)
|
||||
|
||||
file := &NZBFile{
|
||||
Name: group.BaseName + ext,
|
||||
Groups: p.getGroupsList(group.Groups),
|
||||
Segments: []NZBSegment{},
|
||||
Password: password,
|
||||
IsRarArchive: false,
|
||||
}
|
||||
|
||||
currentOffset := int64(0)
|
||||
ratio := 0.968
|
||||
for _, nzbFile := range group.Files {
|
||||
sort.Slice(nzbFile.Segments, func(i, j int) bool {
|
||||
return nzbFile.Segments[i].Number < nzbFile.Segments[j].Number
|
||||
})
|
||||
|
||||
for _, segment := range nzbFile.Segments {
|
||||
|
||||
decodedSize := int64(float64(segment.Bytes) * ratio)
|
||||
|
||||
seg := NZBSegment{
|
||||
Number: segment.Number,
|
||||
MessageID: segment.Id,
|
||||
Bytes: int64(segment.Bytes),
|
||||
StartOffset: currentOffset,
|
||||
EndOffset: currentOffset + decodedSize,
|
||||
Group: file.Groups[0],
|
||||
}
|
||||
|
||||
file.Segments = append(file.Segments, seg)
|
||||
currentOffset += decodedSize
|
||||
}
|
||||
}
|
||||
|
||||
fileInfo, err := p.getFileInfo(context.Background(), group)
|
||||
if err != nil {
|
||||
p.logger.Warn().Err(err).Msg("Failed to get file info, using fallback")
|
||||
file.Size = currentOffset
|
||||
file.SegmentSize = currentOffset / int64(len(file.Segments)) // Average segment size
|
||||
} else {
|
||||
file.Size = fileInfo.Size
|
||||
file.SegmentSize = fileInfo.ChunkSize
|
||||
}
|
||||
return file
|
||||
}
|
||||
|
||||
func (p *NZBParser) processRarArchive(ctx context.Context, group *FileGroup, password string) *NZBFile {
|
||||
if len(group.Files) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Sort RAR files by part number
|
||||
sort.Slice(group.Files, func(i, j int) bool {
|
||||
return group.Files[i].Filename < group.Files[j].Filename
|
||||
})
|
||||
|
||||
// Try to extract RAR info during parsing for better accuracy
|
||||
extractedInfo := p.extractRarInfo(ctx, group, password)
|
||||
|
||||
filename := group.BaseName + ".mkv" // Default extension
|
||||
if extractedInfo != nil && extractedInfo.FileName != "" {
|
||||
filename = extractedInfo.FileName
|
||||
}
|
||||
|
||||
filename = utils.RemoveInvalidChars(path.Base(filename))
|
||||
|
||||
file := &NZBFile{
|
||||
Name: filename,
|
||||
Groups: p.getGroupsList(group.Groups),
|
||||
Segments: []NZBSegment{},
|
||||
Password: password,
|
||||
IsRarArchive: true,
|
||||
}
|
||||
|
||||
// Build segments
|
||||
ratio := 0.968
|
||||
currentOffset := int64(0)
|
||||
|
||||
for _, nzbFile := range group.Files {
|
||||
sort.Slice(nzbFile.Segments, func(i, j int) bool {
|
||||
return nzbFile.Segments[i].Number < nzbFile.Segments[j].Number
|
||||
})
|
||||
|
||||
for _, segment := range nzbFile.Segments {
|
||||
decodedSize := int64(float64(segment.Bytes) * ratio)
|
||||
|
||||
seg := NZBSegment{
|
||||
Number: segment.Number,
|
||||
MessageID: segment.Id,
|
||||
Bytes: int64(segment.Bytes),
|
||||
StartOffset: currentOffset,
|
||||
EndOffset: currentOffset + decodedSize,
|
||||
Group: file.Groups[0],
|
||||
}
|
||||
|
||||
file.Segments = append(file.Segments, seg)
|
||||
currentOffset += decodedSize
|
||||
}
|
||||
}
|
||||
|
||||
if extractedInfo != nil {
|
||||
file.Size = extractedInfo.FileSize
|
||||
file.SegmentSize = extractedInfo.SegmentSize
|
||||
file.StartOffset = extractedInfo.EstimatedStartOffset
|
||||
} else {
|
||||
file.Size = currentOffset
|
||||
file.SegmentSize = currentOffset / int64(len(file.Segments)) // Average segment size
|
||||
file.StartOffset = 0 // No accurate start offset available
|
||||
}
|
||||
return file
|
||||
}
|
||||
|
||||
func (p *NZBParser) getFileInfo(ctx context.Context, group *FileGroup) (*FileInfo, error) {
|
||||
if len(group.Files) == 0 {
|
||||
return nil, fmt.Errorf("no files in group %s", group.BaseName)
|
||||
}
|
||||
|
||||
// Sort files
|
||||
sort.Slice(group.Files, func(i, j int) bool {
|
||||
return group.Files[i].Filename < group.Files[j].Filename
|
||||
})
|
||||
firstFile := group.Files[0]
|
||||
lastFile := group.Files[len(group.Files)-1]
|
||||
firstInfo, err := p.client.DownloadHeader(ctx, firstFile.Segments[0].Id)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
lastInfo, err := p.client.DownloadHeader(ctx, lastFile.Segments[len(lastFile.Segments)-1].Id)
|
||||
if err != nil {
|
||||
p.logger.Warn().Err(err).Msg("Failed to download last segment header")
|
||||
return nil, err
|
||||
}
|
||||
|
||||
chunkSize := firstInfo.End - (firstInfo.Begin - 1)
|
||||
totalFileSize := (int64(len(group.Files)-1) * firstInfo.Size) + lastInfo.Size
|
||||
return &FileInfo{
|
||||
Size: totalFileSize,
|
||||
ChunkSize: chunkSize,
|
||||
Name: firstInfo.Name,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (p *NZBParser) extractRarInfo(ctx context.Context, group *FileGroup, password string) *ExtractedFileInfo {
|
||||
if len(group.Files) == 0 || len(group.Files[0].Segments) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
firstRarFile := group.Files[0]
|
||||
segmentsToDownload := min(5, len(firstRarFile.Segments))
|
||||
headerBuffer, err := p.downloadRarHeaders(ctx, firstRarFile.Segments[:segmentsToDownload])
|
||||
if err != nil {
|
||||
p.logger.Warn().Err(err).Msg("Failed to download RAR headers")
|
||||
return nil
|
||||
}
|
||||
|
||||
fileInfo, err := p.getFileInfo(ctx, group)
|
||||
if err != nil {
|
||||
p.logger.Warn().Err(err).Msg("Failed to get file info for RAR group")
|
||||
return nil
|
||||
}
|
||||
// Pass the actual RAR size to the analysis function
|
||||
return p.analyzeRarStructure(headerBuffer, password, fileInfo)
|
||||
}
|
||||
|
||||
func (p *NZBParser) analyzeRarStructure(headerData []byte, password string, fileInfo *FileInfo) *ExtractedFileInfo {
|
||||
reader := bytes.NewReader(headerData)
|
||||
tracker := &PositionTracker{reader: reader, position: 0}
|
||||
|
||||
rarReader, err := rardecode.NewReader(tracker, rardecode.Password(password))
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
for {
|
||||
header, err := rarReader.Next()
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
|
||||
if !header.IsDir && p.isMediaFile(header.Name) {
|
||||
compressionRatio := float64(fileInfo.Size) / float64(header.UnPackedSize)
|
||||
|
||||
if compressionRatio > 0.95 {
|
||||
fileDataOffset := tracker.Position()
|
||||
|
||||
p.logger.Info().
|
||||
Str("file", header.Name).
|
||||
Int64("accurate_offset", fileDataOffset).
|
||||
Float64("compression_ratio", compressionRatio).
|
||||
Msg("Found accurate store RAR offset using position tracking")
|
||||
|
||||
return &ExtractedFileInfo{
|
||||
FileName: header.Name,
|
||||
FileSize: header.UnPackedSize,
|
||||
SegmentSize: fileInfo.ChunkSize,
|
||||
EstimatedStartOffset: fileDataOffset,
|
||||
}
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
// Skip file content - this advances the tracker position
|
||||
io.Copy(io.Discard, rarReader)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *NZBParser) determineExtension(group *FileGroup) string {
|
||||
// Try to determine extension from filenames
|
||||
for _, file := range group.Files {
|
||||
ext := filepath.Ext(file.Filename)
|
||||
if ext != "" {
|
||||
return ext
|
||||
}
|
||||
}
|
||||
return ".mkv" // Default
|
||||
}
|
||||
|
||||
func (p *NZBParser) getGroupsList(groups map[string]struct{}) []string {
|
||||
result := make([]string, 0, len(groups))
|
||||
for g := range groups {
|
||||
result = append(result, g)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// Download RAR headers from segments
|
||||
func (p *NZBParser) downloadRarHeaders(ctx context.Context, segments []nzbparser.NzbSegment) ([]byte, error) {
|
||||
var headerBuffer bytes.Buffer
|
||||
|
||||
for _, segment := range segments {
|
||||
conn, cleanup, err := p.client.GetConnection(ctx)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
data, err := conn.GetBody(segment.Id)
|
||||
cleanup()
|
||||
|
||||
if err != nil {
|
||||
if !nntp.IsRetryableError(err) {
|
||||
return nil, err
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if len(data) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
// yEnc decode
|
||||
part, err := nntp.DecodeYenc(bytes.NewReader(data))
|
||||
if err != nil || part == nil || len(part.Body) == 0 {
|
||||
p.logger.Warn().Err(err).Str("segment_id", segment.Id).Msg("Failed to decode RAR header segment")
|
||||
continue
|
||||
}
|
||||
|
||||
headerBuffer.Write(part.Body)
|
||||
|
||||
// Stop if we have enough data (typically first segment is enough for headers)
|
||||
if headerBuffer.Len() > 32768 { // 32KB should be plenty for RAR headers
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if headerBuffer.Len() == 0 {
|
||||
return nil, fmt.Errorf("no valid header data downloaded")
|
||||
}
|
||||
|
||||
return headerBuffer.Bytes(), nil
|
||||
}
|
||||
|
||||
func (p *NZBParser) detectFileTypeByContent(ctx context.Context, file nzbparser.NzbFile) (FileType, string) {
|
||||
if len(file.Segments) == 0 {
|
||||
return FileTypeUnknown, ""
|
||||
}
|
||||
|
||||
// Download first segment to check file signature
|
||||
firstSegment := file.Segments[0]
|
||||
data, err := p.downloadFirstSegment(ctx, firstSegment)
|
||||
if err != nil {
|
||||
p.logger.Warn().Err(err).Msg("Failed to download first segment for content detection")
|
||||
return FileTypeUnknown, ""
|
||||
}
|
||||
|
||||
if data.Name != "" {
|
||||
fileType := p.detectFileType(data.Name)
|
||||
if fileType != FileTypeUnknown {
|
||||
return fileType, data.Name
|
||||
}
|
||||
}
|
||||
|
||||
return p.detectFileTypeFromContent(data.Body), data.Name
|
||||
}
|
||||
|
||||
func (p *NZBParser) detectFileTypeFromContent(data []byte) FileType {
|
||||
if len(data) == 0 {
|
||||
return FileTypeUnknown
|
||||
}
|
||||
|
||||
// Check for RAR signatures (both RAR 4.x and 5.x)
|
||||
if len(data) >= 7 {
|
||||
// RAR 4.x signature
|
||||
if bytes.Equal(data[:7], []byte("Rar!\x1A\x07\x00")) {
|
||||
return FileTypeRar
|
||||
}
|
||||
}
|
||||
if len(data) >= 8 {
|
||||
// RAR 5.x signature
|
||||
if bytes.Equal(data[:8], []byte("Rar!\x1A\x07\x01\x00")) {
|
||||
return FileTypeRar
|
||||
}
|
||||
}
|
||||
|
||||
// Check for ZIP signature
|
||||
if len(data) >= 4 && bytes.Equal(data[:4], []byte{0x50, 0x4B, 0x03, 0x04}) {
|
||||
return FileTypeArchive
|
||||
}
|
||||
|
||||
// Check for 7z signature
|
||||
if len(data) >= 6 && bytes.Equal(data[:6], []byte{0x37, 0x7A, 0xBC, 0xAF, 0x27, 0x1C}) {
|
||||
return FileTypeArchive
|
||||
}
|
||||
|
||||
// Check for common media file signatures
|
||||
if len(data) >= 4 {
|
||||
// Matroska (MKV/WebM)
|
||||
if bytes.Equal(data[:4], []byte{0x1A, 0x45, 0xDF, 0xA3}) {
|
||||
return FileTypeMedia
|
||||
}
|
||||
|
||||
// MP4/MOV (check for 'ftyp' at offset 4)
|
||||
if len(data) >= 8 && bytes.Equal(data[4:8], []byte("ftyp")) {
|
||||
return FileTypeMedia
|
||||
}
|
||||
|
||||
// AVI
|
||||
if len(data) >= 12 && bytes.Equal(data[:4], []byte("RIFF")) &&
|
||||
bytes.Equal(data[8:12], []byte("AVI ")) {
|
||||
return FileTypeMedia
|
||||
}
|
||||
}
|
||||
|
||||
// MPEG checks need more specific patterns
|
||||
if len(data) >= 4 {
|
||||
// MPEG-1/2 Program Stream
|
||||
if bytes.Equal(data[:4], []byte{0x00, 0x00, 0x01, 0xBA}) {
|
||||
return FileTypeMedia
|
||||
}
|
||||
|
||||
// MPEG-1/2 Video Stream
|
||||
if bytes.Equal(data[:4], []byte{0x00, 0x00, 0x01, 0xB3}) {
|
||||
return FileTypeMedia
|
||||
}
|
||||
}
|
||||
|
||||
// Check for Transport Stream (TS files)
|
||||
if len(data) >= 1 && data[0] == 0x47 {
|
||||
// Additional validation for TS files
|
||||
if len(data) >= 188 && data[188] == 0x47 {
|
||||
return FileTypeMedia
|
||||
}
|
||||
}
|
||||
|
||||
return FileTypeUnknown
|
||||
}
|
||||
|
||||
func (p *NZBParser) downloadFirstSegment(ctx context.Context, segment nzbparser.NzbSegment) (*yenc.Part, error) {
|
||||
conn, cleanup, err := p.client.GetConnection(ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer cleanup()
|
||||
|
||||
data, err := conn.GetBody(segment.Id)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// yEnc decode
|
||||
part, err := nntp.DecodeYenc(bytes.NewReader(data))
|
||||
if err != nil || part == nil {
|
||||
return nil, fmt.Errorf("failed to decode segment")
|
||||
}
|
||||
|
||||
// Return both the filename and decoded data
|
||||
return part, nil
|
||||
}
|
||||
|
||||
// Calculate total archive size from all RAR parts in the group
|
||||
func (p *NZBParser) calculateTotalArchiveSize(group *FileGroup) int64 {
|
||||
var total int64
|
||||
for _, file := range group.Files {
|
||||
for _, segment := range file.Segments {
|
||||
total += int64(segment.Bytes)
|
||||
}
|
||||
}
|
||||
return total
|
||||
}
|
||||
|
||||
func determineNZBName(filename string, meta map[string]string) string {
|
||||
// Prefer filename if it exists
|
||||
if filename != "" {
|
||||
filename = strings.Replace(filename, filepath.Ext(filename), "", 1)
|
||||
} else {
|
||||
if name := meta["name"]; name != "" {
|
||||
filename = name
|
||||
} else if title := meta["title"]; title != "" {
|
||||
filename = title
|
||||
}
|
||||
}
|
||||
return utils.RemoveInvalidChars(filename)
|
||||
}
|
||||
|
||||
func generateID(nzb *NZB) string {
|
||||
h := sha256.New()
|
||||
h.Write([]byte(nzb.Name))
|
||||
h.Write([]byte(fmt.Sprintf("%d", nzb.TotalSize)))
|
||||
h.Write([]byte(nzb.Category))
|
||||
h.Write([]byte(nzb.Password))
|
||||
return hex.EncodeToString(h.Sum(nil))[:16]
|
||||
}
|
||||
Reference in New Issue
Block a user