864 lines
22 KiB
Go
864 lines
22 KiB
Go
package usenet
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"crypto/sha256"
|
|
"encoding/hex"
|
|
"fmt"
|
|
"github.com/Tensai75/nzbparser"
|
|
"github.com/chrisfarms/yenc"
|
|
"github.com/nwaples/rardecode/v2"
|
|
"github.com/rs/zerolog"
|
|
"github.com/sirrobot01/decypharr/internal/nntp"
|
|
"github.com/sirrobot01/decypharr/internal/utils"
|
|
"github.com/sourcegraph/conc/pool"
|
|
"io"
|
|
"path"
|
|
"path/filepath"
|
|
"regexp"
|
|
"sort"
|
|
"strings"
|
|
"sync"
|
|
)
|
|
|
|
// NZBParser provides a simplified, robust NZB parser
|
|
type NZBParser struct {
|
|
logger zerolog.Logger
|
|
client *nntp.Client
|
|
cache *SegmentCache
|
|
}
|
|
|
|
type FileGroup struct {
|
|
BaseName string
|
|
ActualFilename string
|
|
Type FileType
|
|
Files []nzbparser.NzbFile
|
|
Groups map[string]struct{}
|
|
}
|
|
|
|
type FileInfo struct {
|
|
Size int64
|
|
ChunkSize int64
|
|
Name string
|
|
}
|
|
|
|
// NewNZBParser creates a new simplified NZB parser
|
|
func NewNZBParser(client *nntp.Client, cache *SegmentCache, logger zerolog.Logger) *NZBParser {
|
|
return &NZBParser{
|
|
logger: logger.With().Str("component", "nzb_parser").Logger(),
|
|
client: client,
|
|
cache: cache,
|
|
}
|
|
}
|
|
|
|
type FileType int
|
|
|
|
const (
|
|
FileTypeMedia FileType = iota // Direct media files (.mkv, .mp4, etc.) // Check internal/utils.IsMediaFile
|
|
FileTypeRar // RAR archives (.rar, .r00, .r01, etc.)
|
|
FileTypeArchive // Other archives (.7z, .zip, etc.)
|
|
FileTypeIgnore // Files to ignore (.nfo, .txt, par2 etc.)
|
|
FileTypeUnknown
|
|
)
|
|
|
|
var (
|
|
// RAR file patterns - simplified and more accurate
|
|
rarMainPattern = regexp.MustCompile(`\.rar$`)
|
|
rarPartPattern = regexp.MustCompile(`\.r\d{2}$`) // .r00, .r01, etc.
|
|
rarVolumePattern = regexp.MustCompile(`\.part\d+\.rar$`)
|
|
ignoreExtensions = []string{".par2", ".sfv", ".nfo", ".jpg", ".png", ".txt", ".srt", ".idx", ".sub"}
|
|
sevenZMainPattern = regexp.MustCompile(`\.7z$`)
|
|
sevenZPartPattern = regexp.MustCompile(`\.7z\.\d{3}$`)
|
|
extWithNumberPattern = regexp.MustCompile(`\.[^ "\.]*\.\d+$`)
|
|
volPar2Pattern = regexp.MustCompile(`(?i)\.vol\d+\+\d+\.par2?$`)
|
|
partPattern = regexp.MustCompile(`(?i)\.part\d+\.[^ "\.]*$`)
|
|
regularExtPattern = regexp.MustCompile(`\.[^ "\.]*$`)
|
|
)
|
|
|
|
type PositionTracker struct {
|
|
reader io.Reader
|
|
position int64
|
|
}
|
|
|
|
func (pt *PositionTracker) Read(p []byte) (n int, err error) {
|
|
n, err = pt.reader.Read(p)
|
|
pt.position += int64(n)
|
|
return n, err
|
|
}
|
|
|
|
func (pt *PositionTracker) Position() int64 {
|
|
return pt.position
|
|
}
|
|
|
|
func (p *NZBParser) Parse(ctx context.Context, filename string, category string, content []byte) (*NZB, error) {
|
|
// Parse raw XML
|
|
raw, err := nzbparser.Parse(bytes.NewReader(content))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to parse NZB content: %w", err)
|
|
}
|
|
|
|
// Create base NZB structure
|
|
nzb := &NZB{
|
|
Files: []NZBFile{},
|
|
Status: "parsed",
|
|
Category: category,
|
|
Name: determineNZBName(filename, raw.Meta),
|
|
Title: raw.Meta["title"],
|
|
Password: raw.Meta["password"],
|
|
}
|
|
// Group files by base name and type
|
|
fileGroups := p.groupFiles(ctx, raw.Files)
|
|
|
|
// Process each group
|
|
files := p.processFileGroups(ctx, fileGroups, nzb.Password)
|
|
|
|
nzb.ID = generateID(nzb)
|
|
|
|
if len(files) == 0 {
|
|
return nil, fmt.Errorf("no valid files found in NZB")
|
|
}
|
|
|
|
// Calculate total size
|
|
for _, file := range files {
|
|
nzb.TotalSize += file.Size
|
|
file.NzbID = nzb.ID
|
|
nzb.Files = append(nzb.Files, file)
|
|
}
|
|
return nzb, nil
|
|
}
|
|
|
|
func (p *NZBParser) groupFiles(ctx context.Context, files nzbparser.NzbFiles) map[string]*FileGroup {
|
|
|
|
var unknownFiles []nzbparser.NzbFile
|
|
var knownFiles []struct {
|
|
file nzbparser.NzbFile
|
|
fileType FileType
|
|
}
|
|
|
|
for _, file := range files {
|
|
if len(file.Segments) == 0 {
|
|
continue
|
|
}
|
|
|
|
fileType := p.detectFileType(file.Filename)
|
|
|
|
if fileType == FileTypeUnknown {
|
|
unknownFiles = append(unknownFiles, file)
|
|
} else {
|
|
knownFiles = append(knownFiles, struct {
|
|
file nzbparser.NzbFile
|
|
fileType FileType
|
|
}{file, fileType})
|
|
}
|
|
}
|
|
|
|
p.logger.Info().
|
|
Int("known_files", len(knownFiles)).
|
|
Int("unknown_files", len(unknownFiles)).
|
|
Msg("File type detection")
|
|
|
|
unknownResults := p.batchDetectContentTypes(ctx, unknownFiles)
|
|
|
|
allFiles := make([]struct {
|
|
file nzbparser.NzbFile
|
|
fileType FileType
|
|
actualFilename string
|
|
}, 0, len(knownFiles)+len(unknownResults))
|
|
|
|
// Add known files
|
|
for _, known := range knownFiles {
|
|
allFiles = append(allFiles, struct {
|
|
file nzbparser.NzbFile
|
|
fileType FileType
|
|
actualFilename string
|
|
}{known.file, known.fileType, known.file.Filename})
|
|
}
|
|
|
|
// Add unknown results
|
|
allFiles = append(allFiles, unknownResults...)
|
|
|
|
return p.groupProcessedFiles(allFiles)
|
|
}
|
|
|
|
// Batch process unknown files in parallel
|
|
func (p *NZBParser) batchDetectContentTypes(ctx context.Context, unknownFiles []nzbparser.NzbFile) []struct {
|
|
file nzbparser.NzbFile
|
|
fileType FileType
|
|
actualFilename string
|
|
} {
|
|
if len(unknownFiles) == 0 {
|
|
return nil
|
|
}
|
|
|
|
// Use worker pool for parallel processing
|
|
workers := min(len(unknownFiles), 10) // Max 10 concurrent downloads
|
|
workerPool := pool.New().WithMaxGoroutines(workers).WithContext(ctx)
|
|
|
|
type result struct {
|
|
index int
|
|
file nzbparser.NzbFile
|
|
fileType FileType
|
|
actualFilename string
|
|
}
|
|
|
|
results := make([]result, len(unknownFiles))
|
|
var mu sync.Mutex
|
|
|
|
// Process each unknown file
|
|
for i, file := range unknownFiles {
|
|
i, file := i, file // Capture loop variables
|
|
|
|
workerPool.Go(func(ctx context.Context) error {
|
|
detectedType, actualFilename := p.detectFileTypeByContent(ctx, file)
|
|
|
|
mu.Lock()
|
|
results[i] = result{
|
|
index: i,
|
|
file: file,
|
|
fileType: detectedType,
|
|
actualFilename: actualFilename,
|
|
}
|
|
mu.Unlock()
|
|
|
|
return nil // Don't fail the entire batch for one file
|
|
})
|
|
}
|
|
|
|
// Wait for all to complete
|
|
|
|
if err := workerPool.Wait(); err != nil {
|
|
return nil
|
|
}
|
|
|
|
// Convert results
|
|
processedFiles := make([]struct {
|
|
file nzbparser.NzbFile
|
|
fileType FileType
|
|
actualFilename string
|
|
}, 0, len(results))
|
|
|
|
for _, result := range results {
|
|
if result.fileType != FileTypeUnknown {
|
|
processedFiles = append(processedFiles, struct {
|
|
file nzbparser.NzbFile
|
|
fileType FileType
|
|
actualFilename string
|
|
}{result.file, result.fileType, result.actualFilename})
|
|
}
|
|
}
|
|
|
|
return processedFiles
|
|
}
|
|
|
|
// Group already processed files (fast)
|
|
func (p *NZBParser) groupProcessedFiles(allFiles []struct {
|
|
file nzbparser.NzbFile
|
|
fileType FileType
|
|
actualFilename string
|
|
}) map[string]*FileGroup {
|
|
groups := make(map[string]*FileGroup)
|
|
|
|
for _, item := range allFiles {
|
|
// Skip unwanted files
|
|
if item.fileType == FileTypeIgnore || item.fileType == FileTypeArchive {
|
|
continue
|
|
}
|
|
|
|
var groupKey string
|
|
if item.actualFilename != "" && item.actualFilename != item.file.Filename {
|
|
groupKey = p.getBaseFilename(item.actualFilename)
|
|
} else {
|
|
groupKey = item.file.Basefilename
|
|
}
|
|
|
|
group, exists := groups[groupKey]
|
|
if !exists {
|
|
group = &FileGroup{
|
|
ActualFilename: item.actualFilename,
|
|
BaseName: groupKey,
|
|
Type: item.fileType,
|
|
Files: []nzbparser.NzbFile{},
|
|
Groups: make(map[string]struct{}),
|
|
}
|
|
groups[groupKey] = group
|
|
}
|
|
|
|
// Update filename
|
|
item.file.Filename = item.actualFilename
|
|
|
|
group.Files = append(group.Files, item.file)
|
|
for _, g := range item.file.Groups {
|
|
group.Groups[g] = struct{}{}
|
|
}
|
|
}
|
|
|
|
return groups
|
|
}
|
|
|
|
func (p *NZBParser) getBaseFilename(filename string) string {
|
|
if filename == "" {
|
|
return ""
|
|
}
|
|
|
|
// First remove any quotes and trim spaces
|
|
cleaned := strings.Trim(filename, `" -`)
|
|
|
|
// Check for vol\d+\+\d+\.par2? (PAR2 volume files)
|
|
if volPar2Pattern.MatchString(cleaned) {
|
|
return volPar2Pattern.ReplaceAllString(cleaned, "")
|
|
}
|
|
|
|
// Check for part\d+\.[^ "\.]* (part files like .part01.rar)
|
|
|
|
if partPattern.MatchString(cleaned) {
|
|
return partPattern.ReplaceAllString(cleaned, "")
|
|
}
|
|
|
|
// Check for [^ "\.]*\.\d+ (extensions with numbers like .7z.001, .r01, etc.)
|
|
if extWithNumberPattern.MatchString(cleaned) {
|
|
return extWithNumberPattern.ReplaceAllString(cleaned, "")
|
|
}
|
|
|
|
// Check for regular extensions [^ "\.]*
|
|
|
|
if regularExtPattern.MatchString(cleaned) {
|
|
return regularExtPattern.ReplaceAllString(cleaned, "")
|
|
}
|
|
|
|
return cleaned
|
|
}
|
|
|
|
// Simplified file type detection
|
|
func (p *NZBParser) detectFileType(filename string) FileType {
|
|
lower := strings.ToLower(filename)
|
|
|
|
// Check for media first
|
|
if p.isMediaFile(lower) {
|
|
return FileTypeMedia
|
|
}
|
|
|
|
// Check rar next
|
|
if p.isRarFile(lower) {
|
|
return FileTypeRar
|
|
}
|
|
|
|
// Check for 7z files
|
|
if sevenZMainPattern.MatchString(lower) || sevenZPartPattern.MatchString(lower) {
|
|
return FileTypeArchive
|
|
}
|
|
|
|
if strings.HasSuffix(lower, ".zip") || strings.HasSuffix(lower, ".tar") ||
|
|
strings.HasSuffix(lower, ".gz") || strings.HasSuffix(lower, ".bz2") {
|
|
return FileTypeArchive
|
|
}
|
|
|
|
// Check for ignored file types
|
|
for _, ext := range ignoreExtensions {
|
|
if strings.HasSuffix(lower, ext) {
|
|
return FileTypeIgnore
|
|
}
|
|
}
|
|
// Default to unknown type
|
|
return FileTypeUnknown
|
|
}
|
|
|
|
// Simplified RAR detection
|
|
func (p *NZBParser) isRarFile(filename string) bool {
|
|
return rarMainPattern.MatchString(filename) ||
|
|
rarPartPattern.MatchString(filename) ||
|
|
rarVolumePattern.MatchString(filename)
|
|
}
|
|
|
|
func (p *NZBParser) isMediaFile(filename string) bool {
|
|
return utils.IsMediaFile(filename)
|
|
}
|
|
|
|
func (p *NZBParser) processFileGroups(ctx context.Context, groups map[string]*FileGroup, password string) []NZBFile {
|
|
if len(groups) == 0 {
|
|
return nil
|
|
}
|
|
|
|
// Channel to collect results
|
|
results := make(chan *NZBFile, len(groups))
|
|
var wg sync.WaitGroup
|
|
|
|
// Process each group concurrently
|
|
for _, group := range groups {
|
|
wg.Add(1)
|
|
go func(g *FileGroup) {
|
|
defer wg.Done()
|
|
file := p.processFileGroup(ctx, g, password)
|
|
results <- file // nil values are fine, we'll filter later
|
|
}(group)
|
|
}
|
|
|
|
// Close results channel when all goroutines complete
|
|
go func() {
|
|
wg.Wait()
|
|
close(results)
|
|
}()
|
|
|
|
// Collect results
|
|
var files []NZBFile
|
|
for file := range results {
|
|
if file != nil {
|
|
files = append(files, *file)
|
|
}
|
|
}
|
|
|
|
return files
|
|
}
|
|
|
|
// Simplified individual group processing
|
|
func (p *NZBParser) processFileGroup(ctx context.Context, group *FileGroup, password string) *NZBFile {
|
|
switch group.Type {
|
|
case FileTypeMedia:
|
|
return p.processMediaFile(group, password)
|
|
case FileTypeRar:
|
|
return p.processRarArchive(ctx, group, password)
|
|
case FileTypeArchive:
|
|
return nil
|
|
default:
|
|
// Treat unknown files as media files with conservative estimation
|
|
return p.processMediaFile(group, password)
|
|
}
|
|
}
|
|
|
|
// Process regular media files
|
|
func (p *NZBParser) processMediaFile(group *FileGroup, password string) *NZBFile {
|
|
if len(group.Files) == 0 {
|
|
return nil
|
|
}
|
|
|
|
// Sort files for consistent ordering
|
|
sort.Slice(group.Files, func(i, j int) bool {
|
|
return group.Files[i].Number < group.Files[j].Number
|
|
})
|
|
|
|
// Determine extension
|
|
ext := p.determineExtension(group)
|
|
|
|
file := &NZBFile{
|
|
Name: group.BaseName + ext,
|
|
Groups: p.getGroupsList(group.Groups),
|
|
Segments: []NZBSegment{},
|
|
Password: password,
|
|
IsRarArchive: false,
|
|
}
|
|
|
|
currentOffset := int64(0)
|
|
ratio := 0.968
|
|
for _, nzbFile := range group.Files {
|
|
sort.Slice(nzbFile.Segments, func(i, j int) bool {
|
|
return nzbFile.Segments[i].Number < nzbFile.Segments[j].Number
|
|
})
|
|
|
|
for _, segment := range nzbFile.Segments {
|
|
|
|
decodedSize := int64(float64(segment.Bytes) * ratio)
|
|
|
|
seg := NZBSegment{
|
|
Number: segment.Number,
|
|
MessageID: segment.Id,
|
|
Bytes: int64(segment.Bytes),
|
|
StartOffset: currentOffset,
|
|
EndOffset: currentOffset + decodedSize,
|
|
Group: file.Groups[0],
|
|
}
|
|
|
|
file.Segments = append(file.Segments, seg)
|
|
currentOffset += decodedSize
|
|
}
|
|
}
|
|
|
|
fileInfo, err := p.getFileInfo(context.Background(), group)
|
|
if err != nil {
|
|
p.logger.Warn().Err(err).Msg("Failed to get file info, using fallback")
|
|
file.Size = currentOffset
|
|
file.SegmentSize = currentOffset / int64(len(file.Segments)) // Average segment size
|
|
} else {
|
|
file.Size = fileInfo.Size
|
|
file.SegmentSize = fileInfo.ChunkSize
|
|
}
|
|
return file
|
|
}
|
|
|
|
func (p *NZBParser) processRarArchive(ctx context.Context, group *FileGroup, password string) *NZBFile {
|
|
if len(group.Files) == 0 {
|
|
return nil
|
|
}
|
|
|
|
// Sort RAR files by part number
|
|
sort.Slice(group.Files, func(i, j int) bool {
|
|
return group.Files[i].Filename < group.Files[j].Filename
|
|
})
|
|
|
|
// Try to extract RAR info during parsing for better accuracy
|
|
extractedInfo := p.extractRarInfo(ctx, group, password)
|
|
|
|
filename := group.BaseName + ".mkv" // Default extension
|
|
if extractedInfo != nil && extractedInfo.FileName != "" {
|
|
filename = extractedInfo.FileName
|
|
}
|
|
|
|
filename = utils.RemoveInvalidChars(path.Base(filename))
|
|
|
|
file := &NZBFile{
|
|
Name: filename,
|
|
Groups: p.getGroupsList(group.Groups),
|
|
Segments: []NZBSegment{},
|
|
Password: password,
|
|
IsRarArchive: true,
|
|
}
|
|
|
|
// Build segments
|
|
ratio := 0.968
|
|
currentOffset := int64(0)
|
|
|
|
for _, nzbFile := range group.Files {
|
|
sort.Slice(nzbFile.Segments, func(i, j int) bool {
|
|
return nzbFile.Segments[i].Number < nzbFile.Segments[j].Number
|
|
})
|
|
|
|
for _, segment := range nzbFile.Segments {
|
|
decodedSize := int64(float64(segment.Bytes) * ratio)
|
|
|
|
seg := NZBSegment{
|
|
Number: segment.Number,
|
|
MessageID: segment.Id,
|
|
Bytes: int64(segment.Bytes),
|
|
StartOffset: currentOffset,
|
|
EndOffset: currentOffset + decodedSize,
|
|
Group: file.Groups[0],
|
|
}
|
|
|
|
file.Segments = append(file.Segments, seg)
|
|
currentOffset += decodedSize
|
|
}
|
|
}
|
|
|
|
if extractedInfo != nil {
|
|
file.Size = extractedInfo.FileSize
|
|
file.SegmentSize = extractedInfo.SegmentSize
|
|
file.StartOffset = extractedInfo.EstimatedStartOffset
|
|
} else {
|
|
file.Size = currentOffset
|
|
file.SegmentSize = currentOffset / int64(len(file.Segments)) // Average segment size
|
|
file.StartOffset = 0 // No accurate start offset available
|
|
}
|
|
return file
|
|
}
|
|
|
|
func (p *NZBParser) getFileInfo(ctx context.Context, group *FileGroup) (*FileInfo, error) {
|
|
if len(group.Files) == 0 {
|
|
return nil, fmt.Errorf("no files in group %s", group.BaseName)
|
|
}
|
|
|
|
// Sort files
|
|
sort.Slice(group.Files, func(i, j int) bool {
|
|
return group.Files[i].Filename < group.Files[j].Filename
|
|
})
|
|
firstFile := group.Files[0]
|
|
lastFile := group.Files[len(group.Files)-1]
|
|
firstInfo, err := p.client.DownloadHeader(ctx, firstFile.Segments[0].Id)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
lastInfo, err := p.client.DownloadHeader(ctx, lastFile.Segments[len(lastFile.Segments)-1].Id)
|
|
if err != nil {
|
|
p.logger.Warn().Err(err).Msg("Failed to download last segment header")
|
|
return nil, err
|
|
}
|
|
|
|
chunkSize := firstInfo.End - (firstInfo.Begin - 1)
|
|
totalFileSize := (int64(len(group.Files)-1) * firstInfo.Size) + lastInfo.Size
|
|
return &FileInfo{
|
|
Size: totalFileSize,
|
|
ChunkSize: chunkSize,
|
|
Name: firstInfo.Name,
|
|
}, nil
|
|
}
|
|
|
|
func (p *NZBParser) extractRarInfo(ctx context.Context, group *FileGroup, password string) *ExtractedFileInfo {
|
|
if len(group.Files) == 0 || len(group.Files[0].Segments) == 0 {
|
|
return nil
|
|
}
|
|
|
|
firstRarFile := group.Files[0]
|
|
segmentsToDownload := min(5, len(firstRarFile.Segments))
|
|
headerBuffer, err := p.downloadRarHeaders(ctx, firstRarFile.Segments[:segmentsToDownload])
|
|
if err != nil {
|
|
p.logger.Warn().Err(err).Msg("Failed to download RAR headers")
|
|
return nil
|
|
}
|
|
|
|
fileInfo, err := p.getFileInfo(ctx, group)
|
|
if err != nil {
|
|
p.logger.Warn().Err(err).Msg("Failed to get file info for RAR group")
|
|
return nil
|
|
}
|
|
// Pass the actual RAR size to the analysis function
|
|
return p.analyzeRarStructure(headerBuffer, password, fileInfo)
|
|
}
|
|
|
|
func (p *NZBParser) analyzeRarStructure(headerData []byte, password string, fileInfo *FileInfo) *ExtractedFileInfo {
|
|
reader := bytes.NewReader(headerData)
|
|
tracker := &PositionTracker{reader: reader, position: 0}
|
|
|
|
rarReader, err := rardecode.NewReader(tracker, rardecode.Password(password))
|
|
if err != nil {
|
|
return nil
|
|
}
|
|
|
|
for {
|
|
header, err := rarReader.Next()
|
|
if err != nil {
|
|
break
|
|
}
|
|
|
|
if !header.IsDir && p.isMediaFile(header.Name) {
|
|
compressionRatio := float64(fileInfo.Size) / float64(header.UnPackedSize)
|
|
|
|
if compressionRatio > 0.95 {
|
|
fileDataOffset := tracker.Position()
|
|
|
|
p.logger.Info().
|
|
Str("file", header.Name).
|
|
Int64("accurate_offset", fileDataOffset).
|
|
Float64("compression_ratio", compressionRatio).
|
|
Msg("Found accurate store RAR offset using position tracking")
|
|
|
|
return &ExtractedFileInfo{
|
|
FileName: header.Name,
|
|
FileSize: header.UnPackedSize,
|
|
SegmentSize: fileInfo.ChunkSize,
|
|
EstimatedStartOffset: fileDataOffset,
|
|
}
|
|
}
|
|
break
|
|
}
|
|
|
|
// Skip file content - this advances the tracker position
|
|
io.Copy(io.Discard, rarReader)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (p *NZBParser) determineExtension(group *FileGroup) string {
|
|
// Try to determine extension from filenames
|
|
for _, file := range group.Files {
|
|
ext := filepath.Ext(file.Filename)
|
|
if ext != "" {
|
|
return ext
|
|
}
|
|
}
|
|
return ".mkv" // Default
|
|
}
|
|
|
|
func (p *NZBParser) getGroupsList(groups map[string]struct{}) []string {
|
|
result := make([]string, 0, len(groups))
|
|
for g := range groups {
|
|
result = append(result, g)
|
|
}
|
|
return result
|
|
}
|
|
|
|
// Download RAR headers from segments
|
|
func (p *NZBParser) downloadRarHeaders(ctx context.Context, segments []nzbparser.NzbSegment) ([]byte, error) {
|
|
var headerBuffer bytes.Buffer
|
|
|
|
for _, segment := range segments {
|
|
conn, cleanup, err := p.client.GetConnection(ctx)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
|
|
data, err := conn.GetBody(segment.Id)
|
|
cleanup()
|
|
|
|
if err != nil {
|
|
if !nntp.IsRetryableError(err) {
|
|
return nil, err
|
|
}
|
|
continue
|
|
}
|
|
|
|
if len(data) == 0 {
|
|
continue
|
|
}
|
|
|
|
// yEnc decode
|
|
part, err := nntp.DecodeYenc(bytes.NewReader(data))
|
|
if err != nil || part == nil || len(part.Body) == 0 {
|
|
p.logger.Warn().Err(err).Str("segment_id", segment.Id).Msg("Failed to decode RAR header segment")
|
|
continue
|
|
}
|
|
|
|
headerBuffer.Write(part.Body)
|
|
|
|
// Stop if we have enough data (typically first segment is enough for headers)
|
|
if headerBuffer.Len() > 32768 { // 32KB should be plenty for RAR headers
|
|
break
|
|
}
|
|
}
|
|
|
|
if headerBuffer.Len() == 0 {
|
|
return nil, fmt.Errorf("no valid header data downloaded")
|
|
}
|
|
|
|
return headerBuffer.Bytes(), nil
|
|
}
|
|
|
|
func (p *NZBParser) detectFileTypeByContent(ctx context.Context, file nzbparser.NzbFile) (FileType, string) {
|
|
if len(file.Segments) == 0 {
|
|
return FileTypeUnknown, ""
|
|
}
|
|
|
|
// Download first segment to check file signature
|
|
firstSegment := file.Segments[0]
|
|
data, err := p.downloadFirstSegment(ctx, firstSegment)
|
|
if err != nil {
|
|
p.logger.Warn().Err(err).Msg("Failed to download first segment for content detection")
|
|
return FileTypeUnknown, ""
|
|
}
|
|
|
|
if data.Name != "" {
|
|
fileType := p.detectFileType(data.Name)
|
|
if fileType != FileTypeUnknown {
|
|
return fileType, data.Name
|
|
}
|
|
}
|
|
|
|
return p.detectFileTypeFromContent(data.Body), data.Name
|
|
}
|
|
|
|
func (p *NZBParser) detectFileTypeFromContent(data []byte) FileType {
|
|
if len(data) == 0 {
|
|
return FileTypeUnknown
|
|
}
|
|
|
|
// Check for RAR signatures (both RAR 4.x and 5.x)
|
|
if len(data) >= 7 {
|
|
// RAR 4.x signature
|
|
if bytes.Equal(data[:7], []byte("Rar!\x1A\x07\x00")) {
|
|
return FileTypeRar
|
|
}
|
|
}
|
|
if len(data) >= 8 {
|
|
// RAR 5.x signature
|
|
if bytes.Equal(data[:8], []byte("Rar!\x1A\x07\x01\x00")) {
|
|
return FileTypeRar
|
|
}
|
|
}
|
|
|
|
// Check for ZIP signature
|
|
if len(data) >= 4 && bytes.Equal(data[:4], []byte{0x50, 0x4B, 0x03, 0x04}) {
|
|
return FileTypeArchive
|
|
}
|
|
|
|
// Check for 7z signature
|
|
if len(data) >= 6 && bytes.Equal(data[:6], []byte{0x37, 0x7A, 0xBC, 0xAF, 0x27, 0x1C}) {
|
|
return FileTypeArchive
|
|
}
|
|
|
|
// Check for common media file signatures
|
|
if len(data) >= 4 {
|
|
// Matroska (MKV/WebM)
|
|
if bytes.Equal(data[:4], []byte{0x1A, 0x45, 0xDF, 0xA3}) {
|
|
return FileTypeMedia
|
|
}
|
|
|
|
// MP4/MOV (check for 'ftyp' at offset 4)
|
|
if len(data) >= 8 && bytes.Equal(data[4:8], []byte("ftyp")) {
|
|
return FileTypeMedia
|
|
}
|
|
|
|
// AVI
|
|
if len(data) >= 12 && bytes.Equal(data[:4], []byte("RIFF")) &&
|
|
bytes.Equal(data[8:12], []byte("AVI ")) {
|
|
return FileTypeMedia
|
|
}
|
|
}
|
|
|
|
// MPEG checks need more specific patterns
|
|
if len(data) >= 4 {
|
|
// MPEG-1/2 Program Stream
|
|
if bytes.Equal(data[:4], []byte{0x00, 0x00, 0x01, 0xBA}) {
|
|
return FileTypeMedia
|
|
}
|
|
|
|
// MPEG-1/2 Video Stream
|
|
if bytes.Equal(data[:4], []byte{0x00, 0x00, 0x01, 0xB3}) {
|
|
return FileTypeMedia
|
|
}
|
|
}
|
|
|
|
// Check for Transport Stream (TS files)
|
|
if len(data) >= 1 && data[0] == 0x47 {
|
|
// Additional validation for TS files
|
|
if len(data) >= 188 && data[188] == 0x47 {
|
|
return FileTypeMedia
|
|
}
|
|
}
|
|
|
|
return FileTypeUnknown
|
|
}
|
|
|
|
func (p *NZBParser) downloadFirstSegment(ctx context.Context, segment nzbparser.NzbSegment) (*yenc.Part, error) {
|
|
conn, cleanup, err := p.client.GetConnection(ctx)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer cleanup()
|
|
|
|
data, err := conn.GetBody(segment.Id)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// yEnc decode
|
|
part, err := nntp.DecodeYenc(bytes.NewReader(data))
|
|
if err != nil || part == nil {
|
|
return nil, fmt.Errorf("failed to decode segment")
|
|
}
|
|
|
|
// Return both the filename and decoded data
|
|
return part, nil
|
|
}
|
|
|
|
// Calculate total archive size from all RAR parts in the group
|
|
func (p *NZBParser) calculateTotalArchiveSize(group *FileGroup) int64 {
|
|
var total int64
|
|
for _, file := range group.Files {
|
|
for _, segment := range file.Segments {
|
|
total += int64(segment.Bytes)
|
|
}
|
|
}
|
|
return total
|
|
}
|
|
|
|
func determineNZBName(filename string, meta map[string]string) string {
|
|
// Prefer filename if it exists
|
|
if filename != "" {
|
|
filename = strings.Replace(filename, filepath.Ext(filename), "", 1)
|
|
} else {
|
|
if name := meta["name"]; name != "" {
|
|
filename = name
|
|
} else if title := meta["title"]; title != "" {
|
|
filename = title
|
|
}
|
|
}
|
|
return utils.RemoveInvalidChars(filename)
|
|
}
|
|
|
|
func generateID(nzb *NZB) string {
|
|
h := sha256.New()
|
|
h.Write([]byte(nzb.Name))
|
|
h.Write([]byte(fmt.Sprintf("%d", nzb.TotalSize)))
|
|
h.Write([]byte(nzb.Category))
|
|
h.Write([]byte(nzb.Password))
|
|
return hex.EncodeToString(h.Sum(nil))[:16]
|
|
}
|