Skip to content

Commit c4eff2a

Browse files
Suncussclaude
andcommitted
fix: storage cleanup failing to delete multi-source detection files
Cleanup query was missing the `extra` column, so filenames for multi-source detections were reconstructed without the source suffix, causing files to be silently skipped. Also consolidates double candidate fetch into a single query and fixes target_reached not being set when the final deletion crosses the threshold. Co-Authored-By: Claude Opus 4.6 <[email protected]>
1 parent 2f4c63a commit c4eff2a

4 files changed

Lines changed: 233 additions & 10 deletions

File tree

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22

33
## [Unreleased]
44

5+
- Fixed storage cleanup silently skipping multi-source detection files — cleanup query was missing the `extra` column needed to reconstruct filenames with source suffixes, so those files were never deleted
6+
- Fixed storage cleanup reporting "candidates exhausted" instead of "target reached" when the final deletion crossed the threshold
7+
- Optimized storage cleanup to fetch cleanup candidates once instead of twice
58
- Redesigned audio source selection UX — pills now open the edit modal on click instead of toggling state, preventing accidental source deactivation; enable/disable toggle moved inside the modal; opacity distinguishes active vs inactive sources
69
- Auto-test RTSP streams on save/add with inline progress spinner, skipping test when URL unchanged (label-only edits save instantly); same pattern applied to SetupWizard
710
- Restored keyboard accessibility for source and notification pills

backend/core/db.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -854,7 +854,8 @@ def get_cleanup_candidates(self, keep_per_species=60, limit=None):
854854
limit: Optional max number of records to return
855855
856856
Returns:
857-
List of dicts with: id, common_name, confidence, timestamp
857+
List of dicts with: id, common_name, confidence, timestamp,
858+
audio_source, extra (raw JSON string)
858859
Ordered by timestamp ASC (oldest first)
859860
"""
860861
# Use window function to rank recordings within each species by confidence
@@ -868,13 +869,14 @@ def get_cleanup_candidates(self, keep_per_species=60, limit=None):
868869
confidence,
869870
timestamp,
870871
audio_source,
872+
extra,
871873
ROW_NUMBER() OVER (
872874
PARTITION BY common_name
873875
ORDER BY confidence DESC
874876
) as confidence_rank
875877
FROM detections
876878
)
877-
SELECT id, common_name, confidence, timestamp, audio_source
879+
SELECT id, common_name, confidence, timestamp, audio_source, extra
878880
FROM RankedDetections
879881
WHERE confidence_rank > ?
880882
ORDER BY timestamp ASC

backend/core/storage_manager.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@
2828

2929
logger = get_logger(__name__)
3030

31+
# Average: ~270KB audio + ~30KB spectrogram = ~300KB per detection
32+
ESTIMATED_SIZE_PER_DETECTION = 300 * 1024 # 300 KB
33+
3134
def _get_storage_config() -> dict:
3235
"""Load current storage settings with defaults."""
3336
storage = get_runtime_settings().get('storage', {})
@@ -202,10 +205,6 @@ def estimate_deletable_size(db_manager, keep_per_species=None):
202205
# Get candidates (without limit to count all)
203206
candidates = db_manager.get_cleanup_candidates(keep_per_species=keep_per_species)
204207

205-
# Estimate size (use average file size if we can't check all)
206-
# Average: ~270KB audio + ~30KB spectrogram = ~300KB per detection
207-
ESTIMATED_SIZE_PER_DETECTION = 300 * 1024 # 300 KB
208-
209208
estimated_bytes = len(candidates) * ESTIMATED_SIZE_PER_DETECTION
210209
return estimated_bytes, len(candidates)
211210

@@ -258,8 +257,12 @@ def cleanup_storage(db_manager, target_percent=None, keep_per_species=None):
258257
# Calculate how much we need to free
259258
bytes_to_free = usage['used_bytes'] - (usage['total_bytes'] * target_percent / 100)
260259

260+
# Fetch cleanup candidates once (oldest first, beyond top N per species)
261+
candidates = db_manager.get_cleanup_candidates(keep_per_species=keep_per_species)
262+
candidate_count = len(candidates)
263+
261264
# SAFETY CHECK: Estimate if we can actually reach the target
262-
estimated_deletable, candidate_count = estimate_deletable_size(db_manager, keep_per_species)
265+
estimated_deletable = candidate_count * ESTIMATED_SIZE_PER_DETECTION
263266

264267
if estimated_deletable < bytes_to_free:
265268
logger.warning("Target unachievable - BirdNET data insufficient", extra={
@@ -281,9 +284,6 @@ def cleanup_storage(db_manager, target_percent=None, keep_per_species=None):
281284
'keep_per_species': keep_per_species
282285
})
283286

284-
# Get cleanup candidates (oldest first, beyond top N per species)
285-
candidates = db_manager.get_cleanup_candidates(keep_per_species=keep_per_species)
286-
287287
if not candidates:
288288
logger.info("No cleanup candidates found - all recordings within keep limit", extra={
289289
'keep_per_species': keep_per_species
@@ -311,6 +311,10 @@ def cleanup_storage(db_manager, target_percent=None, keep_per_species=None):
311311
result['files_deleted'] += 1
312312
bytes_freed += delete_result['bytes_freed']
313313

314+
# Check if target was reached by final deletion
315+
if not result['target_reached'] and bytes_freed >= bytes_to_free:
316+
result['target_reached'] = True
317+
314318
result['bytes_freed'] = bytes_freed
315319

316320
# Log summary

backend/tests/test_storage_manager.py

Lines changed: 214 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,38 @@ def test_no_candidates_when_all_within_limit(self, test_db_manager):
146146
candidates = test_db_manager.get_cleanup_candidates(keep_per_species=60)
147147
assert len(candidates) == 0
148148

149+
def test_returns_extra_column(self, test_db_manager):
150+
"""Cleanup candidates should include extra column for filename reconstruction."""
151+
import json
152+
153+
base_time = datetime(2024, 1, 15, 10, 0, 0)
154+
for i in range(70):
155+
detection = {
156+
'timestamp': (base_time - timedelta(hours=i)).isoformat(),
157+
'group_timestamp': (base_time - timedelta(hours=i)).isoformat(),
158+
'scientific_name': 'Testus birdus',
159+
'common_name': 'Test Bird',
160+
'confidence': 0.75 + (i % 20) * 0.01,
161+
'latitude': 40.7128,
162+
'longitude': -74.0060,
163+
'cutoff': 0.5,
164+
'sensitivity': 0.75,
165+
'overlap': 0.25,
166+
'extra': {'source_label': 'Backyard_Mic'},
167+
'audio_source': 'alsa_input.usb-test',
168+
}
169+
test_db_manager.insert_detection(detection)
170+
171+
candidates = test_db_manager.get_cleanup_candidates(keep_per_species=60)
172+
assert len(candidates) == 10
173+
174+
for candidate in candidates:
175+
assert 'extra' in candidate, "extra column must be present in cleanup candidates"
176+
extra = candidate['extra']
177+
if isinstance(extra, str):
178+
extra = json.loads(extra)
179+
assert extra.get('source_label') == 'Backyard_Mic'
180+
149181

150182
class TestGetDiskUsage:
151183
"""Tests for storage_manager.get_disk_usage()"""
@@ -193,6 +225,24 @@ def test_constructs_correct_paths(self):
193225
assert 'American_Robin' in paths['spectrogram_path']
194226
assert '85' in paths['audio_path'] # Confidence as percentage
195227

228+
def test_constructs_paths_with_source_label(self):
229+
"""Should include source_label suffix in filenames when present in extra."""
230+
with patch('config.settings.EXTRACTED_AUDIO_DIR', '/app/data/audio/extracted_songs'):
231+
with patch('config.settings.SPECTROGRAM_DIR', '/app/data/spectrograms'):
232+
from core.storage_manager import get_detection_files
233+
234+
detection = {
235+
'common_name': 'American Robin',
236+
'confidence': 0.85,
237+
'timestamp': '2024-01-15T10:30:00',
238+
'extra': '{"source_label": "Backyard_Mic"}',
239+
}
240+
241+
paths = get_detection_files(detection)
242+
243+
assert paths['audio_path'].endswith('_Backyard_Mic.mp3')
244+
assert paths['spectrogram_path'].endswith('_Backyard_Mic.webp')
245+
196246
def test_fallback_to_legacy_colon_pattern(self):
197247
"""Should fall back to legacy colon-pattern files if dash-pattern not found."""
198248
with tempfile.TemporaryDirectory() as tmpdir:
@@ -461,3 +511,167 @@ def test_warns_when_target_unachievable(self, populated_db_for_cleanup):
461511
# Should flag that target is not achievable
462512
assert not result['target_achievable']
463513
assert not result['target_reached']
514+
515+
def test_cleanup_resolves_multi_source_filenames(self, test_db_manager):
516+
"""Cleanup should correctly resolve filenames for multi-source detections."""
517+
base_time = datetime(2024, 1, 15, 10, 0, 0)
518+
519+
for i in range(70):
520+
detection = {
521+
'timestamp': (base_time - timedelta(hours=i)).isoformat(),
522+
'group_timestamp': (base_time - timedelta(hours=i)).isoformat(),
523+
'scientific_name': 'Testus birdus',
524+
'common_name': 'Test Bird',
525+
'confidence': 0.75 + (i % 20) * 0.01,
526+
'latitude': 40.7128,
527+
'longitude': -74.0060,
528+
'cutoff': 0.5,
529+
'sensitivity': 0.75,
530+
'overlap': 0.25,
531+
'extra': {'source_label': 'Backyard_Mic'},
532+
'audio_source': 'alsa_input.usb-test',
533+
}
534+
test_db_manager.insert_detection(detection)
535+
536+
with patch('config.settings.BASE_DIR', '/tmp'):
537+
with patch('config.settings.EXTRACTED_AUDIO_DIR', '/tmp/audio'):
538+
with patch('config.settings.SPECTROGRAM_DIR', '/tmp/spectrograms'):
539+
with patch('config.settings.user_settings', {'storage': {}}):
540+
with patch('core.storage_manager.get_disk_usage') as mock_usage:
541+
mock_usage.return_value = {
542+
'total_bytes': 100 * 1024**3,
543+
'used_bytes': 90 * 1024**3,
544+
'free_bytes': 10 * 1024**3,
545+
'percent_used': 90.0
546+
}
547+
548+
with patch('core.storage_manager.get_file_size') as mock_size:
549+
mock_size.return_value = 300 * 1024
550+
551+
with patch('core.storage_manager.delete_detection_files') as mock_delete:
552+
mock_delete.return_value = {
553+
'deleted_audio': True,
554+
'deleted_spectrogram': True,
555+
'bytes_freed': 300 * 1024
556+
}
557+
558+
from core.storage_manager import cleanup_storage
559+
560+
result = cleanup_storage(
561+
test_db_manager,
562+
target_percent=80,
563+
keep_per_species=60
564+
)
565+
566+
# Pre-fix: all would be skipped_missing because
567+
# filenames lacked the _Backyard_Mic suffix
568+
assert result['skipped_missing'] == 0
569+
assert result['files_deleted'] > 0
570+
571+
# Verify delete received detection with extra
572+
for call_args in mock_delete.call_args_list:
573+
det = call_args[0][0]
574+
assert 'extra' in det
575+
576+
def test_cleanup_fetches_candidates_once(self, test_db_manager):
577+
"""cleanup_storage should fetch candidates only once, not twice."""
578+
base_time = datetime(2024, 1, 15, 10, 0, 0)
579+
for i in range(70):
580+
detection = {
581+
'timestamp': (base_time - timedelta(hours=i)).isoformat(),
582+
'group_timestamp': (base_time - timedelta(hours=i)).isoformat(),
583+
'scientific_name': 'Testus birdus',
584+
'common_name': 'Test Bird',
585+
'confidence': 0.75 + (i % 20) * 0.01,
586+
'latitude': 40.7128,
587+
'longitude': -74.0060,
588+
'cutoff': 0.5,
589+
'sensitivity': 0.75,
590+
'overlap': 0.25,
591+
}
592+
test_db_manager.insert_detection(detection)
593+
594+
with patch('config.settings.BASE_DIR', '/tmp'):
595+
with patch('config.settings.user_settings', {'storage': {}}):
596+
with patch('core.storage_manager.get_disk_usage') as mock_usage:
597+
mock_usage.return_value = {
598+
'total_bytes': 100 * 1024**3,
599+
'used_bytes': 90 * 1024**3,
600+
'free_bytes': 10 * 1024**3,
601+
'percent_used': 90.0
602+
}
603+
604+
with patch('core.storage_manager.get_file_size', return_value=0):
605+
with patch.object(
606+
test_db_manager, 'get_cleanup_candidates',
607+
wraps=test_db_manager.get_cleanup_candidates
608+
) as mock_gc:
609+
from core.storage_manager import cleanup_storage
610+
611+
cleanup_storage(
612+
test_db_manager,
613+
target_percent=80,
614+
keep_per_species=60
615+
)
616+
617+
assert mock_gc.call_count == 1, (
618+
f"get_cleanup_candidates called {mock_gc.call_count} times, expected 1"
619+
)
620+
621+
def test_target_reached_on_final_deletion(self, test_db_manager):
622+
"""target_reached should be True when final deletion crosses the threshold."""
623+
base_time = datetime(2024, 1, 15, 10, 0, 0)
624+
# Insert 62 detections so we get exactly 2 candidates with keep=60
625+
for i in range(62):
626+
detection = {
627+
'timestamp': (base_time - timedelta(hours=i)).isoformat(),
628+
'group_timestamp': (base_time - timedelta(hours=i)).isoformat(),
629+
'scientific_name': 'Testus birdus',
630+
'common_name': 'Test Bird',
631+
'confidence': 0.75 + (i % 20) * 0.01,
632+
'latitude': 40.7128,
633+
'longitude': -74.0060,
634+
'cutoff': 0.5,
635+
'sensitivity': 0.75,
636+
'overlap': 0.25,
637+
}
638+
test_db_manager.insert_detection(detection)
639+
640+
with patch('config.settings.BASE_DIR', '/tmp'):
641+
with patch('config.settings.EXTRACTED_AUDIO_DIR', '/tmp/audio'):
642+
with patch('config.settings.SPECTROGRAM_DIR', '/tmp/spectrograms'):
643+
with patch('config.settings.user_settings', {'storage': {}}):
644+
# Disk: 81% used on 100MB total → need to free 1MB to reach 80%
645+
with patch('core.storage_manager.get_disk_usage') as mock_usage:
646+
mock_usage.return_value = {
647+
'total_bytes': 100 * 1024**2,
648+
'used_bytes': 81 * 1024**2,
649+
'free_bytes': 19 * 1024**2,
650+
'percent_used': 81.0
651+
}
652+
653+
with patch('core.storage_manager.get_file_size') as mock_size:
654+
mock_size.return_value = 600 * 1024 # 600KB per file
655+
656+
with patch('core.storage_manager.delete_detection_files') as mock_delete:
657+
mock_delete.return_value = {
658+
'deleted_audio': True,
659+
'deleted_spectrogram': True,
660+
'bytes_freed': 600 * 1024
661+
}
662+
663+
from core.storage_manager import cleanup_storage
664+
665+
result = cleanup_storage(
666+
test_db_manager,
667+
target_percent=80,
668+
keep_per_species=60
669+
)
670+
671+
# 2 candidates, each frees 600KB = 1.2MB
672+
# bytes_to_free = 81MB - 80MB = 1MB
673+
# First deletion: 600KB < 1MB, loop continues
674+
# Second deletion: 1.2MB >= 1MB, but loop ends
675+
# Post-loop check should catch this
676+
assert result['target_reached'] is True
677+
assert result['files_deleted'] == 2

0 commit comments

Comments
 (0)