shaun3141 commited on
Commit
9e0ae37
·
1 Parent(s): f166d57

Fix: Wrap feature access in try/except to prevent torchcodec ImportError

Browse files
Files changed (1) hide show
  1. utils/status.py +48 -13
utils/status.py CHANGED
@@ -34,27 +34,62 @@ def check_setup_status():
34
 
35
  if _hf_dataset is not None:
36
  # Check if audio column exists without iterating (which would trigger decoding)
 
37
  audio_count = 0
38
  try:
39
- # Check if audio feature exists without decoding
40
- if 'train' in _hf_dataset and 'audio' in _hf_dataset['train'].features:
41
- # Use dataset info to check if audio exists without iterating
42
- # If we can't check without iterating, assume audio is available if feature exists
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  status["audio_from_hf"] = True
44
- # Estimate count from dataset size (safer than iterating)
45
- audio_count = len(_hf_dataset['train']) if 'train' in _hf_dataset else 0
46
- if 'test' in _hf_dataset and 'audio' in _hf_dataset['test'].features:
47
- audio_count += len(_hf_dataset['test']) if 'test' in _hf_dataset else 0
 
48
  status["audio_from_hf"] = True
 
 
 
 
49
 
50
  if audio_count > 0:
51
  status["audio_count_hf"] = audio_count
 
 
 
 
 
 
 
 
52
  except Exception as e:
53
- # If we can't check audio (e.g., torchcodec not installed),
54
- # just mark that audio feature exists
55
- if 'train' in _hf_dataset and 'audio' in _hf_dataset['train'].features:
56
- status["audio_from_hf"] = True
57
- # Don't set count if we can't safely check
58
 
59
  # Check ESPnet
60
  try:
 
34
 
35
  if _hf_dataset is not None:
36
  # Check if audio column exists without iterating (which would trigger decoding)
37
+ # Even accessing .features might trigger torchcodec requirement, so wrap carefully
38
  audio_count = 0
39
  try:
40
+ # Try to check features - this might trigger torchcodec ImportError
41
+ has_train_audio = False
42
+ has_test_audio = False
43
+
44
+ try:
45
+ if 'train' in _hf_dataset:
46
+ features = _hf_dataset['train'].features
47
+ has_train_audio = 'audio' in features if features else False
48
+ except ImportError as e:
49
+ if 'torchcodec' in str(e).lower():
50
+ # Can't check - torchcodec not available
51
+ has_train_audio = False
52
+ else:
53
+ raise
54
+
55
+ try:
56
+ if 'test' in _hf_dataset:
57
+ features = _hf_dataset['test'].features
58
+ has_test_audio = 'audio' in features if features else False
59
+ except ImportError as e:
60
+ if 'torchcodec' in str(e).lower():
61
+ # Can't check - torchcodec not available
62
+ has_test_audio = False
63
+ else:
64
+ raise
65
+
66
+ if has_train_audio:
67
  status["audio_from_hf"] = True
68
+ try:
69
+ audio_count = len(_hf_dataset['train'])
70
+ except:
71
+ pass
72
+ if has_test_audio:
73
  status["audio_from_hf"] = True
74
+ try:
75
+ audio_count += len(_hf_dataset['test'])
76
+ except:
77
+ pass
78
 
79
  if audio_count > 0:
80
  status["audio_count_hf"] = audio_count
81
+
82
+ except ImportError as e:
83
+ if 'torchcodec' in str(e).lower():
84
+ # torchcodec not installed - can't check audio features
85
+ # Don't set audio_from_hf to avoid false positives
86
+ pass
87
+ else:
88
+ raise
89
  except Exception as e:
90
+ # Other errors - don't fail, just skip audio check
91
+ if 'torchcodec' not in str(e).lower():
92
+ print(f"⚠ Warning checking audio features: {e}")
 
 
93
 
94
  # Check ESPnet
95
  try: