File size: 1,276 Bytes
eca55dc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import h5py
import pandas as pd
import os

data_dir = "data/AudioSet"
h5_path = os.path.join(data_dir, "balanced_train_soxrhq.h5")
csv_path = os.path.join(data_dir, "silent_files_balanced_train_soxrhq.csv")

print(f"Inspecting {h5_path}...")
try:
    with h5py.File(h5_path, "r") as f:
        print("Keys:", list(f.keys()))
        # Inspect the first few items if it's a group or dataset
        for key in list(f.keys())[:5]:
            item = f[key]
            if isinstance(item, h5py.Dataset):
                print(f"Dataset {key}: shape={item.shape}, dtype={item.dtype}")
            elif isinstance(item, h5py.Group):
                print(f"Group {key}: keys={list(item.keys())}")
                # Go one level deeper
                for subkey in list(item.keys())[:3]:
                    subitem = item[subkey]
                    if isinstance(subitem, h5py.Dataset):
                        print(
                            f"  Dataset {subkey}: shape={subitem.shape}, dtype={subitem.dtype}"
                        )
except Exception as e:
    print(f"Error reading HDF5: {e}")

print(f"\nInspecting {csv_path}...")
try:
    df = pd.read_csv(csv_path)
    print(df.head())
    print(df.columns)
except Exception as e:
    print(f"Error reading CSV: {e}")