@ARTICLE{Najamuddin_CAPSE-ViT:_2025, author={Najamuddin and Sheikh, Usman Ullah and Sha’ Ameri, Ahmad Zuri}, volume={vol. 50}, number={No 2}, journal={Archives of Acoustics}, howpublished={online}, year={2025}, publisher={Polish Academy of Sciences, Institute of Fundamental Technological Research, Committee on Acoustics}, abstract={Underwater acoustic target classification has become a key area of research for marine vessel classification, where machine learning (ML) models are leveraged to identify targets automatically. The major challenge is inserting area-specific understanding into ML frameworks to extract features that effectively distinguish between different vessel types. In this study, we propose a model that uses the coherently averaged power spectral estimation (CAPSE) algorithm. Vessel frequency spectra is first computed through the CAPSE analysis, capturing key machinery characteristics. Further, the features are processed via a vision transformer (ViT) network. This method enables the model to learn more complex relationships and patterns within the data, thereby improving the classification performance. This is accomplished by using self-attention mechanisms to capture global dependencies between features, enabling the model to focus on relationships throughout the entire input. The results, evaluated on standard DeepShip and ShipsEar datasets, show that the proposed model achieved a classification accuracy of 97.98% and 99.19% while utilizing just 1.90 million parameters, outperforming other models such as ResNet18 and UATR-Transformer in terms of both accuracy and computational efficiency. This work offers an improvement to the development of efficient marine vessel classification systems for underwater acoustics applications, demonstrating that high performance can be achieved with reduced computational complexity.}, type={Article}, title={CAPSE-ViT: A Lightweight Framework for Underwater Acoustic Vessel Classification Using Coherent Spectral Estimation and Modified Vision Transformer}, URL={http://ochroma.man.poznan.pl/Content/135220/aoa.2025.153662.pdf}, doi={10.24425/aoa.2025.153662}, keywords={underwater acoustic targets, CAPSE, vision transformer, CNN, LOFAR gram}, }