@article {10.3844/jcssp.2026.540.551, article_type = {journal}, title = {A Metaheuristic-Optimized Feature Selection for Early-Stage Diabetes Prediction With SHAP-Guided Insight into Influential Attributes}, author = {Rochy, Esmay Azam and Ferdaus, Jannatul and Biswas, Uzzal and Tiang, Jun-Jiat and Nahid, Abdullah-Al}, volume = {22}, number = {2}, year = {2026}, month = {Feb}, pages = {540-551}, doi = {10.3844/jcssp.2026.540.551}, url = {https://thescipub.com/abstract/jcssp.2026.540.551}, abstract = {Diabetes is a metabolic disorder that causes elevated blood glucose. This long-term health condition can lead to cardiovascular diseases, stroke, kidney failure, visual impairment, neuropathy, and even death in critical cases. So, a Computer-Aided Diagnostic (CAD) system is necessary to diagnose diabetes automatically. A clinician can utilize a machine learning-based CAD system that automatically diagnoses many people. This paper will use a Random Forest (RF) classifier for Machine Learning (ML) classification to identify if any individual is diabetic or non-diabetic. In order to increase the accuracy and robustness of the model, the Zebra Optimization Algorithm (ZOA) and the proposed Nomad Zebra Optimization Algorithm (NZOA) are used to identify the most optimal feature sets based on RF subset selection and RFE (Recursive Feature Elimination) technique. Smoking and Age have been identified as the most influential features with a prediction accuracy of 79.86%with a precision of 75.51%, recall of 88.33%, and F1-score of 81.42% using the proposed NZOA. Finally, to further increase the model interpretability and assist physicians in making decisions without any irrationality, SHAP (Shapley Additive Explanations) is used to explain the outputs of the models based on game theory and optimal credit allocation techniques. It also identifies that smoking has the highest impact on our model.}, journal = {Journal of Computer Science}, publisher = {Science Publications} }