<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3-mathml3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="1.3" xml:lang="EN">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">Int. J. Public Health</journal-id>
<journal-title-group>
<journal-title>International Journal of Public Health</journal-title>
<abbrev-journal-title abbrev-type="pubmed">Int. J. Public Health</abbrev-journal-title>
</journal-title-group>
<issn pub-type="epub">1661-8564</issn>
<publisher>
<publisher-name>Frontiers Media S.A.</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">1609153</article-id>
<article-id pub-id-type="doi">10.3389/ijph.2026.1609153</article-id>
<article-version article-version-type="Version of Record" vocab="NISO-RP-8-2008"/>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Original Article</subject>
</subj-group>
</article-categories>
<title-group>
<article-title>Development and external validation of an interpretable machine learning model for obesity-depression comorbidity in Korean and US adults</article-title>
<alt-title alt-title-type="left-running-head">Shangguan et al.</alt-title>
<alt-title alt-title-type="right-running-head">
<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/ijph.2026.1609153">10.3389/ijph.2026.1609153</ext-link>
</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" equal-contrib="yes">
<name>
<surname>Shangguan</surname>
<given-names>Yuwen</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="author-notes" rid="fn002">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2859798"/>
</contrib>
<contrib contrib-type="author" equal-contrib="yes">
<name>
<surname>Lin</surname>
<given-names>Zhenhao</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
<xref ref-type="author-notes" rid="fn002">
<sup>&#x2020;</sup>
</xref>
<uri xlink:href="https://loop.frontiersin.org/people/3235266"/>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Sim</surname>
<given-names>Young-Je</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Wu</surname>
<given-names>Kunpeng</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Chu</surname>
<given-names>Yu</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Huang</surname>
<given-names>Kunyi</given-names>
</name>
<xref ref-type="aff" rid="aff3">
<sup>3</sup>
</xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Chen</surname>
<given-names>Fangxi</given-names>
</name>
<xref ref-type="aff" rid="aff1">
<sup>1</sup>
</xref>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Ji</surname>
<given-names>Kangkang</given-names>
</name>
<xref ref-type="aff" rid="aff4">
<sup>4</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
<uri xlink:href="https://loop.frontiersin.org/people/2337254"/>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Chen</surname>
<given-names>Fang</given-names>
</name>
<xref ref-type="aff" rid="aff2">
<sup>2</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
</contrib>
<contrib contrib-type="author" corresp="yes">
<name>
<surname>Liu</surname>
<given-names>Shangrui</given-names>
</name>
<xref ref-type="aff" rid="aff5">
<sup>5</sup>
</xref>
<xref ref-type="corresp" rid="c001">&#x2a;</xref>
</contrib>
</contrib-group>
<aff id="aff1">
<label>1</label>
<institution>Department of Exercise Physiology, Kunsan National University</institution>, <city>Gunsan</city>, <country country="KR">Republic of Korea</country>
</aff>
<aff id="aff2">
<label>2</label>
<institution>Yancheng Key Laboratory of Molecular Epigenetics, Yancheng Medical Research Center of Nanjing University Medical School, The First People&#x2019;s Hospital of Yancheng</institution>, <city>Yancheng</city>, <country country="CN">China</country>
</aff>
<aff id="aff3">
<label>3</label>
<institution>Department of Health and Physical Education, The Education University of Hong Kong</institution>, <city>Tai Po</city>, <country country="XXX">Hong Kong SAR, China</country>
</aff>
<aff id="aff4">
<label>4</label>
<institution>Department of Clinical Medical Research, Binhai County People&#x2019;s Hospital, Binhai Clinical College, Yangzhou University Medical College</institution>, <city>Yancheng</city>, <state>Jiangsu</state>, <country country="CN">China</country>
</aff>
<aff id="aff5">
<label>5</label>
<institution>Department of Physical Education, Kyungpook National University</institution>, <city>Daegu</city>, <country country="KR">Republic of Korea</country>
</aff>
<author-notes>
<corresp id="c001">
<label>&#x2a;</label>Correspondence: Kangkang Ji, <email xlink:href="mailto:kyrie@mail.ustc.edu.cn">kyrie@mail.ustc.edu.cn</email>; Fang Chen, <email xlink:href="mailto:jsdxchenfang@126.com">jsdxchenfang@126.com</email>; Shangrui Liu, <email xlink:href="mailto:lsr980324@knu.ac.kr">lsr980324@knu.ac.kr</email>
</corresp>
<fn id="fn001" fn-type="other">
<p>This Original Article is part of the IJPH Special Issue &#x201c;Artificial Intelligence (AI) and Public Health&#x201d;</p>
</fn>
<fn fn-type="equal" id="fn002">
<label>&#x2020;</label>
<p>These authors have contributed equally to this work</p>
</fn>
</author-notes>
<pub-date publication-format="electronic" date-type="pub" iso-8601-date="2026-05-28">
<day>28</day>
<month>05</month>
<year>2026</year>
</pub-date>
<pub-date publication-format="electronic" date-type="collection">
<year>2026</year>
</pub-date>
<volume>71</volume>
<elocation-id>1609153</elocation-id>
<history>
<date date-type="received">
<day>02</day>
<month>10</month>
<year>2025</year>
</date>
<date date-type="rev-recd">
<day>20</day>
<month>04</month>
<year>2026</year>
</date>
<date date-type="accepted">
<day>07</day>
<month>05</month>
<year>2026</year>
</date>
</history>
<permissions>
<copyright-statement>Copyright &#xa9; 2026 Shangguan, Lin, Sim, Wu, Chu, Huang, Chen, Ji, Chen and Liu.</copyright-statement>
<copyright-year>2026</copyright-year>
<copyright-holder>Shangguan, Lin, Sim, Wu, Chu, Huang, Chen, Ji, Chen and Liu</copyright-holder>
<license>
<ali:license_ref start_date="2026-05-28">https://creativecommons.org/licenses/by/4.0/</ali:license_ref>
<license-p>This is an open-access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution License (CC BY)</ext-link>. The use, distribution or reproduction in other forums is permitted, provided the original author(s) and the copyright owner(s) are credited and that the original publication in this journal is cited, in accordance with accepted academic practice. No use, distribution or reproduction is permitted which does not comply with these terms.</license-p>
</license>
</permissions>
<abstract>
<sec>
<title>Objective</title>
<p>To investigate the association between physical inactivity and obesity&#x2013;depression comorbidity (ODC), defined as the co-occurrence of obesity and depression, and to develop an effective screening tool for identifying high-risk individuals to facilitate early intervention.</p>
</sec>
<sec>
<title>Methods</title>
<p>Data were obtained from 3,357 physically inactive adults enrolled in the Korea National Health and Nutrition Examination Survey (KNHANES, 2007&#x2013;2012). An XGBoost machine learning framework was applied to develop predictive models. Feature selection was conducted using random forest, and the prediction mechanism was interpreted with SHAP values. The model was validated internally using KNHANES 2011&#x2013;2012 data and externally with the U.S. NHANES dataset.</p>
</sec>
<sec>
<title>Results</title>
<p>The XGBoost model demonstrated good discriminative performance in internal validation (AUC &#x3d; 0.783 and 0.744) and achieved an external validation AUC of 0.886. Feature importance analysis revealed that insulin concentration, white blood cell count, and height were the primary predictors of ODC, with insulin exerting the strongest influence.</p>
</sec>
<sec>
<title>Conclusion</title>
<p>This study developed a high-performing and interpretable prediction model for ODC risk. SHAP-based interpretation identified insulin as the most influential predictor within the model, suggesting that metabolic factors may be important for ODC risk stratification.</p>
</sec>
</abstract>
<kwd-group>
<kwd>KNHANES</kwd>
<kwd>machine learning prediction</kwd>
<kwd>obesity-depression comorbidity</kwd>
<kwd>physical inactivity</kwd>
<kwd>SHAP interpretability</kwd>
</kwd-group>
<funding-group>
<funding-statement>The author(s) declared that financial support was received for this work and/or its publication. This work was supported by the National Natural Science Foundation of China (No. 82300780), the Natural Science Foundation of Jiangsu Province (No. BK20220306), Yancheng Key Research and Development Plan (Social Development) Project (No. YCBE202214).</funding-statement>
</funding-group>
<counts>
<fig-count count="5"/>
<table-count count="1"/>
<equation-count count="0"/>
<ref-count count="43"/>
<page-count count="11"/>
</counts>
</article-meta>
</front>
<body>
<sec sec-type="intro" id="s1">
<title>Introduction</title>
<p>The co-occurrence of obesity and depression, termed obesity-depression comorbidity (ODC), has emerged as a significant global public health challenge receiving heightened attention within healthcare systems and society [<xref ref-type="bibr" rid="B1">1</xref>&#x2013;<xref ref-type="bibr" rid="B3">3</xref>]. Obesity represents a chronic, multifactorial disease state intricately associated with metabolic dysregulation, cardiovascular pathology, and mental health disturbances [<xref ref-type="bibr" rid="B4">4</xref>, <xref ref-type="bibr" rid="B5">5</xref>]. Concurrently, depression&#x2014;among the most prevalent affective disorders&#x2014;exhibits strong bidirectional relationships with chronic somatic conditions, particularly obesity [<xref ref-type="bibr" rid="B6">6</xref>, <xref ref-type="bibr" rid="B7">7</xref>]. Against the backdrop of evolving socioeconomic structures and lifestyle patterns, obesity and depression prevalence continues to escalate within adult populations globally. Their comorbid presentation has demonstrated increasing frequency, substantially impairing patients&#x2019; quality of life while imposing growing economic burdens on healthcare infrastructures and society [<xref ref-type="bibr" rid="B8">8</xref>, <xref ref-type="bibr" rid="B9">9</xref>]. Although epidemiological associations between depression and obesity are well-documented, the underlying mechanistic pathways and causal sequences remain inadequately characterized [<xref ref-type="bibr" rid="B10">10</xref>]. The parallel increase in both conditions necessitates urgent development of interventions targeting their complex interplay. In the present study, ODC was treated as a concurrent comorbidity outcome rather than a directional transition from obesity to depression or from depression to obesity.</p>
<p>Current therapeutic modalities primarily encompass pharmacological and psychological approaches, yet these strategies frequently encounter limitations regarding sustained efficacy and treatment adherence [<xref ref-type="bibr" rid="B11">11</xref>, <xref ref-type="bibr" rid="B12">12</xref>]. Given the multifactorial etiology of depression and obesity&#x2014;involving genetic, environmental, and psychosocial determinants&#x2014;innovative therapeutic paradigms are required to enhance outcomes and establish personalized intervention pathways [<xref ref-type="bibr" rid="B13">13</xref>, <xref ref-type="bibr" rid="B14">14</xref>]. The identification and mechanistic dissection of pivotal factors in ODC development are consequently imperative for optimizing interventional efficacy. Previous research has established physical inactivity as a shared risk factor for both obesity and depression [<xref ref-type="bibr" rid="B15">15</xref>, <xref ref-type="bibr" rid="B16">16</xref>], suggesting that augmented physical activity may complement conventional treatment strategies. Nevertheless, despite well-characterized associations between exercise engagement and these disease states, substantial knowledge gaps persist concerning their complex interactions, particularly within large-scale multi-cycle population-based survey data. Prior cross-sectional analyses have inadequately explored synergistic effects among physical activity, socioeconomic status, and dietary patterns, lacking systematic examination of comorbidity mechanisms. Contemporary machine learning methodologies and explainable artificial intelligence algorithms such as SHAP (Shapley Additive Explanations) have demonstrated considerable potential in health risk assessment and disease mechanism research through their capacity to model intricate relationships while enhancing interpretability [<xref ref-type="bibr" rid="B17">17</xref>, <xref ref-type="bibr" rid="B18">18</xref>].</p>
<p>Given these research gaps&#x2014;especially regarding multifactorial interactions and cross-sectional characteristics collected across multiple survey cycles&#x2014;this study focuses on physically inactive adults. We employ an integrated machine learning and SHAP analytical framework to develop an interpretable risk prediction model for ODC and to quantify the relative contributions of key predictors at the model level. The model was specifically developed for risk stratification among physically inactive adults, a subgroup considered to be at elevated risk for obesity-depression comorbidity. Leveraging data from KNHANES (2007&#x2013;2012), we examined how physical inactivity is associated with the co-occurrence of obesity and depression. Through integration of multidimensional data (demographic, behavioral, socioeconomic, clinical, and nutritional domains), this study identifies major predictors of ODC within the model and quantifies their relative contributions, thereby providing a basis for future risk stratification and hypothesis generation. Our principal objective was to address knowledge gaps in nonlinear pattern analysis and quantitative feature attribution via integration of machine learning and SHAP, thereby improving understanding of ODC-related risk patterns.</p>
</sec>
<sec sec-type="methods" id="s2">
<title>Methods</title>
<sec id="s2-1">
<title>Data source and study population</title>
<p>Analytical data originated from the 2007&#x2013;2012 Korea National Health and Nutrition Examination Survey (KNHANES) database administered by the Korea Centers for Disease Control and Prevention (CDC) [<xref ref-type="bibr" rid="B19">19</xref>, <xref ref-type="bibr" rid="B20">20</xref>]. KNHANES constitutes a nationally representative continuous cross-sectional survey employing multistage stratified cluster sampling methodology. The survey comprises three distinct modules: health examinations, nutrition assessments, and health interviews. The institutional review board of the KCDC granted ethical approval for the study protocol, with all participants providing written informed consent.</p>
<p>The initial analytical cohort included 50,405 KNHANES participants (2007&#x2013;2012). Screening procedures first excluded individuals with incomplete physical activity documentation, yielding 8,263 eligible subjects. Subsequent exclusions comprised participants meeting adequate physical activity thresholds (defined in Section <italic>Definition of physically inactive population</italic>) and those aged &#x3c;19&#xa0;years, leaving 3,750 individuals. Following exclusion of 393 subjects with missing depression-related metrics, the final analytical sample encompassed 3,357 adults. To assess temporal robustness within the same survey framework, the 2011 (n &#x3d; 477) and 2012 (n &#x3d; 502) KNHANES subsets were reserved as temporally separated internal validation cohorts. The remaining participants from 2007 to 2010 (n &#x3d; 2,378) were randomly partitioned into training (n &#x3d; 1,665) and testing (n &#x3d; 713) subsets at a 7:3 ratio. <xref ref-type="fig" rid="F1">Figure 1</xref> presents the comprehensive participant selection workflow.</p>
<fig id="F1" position="float">
<label>FIGURE 1</label>
<caption>
<p>Population screening process (South Korea, 2007&#x2013;2012).</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="ijph-71-1609153-g001.tif">
<alt-text content-type="machine-generated">Flowchart showing participant selection from KNHANES 2007-2012. Starting with 50,405 individuals, exclusions for missing data and age under 19 years led to 8,263. Further exclusion for missing depression data or sufficient physical activity resulted in 3,357 for analysis. These were split into a training set of 1,665, test set of 713, and validation set of 979. The validation set included 477 in 2011 and 502 in 2012.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s2-2">
<title>Definition of physically inactive population</title>
<p>Consistent with World Health Organization (WHO) physical activity guidelines, this study evaluated individual activity levels using metabolic equivalent minutes per week (MET-min/week). Exclusion criteria incorporated missing essential data elements including weekly frequency and average daily duration of walking, moderate-intensity exercise, and vigorous-intensity exercise. Referencing WHO recommendations, minimum effective session durations were established: &#x2265;15&#xa0;min for vigorous exercise and &#x2265;10&#xa0;min for moderate exercise or walking. To minimize extreme value influence on cumulative activity calculations, session durations underwent Winsorization at the 99th percentile. Following KNHANES methodology, total weekly physical activity (PA) was computed as PA (MET-min/week) &#x3d; MET coefficient &#xd7; session duration &#xd7; weekly frequency [<xref ref-type="bibr" rid="B21">21</xref>]. Based on WHO standards, participants achieving PA &#x3c; 600 MET-min/week were classified as &#x201c;physically inactive,&#x201d; whereas those attaining PA &#x2265; 600 MET-min/week were designated &#x201c;physically active&#x201d; [<xref ref-type="bibr" rid="B21">21</xref>&#x2013;<xref ref-type="bibr" rid="B23">23</xref>].</p>
</sec>
<sec id="s2-3">
<title>Definition of obesity-depression comorbidity</title>
<p>This investigation employed Asian-specific diagnostic criteria for obesity classification and the Patient Health Questionnaire-9 (PHQ-9) for depression assessment to define obesity-depression comorbidity (ODC). Obesity categorization integrated body mass index (BMI) and waist circumference (WC) measurements: generalized obesity was defined as BMI &#x2265;25&#xa0;kg/m<sup>2</sup>; abdominal obesity was defined as WC &#x2265; 90&#xa0;cm for males or &#x2265;85&#xa0;cm for females. Participants were consequently stratified into four mutually exclusive categories: 1) Non-obese (below threshold values for both BMI and WC); 2) Isolated abdominal obesity (WC exceeding threshold with subthreshold BMI); 3) Isolated generalized obesity (BMI exceeding threshold with subthreshold WC); and 4) Compound obesity (exceeding thresholds for both indices). For analytical purposes, categories 2&#x2013;4 were collectively classified as obese. Depression status was assessed using the Patient Health Questionnaire-9 (PHQ-9; total score range: 0&#x2013;27) [<xref ref-type="bibr" rid="B24">24</xref>]. Consistent with prior validation studies, a PHQ-9 score of &#x2265;10 was used to indicate clinically significant depressive symptoms [<xref ref-type="bibr" rid="B25">25</xref>]. Thus, in this study, depression was operationally defined on the basis of symptom screening rather than physician-diagnosed depression.</p>
</sec>
<sec id="s2-4">
<title>Candidate predictor variables</title>
<p>Based on existing literature and clinical expertise, this study incorporated multiple classes of potential predictor variables relevant to depression-obesity comorbidity, comprising: Demographic and sociological characteristics (sex, age, household income, educational attainment, marital status); health status indicators and disease history (hypertension, dyslipidemia, stroke, myocardial infarction, arthritis, diabetes, smoking status, alcohol consumption); clinical signs and laboratory parameters (systolic blood pressure, diastolic blood pressure, height, fasting glucose, insulin, total cholesterol, high-density lipoprotein cholesterol (HDL-C), triglycerides, hematocrit, ferritin, serum creatinine, vitamin D, white blood cell count, red blood cell count, platelet count); dietary intake metrics derived from 24-h recall (total food mass, total energy intake, water consumption, protein, fat, carbohydrates, calcium, phosphorus, iron, sodium, potassium, vitamin A, &#x3b2;-carotene, retinol, thiamine, riboflavin, niacin, vitamin C).</p>
</sec>
<sec id="s2-5">
<title>Data preprocessing and machine learning modeling</title>
<p>The initial dataset contained 46 predictor variables (12 categorical, 34 continuous). To develop robust, generalizable prediction models, systematic data preprocessing and modeling procedures were implemented. Samples with missing values were excluded in a complete-case analysis, rather than being imputed, to avoid introducing additional model-based uncertainty across heterogeneous demographic, clinical, and nutritional variables during preprocessing. Subsequently, three feature selection methods&#x2014;logistic regression, LASSO regression, and random forest&#x2014;were applied to identify optimal predictive feature subsets. The LASSO method was selected for final feature subset construction based on five-fold cross-validated area under the receiver operating characteristic curve (AUC-ROC) performance in the training set. SHAP values were used only after final model development to interpret variable contributions within the XGBoost model; therefore, SHAP-based importance rankings were not intended to replicate the feature selection results obtained in the preprocessing stage. Pearson correlation coefficients were computed to address multicollinearity, retaining variables with stronger outcome associations when pairwise correlations exceeded 0.8. To address class imbalance, the Synthetic Minority Over-sampling Technique (SMOTE) was applied exclusively to the training set to improve recognition of the minority ODC-positive class [<xref ref-type="bibr" rid="B26">26</xref>]. No oversampling was performed in the internal test or external validation datasets, thereby reducing the risk of information leakage and overly optimistic performance estimates.</p>
<p>Multiple candidate models were trained and compared using SMOTE-processed data, including logistic regression, random forest, XGBoost, decision tree, na&#xef;ve Bayes, K-nearest neighbors, and radial basis function (RBF) kernel support vector machines (SVM). Hyperparameters were systematically optimized via grid search with five-fold cross-validation on the internal test set. Following comprehensive performance comparisons, the optimally parameterized XGBoost model was selected for final evaluation on temporally independent external validation sets (2011 and 2012 data) to assess clinical generalizability and stability. Crucially, SMOTE application was restricted to training data construction, while test and external validation sets retained original class distributions to prevent information leakage and ensure objective evaluation.</p>
</sec>
<sec id="s2-6">
<title>External validation strategy</title>
<p>To objectively evaluate model generalizability, the 2005&#x2013;2020 U.S. National Health and Nutrition Examination Survey (NHANES) cohort served as an independent external validation dataset. Applying identical inclusion/exclusion criteria as KNHANES produced a validation cohort of 2,070 participants (demographic characteristics in <xref ref-type="sec" rid="s11">Supplementary Table S1</xref>). NHANES was selected as an accessible and well-characterized independent population-based dataset for external testing; however, it was not intended to represent the most culturally or clinically comparable population to South Korea. We did not perform U.S.-specific recalibration. Instead, the NHANES analysis was intended to assess the external discrimination and transportability of the KNHANES-derived model in an independent population setting. In the external validation cohort, obesity was defined according to U.S.-appropriate criteria to preserve the clinical relevance of outcome ascertainment in that population. Within this cohort, XGBoost performance was benchmarked against conventional algorithms including logistic regression, SVM, and random forest, primarily using area under the receiver operating characteristic curve (AUC) for discriminative ability assessment. To interpret the optimal model&#x2019;s (XGBoost) prediction patterns and identify influential predictors, mean absolute SHAP values were computed across the validation cohort, ensuring interpretative consistency and local explanation accuracy.</p>
</sec>
<sec id="s2-7">
<title>Statistical analysis</title>
<p>Binary prediction performance for ODC was comprehensively evaluated through systematic comparison of multiple machine learning algorithms: XGBoost, decision tree, logistic regression, na&#xef;ve Bayes, K-nearest neighbors, random forest, and RBF-kernel SVM. Performance was assessed multidimensionally: Fundamental metrics included error rate and accuracy; class imbalance was addressed via F&#x3b2;-score (integrating precision and recall); discriminatory capacity was measured by AUC, sensitivity, and specificity; precision-recall balance was evaluated via precision-recall AUC (PR AUC). Calibration curves assessed agreement between predicted probabilities and observed event rates. Decision curve analysis (DCA) quantified clinical utility by comparing net benefits across decision thresholds. Following comparative evaluation using these metrics, the best-performing XGBoost model underwent final validation and interpretability analysis. SHAP (SHapley Additive exPlanations) values enabled quantification of individualized feature contributions to ODC predictions, enhancing model interpretability. Finally, an interactive online risk prediction tool was developed using the R Shiny framework based on the validated XGBoost architecture. All analyses were conducted in R (version 4.4.1) employing critical packages: DMwR, ggcor, mlr3, mlr3benchmark, mlr3extralearners, kernelshap, and shapviz. Two-sided statistical tests were applied with significance defined as p &#x3c; 0.05.</p>
</sec>
</sec>
<sec sec-type="results" id="s3">
<title>Results</title>
<sec id="s3-1">
<title>Baseline characteristics of the study population</title>
<p>The final analytical cohort comprised 3,357 physically inactive adult participants, stratified into training (n &#x3d; 1,665), testing (n &#x3d; 713), and two temporally independent internal validation cohorts (2011: n &#x3d; 477; 2012: n &#x3d; 502). Baseline characteristic analysis revealed no statistically significant differences in core demographic and clinical variables&#x2014;including gender distribution, household income, educational attainment, marital status, and chronic disease history&#x2014;across datasets (p &#x3e; 0.05), with all standardized mean differences (SMD) below the 0.1 threshold. Though statistically significant variations existed for select metabolic and nutritional indicators (p &#x3c; 0.05), their SMD values remained below the 0.3 benchmark for clinical relevance. Compared to the training set, validation cohorts exhibited elevated high-density lipoprotein cholesterol (HDL-C) and hematocrit concentrations, while demonstrating significantly reduced vitamin D levels and diminished dietary intakes of carbohydrates, phosphorus, sodium, and potassium (all p &#x3c; 0.01; SMDs &#x3c; 0.227). Statistically significant but clinically marginal differences were also observed for diastolic blood pressure, height, insulin concentrations, and alcohol consumption patterns (p &#x3c; 0.05; SMDs&#x3c;0.106). With all variables exhibiting SMDs below 0.3, the datasets demonstrated satisfactory clinical comparability for subsequent machine learning modeling and validation procedures (complete data in <xref ref-type="sec" rid="s11">Supplementary Table S2</xref>).</p>
<p>To elucidate geographical heterogeneity in obesity-depression comorbidity (ODC) burden, we generated a spatial heatmap depicting ODC prevalence probabilities across Korean administrative regions (<xref ref-type="fig" rid="F2">Figure 2</xref>). Results revealed marked epidemiological disparities: South Gyeongsang Province exhibited the highest prevalence (10.7%), followed sequentially by South Chungcheong, North Jeolla, Jeju Island, and North Gyeongsang (all &#x3e;6.0%). Remaining regions demonstrated relatively uniform distribution patterns. This geographical stratification establishes critical context for subsequent subgroup analyses and informs assessments of model generalizability across diverse populations.</p>
<fig id="F2" position="float">
<label>FIGURE 2</label>
<caption>
<p>Geographical distribution heatmap of depression-obesity comorbidity prevalence among Korean adults (South Korea, 2007&#x2013;2012).</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="ijph-71-1609153-g002.tif">
<alt-text content-type="machine-generated">Choropleth map of South Korea displaying ODC prevalence rates by region, with a gradient color scale from light yellow at 0 percent to deep purple at 10 percent prevalence, and each province&#x27;s name labeled.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s3-2">
<title>Feature selection</title>
<p>To construct an optimal predictive feature subset, we systematically evaluated three feature selection methodologies&#x2014;logistic regression, LASSO regression, and random forest&#x2014;on the training cohort (<xref ref-type="fig" rid="F3">Figure 3A</xref>). Comparative assessment using five-fold cross-validated area under the receiver operating characteristic curve (AUC-ROC) demonstrated the random forest approach achieving superior discriminatory capacity (AUC &#x3d; 0.721), significantly outperforming LASSO regression (AUC &#x3d; 0.698) and logistic regression (AUC &#x3d; 0.674) (<xref ref-type="fig" rid="F3">Figure 3B</xref>). Consequently, random forest was selected as the definitive feature selection technique. Variable importance ranking yielded the top 30 predictors (<xref ref-type="fig" rid="F3">Figure 3D</xref>), encompassing metabolic biomarkers, hematological indices, nutritional parameters, and demographic characteristics. To mitigate multicollinearity effects, Pearson correlation matrices were computed (<xref ref-type="sec" rid="s11">Supplementary Figure S1</xref>). For feature pairs exhibiting correlation coefficients &#x3e;0.8, we retained variables demonstrating stronger associations with the outcome (ODC), excluding six redundant parameters: serum creatinine, vitamin A, water intake, carbohydrate intake, total energy intake, and hematocrit. This refinement process yielded a final feature set comprising 24 core variables for model construction (<xref ref-type="fig" rid="F3">Figure 3C</xref>). These results reflect the performance of alternative feature selection strategies when all candidate variables were initially entered for screening, with the aim of identifying the most suitable method for constructing an optimal predictor subset. Thus, the logistic regression result shown in <xref ref-type="fig" rid="F3">Figure 3</xref> represents a preliminary feature selection performance rather than the final performance of a logistic regression classifier.</p>
<fig id="F3" position="float">
<label>FIGURE 3</label>
<caption>
<p>Feature selection and preliminary comparison of model performance (South Korea, 2007&#x2013;2012). <bold>(A)</bold> Comparison of feature importance rankings generated by logistic regression, least absolute shrinkage and selection operator regression, and random forest. <bold>(B)</bold> Receiver operating characteristic curves of the three models in the training dataset. <bold>(C)</bold> Correlation matrix after removal of collinear features. <bold>(D)</bold> Contribution scores of the 30 most important predictors identified by random forest.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="ijph-71-1609153-g003.tif">
<alt-text content-type="machine-generated">Panel A contains three bar charts comparing variable importance across Random Forest, LASSO Regression, and Logistic Regression models, with differing key predictors. Panel B displays ROC curves comparing model performance with AUC values. Panel C shows a triangular correlation heatmap for variables, where color intensity indicates correlation strength. Panel D presents a radial bar chart visualizing variable importance, highlighting insulin, HDL cholesterol, and food as top factors.</alt-text>
</graphic>
</fig>
</sec>
<sec id="s3-3">
<title>Model performance comparison and optimization</title>
<p>After the optimal feature selection method was determined and the retained variables were used to construct the final dataset, seven machine learning classifiers were compared to identify the best-performing predictive model. Comprehensive algorithm comparison (<xref ref-type="sec" rid="s11">Supplementary Figure S2</xref>; <xref ref-type="table" rid="T1">Table 1</xref>) revealed that XGBoost and random forest models substantially outperformed alternatives. Both attained accuracy exceeding 98%, ROC AUC surpassing 0.98, sensitivity approaching 100%, specificity exceeding 92%, and precision-recall AUC (PR AUC) above 0.97 when evaluated on the SMOTE-processed training data (<xref ref-type="fig" rid="F4">Figure 4</xref>). Their respective Brier scores&#x2014;0.0240 (ranked first) and 0.0244 (ranked second)&#x2014;indicated optimal probability calibration. Moderate performance was observed for K-nearest neighbors and support vector machines, while decision trees, naive Bayes, and logistic regression showed markedly inferior metrics (notably Brier scores &#x3e;0.14, specificity &#x3c;75%, and PR AUC &#x3c;0.5), suggesting inadequate predictive stability (detailed metrics in <xref ref-type="sec" rid="s11">Supplementary Table S3</xref>). On the independent test set preserving original class distribution, the XGBoost model maintained strong generalization capability (<xref ref-type="sec" rid="s11">Supplementary Table S4</xref>), achieving an AUC of 0.750&#x2014;confirming robust discriminatory power. Following clinical sensitivity optimization using a 0.1 decision threshold, the model attained a recall rate of 65.38% (95% CI: 54.2%&#x2013;75.4%), though precision remained constrained at 8.46%, reflecting inherent sensitivity-precision trade-offs in severely imbalanced data (F1 score &#x3d; 0.150; accuracy &#x3d; 72.93%). However, this sensitivity-oriented threshold was associated with a low positive predictive value, indicating a substantial false-positive burden and limiting the model&#x2019;s suitability as a standalone clinical screening tool. The test set confusion matrix further delineated classification performance of the optimized XGBoost model (<xref ref-type="sec" rid="s11">Supplementary Table S5</xref>).</p>
<table-wrap id="T1" position="float">
<label>TABLE 1</label>
<caption>
<p>Performance evaluation results of seven machine learning models (South Korea, 2007&#x2013;2012).</p>
</caption>
<table>
<thead valign="top">
<tr>
<th align="left">Model</th>
<th align="left">Error Rate</th>
<th align="left">Accuracy</th>
<th align="left">F-beta</th>
<th align="left">ROC AUC</th>
<th align="left">Sensitivity</th>
<th align="left">Specificity</th>
<th align="left">PR AUC</th>
</tr>
</thead>
<tbody valign="top">
<tr>
<td align="left">Random forest</td>
<td align="left">0.0122</td>
<td align="left">0.9878</td>
<td align="left">0.9928</td>
<td align="left">0.9895</td>
<td align="left">1.0000</td>
<td align="left">0.9212</td>
<td align="left">0.9783</td>
</tr>
<tr>
<td align="left">XGBoost</td>
<td align="left">0.0186</td>
<td align="left">0.9814</td>
<td align="left">0.9890</td>
<td align="left">0.9883</td>
<td align="left">0.9887</td>
<td align="left">0.9418</td>
<td align="left">0.9715</td>
</tr>
<tr>
<td align="left">K-nearest neighbors</td>
<td align="left">0.0441</td>
<td align="left">0.9559</td>
<td align="left">0.9734</td>
<td align="left">0.9507</td>
<td align="left">0.9523</td>
<td align="left">0.9760</td>
<td align="left">0.7624</td>
</tr>
<tr>
<td align="left">SVM (RBF)</td>
<td align="left">0.0939</td>
<td align="left">0.9061</td>
<td align="left">0.9459</td>
<td align="left">0.9184</td>
<td align="left">0.9711</td>
<td align="left">0.5514</td>
<td align="left">0.7617</td>
</tr>
<tr>
<td align="left">Decision tree</td>
<td align="left">0.1019</td>
<td align="left">0.8981</td>
<td align="left">0.9400</td>
<td align="left">0.8640</td>
<td align="left">0.9447</td>
<td align="left">0.6438</td>
<td align="left">0.5958</td>
</tr>
<tr>
<td align="left">Naive Bayes</td>
<td align="left">0.1539</td>
<td align="left">0.8461</td>
<td align="left">0.9063</td>
<td align="left">0.8864</td>
<td align="left">0.8807</td>
<td align="left">0.6575</td>
<td align="left">0.5950</td>
</tr>
<tr>
<td align="left">Logistic regression</td>
<td align="left">0.1576</td>
<td align="left">0.8424</td>
<td align="left">0.9126</td>
<td align="left">0.7661</td>
<td align="left">0.9742</td>
<td align="left">0.1233</td>
<td align="left">0.3469</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn>
<p>Abbreviations: XGBoost, eXtreme Gradient Boosting; SVM (RBF), Support Vector Machine (Radial Basis Function); F-beta, harmonic mean of precision and recall with adjustable weighting toward recall; ROC AUC, area under the receiver operating characteristic curve; PR AUC, area under the precision-recall curve.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<fig id="F4" position="float">
<label>FIGURE 4</label>
<caption>
<p>Model validation results after synthetic minority oversampling technique resampling (South Korea, 2007&#x2013;2012). <bold>(A)</bold> Receiver operating characteristic curves. <bold>(B)</bold> Decision curve analysis. <bold>(C)</bold> Calibration curves.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="ijph-71-1609153-g004.tif">
<alt-text content-type="machine-generated">Panel A presents a receiver operating characteristic (ROC) curve comparing sensitivity versus one minus specificity for seven machine learning models. Panel B is a calibration plot showing observed versus predicted probabilities by model, with point sizes indicating sample size. Panel C is a decision curve analysis displaying net benefit versus threshold probability for the same models. Each panel&#x2019;s legend lists XGBoost, Decision Tree, Logistic Regression, Naive Bayes, K-Nearest Neighbors, Random Forest, and SVM (RBF) in different colors for comparison.</alt-text>
</graphic>
</fig>
<p>Systematic evaluation of seven machine learning algorithms (<xref ref-type="sec" rid="s11">Supplementary Figure S2</xref>) employed SMOTE-oversampled training data (ODC positive:negative ratio &#x3d; 1:5). Through grid search coupled with five-fold cross-validation for hyperparameter optimization, the optimal configuration was determined (learning rate eta &#x3d; 0.1, max_depth &#x3d; 6, subsample &#x3d; 0.8, lambda &#x3d; 1.0), with early stopping regularization controlling overfitting. As documented in <xref ref-type="sec" rid="s11">Supplementary Table S6</xref>, peak performance occurred at the 44th iteration (test set AUC &#x3d; 0.750), establishing XGBoost as the definitive predictive framework.</p>
</sec>
<sec id="s3-4">
<title>Internal and external model validation</title>
<p>To rigorously assess generalizability and clinical utility, the XGBoost model underwent comprehensive validation using two temporally distinct internal cohorts (2011: n &#x3d; 477; 2012: n &#x3d; 502). As presented in <xref ref-type="sec" rid="s11">Supplementary Table S7</xref>, the model demonstrated excellent temporal discriminative capability (<xref ref-type="sec" rid="s11">Supplementary Figure S3</xref>): ROC curve analysis yielded AUC values of 0.783 (95% CI: 0.702&#x2013;0.864) for the 2011 cohort and 0.744 (95% CI: 0.652&#x2013;0.835) for the 2012 cohort. Implementation of a low decision threshold (0.1) optimized for screening sensitivity achieved detection rates of 84.2% (95% CI: 73.1%&#x2013;91.4%) and 70.0% (95% CI: 55.9%&#x2013;81.2%) in the respective cohorts, successfully capturing over two-thirds of true positive cases&#x2014;meeting fundamental requirements for early screening instruments.</p>
<p>In external validation using the U.S. NHANES cohort, the XGBoost model achieved an AUC of 0.886, allowing direct comparison with the internal validation performance reported above. The random forest classifier ranked second (AUC &#x3d; 0.858), followed by radial basis function (RBF) kernel support vector machine (AUC &#x3d; 0.831). Remaining models demonstrated comparatively limited predictive capacity: K-nearest neighbors (AUC &#x3d; 0.795), logistic regression (AUC &#x3d; 0.778), naive Bayes (AUC &#x3d; 0.759), and decision tree (AUC &#x3d; 0.667), See <xref ref-type="sec" rid="s11">Supplementary Table S8</xref> for details. Comparative ROC curves are depicted in <xref ref-type="sec" rid="s11">Supplementary Figure S4</xref>.</p>
</sec>
<sec id="s3-5">
<title>SHAP interpretability analysis</title>
<p>SHAP (SHapley Additive exPlanations) was used to interpret the XGBoost model&#x2019;s prediction patterns. Feature importance analysis (<xref ref-type="fig" rid="F5">Figure 5</xref>) identified insulin concentration as the predominant predictor of obesity-depression comorbidity (mean &#x7c;SHAP&#x7c; &#x3d; 0.052), exerting substantially greater influence than secondary contributors: white blood cell count (0.036), height (0.027), ferritin (0.023), HDL-C (0.021), and age (0.012). SHAP summary plots indicated that elevated insulin, advanced age, increased systolic blood pressure, and higher white blood cell counts were associated with higher predicted ODC probability, whereas greater height and elevated HDL-C concentrations were associated with lower predicted ODC probability. <xref ref-type="sec" rid="s11">Supplementary Figure S5</xref> provides additional SHAP dependence visualizations.</p>
<fig id="F5" position="float">
<label>FIGURE 5</label>
<caption>
<p>Model interpretability visualizations for the extreme gradient boosting model (South Korea, 2007&#x2013;2012). <bold>(A)</bold> Shapley additive explanations beeswarm plot. <bold>(B)</bold> Shapley additive explanations global importance bar plot. <bold>(C)</bold> Shapley additive explanations waterfall plot.</p>
</caption>
<graphic mimetype="image" mime-subtype="tiff" xlink:href="ijph-71-1609153-g005.tif">
<alt-text content-type="machine-generated">Panel A presents a SHAP summary plot for multiple biomedical variables such as insulin, white blood cell count, height, and ferritin, with feature values represented by color gradients. Panel B displays a horizontal bar chart ranking variables by mean SHAP value, highlighting insulin as the most influential feature. Panel C provides a SHAP force plot visualizing individual feature contributions to a prediction, with insulin, height, and total cholesterol among the shown variables.</alt-text>
</graphic>
</fig>
<p>External validation SHAP analysis using the NHANES cohort further delineated the optimal model&#x2019;s decision architecture and feature contribution patterns (<xref ref-type="sec" rid="s11">Supplementary Figure S6</xref>). Insulin was reconfirmed as the most influential predictive variable, with its mean absolute SHAP value substantially exceeding those of other features&#x2014;underscoring its centrality in model discrimination. Key predictors including age, retinol, height, and fasting glucose (fglu) followed in descending order of importance, exhibiting remarkable concordance with KNHANES-derived SHAP results. This cross-cohort reproducibility enhances model credibility and suggests that integrated metabolic, nutritional, and developmental features may provide a robust predictive foundation across populations.</p>
</sec>
<sec id="s3-6">
<title>Online prediction tool demonstration</title>
<p>Based on the rigorously validated XGBoost framework, we developed a clinically oriented online prediction instrument (accessible at <ext-link ext-link-type="uri" xlink:href="https://zhlapp.shinyapps.io/Korea_ODC-shap-model/">https://zhlapp.shinyapps.io/Korea_ODC-shap-model/</ext-link>). Implemented via the R Shiny platform, this tool provides interactive risk assessment functionality, enabling healthcare practitioners to input 24 core indicators&#x2014;including insulin concentration, white blood cell count, height, and age&#x2014;for real-time ODC risk stratification, as visually demonstrated in <xref ref-type="sec" rid="s11">Supplementary Figure S7</xref>.</p>
</sec>
</sec>
<sec sec-type="discussion" id="s4">
<title>Discussion</title>
<p>This investigation established an XGBoost-based machine learning framework for predicting obesity-depression comorbidity (ODC) risk among physically inactive adults, leveraging KNHANES data. SHAP methodology provided interpretable information on key predictors and their interactions within the model. Principal findings include: (1) The developed XGBoost model demonstrated robust discriminatory capacity in internal validation (2011 and 2012 KNHANES cohorts; AUCs &#x3d; 0.783 and 0.744 respectively) and exceptional generalizability in an independent NHANES validation cohort (AUC &#x3d; 0.886), significantly outperforming comparator models and confirming clinical utility for early ODC detection; (2) SHAP interpretability analysis identified insulin concentration as the predominant ODC predictor (highest mean absolute SHAP value), followed sequentially by white blood cell count, age, retinol, height, and fasting glucose&#x2014;highlighting central roles of metabolic regulation, nutritional status, and developmental indicators [<xref ref-type="bibr" rid="B27">27</xref>, <xref ref-type="bibr" rid="B28">28</xref>], however, given the cross-sectional design of the present study, these findings should be interpreted as associations with model prediction rather than evidence of temporal or causal pathways underlying ODC; (3) Marked geographical heterogeneity in ODC prevalence across South Korean regions (e.g., peak prevalence of 10.7% in South Gyeongsang) provides epidemiological foundations for targeted public health initiatives; (4) Integration of machine learning with SHAP methodology effectively quantified individualized contributions of multidimensional features (demographic, clinical, nutritional) to ODC risk and delineated their complex nonlinear association patterns.</p>
<p>SHAP analysis consistently identified insulin concentration as the most influential ODC predictor across both internal (KNHANES) and external (NHANES) validation cohorts. Insulin resistance&#x2014;a core pathophysiological feature of obesity&#x2014;has been mechanistically linked to depressive symptomatology in prior research [<xref ref-type="bibr" rid="B27">27</xref>&#x2013;<xref ref-type="bibr" rid="B29">29</xref>]. Hyperinsulinemia and impaired insulin signaling may promote emotional dysregulation through disruptions in central neurotransmitter metabolism (e.g., dopamine), neuroplasticity, and hypothalamic-pituitary-adrenal (HPA) axis function [<xref ref-type="bibr" rid="B30">30</xref>&#x2013;<xref ref-type="bibr" rid="B33">33</xref>]. By quantitatively establishing insulin&#x2019;s centrality in ODC risk prediction through machine learning, this study highlights a potentially important association between metabolic dysregulation and metabolic-mental health comorbidity. The substantial contribution of white blood cell count (second-highest SHAP value) suggests that systemic low-grade inflammation may be associated with the co-occurrence of obesity and depression [<xref ref-type="bibr" rid="B34">34</xref>&#x2013;<xref ref-type="bibr" rid="B36">36</xref>], though specific inflammatory biomarkers were not directly assayed. The positive association with advancing age may reflect cumulative effects of chronic disease burden, psychosocial stressors, or physiological decline [<xref ref-type="bibr" rid="B37">37</xref>, <xref ref-type="bibr" rid="B38">38</xref>]. Conversely, the inverse associations observed for greater height and elevated HDL-C concentrations may reflect differences in growth-related exposures and cardiometabolic health status in relation to ODC. These predictors suggest that ODC is associated with complex multisystem patterns spanning metabolic, inflammatory, and developmental domains.</p>
<p>This study substantiates the superiority of machine learning algorithms, particularly XGBoost, in predicting complex outcomes like ODC that involve nonlinear interactions among demographic, behavioral, metabolic, and nutritional determinants [<xref ref-type="bibr" rid="B39">39</xref>&#x2013;<xref ref-type="bibr" rid="B41">41</xref>]. Compared to conventional regression approaches, XGBoost more effectively captures intricate patterns and interaction effects within high-dimensional data, achieving superior discriminatory performance in both internal and external validations (AUC &#x3e;0.74). Crucially, through integration of SHAP (SHapley Additive exPlanations)&#x2014;an explainable artificial intelligence (XAI) technique&#x2014;we successfully demystified the decision logic of this sophisticated model [<xref ref-type="bibr" rid="B42">42</xref>, <xref ref-type="bibr" rid="B43">43</xref>]. SHAP values not only objectively quantified individualized predictor contributions to ODC risk (e.g., insulin&#x2019;s dominant role) but dependence plots also visually revealed nonlinear relationships between key variables (e.g., insulin, age, systolic blood pressure, white blood cell count) and disease probability. This methodological synthesis enhances model transparency and clinical interpretability by helping translate risk scores into interpretable prediction patterns. The resultant online prediction tool (<xref ref-type="sec" rid="s11">Supplementary Figure S7</xref>) may support clinical practice by helping identify high-risk individuals and informing further clinical assessment. In practice, clinicians could enter routinely available demographic, clinical, and laboratory variables into the web-based interface to obtain an individualized predicted risk of obesity-depression comorbidity. This output may be used to support preliminary risk stratification and to identify patients who may benefit from further psychological or metabolic assessment, rather than to establish a diagnosis independently.</p>
<p>This investigation presents the first geographical heatmap of ODC risk distribution across South Korea (<xref ref-type="fig" rid="F2">Figure 2</xref>), revealing substantial regional heterogeneity (highest burden in South Gyeongsang). Such disparities may originate from inter-regional variations in socioeconomic status, healthcare access, cultural practices (e.g., dietary habits, physical activity norms), or environmental exposures. These findings provide critical epidemiological foundations for South Korea and comparable settings to develop regionalized precision prevention strategies. In high-prevalence regions (e.g., South Gyeongsang, South Chungcheong), community health initiatives should prioritize physical activity promotion, nutritional quality improvement, and enhanced access to integrated metabolic-mental health screening services.</p>
<sec id="s4-1">
<title>Study limitations</title>
<p>Several methodological constraints warrant acknowledgment: First, the development dataset was based on KNHANES 2007&#x2013;2012, and temporal changes in lifestyle patterns, obesity prevalence, mental health awareness, and public health policies over the past decade may limit the model&#x2019;s direct applicability to contemporary populations. Future studies should therefore assess the temporal transportability of the model using more recent datasets and update or recalibrate it as needed. Second, depression was defined using PHQ-9 screening rather than a structured clinical diagnosis. Third, missing data were handled using complete-case analysis without imputation. Although this approach avoided additional assumptions introduced by imputation models, it reduced the effective sample size and may have introduced selection bias if the missingness mechanism was not completely random. Future studies should evaluate the robustness of the findings using multiple imputation or other sensitivity analyses. Fourth, though demonstrating robust performance in the NHANES validation cohort, population-specific genetic backgrounds, cultural contexts, and social structures in South Korea may limit global generalizability, necessitating further validation across diverse populations, Future research should prioritize validation and recalibration in East Asian populations that are more comparable (such as Chinese or Japanese populations). Fifth, feature engineering excluded highly correlated variables (r &#x3e; 0.8); while statistically justified, this process may have omitted biologically relevant indicators (e.g., vitamin A, hematocrit). Sixth, although the model achieved acceptable sensitivity at the optimized decision threshold of 0.1, its precision remained relatively low, indicating a substantial false-positive burden. In practical clinical settings, this may reduce efficiency, contribute to alert fatigue among clinicians, and lead to unnecessary follow-up assessments or anxiety in individuals incorrectly classified as high risk. Therefore, the model should be regarded as a preliminary risk stratification tool rather than a standalone screening or diagnostic instrument. Finally, although obesity in the NHANES cohort was defined using U.S.-appropriate criteria, the absence of U.S.-specific recalibration means that the external validation results should still be interpreted primarily in terms of discrimination and transportability rather than calibration equivalence.</p>
</sec>
<sec id="s4-2">
<title>Conclusion</title>
<p>Utilizing a large-scale population-based dataset, this study developed and validated an interpretable XGBoost model for ODC risk prediction in physically inactive adults. SHAP analysis identified insulin as the most influential predictor within the model, indicating that metabolic variables may play an important role in model-based risk stratification. However, these feature-attribution results reflect predictive relevance within the algorithm rather than confirmed biological mechanisms. Accordingly, the findings should be interpreted as hypothesis-generating and supportive of further prospective, experimental, and interventional studies.</p>
</sec>
</sec>
</body>
<back>
<sec sec-type="data-availability" id="s5">
<title>Data availability statement</title>
<p>The data used in this study are publicly available and can be freely downloaded from the KNHANES website (<ext-link ext-link-type="uri" xlink:href="https://knhanes.kdca.go.kr/">https://knhanes.kdca.go.kr/</ext-link>).</p>
</sec>
<sec sec-type="ethics-statement" id="s6">
<title>Ethics statement</title>
<p>The Korea National Health and Nutrition Examination Survey (KNHANES) was approved by the Institutional Review Board (IRB) of the Korea Centers for Disease Control and Prevention. Written informed consent was obtained from all participants. This study was conducted in accordance with the ethical principles of the Declaration of Helsinki for medical research involving human subjects.</p>
</sec>
<sec sec-type="author-contributions" id="s7">
<title>Author contributions</title>
<p>Conceptualization: YS and ZL, Methodology: YS, Data curation: YS, Formal analysis: ZL, Writing original draft: KJ, Visualization: FC and JZ, Writing review and editing: YC and KW, Supervision: FC, Funding acquisition: YZ. All authors contributed to the article and approved the submitted version.</p>
</sec>
<ack>
<title>Acknowledgements</title>
<p>The authors thank colleagues for their contributions.</p>
</ack>
<sec sec-type="COI-statement" id="s9">
<title>Conflict of interest</title>
<p>The authors declare that they do not have any conflicts of interest.</p>
</sec>
<sec sec-type="ai-statement" id="s10">
<title>Generative AI statement</title>
<p>The author(s) declared that generative AI was not used in the creation of this manuscript.</p>
<p>Any alternative text (alt text) provided alongside figures in this article has been generated by Frontiers with the support of artificial intelligence and reasonable efforts have been made to ensure accuracy, including review by the authors wherever possible. If you identify any issues, please contact us.</p>
</sec>
<sec sec-type="supplementary-material" id="s11">
<title>Supplementary material</title>
<p>The Supplementary Material for this article can be found online at: <ext-link ext-link-type="uri" xlink:href="https://www.ssph-journal.org/articles/10.3389/ijph.2026.1609153/full#supplementary-material">https://www.ssph-journal.org/articles/10.3389/ijph.2026.1609153/full&#x23;supplementary-material</ext-link>
</p>
<supplementary-material xlink:href="Supplementaryfile1.docx" id="SM1" mimetype="application/docx" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</sec>
<ref-list>
<title>References</title>
<ref id="B1">
<label>1.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Perdomo</surname>
<given-names>CM</given-names>
</name>
<name>
<surname>Cohen</surname>
<given-names>RV</given-names>
</name>
<name>
<surname>Sumithran</surname>
<given-names>P</given-names>
</name>
<name>
<surname>Cl&#xe9;ment</surname>
<given-names>K</given-names>
</name>
<name>
<surname>Fr&#xfc;hbeck</surname>
<given-names>G</given-names>
</name>
</person-group>. <article-title>Contemporary medical, device, and surgical therapies for obesity in adults</article-title>. <source>Lancet</source> (<year>2023</year>) <volume>401</volume>(<issue>10382</issue>):<fpage>1116</fpage>&#x2013;<lpage>30</lpage>. <pub-id pub-id-type="doi">10.1016/S0140-6736(22)02403-5</pub-id>
<pub-id pub-id-type="pmid">36774932</pub-id>
</mixed-citation>
</ref>
<ref id="B2">
<label>2.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Richardson</surname>
<given-names>E</given-names>
</name>
<name>
<surname>Patterson</surname>
<given-names>R</given-names>
</name>
<name>
<surname>Meltzer-Brody</surname>
<given-names>S</given-names>
</name>
<name>
<surname>McClure</surname>
<given-names>R</given-names>
</name>
<name>
<surname>Tow</surname>
<given-names>A</given-names>
</name>
</person-group>. <article-title>Transformative therapies for depression: postpartum depression, major depressive disorder, and treatment-resistant depression</article-title>. <source>Annu Rev Med</source> (<year>2025</year>) <volume>76</volume>(<issue>1</issue>):<fpage>81</fpage>&#x2013;<lpage>93</lpage>. <pub-id pub-id-type="doi">10.1146/annurev-med-050423-095712</pub-id>
<pub-id pub-id-type="pmid">39527720</pub-id>
</mixed-citation>
</ref>
<ref id="B3">
<label>3.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gerardo</surname>
<given-names>G</given-names>
</name>
<name>
<surname>Peterson</surname>
<given-names>N</given-names>
</name>
<name>
<surname>Goodpaster</surname>
<given-names>K</given-names>
</name>
<name>
<surname>Heinberg</surname>
<given-names>L</given-names>
</name>
</person-group>. <article-title>Depression and obesity</article-title>. <source>Curr Obes Rep</source> (<year>2025</year>) <volume>14</volume>(<issue>1</issue>):<fpage>5</fpage>. <pub-id pub-id-type="doi">10.1007/s13679-024-00603-x</pub-id>
<pub-id pub-id-type="pmid">39752052</pub-id>
</mixed-citation>
</ref>
<ref id="B4">
<label>4.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Zou</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Pitchumoni</surname>
<given-names>CS</given-names>
</name>
</person-group>. <article-title>Obesity, obesities and gastrointestinal cancers</article-title>. <source>Dis Mon</source> (<year>2023</year>) <volume>69</volume>(<issue>12</issue>):<fpage>101592</fpage>. <pub-id pub-id-type="doi">10.1016/j.disamonth.2023.101592</pub-id>
<pub-id pub-id-type="pmid">37308362</pub-id>
</mixed-citation>
</ref>
<ref id="B5">
<label>5.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Fabricatore</surname>
<given-names>AN</given-names>
</name>
<name>
<surname>Wadden</surname>
<given-names>TA</given-names>
</name>
</person-group>. <article-title>Obesity</article-title>. <source>Annu Rev Clin Psychol</source> (<year>2006</year>) <volume>2</volume>:<fpage>357</fpage>&#x2013;<lpage>77</lpage>. <pub-id pub-id-type="doi">10.1146/annurev.clinpsy.2.022305.095249</pub-id>
<pub-id pub-id-type="pmid">17716074</pub-id>
</mixed-citation>
</ref>
<ref id="B6">
<label>6.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lin</surname>
<given-names>Z</given-names>
</name>
<name>
<surname>Lawrence</surname>
<given-names>WR</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>Q</given-names>
</name>
<name>
<surname>Gao</surname>
<given-names>Y</given-names>
</name>
</person-group>. <article-title>Classifying depression using blood biomarkers: a large population study</article-title>. <source>J Psychiatr Res</source> (<year>2021</year>) <volume>140</volume>:<fpage>364</fpage>&#x2013;<lpage>72</lpage>. <pub-id pub-id-type="doi">10.1016/j.jpsychires.2021.05.070</pub-id>
<pub-id pub-id-type="pmid">34144440</pub-id>
</mixed-citation>
</ref>
<ref id="B7">
<label>7.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Milaneschi</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Simmons</surname>
<given-names>WK</given-names>
</name>
<name>
<surname>van Rossum</surname>
<given-names>EFC</given-names>
</name>
<name>
<surname>Penninx</surname>
<given-names>BW</given-names>
</name>
</person-group>. <article-title>Depression and obesity: evidence of shared biological mechanisms</article-title>. <source>Mol Psychiatry</source> (<year>2019</year>) <volume>24</volume>(<issue>1</issue>):<fpage>18</fpage>&#x2013;<lpage>33</lpage>. <pub-id pub-id-type="doi">10.1038/s41380-018-0017-5</pub-id>
<pub-id pub-id-type="pmid">29453413</pub-id>
</mixed-citation>
</ref>
<ref id="B8">
<label>8.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Park</surname>
<given-names>JH</given-names>
</name>
<name>
<surname>Moon</surname>
<given-names>JH</given-names>
</name>
<name>
<surname>Kim</surname>
<given-names>HJ</given-names>
</name>
<name>
<surname>Kong</surname>
<given-names>MH</given-names>
</name>
<name>
<surname>Oh</surname>
<given-names>YH</given-names>
</name>
</person-group>. <article-title>Sedentary lifestyle: overview of updated evidence of potential health risks</article-title>. <source>Korean J Fam Med</source> (<year>2020</year>) <volume>41</volume>(<issue>6</issue>):<fpage>365</fpage>&#x2013;<lpage>73</lpage>. <pub-id pub-id-type="doi">10.4082/kjfm.20.0165</pub-id>
<pub-id pub-id-type="pmid">33242381</pub-id>
</mixed-citation>
</ref>
<ref id="B9">
<label>9.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Moulton</surname>
<given-names>CD</given-names>
</name>
<name>
<surname>Tharmaraja</surname>
<given-names>T</given-names>
</name>
<name>
<surname>Hopkins</surname>
<given-names>CWP</given-names>
</name>
</person-group>. <article-title>Collaborative care for adults with obesity and depression</article-title>. <source>Jama</source> (<year>2019</year>) <volume>322</volume>(<issue>4</issue>):<fpage>367</fpage>&#x2013;<lpage>8</lpage>. <pub-id pub-id-type="doi">10.1001/jama.2019.6774</pub-id>
<pub-id pub-id-type="pmid">31334784</pub-id>
</mixed-citation>
</ref>
<ref id="B10">
<label>10.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lasserre</surname>
<given-names>AM</given-names>
</name>
<name>
<surname>Glaus</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Vandeleur</surname>
<given-names>CL</given-names>
</name>
<name>
<surname>Marques-Vidal</surname>
<given-names>P</given-names>
</name>
<name>
<surname>Vaucher</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Bastardot</surname>
<given-names>F</given-names>
</name>
<etal/>
</person-group> <article-title>Depression with atypical features and increase in obesity, body mass index, waist circumference, and fat mass: a prospective, population-based study</article-title>. <source>JAMA Psychiatry</source> (<year>2014</year>) <volume>71</volume>(<issue>8</issue>):<fpage>880</fpage>&#x2013;<lpage>8</lpage>. <pub-id pub-id-type="doi">10.1001/jamapsychiatry.2014.411</pub-id>
<pub-id pub-id-type="pmid">24898270</pub-id>
</mixed-citation>
</ref>
<ref id="B11">
<label>11.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Marchitelli</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Mazza</surname>
<given-names>C</given-names>
</name>
<name>
<surname>Ricci</surname>
<given-names>E</given-names>
</name>
<name>
<surname>Faia</surname>
<given-names>V</given-names>
</name>
<name>
<surname>Biondi</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Colasanti</surname>
<given-names>M</given-names>
</name>
<etal/>
</person-group> <article-title>Identification of psychological treatment dropout predictors using machine learning models on Italian patients living with overweight and obesity ineligible for bariatric surgery</article-title>. <source>Nutrients</source> (<year>2024</year>) <volume>16</volume>(<issue>16</issue>):<fpage>2605</fpage>. <pub-id pub-id-type="doi">10.3390/nu16162605</pub-id>
<pub-id pub-id-type="pmid">39203742</pub-id>
</mixed-citation>
</ref>
<ref id="B12">
<label>12.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Otte</surname>
<given-names>C</given-names>
</name>
<name>
<surname>Chae</surname>
<given-names>WR</given-names>
</name>
<name>
<surname>Dogan</surname>
<given-names>DY</given-names>
</name>
<name>
<surname>Piber</surname>
<given-names>D</given-names>
</name>
<name>
<surname>Roepke</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Cho</surname>
<given-names>AB</given-names>
</name>
<etal/>
</person-group> <article-title>Simvastatin as Add-On treatment to escitalopram in patients with major depression and obesity: a randomized clinical trial</article-title>. <source>JAMA Psychiatry</source> (<year>2025</year>) <volume>82</volume>(<issue>8</issue>):<fpage>759</fpage>&#x2013;<lpage>67</lpage>. <pub-id pub-id-type="doi">10.1001/jamapsychiatry.2025.0801</pub-id>
<pub-id pub-id-type="pmid">40465256</pub-id>
</mixed-citation>
</ref>
<ref id="B13">
<label>13.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Jitte</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Keluth</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Bisht</surname>
<given-names>P</given-names>
</name>
<name>
<surname>Wal</surname>
<given-names>P</given-names>
</name>
<name>
<surname>Singh</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Murti</surname>
<given-names>K</given-names>
</name>
<etal/>
</person-group> <article-title>Obesity and depression: common link and possible targets</article-title>. <source>CNS Neurol Disord Drug Targets</source> (<year>2024</year>) <volume>23</volume>(<issue>12</issue>):<fpage>1425</fpage>&#x2013;<lpage>49</lpage>. <pub-id pub-id-type="doi">10.2174/0118715273291985240430074053</pub-id>
<pub-id pub-id-type="pmid">38747226</pub-id>
</mixed-citation>
</ref>
<ref id="B14">
<label>14.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>P&#xe9;rez-Guti&#xe9;rrez</surname>
<given-names>AM</given-names>
</name>
<name>
<surname>Carmona</surname>
<given-names>R</given-names>
</name>
<name>
<surname>Loucera</surname>
<given-names>C</given-names>
</name>
<name>
<surname>Cervilla</surname>
<given-names>JA</given-names>
</name>
<name>
<surname>Guti&#xe9;rrez</surname>
<given-names>B</given-names>
</name>
<name>
<surname>Molina</surname>
<given-names>E</given-names>
</name>
<etal/>
</person-group> <article-title>Mutational landscape of risk variants in comorbid depression and obesity: a next-generation sequencing approach</article-title>. <source>Mol Psychiatry</source> (<year>2024</year>) <volume>29</volume>(<issue>11</issue>):<fpage>3553</fpage>&#x2013;<lpage>66</lpage>. <pub-id pub-id-type="doi">10.1038/s41380-024-02609-2</pub-id>
<pub-id pub-id-type="pmid">38806690</pub-id>
</mixed-citation>
</ref>
<ref id="B15">
<label>15.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Hruby</surname>
<given-names>A</given-names>
</name>
<name>
<surname>Manson</surname>
<given-names>JE</given-names>
</name>
<name>
<surname>Qi</surname>
<given-names>L</given-names>
</name>
<name>
<surname>Malik</surname>
<given-names>VS</given-names>
</name>
<name>
<surname>Rimm</surname>
<given-names>EB</given-names>
</name>
<name>
<surname>Sun</surname>
<given-names>Q</given-names>
</name>
<etal/>
</person-group> <article-title>Determinants and consequences of obesity</article-title>. <source>Am J Public Health</source> (<year>2016</year>) <volume>106</volume>(<issue>9</issue>):<fpage>1656</fpage>&#x2013;<lpage>62</lpage>. <pub-id pub-id-type="doi">10.2105/AJPH.2016.303326</pub-id>
<pub-id pub-id-type="pmid">27459460</pub-id>
</mixed-citation>
</ref>
<ref id="B16">
<label>16.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Casanova</surname>
<given-names>F</given-names>
</name>
<name>
<surname>O&#x27;Loughlin</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Karageorgiou</surname>
<given-names>V</given-names>
</name>
<name>
<surname>Beaumont</surname>
<given-names>RN</given-names>
</name>
<name>
<surname>Bowden</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Wood</surname>
<given-names>AR</given-names>
</name>
<etal/>
</person-group> <article-title>Effects of physical activity and sedentary time on depression, anxiety and well-being: a bidirectional Mendelian randomisation study</article-title>. <source>BMC Med</source> (<year>2023</year>) <volume>21</volume>(<issue>1</issue>):<fpage>501</fpage>. <pub-id pub-id-type="doi">10.1186/s12916-023-03211-z</pub-id>
<pub-id pub-id-type="pmid">38110912</pub-id>
</mixed-citation>
</ref>
<ref id="B17">
<label>17.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Curtiss</surname>
<given-names>J</given-names>
</name>
<name>
<surname>DiPietro</surname>
<given-names>C</given-names>
</name>
</person-group>. <article-title>Machine learning in the prediction of treatment response for emotional disorders: a systematic review and meta-analysis</article-title>. <source>Clin Psychol Rev</source> (<year>2025</year>) <volume>120</volume>:<fpage>102593</fpage>. <pub-id pub-id-type="doi">10.1016/j.cpr.2025.102593</pub-id>
<pub-id pub-id-type="pmid">40493989</pub-id>
</mixed-citation>
</ref>
<ref id="B18">
<label>18.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Qi</surname>
<given-names>X</given-names>
</name>
<name>
<surname>Wang</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Fang</surname>
<given-names>C</given-names>
</name>
<name>
<surname>Jia</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Lin</surname>
<given-names>L</given-names>
</name>
<name>
<surname>Yuan</surname>
<given-names>T</given-names>
</name>
</person-group>. <article-title>Machine learning and SHAP value interpretation for predicting comorbidity of cardiovascular disease and cancer with dietary antioxidants</article-title>. <source>Redox Biol</source> (<year>2025</year>) <volume>79</volume>:<fpage>103470</fpage>. <pub-id pub-id-type="doi">10.1016/j.redox.2024.103470</pub-id>
<pub-id pub-id-type="pmid">39700695</pub-id>
</mixed-citation>
</ref>
<ref id="B19">
<label>19.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Lee</surname>
<given-names>HA</given-names>
</name>
<name>
<surname>Kim</surname>
<given-names>HR</given-names>
</name>
<name>
<surname>Park</surname>
<given-names>H</given-names>
</name>
<name>
<surname>Jung</surname>
<given-names>SY</given-names>
</name>
<name>
<surname>Jeon</surname>
<given-names>JP</given-names>
</name>
<name>
<surname>Park</surname>
<given-names>B</given-names>
</name>
<etal/>
</person-group> <article-title>Data resource profile: the statistics of the korea national health and nutrition examination survey (KNHANES) biobank project</article-title>. <source>J Korean Med Sci</source> (<year>2025</year>) <volume>40</volume>(<issue>23</issue>):<fpage>e189</fpage>. <pub-id pub-id-type="doi">10.3346/jkms.2025.40.e189</pub-id>
<pub-id pub-id-type="pmid">40524629</pub-id>
</mixed-citation>
</ref>
<ref id="B20">
<label>20.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Kweon</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Kim</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Jang</surname>
<given-names>MJ</given-names>
</name>
<name>
<surname>Kim</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Kim</surname>
<given-names>K</given-names>
</name>
<name>
<surname>Choi</surname>
<given-names>S</given-names>
</name>
<etal/>
</person-group> <article-title>Data resource profile: the korea national health and nutrition examination survey (KNHANES)</article-title>. <source>Int J Epidemiol</source> (<year>2014</year>) <volume>43</volume>(<issue>1</issue>):<fpage>69</fpage>&#x2013;<lpage>77</lpage>. <pub-id pub-id-type="doi">10.1093/ije/dyt228</pub-id>
<pub-id pub-id-type="pmid">24585853</pub-id>
</mixed-citation>
</ref>
<ref id="B21">
<label>21.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gu</surname>
<given-names>H</given-names>
</name>
<name>
<surname>Chen</surname>
<given-names>R</given-names>
</name>
<name>
<surname>Fang</surname>
<given-names>T</given-names>
</name>
<name>
<surname>Xu</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Zhang</surname>
<given-names>Y</given-names>
</name>
<name>
<surname>Bian</surname>
<given-names>C</given-names>
</name>
<etal/>
</person-group> <article-title>Associations of physical activity with the risks of osteoarthritis and subtypes: a population-based cohort study of UK biobank data</article-title>. <source>Bone Joint Res</source> (<year>2025</year>) <volume>14</volume>(<issue>7</issue>):<fpage>656</fpage>&#x2013;<lpage>65</lpage>. <pub-id pub-id-type="doi">10.1302/2046-3758.147.BJR-2024-0529.R1</pub-id>
<pub-id pub-id-type="pmid">40709816</pub-id>
</mixed-citation>
</ref>
<ref id="B22">
<label>22.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Vilar-Gomez</surname>
<given-names>E</given-names>
</name>
<name>
<surname>Nephew</surname>
<given-names>LD</given-names>
</name>
<name>
<surname>Vuppalanchi</surname>
<given-names>R</given-names>
</name>
<name>
<surname>Gawrieh</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Mladenovic</surname>
<given-names>A</given-names>
</name>
<name>
<surname>Pike</surname>
<given-names>F</given-names>
</name>
<etal/>
</person-group> <article-title>High-quality diet, physical activity, and college education are associated with low risk of NAFLD among the US population</article-title>. <source>Hepatology</source> (<year>2022</year>) <volume>75</volume>(<issue>6</issue>):<fpage>1491</fpage>&#x2013;<lpage>506</lpage>. <pub-id pub-id-type="doi">10.1002/hep.32207</pub-id>
<pub-id pub-id-type="pmid">34668597</pub-id>
</mixed-citation>
</ref>
<ref id="B23">
<label>23.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chudasama</surname>
<given-names>YV</given-names>
</name>
<name>
<surname>Khunti</surname>
<given-names>KK</given-names>
</name>
<name>
<surname>Zaccardi</surname>
<given-names>F</given-names>
</name>
<name>
<surname>Rowlands</surname>
<given-names>AV</given-names>
</name>
<name>
<surname>Yates</surname>
<given-names>T</given-names>
</name>
<name>
<surname>Gillies</surname>
<given-names>CL</given-names>
</name>
<etal/>
</person-group> <article-title>Physical activity, multimorbidity, and life expectancy: a UK biobank longitudinal study</article-title>. <source>BMC Med</source> (<year>2019</year>) <volume>17</volume>(<issue>1</issue>):<fpage>108</fpage>. <pub-id pub-id-type="doi">10.1186/s12916-019-1339-0</pub-id>
<pub-id pub-id-type="pmid">31186007</pub-id>
</mixed-citation>
</ref>
<ref id="B24">
<label>24.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Steffens</surname>
<given-names>DC</given-names>
</name>
</person-group>. <article-title>Treatment-resistant depression in older adults</article-title>. <source>N Engl J Med</source> (<year>2024</year>) <volume>390</volume>(<issue>7</issue>):<fpage>630</fpage>&#x2013;<lpage>9</lpage>. <pub-id pub-id-type="doi">10.1056/NEJMcp2305428</pub-id>
<pub-id pub-id-type="pmid">38354142</pub-id>
</mixed-citation>
</ref>
<ref id="B25">
<label>25.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Martinez</surname>
<given-names>A</given-names>
</name>
<name>
<surname>Teklu</surname>
<given-names>SM</given-names>
</name>
<name>
<surname>Tahir</surname>
<given-names>P</given-names>
</name>
<name>
<surname>Garcia</surname>
<given-names>ME</given-names>
</name>
</person-group>. <article-title>Validity of the spanish-language patient health questionnaires 2 and 9: a systematic review and meta-analysis</article-title>. <source>JAMA Netw Open</source> (<year>2023</year>) <volume>6</volume>(<issue>10</issue>):<fpage>e2336529</fpage>. <pub-id pub-id-type="doi">10.1001/jamanetworkopen.2023.36529</pub-id>
<pub-id pub-id-type="pmid">37847505</pub-id>
</mixed-citation>
</ref>
<ref id="B26">
<label>26.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Shao</surname>
<given-names>H</given-names>
</name>
<name>
<surname>Liu</surname>
<given-names>X</given-names>
</name>
<name>
<surname>Zong</surname>
<given-names>D</given-names>
</name>
<name>
<surname>Song</surname>
<given-names>Q</given-names>
</name>
</person-group>. <article-title>Optimization of diabetes prediction methods based on combinatorial balancing algorithm</article-title>. <source>Nutr Diabetes</source> (<year>2024</year>) <volume>14</volume>(<issue>1</issue>):<fpage>63</fpage>. <pub-id pub-id-type="doi">10.1038/s41387-024-00324-z</pub-id>
<pub-id pub-id-type="pmid">39143066</pub-id>
</mixed-citation>
</ref>
<ref id="B27">
<label>27.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Watson</surname>
<given-names>KT</given-names>
</name>
<name>
<surname>Simard</surname>
<given-names>JF</given-names>
</name>
<name>
<surname>Henderson</surname>
<given-names>VW</given-names>
</name>
<name>
<surname>Nutkiewicz</surname>
<given-names>L</given-names>
</name>
<name>
<surname>Lamers</surname>
<given-names>F</given-names>
</name>
<name>
<surname>Rasgon</surname>
<given-names>N</given-names>
</name>
<etal/>
</person-group> <article-title>Association of insulin resistance with depression severity and remission status: defining a metabolic endophenotype of depression</article-title>. <source>JAMA Psychiatry</source> (<year>2021</year>) <volume>78</volume>(<issue>4</issue>):<fpage>439</fpage>&#x2013;<lpage>41</lpage>. <pub-id pub-id-type="doi">10.1001/jamapsychiatry.2020.3669</pub-id>
<pub-id pub-id-type="pmid">33263725</pub-id>
</mixed-citation>
</ref>
<ref id="B28">
<label>28.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ehrmann</surname>
<given-names>D</given-names>
</name>
<name>
<surname>Krause-Steinrauf</surname>
<given-names>H</given-names>
</name>
<name>
<surname>Uschner</surname>
<given-names>D</given-names>
</name>
<name>
<surname>Wen</surname>
<given-names>H</given-names>
</name>
<name>
<surname>Hoogendoorn</surname>
<given-names>CJ</given-names>
</name>
<name>
<surname>Crespo-Ramos</surname>
<given-names>G</given-names>
</name>
<etal/>
</person-group> <article-title>Differential associations of somatic and cognitive-affective symptoms of depression with inflammation and insulin resistance: cross-sectional and longitudinal results from the emotional distress sub-study of the GRADE study</article-title>. <source>Diabetologia</source> (<year>2025</year>) <volume>68</volume>(<issue>7</issue>):<fpage>1403</fpage>&#x2013;<lpage>15</lpage>. <pub-id pub-id-type="doi">10.1007/s00125-025-06369-8</pub-id>
<pub-id pub-id-type="pmid">39951058</pub-id>
</mixed-citation>
</ref>
<ref id="B29">
<label>29.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Timonen</surname>
<given-names>M</given-names>
</name>
<name>
<surname>Laakso</surname>
<given-names>M</given-names>
</name>
<name>
<surname>Jokelainen</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Rajala</surname>
<given-names>U</given-names>
</name>
<name>
<surname>Meyer-Rochow</surname>
<given-names>VB</given-names>
</name>
<name>
<surname>Kein&#xe4;nen-Kiukaanniemi</surname>
<given-names>S</given-names>
</name>
</person-group>. <article-title>Insulin resistance and depression: cross sectional study</article-title>. <source>Bmj</source> (<year>2005</year>) <volume>330</volume>(<issue>7481</issue>):<fpage>17</fpage>&#x2013;<lpage>8</lpage>. <pub-id pub-id-type="doi">10.1136/bmj.38313.513310.F71</pub-id>
<pub-id pub-id-type="pmid">15604155</pub-id>
</mixed-citation>
</ref>
<ref id="B30">
<label>30.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Gruber</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Hanssen</surname>
<given-names>R</given-names>
</name>
<name>
<surname>Qubad</surname>
<given-names>M</given-names>
</name>
<name>
<surname>Bouzouina</surname>
<given-names>A</given-names>
</name>
<name>
<surname>Schack</surname>
<given-names>V</given-names>
</name>
<name>
<surname>Sochor</surname>
<given-names>H</given-names>
</name>
<etal/>
</person-group> <article-title>Impact of insulin and insulin resistance on brain dopamine signalling and reward processing - an underexplored mechanism in the pathophysiology of depression?</article-title> <source>Neurosci Biobehav Rev</source> (<year>2023</year>) <volume>149</volume>:<fpage>105179</fpage>. <pub-id pub-id-type="doi">10.1016/j.neubiorev.2023.105179</pub-id>
<pub-id pub-id-type="pmid">37059404</pub-id>
</mixed-citation>
</ref>
<ref id="B31">
<label>31.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>de Bartolomeis</surname>
<given-names>A</given-names>
</name>
<name>
<surname>De Simone</surname>
<given-names>G</given-names>
</name>
<name>
<surname>De Prisco</surname>
<given-names>M</given-names>
</name>
<name>
<surname>Barone</surname>
<given-names>A</given-names>
</name>
<name>
<surname>Napoli</surname>
<given-names>R</given-names>
</name>
<name>
<surname>Beguinot</surname>
<given-names>F</given-names>
</name>
<etal/>
</person-group> <article-title>Insulin effects on core neurotransmitter pathways involved in schizophrenia neurobiology: a meta-analysis of preclinical studies. Implications for the treatment</article-title>. <source>Mol Psychiatry</source> (<year>2023</year>) <volume>28</volume>(<issue>7</issue>):<fpage>2811</fpage>&#x2013;<lpage>25</lpage>. <pub-id pub-id-type="doi">10.1038/s41380-023-02065-4</pub-id>
<pub-id pub-id-type="pmid">37085712</pub-id>
</mixed-citation>
</ref>
<ref id="B32">
<label>32.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Choudhary</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Mourya</surname>
<given-names>A</given-names>
</name>
<name>
<surname>Ahuja</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Sah</surname>
<given-names>SP</given-names>
</name>
<name>
<surname>Kumar</surname>
<given-names>A</given-names>
</name>
</person-group>. <article-title>Plausible anti-inflammatory mechanism of resveratrol and caffeic acid against chronic stress-induced insulin resistance in mice</article-title>. <source>Inflammopharmacology</source> (<year>2016</year>) <volume>24</volume>(<issue>6</issue>):<fpage>347</fpage>&#x2013;<lpage>61</lpage>. <pub-id pub-id-type="doi">10.1007/s10787-016-0287-y</pub-id>
<pub-id pub-id-type="pmid">27761692</pub-id>
</mixed-citation>
</ref>
<ref id="B33">
<label>33.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Sarwar</surname>
<given-names>H</given-names>
</name>
<name>
<surname>Rafiqi</surname>
<given-names>SI</given-names>
</name>
<name>
<surname>Ahmad</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Jinna</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Khan</surname>
<given-names>SA</given-names>
</name>
<name>
<surname>Karim</surname>
<given-names>T</given-names>
</name>
<etal/>
</person-group> <article-title>Hyperinsulinemia associated depression</article-title>. <source>Clin Med Insights Endocrinol Diabetes</source> (<year>2022</year>) <volume>15</volume>:<fpage>11795514221090244</fpage>. <pub-id pub-id-type="doi">10.1177/11795514221090244</pub-id>
<pub-id pub-id-type="pmid">35494421</pub-id>
</mixed-citation>
</ref>
<ref id="B34">
<label>34.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Chen</surname>
<given-names>MH</given-names>
</name>
<name>
<surname>Hsu</surname>
<given-names>JW</given-names>
</name>
<name>
<surname>Huang</surname>
<given-names>KL</given-names>
</name>
<name>
<surname>Tsai</surname>
<given-names>SJ</given-names>
</name>
<name>
<surname>Su</surname>
<given-names>TP</given-names>
</name>
<name>
<surname>Li</surname>
<given-names>CT</given-names>
</name>
<etal/>
</person-group> <article-title>Role of obesity in systemic low-grade inflammation and cognitive function in patients with bipolar I disorder or major depressive disorder</article-title>. <source>CNS Spectr</source> (<year>2021</year>) <volume>26</volume>(<issue>5</issue>):<fpage>521</fpage>&#x2013;<lpage>7</lpage>. <pub-id pub-id-type="doi">10.1017/S1092852920001534</pub-id>
<pub-id pub-id-type="pmid">32594934</pub-id>
</mixed-citation>
</ref>
<ref id="B35">
<label>35.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Palmer</surname>
<given-names>ER</given-names>
</name>
<name>
<surname>Morales-Mu&#xf1;oz</surname>
<given-names>I</given-names>
</name>
<name>
<surname>Perry</surname>
<given-names>BI</given-names>
</name>
<name>
<surname>Marwaha</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Warwick</surname>
<given-names>E</given-names>
</name>
<name>
<surname>Rogers</surname>
<given-names>JC</given-names>
</name>
<etal/>
</person-group> <article-title>Trajectories of inflammation in youth and risk of mental and cardiometabolic disorders in adulthood</article-title>. <source>JAMA Psychiatry</source> (<year>2024</year>) <volume>81</volume>(<issue>11</issue>):<fpage>1130</fpage>&#x2013;<lpage>7</lpage>. <pub-id pub-id-type="doi">10.1001/jamapsychiatry.2024.2193</pub-id>
<pub-id pub-id-type="pmid">39167392</pub-id>
</mixed-citation>
</ref>
<ref id="B36">
<label>36.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Guillemot-Legris</surname>
<given-names>O</given-names>
</name>
<name>
<surname>Muccioli</surname>
<given-names>GG</given-names>
</name>
</person-group>. <article-title>Obesity-induced neuroinflammation: beyond the hypothalamus</article-title>. <source>Trends Neurosci</source> (<year>2017</year>) <volume>40</volume>(<issue>4</issue>):<fpage>237</fpage>&#x2013;<lpage>53</lpage>. <pub-id pub-id-type="doi">10.1016/j.tins.2017.02.005</pub-id>
<pub-id pub-id-type="pmid">28318543</pub-id>
</mixed-citation>
</ref>
<ref id="B37">
<label>37.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Forman</surname>
<given-names>DE</given-names>
</name>
<name>
<surname>Kuchel</surname>
<given-names>GA</given-names>
</name>
<name>
<surname>Newman</surname>
<given-names>JC</given-names>
</name>
<name>
<surname>Kirkland</surname>
<given-names>JL</given-names>
</name>
<name>
<surname>Volpi</surname>
<given-names>E</given-names>
</name>
<name>
<surname>Taffet</surname>
<given-names>GE</given-names>
</name>
<etal/>
</person-group> <article-title>Impact of geroscience on therapeutic strategies for older adults with cardiovascular disease: JACC scientific statement</article-title>. <source>J Am Coll Cardiol</source> (<year>2023</year>) <volume>82</volume>(<issue>7</issue>):<fpage>631</fpage>&#x2013;<lpage>47</lpage>. <pub-id pub-id-type="doi">10.1016/j.jacc.2023.05.038</pub-id>
<pub-id pub-id-type="pmid">37389519</pub-id>
</mixed-citation>
</ref>
<ref id="B38">
<label>38.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Calder&#xf3;n-Larra&#xf1;aga</surname>
<given-names>A</given-names>
</name>
<name>
<surname>Vetrano</surname>
<given-names>DL</given-names>
</name>
<name>
<surname>Welmer</surname>
<given-names>AK</given-names>
</name>
<name>
<surname>Grande</surname>
<given-names>G</given-names>
</name>
<name>
<surname>Fratiglioni</surname>
<given-names>L</given-names>
</name>
<name>
<surname>Dekhtyar</surname>
<given-names>S</given-names>
</name>
</person-group>. <article-title>Psychological correlates of multimorbidity and disability accumulation in older adults</article-title>. <source>Age Ageing</source> (<year>2019</year>) <volume>48</volume>(<issue>6</issue>):<fpage>789</fpage>&#x2013;<lpage>96</lpage>. <pub-id pub-id-type="doi">10.1093/ageing/afz117</pub-id>
<pub-id pub-id-type="pmid">31579908</pub-id>
</mixed-citation>
</ref>
<ref id="B39">
<label>39.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Dragosloveanu</surname>
<given-names>S</given-names>
</name>
<name>
<surname>Vulpe</surname>
<given-names>DE</given-names>
</name>
<name>
<surname>Andrei</surname>
<given-names>CA</given-names>
</name>
<name>
<surname>Nedelea</surname>
<given-names>DG</given-names>
</name>
<name>
<surname>Garofil</surname>
<given-names>ND</given-names>
</name>
<name>
<surname>Anghel</surname>
<given-names>C</given-names>
</name>
<etal/>
</person-group> <article-title>Predicting periprosthetic joint infection: evaluating supervised machine learning models for clinical application</article-title>. <source>J Orthop Translat</source> (<year>2025</year>) <volume>54</volume>:<fpage>51</fpage>&#x2013;<lpage>64</lpage>. <pub-id pub-id-type="doi">10.1016/j.jot.2025.06.016</pub-id>
<pub-id pub-id-type="pmid">40703570</pub-id>
</mixed-citation>
</ref>
<ref id="B40">
<label>40.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ngiam</surname>
<given-names>KY</given-names>
</name>
<name>
<surname>Khor</surname>
<given-names>IW</given-names>
</name>
</person-group>. <article-title>Big data and machine learning algorithms for health-care delivery</article-title>. <source>Lancet Oncol</source> (<year>2019</year>) <volume>20</volume>(<issue>5</issue>):<fpage>e262</fpage>&#x2013;<lpage>e273</lpage>. <pub-id pub-id-type="doi">10.1016/S1470-2045(19)30149-4</pub-id>
<pub-id pub-id-type="pmid">31044724</pub-id>
</mixed-citation>
</ref>
<ref id="B41">
<label>41.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Handelman</surname>
<given-names>GS</given-names>
</name>
<name>
<surname>Kok</surname>
<given-names>HK</given-names>
</name>
<name>
<surname>Chandra</surname>
<given-names>RV</given-names>
</name>
<name>
<surname>Razavi</surname>
<given-names>AH</given-names>
</name>
<name>
<surname>Lee</surname>
<given-names>MJ</given-names>
</name>
<name>
<surname>Asadi</surname>
<given-names>H</given-names>
</name>
</person-group>. <article-title>eDoctor: machine learning and the future of medicine</article-title>. <source>J Intern Med</source> (<year>2018</year>) <volume>284</volume>(<issue>6</issue>):<fpage>603</fpage>&#x2013;<lpage>19</lpage>. <pub-id pub-id-type="doi">10.1111/joim.12822</pub-id>
<pub-id pub-id-type="pmid">30102808</pub-id>
</mixed-citation>
</ref>
<ref id="B42">
<label>42.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Watson</surname>
<given-names>DS</given-names>
</name>
<name>
<surname>Krutzinna</surname>
<given-names>J</given-names>
</name>
<name>
<surname>Bruce</surname>
<given-names>IN</given-names>
</name>
<name>
<surname>Griffiths</surname>
<given-names>CE</given-names>
</name>
<name>
<surname>McInnes</surname>
<given-names>IB</given-names>
</name>
<name>
<surname>Barnes</surname>
<given-names>MR</given-names>
</name>
<etal/>
</person-group> <article-title>Clinical applications of machine learning algorithms: beyond the Black box</article-title>. <source>Bmj</source> (<year>2019</year>) <volume>364</volume>:<fpage>l886</fpage>. <pub-id pub-id-type="doi">10.1136/bmj.l886</pub-id>
<pub-id pub-id-type="pmid">30862612</pub-id>
</mixed-citation>
</ref>
<ref id="B43">
<label>43.</label>
<mixed-citation publication-type="journal">
<person-group person-group-type="author">
<name>
<surname>Ozkan</surname>
<given-names>J</given-names>
</name>
</person-group>. <article-title>Thinking outside the black box: Cardiopulse takes a look at some of the issues raised by machine learning and artificial intelligence</article-title>. <source>Eur Heart J</source> (<year>2023</year>) <volume>44</volume>(<issue>12</issue>):<fpage>1007</fpage>&#x2013;<lpage>9</lpage>. <pub-id pub-id-type="doi">10.1093/eurheartj/ehac790</pub-id>
<pub-id pub-id-type="pmid">36592115</pub-id>
</mixed-citation>
</ref>
</ref-list>
<fn-group>
<fn fn-type="custom" custom-type="edited-by">
<p>
<bold>Edited by:</bold> <ext-link ext-link-type="uri" xlink:href="https://loop.frontiersin.org/people/1002545/overview">Gabriel Gulis</ext-link>, University of Southern Denmark, Denmark</p>
</fn>
<fn fn-type="custom" custom-type="reviewed-by">
<p>
<bold>Reviewed by:</bold> Two reviewers who chose to remain anonymous</p>
</fn>
</fn-group>
</back>
</article>