@revista_internacional{1038, keywords = {Premise selection, Graph similarity, Graph representation, Graph neural network, SAGpool-Term-Walk}, author = {XingXing He and Zhongxu Zhao and Yongqi Lan and Yingfang Li and Li Zou and Jun Liu and Luis Martínez and Tianrui Li}, title = {Integrating a simplified formula graph representation into a graph neural network model for premise selection}, abstract = {The search space for automatic theorem proving typically experiences exponential growth when attempting to prove a conclusion with numerous axioms. Premise selection presents a novel approach to tackle this challenge. However, one major obstacle lies in enhancing the presentation of logical formula graphs and graph neural network models in existing premise selection methods to preserve potential information from the logical formulas effectively. This study proposes a novel simplified graph representation of logical formulas by eliminating repeated quantifiers, along with a new term-walk graph neural network model incorporating an attention mechanism and attention pooling (ASTGNNS). This model aims to preserve syntax and semantic information of logical formulas, particularly regarding the order of symbols and the scope of quantifiers in logical formulas, thereby improving classification accuracy in premise selection problems. Specifically, we first transform first-order logical conjectures and premise formulas into simplified logical formula graphs by removing repeated quantifiers. Next, we introduce a method based on a common path kernel function to measure graph similarity and validate the interpretability of our simplified logical formula graphs method. Then, an attention mechanism is employed to assign weights to term-walk feature information of nodes for updating node feature representations; meanwhile, attention pooling is utilized for selecting nodes that significantly contribute towards generating the final formula graph vector. Finally, combining the premise graph vector and conjecture graph vector forms a binary classifier for classification purposes. Experimental results demonstrate that our proposed method achieves an accuracy rate of 88.77% on the MPTP dataset and 85.17% on the CNF dataset, outperforming the state-of-the-art premise selection method.}, year = {2025}, journal = {Applied Soft Computing}, pages = {113318}, issn = {1568-4946}, url = {https://www.sciencedirect.com/science/article/pii/S1568494625006295}, doi = {https://doi.org/10.1016/j.asoc.2025.113318}, }