php.rs
1 use crate::languages::LanguageSupport; 2 use crate::types::EnvSourceKind; 3 use std::sync::OnceLock; 4 use tracing::error; 5 use tree_sitter::{Language, Node, Query}; 6 7 pub struct Php; 8 9 static REFERENCE_QUERY: OnceLock<Query> = OnceLock::new(); 10 static BINDING_QUERY: OnceLock<Query> = OnceLock::new(); 11 static IMPORT_QUERY: OnceLock<Query> = OnceLock::new(); 12 static COMPLETION_QUERY: OnceLock<Query> = OnceLock::new(); 13 static REASSIGNMENT_QUERY: OnceLock<Query> = OnceLock::new(); 14 static IDENTIFIER_QUERY: OnceLock<Query> = OnceLock::new(); 15 static EXPORT_QUERY: OnceLock<Query> = OnceLock::new(); 16 17 static ASSIGNMENT_QUERY: OnceLock<Query> = OnceLock::new(); 18 static DESTRUCTURE_QUERY: OnceLock<Query> = OnceLock::new(); 19 static SCOPE_QUERY: OnceLock<Query> = OnceLock::new(); 20 21 /// Compiles a tree-sitter query and fails fast on errors to surface invalid language query definitions early. 22 fn compile_query(grammar: &Language, source: &str, query_name: &str) -> Query { 23 match Query::new(grammar, source) { 24 Ok(query) => query, 25 Err(e) => { 26 error!( 27 language = "php", 28 query = query_name, 29 error = %e, 30 "Failed to compile query, failing fast" 31 ); 32 panic!("Failed to compile query '{}': {}", query_name, e) 33 } 34 } 35 } 36 37 impl LanguageSupport for Php { 38 fn id(&self) -> &'static str { 39 "php" 40 } 41 42 fn is_standard_env_object(&self, name: &str) -> bool { 43 // PHP uses global superglobals $_ENV and $_SERVER 44 matches!(name, "$_ENV" | "$_SERVER" | "getenv" | "env") 45 } 46 47 fn default_env_object_name(&self) -> Option<&'static str> { 48 Some("$_ENV") 49 } 50 51 fn is_scope_node(&self, node: Node) -> bool { 52 matches!( 53 node.kind(), 54 "program" 55 | "function_definition" 56 | "method_declaration" 57 | "class_declaration" 58 | "anonymous_function" 59 | "arrow_function" 60 | "for_statement" 61 | "foreach_statement" 62 | "if_statement" 63 | "try_statement" 64 | "while_statement" 65 | "do_statement" 66 | "switch_statement" 67 ) 68 } 69 70 fn extensions(&self) -> &'static [&'static str] { 71 &["php", "phtml", "php3", "php4", "php5", "phps"] 72 } 73 74 fn language_ids(&self) -> &'static [&'static str] { 75 &["php"] 76 } 77 78 fn grammar(&self) -> Language { 79 tree_sitter_php::LANGUAGE_PHP.into() 80 } 81 82 fn reference_query(&self) -> &Query { 83 REFERENCE_QUERY.get_or_init(|| { 84 compile_query( 85 &self.grammar(), 86 include_str!("../../queries/php/references.scm"), 87 "references", 88 ) 89 }) 90 } 91 92 fn binding_query(&self) -> Option<&Query> { 93 Some(BINDING_QUERY.get_or_init(|| { 94 compile_query( 95 &self.grammar(), 96 include_str!("../../queries/php/bindings.scm"), 97 "bindings", 98 ) 99 })) 100 } 101 102 fn import_query(&self) -> Option<&Query> { 103 Some(IMPORT_QUERY.get_or_init(|| { 104 compile_query( 105 &self.grammar(), 106 include_str!("../../queries/php/imports.scm"), 107 "imports", 108 ) 109 })) 110 } 111 112 fn completion_query(&self) -> Option<&Query> { 113 Some(COMPLETION_QUERY.get_or_init(|| { 114 compile_query( 115 &self.grammar(), 116 include_str!("../../queries/php/completion.scm"), 117 "completion", 118 ) 119 })) 120 } 121 122 fn reassignment_query(&self) -> Option<&Query> { 123 Some(REASSIGNMENT_QUERY.get_or_init(|| { 124 compile_query( 125 &self.grammar(), 126 include_str!("../../queries/php/reassignments.scm"), 127 "reassignments", 128 ) 129 })) 130 } 131 132 fn identifier_query(&self) -> Option<&Query> { 133 Some(IDENTIFIER_QUERY.get_or_init(|| { 134 compile_query( 135 &self.grammar(), 136 include_str!("../../queries/php/identifiers.scm"), 137 "identifiers", 138 ) 139 })) 140 } 141 142 fn export_query(&self) -> Option<&Query> { 143 Some(EXPORT_QUERY.get_or_init(|| { 144 compile_query( 145 &self.grammar(), 146 include_str!("../../queries/php/exports.scm"), 147 "exports", 148 ) 149 })) 150 } 151 152 fn assignment_query(&self) -> Option<&Query> { 153 Some(ASSIGNMENT_QUERY.get_or_init(|| { 154 compile_query( 155 &self.grammar(), 156 include_str!("../../queries/php/assignments.scm"), 157 "assignments", 158 ) 159 })) 160 } 161 162 fn destructure_query(&self) -> Option<&Query> { 163 Some(DESTRUCTURE_QUERY.get_or_init(|| { 164 compile_query( 165 &self.grammar(), 166 include_str!("../../queries/php/destructures.scm"), 167 "destructures", 168 ) 169 })) 170 } 171 172 fn scope_query(&self) -> Option<&Query> { 173 Some(SCOPE_QUERY.get_or_init(|| { 174 compile_query( 175 &self.grammar(), 176 include_str!("../../queries/php/scopes.scm"), 177 "scopes", 178 ) 179 })) 180 } 181 182 fn is_env_source_node(&self, node: Node, source: &[u8]) -> Option<EnvSourceKind> { 183 // Detect $_ENV and $_SERVER superglobals 184 if node.kind() == "variable_name" { 185 let text = node.utf8_text(source).ok()?; 186 if text == "$_ENV" || text == "_ENV" { 187 return Some(EnvSourceKind::Object { 188 canonical_name: "$_ENV".into(), 189 }); 190 } 191 if text == "$_SERVER" || text == "_SERVER" { 192 return Some(EnvSourceKind::Object { 193 canonical_name: "$_SERVER".into(), 194 }); 195 } 196 } 197 198 None 199 } 200 201 fn known_env_modules(&self) -> &'static [&'static str] { 202 // PHP doesn't have modules in the same way, but these are common env-related patterns 203 &[] 204 } 205 206 fn completion_trigger_characters(&self) -> &'static [&'static str] { 207 // Trigger on opening quote after array subscript or function call 208 &["[\"", "['", "(\"", "('"] 209 } 210 211 fn strip_quotes<'a>(&self, text: &'a str) -> &'a str { 212 text.trim_matches(|c| c == '"' || c == '\'') 213 } 214 215 fn extract_var_name(&self, node: Node, source: &[u8]) -> Option<compact_str::CompactString> { 216 node.utf8_text(source) 217 .ok() 218 .map(|s| compact_str::CompactString::from(self.strip_quotes(s))) 219 } 220 221 fn extract_property_access( 222 &self, 223 tree: &tree_sitter::Tree, 224 content: &str, 225 byte_offset: usize, 226 ) -> Option<(compact_str::CompactString, compact_str::CompactString)> { 227 let node = tree 228 .root_node() 229 .descendant_for_byte_range(byte_offset, byte_offset)?; 230 231 // In PHP, property access is through member_access_expression 232 let member_access = if node.kind() == "member_access_expression" { 233 node 234 } else if let Some(parent) = node.parent() { 235 if parent.kind() == "member_access_expression" { 236 parent 237 } else { 238 return None; 239 } 240 } else { 241 return None; 242 }; 243 244 let object_node = member_access.child_by_field_name("object")?; 245 let name_node = member_access.child_by_field_name("name")?; 246 247 let object_name = object_node.utf8_text(content.as_bytes()).ok()?; 248 let property_name = name_node.utf8_text(content.as_bytes()).ok()?; 249 250 Some((object_name.into(), property_name.into())) 251 } 252 253 fn comment_node_kinds(&self) -> &'static [&'static str] { 254 &["comment"] 255 } 256 257 fn is_root_node(&self, node: Node) -> bool { 258 node.kind() == "program" 259 } 260 } 261 262 #[cfg(test)] 263 mod tests { 264 use super::*; 265 266 fn get_php() -> Php { 267 Php 268 } 269 270 #[test] 271 fn test_id() { 272 assert_eq!(get_php().id(), "php"); 273 } 274 275 #[test] 276 fn test_extensions() { 277 let exts = get_php().extensions(); 278 assert!(exts.contains(&"php")); 279 assert!(exts.contains(&"phtml")); 280 } 281 282 #[test] 283 fn test_language_ids() { 284 let ids = get_php().language_ids(); 285 assert!(ids.contains(&"php")); 286 } 287 288 #[test] 289 fn test_is_standard_env_object() { 290 let php = get_php(); 291 assert!(php.is_standard_env_object("$_ENV")); 292 assert!(php.is_standard_env_object("$_SERVER")); 293 assert!(php.is_standard_env_object("getenv")); 294 assert!(php.is_standard_env_object("env")); 295 assert!(!php.is_standard_env_object("process")); 296 } 297 298 #[test] 299 fn test_default_env_object_name() { 300 assert_eq!(get_php().default_env_object_name(), Some("$_ENV")); 301 } 302 303 #[test] 304 fn test_grammar_compiles() { 305 let php = get_php(); 306 let _grammar = php.grammar(); 307 } 308 309 #[test] 310 fn test_strip_quotes() { 311 let php = get_php(); 312 assert_eq!(php.strip_quotes("\"hello\""), "hello"); 313 assert_eq!(php.strip_quotes("'world'"), "world"); 314 assert_eq!(php.strip_quotes("noquotes"), "noquotes"); 315 } 316 317 #[test] 318 fn test_is_env_source_node_env() { 319 let php = get_php(); 320 let mut parser = tree_sitter::Parser::new(); 321 parser.set_language(&php.grammar()).unwrap(); 322 323 let code = "<?php\n$x = $_ENV['VAR'];"; 324 let tree = parser.parse(code, None).unwrap(); 325 let root = tree.root_node(); 326 327 fn walk_tree(cursor: &mut tree_sitter::TreeCursor, php: &Php, code: &str) -> bool { 328 loop { 329 let node = cursor.node(); 330 if node.kind() == "variable_name" { 331 if let Some(kind) = php.is_env_source_node(node, code.as_bytes()) { 332 if let EnvSourceKind::Object { canonical_name } = kind { 333 if canonical_name == "$_ENV" { 334 return true; 335 } 336 } 337 } 338 } 339 340 if cursor.goto_first_child() { 341 if walk_tree(cursor, php, code) { 342 return true; 343 } 344 cursor.goto_parent(); 345 } 346 347 if !cursor.goto_next_sibling() { 348 break; 349 } 350 } 351 false 352 } 353 354 let mut cursor = root.walk(); 355 let found = walk_tree(&mut cursor, &php, code); 356 assert!(found, "Should detect $_ENV as env source"); 357 } 358 359 #[test] 360 fn test_is_scope_node() { 361 let php = get_php(); 362 let mut parser = tree_sitter::Parser::new(); 363 parser.set_language(&php.grammar()).unwrap(); 364 365 let code = "<?php\nfunction test() { }"; 366 let tree = parser.parse(code, None).unwrap(); 367 let root = tree.root_node(); 368 369 fn find_node_of_kind<'a>( 370 node: tree_sitter::Node<'a>, 371 kind: &str, 372 ) -> Option<tree_sitter::Node<'a>> { 373 if node.kind() == kind { 374 return Some(node); 375 } 376 for i in 0..node.child_count() { 377 if let Some(child) = node.child(i) { 378 if let Some(found) = find_node_of_kind(child, kind) { 379 return Some(found); 380 } 381 } 382 } 383 None 384 } 385 386 if let Some(func) = find_node_of_kind(root, "function_definition") { 387 assert!(php.is_scope_node(func)); 388 } 389 } 390 }