1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
|
/*
Copyright (c) 2008-2009 Yahoo! Inc. All rights reserved.
The copyrights embodied in the content of this file are licensed by
Yahoo! Inc. under the BSD (revised) open source license
@author Dan Vlad Dascalescu <dandv@yahoo-inc.com>
Parse function for PHP. Makes use of the tokenizer from tokenizephp.js.
Based on parsejavascript.js by Marijn Haverbeke.
Features:
+ special "deprecated" style for PHP4 keywords like 'var'
+ support for PHP 5.3 keywords: 'namespace', 'use'
+ 911 predefined constants, 1301 predefined functions, 105 predeclared classes
from a typical PHP installation in a LAMP environment
+ new feature: syntax error flagging, thus enabling strict parsing of:
+ function definitions with explicitly or implicitly typed arguments and default values
+ modifiers (public, static etc.) applied to method and member definitions
+ foreach(array_expression as $key [=> $value]) loops
+ differentiation between single-quoted strings and double-quoted interpolating strings
*/
// add the Array.indexOf method for JS engines that don't support it (e.g. IE)
// code from https://developer.mozilla.org/En/Core_JavaScript_1.5_Reference/Global_Objects/Array/IndexOf
if (!Array.prototype.indexOf)
{
Array.prototype.indexOf = function(elt /*, from*/)
{
var len = this.length;
var from = Number(arguments[1]) || 0;
from = (from < 0)
? Math.ceil(from)
: Math.floor(from);
if (from < 0)
from += len;
for (; from < len; from++)
{
if (from in this &&
this[from] === elt)
return from;
}
return -1;
};
};
var PHPParser = Editor.Parser = (function() {
// Token types that can be considered to be atoms, part of operator expressions
var atomicTypes = {
"atom": true, "number": true, "variable": true, "string": true
};
// Constructor for the lexical context objects.
function PHPLexical(indented, column, type, align, prev, info) {
// indentation at start of this line
this.indented = indented;
// column at which this scope was opened
this.column = column;
// type of scope ('stat' (statement), 'form' (special form), '[', '{', or '(')
this.type = type;
// '[', '{', or '(' blocks that have any text after their opening
// character are said to be 'aligned' -- any lines below are
// indented all the way to the opening character.
if (align != null)
this.align = align;
// Parent scope, if any.
this.prev = prev;
this.info = info;
};
// PHP indentation rules
function indentPHP(lexical) {
return function(firstChars) {
var firstChar = firstChars && firstChars.charAt(0), type = lexical.type;
var closing = firstChar == type;
if (type == "form" && firstChar == "{")
return lexical.indented;
else if (type == "stat" || type == "form")
return lexical.indented + indentUnit;
else if (lexical.info == "switch" && !closing)
return lexical.indented + (/^(?:case|default)\b/.test(firstChars) ? indentUnit : 2 * indentUnit);
else if (lexical.align)
return lexical.column - (closing ? 1 : 0);
else
return lexical.indented + (closing ? 0 : indentUnit);
};
};
// The parser-iterator-producing function itself.
function parsePHP(input, basecolumn) {
// Wrap the input in a token stream
var tokens = tokenizePHP(input);
// The parser state. cc is a stack of actions that have to be
// performed to finish the current statement. For example we might
// know that we still need to find a closing parenthesis and a
// semicolon. Actions at the end of the stack go first. It is
// initialized with an infinitely looping action that consumes
// whole statements.
var cc = [statements];
// The lexical scope, used mostly for indentation.
var lexical = new PHPLexical((basecolumn || 0) - indentUnit, 0, "block", false);
// Current column, and the indentation at the start of the current
// line. Used to create lexical scope objects.
var column = 0;
var indented = 0;
// Variables which are used by the mark, cont, and pass functions
// below to communicate with the driver loop in the 'next' function.
var consume, marked;
// The iterator object.
var parser = {next: next, copy: copy};
// parsing is accomplished by calling next() repeatedly
function next(){
// Start by performing any 'lexical' actions (adjusting the
// lexical variable), or the operations below will be working
// with the wrong lexical state.
while(cc[cc.length - 1].lex)
cc.pop()();
// Fetch the next token.
var token = tokens.next();
// Adjust column and indented.
if (token.type == "whitespace" && column == 0)
indented = token.value.length;
column += token.value.length;
if (token.content == "\n"){
indented = column = 0;
// If the lexical scope's align property is still undefined at
// the end of the line, it is an un-aligned scope.
if (!("align" in lexical))
lexical.align = false;
// Newline tokens get an indentation function associated with
// them.
token.indentation = indentPHP(lexical);
}
// No more processing for meaningless tokens.
if (token.type == "whitespace" || token.type == "comment"
|| token.type == "string_not_terminated" )
return token;
// When a meaningful token is found and the lexical scope's
// align is undefined, it is an aligned scope.
if (!("align" in lexical))
lexical.align = true;
// Execute actions until one 'consumes' the token and we can
// return it. 'marked' is used to change the style of the current token.
while(true) {
consume = marked = false;
// Take and execute the topmost action.
var action = cc.pop();
action(token);
if (consume){
if (marked)
token.style = marked;
// Here we differentiate between local and global variables.
return token;
}
}
return 1; // Firebug workaround for http://code.google.com/p/fbug/issues/detail?id=1239#c1
}
// This makes a copy of the parser state. It stores all the
// stateful variables in a closure, and returns a function that
// will restore them when called with a new input stream. Note
// that the cc array has to be copied, because it is contantly
// being modified. Lexical objects are not mutated, so they can
// be shared between runs of the parser.
function copy(){
var _lexical = lexical, _cc = cc.concat([]), _tokenState = tokens.state;
return function copyParser(input){
lexical = _lexical;
cc = _cc.concat([]); // copies the array
column = indented = 0;
tokens = tokenizePHP(input, _tokenState);
return parser;
};
}
// Helper function for pushing a number of actions onto the cc
// stack in reverse order.
function push(fs){
for (var i = fs.length - 1; i >= 0; i--)
cc.push(fs[i]);
}
// cont and pass are used by the action functions to add other
// actions to the stack. cont will cause the current token to be
// consumed, pass will leave it for the next action.
function cont(){
push(arguments);
consume = true;
}
function pass(){
push(arguments);
consume = false;
}
// Used to change the style of the current token.
function mark(style){
marked = style;
}
// Add a lyer of style to the current token, for example syntax-error
function mark_add(style){
marked = marked + ' ' + style;
}
// Push a new lexical context of the given type.
function pushlex(type, info) {
var result = function pushlexing() {
lexical = new PHPLexical(indented, column, type, null, lexical, info)
};
result.lex = true;
return result;
}
// Pop off the current lexical context.
function poplex(){
lexical = lexical.prev;
}
poplex.lex = true;
// The 'lex' flag on these actions is used by the 'next' function
// to know they can (and have to) be ran before moving on to the
// next token.
// Creates an action that discards tokens until it finds one of
// the given type. This will ignore (and recover from) syntax errors.
function expect(wanted){
return function expecting(token){
if (token.type == wanted) cont(); // consume the token
else {
cont(arguments.callee); // continue expecting() - call itself
}
};
}
// Require a specific token type, or one of the tokens passed in the 'wanted' array
// Used to detect blatant syntax errors. 'execute' is used to pass extra code
// to be executed if the token is matched. For example, a '(' match could
// 'execute' a cont( compasep(funcarg), require(")") )
function require(wanted, execute){
return function requiring(token){
var ok;
var type = token.type;
if (typeof(wanted) == "string")
ok = (type == wanted) -1;
else
ok = wanted.indexOf(type);
if (ok >= 0) {
if (execute && typeof(execute[ok]) == "function")
execute[ok](token);
cont(); // just consume the token
}
else {
if (!marked) mark(token.style);
mark_add("syntax-error");
cont(arguments.callee);
}
};
}
// Looks for a statement, and then calls itself.
function statements(token){
return pass(statement, statements);
}
// Dispatches various types of statements based on the type of the current token.
function statement(token){
var type = token.type;
if (type == "keyword a") cont(pushlex("form"), expression, statement, poplex);
else if (type == "keyword b") cont(pushlex("form"), statement, poplex);
else if (type == "{") cont(pushlex("}"), block, poplex);
else if (type == "function") funcdef();
// technically, "class implode {...}" is correct, but we'll flag that as an error because it overrides a predefined function
else if (type == "class") cont(require("t_string"), expect("{"), pushlex("}"), block, poplex);
else if (type == "foreach") cont(pushlex("form"), require("("), pushlex(")"), expression, require("as"), require("variable"), /* => $value */ expect(")"), poplex, statement, poplex);
else if (type == "for") cont(pushlex("form"), require("("), pushlex(")"), expression, require(";"), expression, require(";"), expression, require(")"), poplex, statement, poplex);
// public final function foo(), protected static $bar;
else if (type == "modifier") cont(require(["modifier", "variable", "function"], [null, null, funcdef]));
else if (type == "switch") cont(pushlex("form"), require("("), expression, require(")"), pushlex("}", "switch"), require([":", "{"]), block, poplex, poplex);
else if (type == "case") cont(expression, require(":"));
else if (type == "default") cont(require(":"));
else if (type == "catch") cont(pushlex("form"), require("("), require("t_string"), require("variable"), require(")"), statement, poplex);
else if (type == "const") cont(require("t_string")); // 'const static x=5' is a syntax error
// technically, "namespace implode {...}" is correct, but we'll flag that as an error because it overrides a predefined function
else if (type == "namespace") cont(namespacedef, require(";"));
// $variables may be followed by operators, () for variable function calls, or [] subscripts
else pass(pushlex("stat"), expression, require(";"), poplex);
}
// Dispatch expression types.
function expression(token){
var type = token.type;
if (atomicTypes.hasOwnProperty(type)) cont(maybeoperator);
else if (type == "<<<") cont(require("string"), maybeoperator); // heredoc/nowdoc
else if (type == "t_string") cont(maybe_double_colon, maybeoperator);
else if (type == "keyword c") cont(expression);
// function call or parenthesized expression: $a = ($b + 1) * 2;
else if (type == "(") cont(pushlex(")"), commasep(expression), require(")"), poplex, maybeoperator);
else if (type == "operator") cont(expression);
}
// Called for places where operators, function calls, or subscripts are
// valid. Will skip on to the next action if none is found.
function maybeoperator(token){
var type = token.type;
if (type == "operator") {
if (token.content == "?") cont(expression, require(":"), expression); // ternary operator
else cont(expression);
}
else if (type == "(") cont(pushlex(")"), expression, commasep(expression), require(")"), poplex, maybeoperator /* $varfunc() + 3 */);
else if (type == "[") cont(pushlex("]"), expression, require("]"), maybeoperator /* for multidimensional arrays, or $func[$i]() */, poplex);
}
// A regular use of the double colon to specify a class, as in self::func() or myclass::$var;
// Differs from `namespace` or `use` in that only one class can be the parent; chains (A::B::$var) are a syntax error.
function maybe_double_colon(token) {
if (token.type == "t_double_colon")
// A::$var, A::func(), A::const
cont(require(["t_string", "variable"]), maybeoperator);
else {
// a t_string wasn't followed by ::, such as in a function call: foo()
pass(expression)
}
}
// the declaration or definition of a function
function funcdef() {
cont(require("t_string"), require("("), pushlex(")"), commasep(funcarg), require(")"), poplex, block);
}
// Parses a comma-separated list of the things that are recognized
// by the 'what' argument.
function commasep(what){
function proceed(token) {
if (token.type == ",") cont(what, proceed);
};
return function commaSeparated() {
pass(what, proceed);
};
}
// Look for statements until a closing brace is found.
function block(token) {
if (token.type == "}") cont();
else pass(statement, block);
}
function maybedefaultparameter(token){
if (token.content == "=") cont(expression);
}
// support for default arguments: http://us.php.net/manual/en/functions.arguments.php#functions.arguments.default
function funcarg(token){
// function foo(myclass $obj) {...}
if (token.type == "t_string") cont(require("variable"), maybedefaultparameter);
// function foo($string) {...}
else if (token.type == "variable") cont(maybedefaultparameter);
}
// A namespace definition or use
function maybe_double_colon_def(token) {
if (token.type == "t_double_colon")
cont(namespacedef);
}
function namespacedef(token) {
pass(require("t_string"), maybe_double_colon_def);
}
return parser;
}
return {make: parsePHP, electricChars: "{}:"};
})();
|