|
Ninja
|
00001 // Copyright 2011 Google Inc. All Rights Reserved. 00002 // 00003 // Licensed under the Apache License, Version 2.0 (the "License"); 00004 // you may not use this file except in compliance with the License. 00005 // You may obtain a copy of the License at 00006 // 00007 // http://www.apache.org/licenses/LICENSE-2.0 00008 // 00009 // Unless required by applicable law or agreed to in writing, software 00010 // distributed under the License is distributed on an "AS IS" BASIS, 00011 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00012 // See the License for the specific language governing permissions and 00013 // limitations under the License. 00014 00015 #include "parsers.h" 00016 00017 #include <assert.h> 00018 #include <errno.h> 00019 #include <stdio.h> 00020 #include <string.h> 00021 00022 #include "graph.h" 00023 #include "state.h" 00024 #include "util.h" 00025 00026 string Token::AsString() const { 00027 switch (type_) { 00028 case IDENT: return "'" + string(pos_, end_ - pos_) + "'"; 00029 case UNKNOWN: return "unknown '" + string(pos_, end_ - pos_) + "'"; 00030 case NEWLINE: return "newline"; 00031 case EQUALS: return "'='"; 00032 case COLON: return "':'"; 00033 case PIPE: return "'|'"; 00034 case PIPE2: return "'||'"; 00035 case TEOF: return "eof"; 00036 case INDENT: return "indenting in"; 00037 case OUTDENT: return "indenting out"; 00038 case NONE: break; 00039 } 00040 assert(false); 00041 return ""; 00042 } 00043 00044 bool Tokenizer::ErrorAt(const char* pos, const string& message, string* err) { 00045 // Re-scan the input, counting newlines so that we can compute the 00046 // correct position. 00047 int line = 1; 00048 const char* line_start = start_; 00049 for (const char* p = start_; p < pos; ++p) { 00050 if (*p == '\n') { 00051 ++line; 00052 line_start = p + 1; 00053 } 00054 } 00055 int col = pos - line_start + 1; 00056 00057 char buf[1024]; 00058 snprintf(buf, sizeof(buf), 00059 "line %d, col %d: %s", line, col, message.c_str()); 00060 err->assign(buf); 00061 return false; 00062 } 00063 00064 void Tokenizer::Start(const char* start, const char* end) { 00065 cur_line_ = cur_ = start_ = start; 00066 end_ = end; 00067 } 00068 00069 bool Tokenizer::ErrorExpected(const string& expected, string* err) { 00070 return Error("expected " + expected + ", got " + token_.AsString(), err); 00071 } 00072 00073 void Tokenizer::SkipWhitespace(bool newline) { 00074 if (token_.type_ == Token::NEWLINE && newline) 00075 Newline(NULL); 00076 00077 const char kContinuation = makefile_flavor_ ? '\\' : '$'; 00078 00079 while (cur_ < end_) { 00080 if (*cur_ == ' ') { 00081 ++cur_; 00082 } else if (newline && *cur_ == '\n') { 00083 Newline(NULL); 00084 } else if (*cur_ == kContinuation && cur_ + 1 < end_ && cur_[1] == '\n') { 00085 ++cur_; ++cur_; 00086 } else if (*cur_ == '#' && cur_ == cur_line_) { 00087 while (cur_ < end_ && *cur_ != '\n') 00088 ++cur_; 00089 if (cur_ < end_ && *cur_ == '\n') 00090 ++cur_; 00091 cur_line_ = cur_; 00092 } else { 00093 break; 00094 } 00095 } 00096 } 00097 00098 bool Tokenizer::Newline(string* err) { 00099 if (!ExpectToken(Token::NEWLINE, err)) 00100 return false; 00101 00102 return true; 00103 } 00104 00105 /// Return true if |c| is part of an identifier. 00106 static bool IsIdentChar(char c) { 00107 // This function shows up hot on profiles. Instead of the natural 00108 // 'if' statement, use a table as generated by this Python script: 00109 // import string 00110 // cs = set() 00111 // for c in string.ascii_letters + string.digits + r'+,-./\_$': 00112 // cs.add(ord(c)) 00113 // for i in range(128): 00114 // if i in cs: 00115 // print '1,', 00116 // else: 00117 // print '0,', 00118 // if i % 16 == 15: 00119 // print 00120 static const bool kIdents[] = { 00121 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00122 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 00123 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 00124 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 00125 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00126 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 00127 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 00128 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 00129 }; 00130 return kIdents[(int)c]; 00131 } 00132 00133 bool Tokenizer::ExpectToken(Token::Type expected, string* err) { 00134 PeekToken(); 00135 if (token_.type_ != expected) 00136 return ErrorExpected(Token(expected).AsString(), err); 00137 ConsumeToken(); 00138 return true; 00139 } 00140 00141 bool Tokenizer::ExpectIdent(const char* expected, string* err) { 00142 PeekToken(); 00143 if (token_.type_ != Token::IDENT || 00144 strncmp(token_.pos_, expected, token_.end_ - token_.pos_) != 0) { 00145 return ErrorExpected(string("'") + expected + "'", err); 00146 } 00147 ConsumeToken(); 00148 return true; 00149 } 00150 00151 bool Tokenizer::ReadIdent(StringPiece* out) { 00152 PeekToken(); 00153 if (token_.type_ != Token::IDENT) 00154 return false; 00155 out->str_ = token_.pos_; 00156 out->len_ = token_.end_ - token_.pos_; 00157 ConsumeToken(); 00158 return true; 00159 } 00160 00161 bool Tokenizer::ReadIdent(string* out) { 00162 StringPiece token; 00163 if (!ReadIdent(&token)) 00164 return false; 00165 out->assign(token.str_, token.len_); 00166 return true; 00167 } 00168 00169 // A note on backslashes in Makefiles, from reading the docs: 00170 // Backslash-newline is the line continuation character. 00171 // Backslash-# escapes a # (otherwise meaningful as a comment start). 00172 // Backslash-% escapes a % (otherwise meaningful as a special). 00173 // Finally, quoting the GNU manual, "Backslashes that are not in danger 00174 // of quoting ‘%’ characters go unmolested." 00175 // How do you end a line with a backslash? The netbsd Make docs suggest 00176 // reading the result of a shell command echoing a backslash! 00177 // 00178 // Rather than implement the above, we do the simpler thing here. 00179 // If anyone actually has depfiles that rely on the more complicated 00180 // behavior we can adjust this. 00181 bool Tokenizer::ReadToNewline(string *text, string* err, size_t max_length) { 00182 // XXX token_.clear(); 00183 const char kContinuation = makefile_flavor_ ? '\\' : '$'; 00184 while (cur_ < end_ && *cur_ != '\n') { 00185 if (*cur_ == kContinuation) { 00186 // Might be a line continuation; peek ahead to check. 00187 if (cur_ + 1 >= end_) 00188 return Error("unexpected eof", err); 00189 if (*(cur_ + 1) == '\n') { 00190 // Let SkipWhitespace handle the continuation logic. 00191 SkipWhitespace(); 00192 continue; 00193 } 00194 00195 // Otherwise, just treat it like a normal character. 00196 text->push_back(*cur_); 00197 ++cur_; 00198 } else { 00199 text->push_back(*cur_); 00200 ++cur_; 00201 } 00202 if (text->size() >= max_length) { 00203 token_.pos_ = cur_; 00204 return false; 00205 } 00206 } 00207 return Newline(err); 00208 } 00209 00210 Token::Type Tokenizer::PeekToken() { 00211 if (token_.type_ != Token::NONE) 00212 return token_.type_; 00213 00214 token_.pos_ = cur_; 00215 if (!makefile_flavor_ && cur_indent_ == -1) { 00216 cur_indent_ = cur_ - cur_line_; 00217 if (cur_indent_ != last_indent_) { 00218 if (cur_indent_ > last_indent_) { 00219 token_.type_ = Token::INDENT; 00220 } else if (cur_indent_ < last_indent_) { 00221 token_.type_ = Token::OUTDENT; 00222 } 00223 last_indent_ = cur_indent_; 00224 return token_.type_; 00225 } 00226 } 00227 00228 if (cur_ >= end_) { 00229 token_.type_ = Token::TEOF; 00230 return token_.type_; 00231 } 00232 00233 if (IsIdentChar(*cur_)) { 00234 while (cur_ < end_ && IsIdentChar(*cur_)) { 00235 ++cur_; 00236 } 00237 token_.end_ = cur_; 00238 token_.type_ = Token::IDENT; 00239 } else if (*cur_ == ':') { 00240 token_.type_ = Token::COLON; 00241 ++cur_; 00242 } else if (*cur_ == '=') { 00243 token_.type_ = Token::EQUALS; 00244 ++cur_; 00245 } else if (*cur_ == '|') { 00246 if (cur_ + 1 < end_ && cur_[1] == '|') { 00247 token_.type_ = Token::PIPE2; 00248 cur_ += 2; 00249 } else { 00250 token_.type_ = Token::PIPE; 00251 ++cur_; 00252 } 00253 } else if (*cur_ == '\n') { 00254 token_.type_ = Token::NEWLINE; 00255 ++cur_; 00256 cur_line_ = cur_; 00257 cur_indent_ = -1; 00258 } 00259 00260 SkipWhitespace(); 00261 00262 if (token_.type_ == Token::NONE) { 00263 token_.type_ = Token::UNKNOWN; 00264 token_.end_ = cur_ + 1; 00265 } 00266 00267 return token_.type_; 00268 } 00269 00270 void Tokenizer::ConsumeToken() { 00271 token_.Clear(); 00272 } 00273 00274 MakefileParser::MakefileParser() { 00275 tokenizer_.SetMakefileFlavor(); 00276 } 00277 00278 bool MakefileParser::Parse(const string& input, string* err) { 00279 tokenizer_.Start(input.data(), input.data() + input.size()); 00280 00281 tokenizer_.SkipWhitespace(true); 00282 00283 if (!tokenizer_.ReadIdent(&out_)) 00284 return tokenizer_.ErrorExpected("output filename", err); 00285 if (!tokenizer_.ExpectToken(Token::COLON, err)) 00286 return false; 00287 while (tokenizer_.PeekToken() == Token::IDENT) { 00288 StringPiece in; 00289 tokenizer_.ReadIdent(&in); 00290 ins_.push_back(in); 00291 } 00292 if (!tokenizer_.ExpectToken(Token::NEWLINE, err)) 00293 return false; 00294 if (!tokenizer_.ExpectToken(Token::TEOF, err)) 00295 return false; 00296 00297 return true; 00298 } 00299 00300 ManifestParser::ManifestParser(State* state, FileReader* file_reader) 00301 : state_(state), file_reader_(file_reader) { 00302 env_ = &state->bindings_; 00303 } 00304 bool ManifestParser::Load(const string& filename, string* err) { 00305 string contents; 00306 if (!file_reader_->ReadFile(filename, &contents, err)) 00307 return false; 00308 return Parse(contents, err); 00309 } 00310 00311 bool ManifestParser::Parse(const string& input, string* err) { 00312 tokenizer_.Start(input.data(), input.data() + input.size()); 00313 00314 tokenizer_.SkipWhitespace(true); 00315 00316 while (tokenizer_.token().type_ != Token::TEOF) { 00317 switch (tokenizer_.PeekToken()) { 00318 case Token::IDENT: { 00319 const Token& token = tokenizer_.token(); 00320 int len = token.end_ - token.pos_; 00321 if (len == 4 && memcmp(token.pos_, "rule", 4) == 0) { 00322 if (!ParseRule(err)) 00323 return false; 00324 } else if (len == 5 && memcmp(token.pos_, "build", 5) == 0) { 00325 if (!ParseEdge(err)) 00326 return false; 00327 } else if (len == 7 && memcmp(token.pos_, "default", 7) == 0) { 00328 if (!ParseDefaults(err)) 00329 return false; 00330 } else if ((len == 7 && memcmp(token.pos_, "include", 7) == 0) || 00331 (len == 8 && memcmp(token.pos_, "subninja", 8) == 0)) { 00332 if (!ParseFileInclude(err)) 00333 return false; 00334 } else { 00335 string name, value; 00336 if (!ParseLet(&name, &value, err)) 00337 return false; 00338 env_->AddBinding(name, value); 00339 } 00340 break; 00341 } 00342 case Token::TEOF: 00343 continue; 00344 default: 00345 return tokenizer_.Error("unhandled " + tokenizer_.token().AsString(), err); 00346 } 00347 tokenizer_.SkipWhitespace(true); 00348 } 00349 00350 return true; 00351 } 00352 00353 bool ManifestParser::ParseRule(string* err) { 00354 if (!tokenizer_.ExpectIdent("rule", err)) 00355 return false; 00356 string name; 00357 if (!tokenizer_.ReadIdent(&name)) 00358 return tokenizer_.ErrorExpected("rule name", err); 00359 if (!tokenizer_.Newline(err)) 00360 return false; 00361 00362 if (state_->LookupRule(name) != NULL) { 00363 *err = "duplicate rule '" + name + "'"; 00364 return false; 00365 } 00366 00367 Rule* rule = new Rule(name); // XXX scoped_ptr 00368 00369 if (tokenizer_.PeekToken() == Token::INDENT) { 00370 tokenizer_.ConsumeToken(); 00371 00372 while (tokenizer_.PeekToken() != Token::OUTDENT) { 00373 const char* let_loc = tokenizer_.token_.pos_; 00374 00375 string key; 00376 if (!ParseLetKey(&key, err)) 00377 return false; 00378 00379 EvalString* eval_target = NULL; 00380 if (key == "command") { 00381 eval_target = &rule->command_; 00382 } else if (key == "depfile") { 00383 eval_target = &rule->depfile_; 00384 } else if (key == "description") { 00385 eval_target = &rule->description_; 00386 } else if (key == "generator") { 00387 rule->generator_ = true; 00388 string dummy; 00389 if (!tokenizer_.ReadToNewline(&dummy, err)) 00390 return false; 00391 continue; 00392 } else if (key == "restat") { 00393 rule->restat_ = true; 00394 string dummy; 00395 if (!tokenizer_.ReadToNewline(&dummy, err)) 00396 return false; 00397 continue; 00398 } else { 00399 // Die on other keyvals for now; revisit if we want to add a 00400 // scope here. 00401 return tokenizer_.ErrorAt(let_loc, "unexpected variable '" + key + "'", 00402 err); 00403 } 00404 00405 if (!ParseLetValue(eval_target, err)) 00406 return false; 00407 } 00408 tokenizer_.ConsumeToken(); 00409 } 00410 00411 if (rule->command_.unparsed().empty()) 00412 return tokenizer_.Error("expected 'command =' line", err); 00413 00414 state_->AddRule(rule); 00415 return true; 00416 } 00417 00418 bool ManifestParser::ParseLet(string* key, string* value, string* err) { 00419 if (!ParseLetKey(key, err)) 00420 return false; 00421 00422 EvalString eval; 00423 if (!ParseLetValue(&eval, err)) 00424 return false; 00425 00426 *value = eval.Evaluate(env_); 00427 00428 return true; 00429 } 00430 00431 bool ManifestParser::ParseLetKey(string* key, string* err) { 00432 if (!tokenizer_.ReadIdent(key)) 00433 return tokenizer_.ErrorExpected("variable name", err); 00434 if (!tokenizer_.ExpectToken(Token::EQUALS, err)) 00435 return false; 00436 return true; 00437 } 00438 00439 bool ManifestParser::ParseLetValue(EvalString* eval, string* err) { 00440 // Backup the tokenizer state prior to consuming the line, for reporting 00441 // the source location in case of a parse error later. 00442 Tokenizer tokenizer_backup = tokenizer_; 00443 00444 // XXX should we tokenize here? it means we'll need to understand 00445 // command syntax, though... 00446 string value; 00447 if (!tokenizer_.ReadToNewline(&value, err)) 00448 return false; 00449 00450 string eval_err; 00451 size_t err_index; 00452 if (!eval->Parse(value, &eval_err, &err_index)) { 00453 value.clear(); 00454 // Advance the saved tokenizer state up to the error index to report the 00455 // error at the correct source location. 00456 tokenizer_backup.ReadToNewline(&value, err, err_index); 00457 return tokenizer_backup.Error(eval_err, err); 00458 } 00459 00460 return true; 00461 } 00462 00463 bool ManifestParser::ParseDefaults(string* err) { 00464 if (!tokenizer_.ExpectIdent("default", err)) 00465 return false; 00466 00467 string target; 00468 if (!tokenizer_.ReadIdent(&target)) 00469 return tokenizer_.ErrorExpected("target name", err); 00470 00471 do { 00472 EvalString eval; 00473 string eval_err; 00474 if (!eval.Parse(target, &eval_err)) 00475 return tokenizer_.Error(eval_err, err); 00476 string path = eval.Evaluate(env_); 00477 if (!CanonicalizePath(&path, &eval_err)) 00478 return tokenizer_.Error(eval_err, err); 00479 if (!state_->AddDefault(path, &eval_err)) 00480 return tokenizer_.Error(eval_err, err); 00481 } while (tokenizer_.ReadIdent(&target)); 00482 00483 if (!tokenizer_.Newline(err)) 00484 return false; 00485 00486 return true; 00487 } 00488 00489 bool ManifestParser::ParseEdge(string* err) { 00490 vector<string> ins, outs; 00491 00492 if (!tokenizer_.ExpectIdent("build", err)) 00493 return false; 00494 00495 for (;;) { 00496 if (tokenizer_.PeekToken() == Token::COLON) { 00497 tokenizer_.ConsumeToken(); 00498 break; 00499 } 00500 00501 string out; 00502 if (!tokenizer_.ReadIdent(&out)) 00503 return tokenizer_.ErrorExpected("output file list", err); 00504 outs.push_back(out); 00505 } 00506 // XXX check outs not empty 00507 00508 string rule_name; 00509 if (!tokenizer_.ReadIdent(&rule_name)) 00510 return tokenizer_.ErrorExpected("build command name", err); 00511 00512 const Rule* rule = state_->LookupRule(rule_name); 00513 if (!rule) 00514 return tokenizer_.Error("unknown build rule '" + rule_name + "'", err); 00515 00516 for (;;) { 00517 string in; 00518 if (!tokenizer_.ReadIdent(&in)) 00519 break; 00520 ins.push_back(in); 00521 } 00522 00523 // Add all order-only deps, counting how many as we go. 00524 int implicit = 0; 00525 if (tokenizer_.PeekToken() == Token::PIPE) { 00526 tokenizer_.ConsumeToken(); 00527 for (;;) { 00528 string in; 00529 if (!tokenizer_.ReadIdent(&in)) 00530 break; 00531 ins.push_back(in); 00532 ++implicit; 00533 } 00534 } 00535 00536 // Add all order-only deps, counting how many as we go. 00537 int order_only = 0; 00538 if (tokenizer_.PeekToken() == Token::PIPE2) { 00539 tokenizer_.ConsumeToken(); 00540 for (;;) { 00541 string in; 00542 if (!tokenizer_.ReadIdent(&in)) 00543 break; 00544 ins.push_back(in); 00545 ++order_only; 00546 } 00547 } 00548 00549 if (!tokenizer_.Newline(err)) 00550 return false; 00551 00552 // Default to using outer env. 00553 BindingEnv* env = env_; 00554 00555 // But use a nested env if there are variables in scope. 00556 if (tokenizer_.PeekToken() == Token::INDENT) { 00557 tokenizer_.ConsumeToken(); 00558 00559 // XXX scoped_ptr to handle error case. 00560 env = new BindingEnv; 00561 env->parent_ = env_; 00562 while (tokenizer_.PeekToken() != Token::OUTDENT) { 00563 string key, val; 00564 if (!ParseLet(&key, &val, err)) 00565 return false; 00566 env->AddBinding(key, val); 00567 } 00568 tokenizer_.ConsumeToken(); 00569 } 00570 00571 // Evaluate all variables in paths. 00572 // XXX: fast path skip the eval parse if there's no $ in the path? 00573 vector<string>* paths[2] = { &ins, &outs }; 00574 for (int p = 0; p < 2; ++p) { 00575 for (vector<string>::iterator i = paths[p]->begin(); 00576 i != paths[p]->end(); ++i) { 00577 EvalString eval; 00578 string eval_err; 00579 if (!eval.Parse(*i, &eval_err)) 00580 return tokenizer_.Error(eval_err, err); 00581 string path = eval.Evaluate(env); 00582 if (!CanonicalizePath(&path, &eval_err)) 00583 return tokenizer_.Error(eval_err, err); 00584 *i = path; 00585 } 00586 } 00587 00588 Edge* edge = state_->AddEdge(rule); 00589 edge->env_ = env; 00590 for (vector<string>::iterator i = ins.begin(); i != ins.end(); ++i) 00591 state_->AddIn(edge, *i); 00592 for (vector<string>::iterator i = outs.begin(); i != outs.end(); ++i) 00593 state_->AddOut(edge, *i); 00594 edge->implicit_deps_ = implicit; 00595 edge->order_only_deps_ = order_only; 00596 00597 return true; 00598 } 00599 00600 bool ManifestParser::ParseFileInclude(string* err) { 00601 string type; 00602 tokenizer_.ReadIdent(&type); 00603 00604 string path; 00605 if (!tokenizer_.ReadIdent(&path)) 00606 return tokenizer_.ErrorExpected("path to ninja file", err); 00607 00608 string contents; 00609 string read_err; 00610 if (!file_reader_->ReadFile(path, &contents, &read_err)) 00611 return tokenizer_.Error("loading " + path + ": " + read_err, err); 00612 00613 ManifestParser subparser(state_, file_reader_); 00614 if (type == "subninja") { 00615 // subninja: Construct a new scope for the new parser. 00616 subparser.env_ = new BindingEnv; 00617 subparser.env_->parent_ = env_; 00618 } else { 00619 // include: Reuse the current scope. 00620 subparser.env_ = env_; 00621 } 00622 00623 string sub_err; 00624 if (!subparser.Parse(contents, &sub_err)) 00625 return tokenizer_.Error("in '" + path + "': " + sub_err, err); 00626 00627 if (!tokenizer_.Newline(err)) 00628 return false; 00629 00630 return true; 00631 }
1.7.5.1