Ninja
parsers.cc
Go to the documentation of this file.
00001 // Copyright 2011 Google Inc. All Rights Reserved.
00002 //
00003 // Licensed under the Apache License, Version 2.0 (the "License");
00004 // you may not use this file except in compliance with the License.
00005 // You may obtain a copy of the License at
00006 //
00007 //     http://www.apache.org/licenses/LICENSE-2.0
00008 //
00009 // Unless required by applicable law or agreed to in writing, software
00010 // distributed under the License is distributed on an "AS IS" BASIS,
00011 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00012 // See the License for the specific language governing permissions and
00013 // limitations under the License.
00014 
00015 #include "parsers.h"
00016 
00017 #include <assert.h>
00018 #include <errno.h>
00019 #include <stdio.h>
00020 #include <string.h>
00021 
00022 #include "graph.h"
00023 #include "state.h"
00024 #include "util.h"
00025 
00026 string Token::AsString() const {
00027   switch (type_) {
00028   case IDENT:    return "'" + string(pos_, end_ - pos_) + "'";
00029   case UNKNOWN:  return "unknown '" + string(pos_, end_ - pos_) + "'";
00030   case NEWLINE:  return "newline";
00031   case EQUALS:   return "'='";
00032   case COLON:    return "':'";
00033   case PIPE:     return "'|'";
00034   case PIPE2:    return "'||'";
00035   case TEOF:     return "eof";
00036   case INDENT:   return "indenting in";
00037   case OUTDENT:  return "indenting out";
00038   case NONE:     break;
00039   }
00040   assert(false);
00041   return "";
00042 }
00043 
00044 bool Tokenizer::ErrorAt(const char* pos, const string& message, string* err) {
00045   // Re-scan the input, counting newlines so that we can compute the
00046   // correct position.
00047   int line = 1;
00048   const char* line_start = start_;
00049   for (const char* p = start_; p < pos; ++p) {
00050     if (*p == '\n') {
00051       ++line;
00052       line_start = p + 1;
00053     }
00054   }
00055   int col = pos - line_start + 1;
00056 
00057   char buf[1024];
00058   snprintf(buf, sizeof(buf),
00059            "line %d, col %d: %s", line, col, message.c_str());
00060   err->assign(buf);
00061   return false;
00062 }
00063 
00064 void Tokenizer::Start(const char* start, const char* end) {
00065   cur_line_ = cur_ = start_ = start;
00066   end_ = end;
00067 }
00068 
00069 bool Tokenizer::ErrorExpected(const string& expected, string* err) {
00070   return Error("expected " + expected + ", got " + token_.AsString(), err);
00071 }
00072 
00073 void Tokenizer::SkipWhitespace(bool newline) {
00074   if (token_.type_ == Token::NEWLINE && newline)
00075     Newline(NULL);
00076 
00077   const char kContinuation = makefile_flavor_ ? '\\' : '$';
00078 
00079   while (cur_ < end_) {
00080     if (*cur_ == ' ') {
00081       ++cur_;
00082     } else if (newline && *cur_ == '\n') {
00083       Newline(NULL);
00084     } else if (*cur_ == kContinuation && cur_ + 1 < end_ && cur_[1] == '\n') {
00085       ++cur_; ++cur_;
00086     } else if (*cur_ == '#' && cur_ == cur_line_) {
00087       while (cur_ < end_ && *cur_ != '\n')
00088         ++cur_;
00089       if (cur_ < end_ && *cur_ == '\n')
00090         ++cur_;
00091       cur_line_ = cur_;
00092     } else {
00093       break;
00094     }
00095   }
00096 }
00097 
00098 bool Tokenizer::Newline(string* err) {
00099   if (!ExpectToken(Token::NEWLINE, err))
00100     return false;
00101 
00102   return true;
00103 }
00104 
00105 /// Return true if |c| is part of an identifier.
00106 static bool IsIdentChar(char c) {
00107   // This function shows up hot on profiles.  Instead of the natural
00108   // 'if' statement, use a table as generated by this Python script:
00109   //    import string
00110   //    cs = set()
00111   //    for c in string.ascii_letters + string.digits + r'+,-./\_$':
00112   //        cs.add(ord(c))
00113   //    for i in range(128):
00114   //        if i in cs:
00115   //            print '1,',
00116   //        else:
00117   //            print '0,',
00118   //        if i % 16 == 15:
00119   //            print
00120   static const bool kIdents[] = {
00121     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00122     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00123     0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
00124     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
00125     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00126     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1,
00127     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00128     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
00129   };
00130   return kIdents[(int)c];
00131 }
00132 
00133 bool Tokenizer::ExpectToken(Token::Type expected, string* err) {
00134   PeekToken();
00135   if (token_.type_ != expected)
00136     return ErrorExpected(Token(expected).AsString(), err);
00137   ConsumeToken();
00138   return true;
00139 }
00140 
00141 bool Tokenizer::ExpectIdent(const char* expected, string* err) {
00142   PeekToken();
00143   if (token_.type_ != Token::IDENT ||
00144       strncmp(token_.pos_, expected, token_.end_ - token_.pos_) != 0) {
00145     return ErrorExpected(string("'") + expected + "'", err);
00146   }
00147   ConsumeToken();
00148   return true;
00149 }
00150 
00151 bool Tokenizer::ReadIdent(StringPiece* out) {
00152   PeekToken();
00153   if (token_.type_ != Token::IDENT)
00154     return false;
00155   out->str_ = token_.pos_;
00156   out->len_ = token_.end_ - token_.pos_;
00157   ConsumeToken();
00158   return true;
00159 }
00160 
00161 bool Tokenizer::ReadIdent(string* out) {
00162   StringPiece token;
00163   if (!ReadIdent(&token))
00164     return false;
00165   out->assign(token.str_, token.len_);
00166   return true;
00167 }
00168 
00169 // A note on backslashes in Makefiles, from reading the docs:
00170 // Backslash-newline is the line continuation character.
00171 // Backslash-# escapes a # (otherwise meaningful as a comment start).
00172 // Backslash-% escapes a % (otherwise meaningful as a special).
00173 // Finally, quoting the GNU manual, "Backslashes that are not in danger
00174 // of quoting ‘%’ characters go unmolested."
00175 // How do you end a line with a backslash?  The netbsd Make docs suggest
00176 // reading the result of a shell command echoing a backslash!
00177 //
00178 // Rather than implement the above, we do the simpler thing here.
00179 // If anyone actually has depfiles that rely on the more complicated
00180 // behavior we can adjust this.
00181 bool Tokenizer::ReadToNewline(string *text, string* err, size_t max_length) {
00182   // XXX token_.clear();
00183   const char kContinuation = makefile_flavor_ ? '\\' : '$';
00184   while (cur_ < end_ && *cur_ != '\n') {
00185     if (*cur_ == kContinuation) {
00186       // Might be a line continuation; peek ahead to check.
00187       if (cur_ + 1 >= end_)
00188         return Error("unexpected eof", err);
00189       if (*(cur_ + 1) == '\n') {
00190         // Let SkipWhitespace handle the continuation logic.
00191         SkipWhitespace();
00192         continue;
00193       }
00194 
00195       // Otherwise, just treat it like a normal character.
00196       text->push_back(*cur_);
00197       ++cur_;
00198     } else {
00199       text->push_back(*cur_);
00200       ++cur_;
00201     }
00202     if (text->size() >= max_length) {
00203       token_.pos_ = cur_;
00204       return false;
00205     }
00206   }
00207   return Newline(err);
00208 }
00209 
00210 Token::Type Tokenizer::PeekToken() {
00211   if (token_.type_ != Token::NONE)
00212     return token_.type_;
00213 
00214   token_.pos_ = cur_;
00215   if (!makefile_flavor_ && cur_indent_ == -1) {
00216     cur_indent_ = cur_ - cur_line_;
00217     if (cur_indent_ != last_indent_) {
00218       if (cur_indent_ > last_indent_) {
00219         token_.type_ = Token::INDENT;
00220       } else if (cur_indent_ < last_indent_) {
00221         token_.type_ = Token::OUTDENT;
00222       }
00223       last_indent_ = cur_indent_;
00224       return token_.type_;
00225     }
00226   }
00227 
00228   if (cur_ >= end_) {
00229     token_.type_ = Token::TEOF;
00230     return token_.type_;
00231   }
00232 
00233   if (IsIdentChar(*cur_)) {
00234     while (cur_ < end_ && IsIdentChar(*cur_)) {
00235       ++cur_;
00236     }
00237     token_.end_ = cur_;
00238     token_.type_ = Token::IDENT;
00239   } else if (*cur_ == ':') {
00240     token_.type_ = Token::COLON;
00241     ++cur_;
00242   } else if (*cur_ == '=') {
00243     token_.type_ = Token::EQUALS;
00244     ++cur_;
00245   } else if (*cur_ == '|') {
00246     if (cur_ + 1 < end_ && cur_[1] == '|') {
00247       token_.type_ = Token::PIPE2;
00248       cur_ += 2;
00249     } else {
00250       token_.type_ = Token::PIPE;
00251       ++cur_;
00252     }
00253   } else if (*cur_ == '\n') {
00254     token_.type_ = Token::NEWLINE;
00255     ++cur_;
00256     cur_line_ = cur_;
00257     cur_indent_ = -1;
00258   }
00259 
00260   SkipWhitespace();
00261 
00262   if (token_.type_ == Token::NONE) {
00263     token_.type_ = Token::UNKNOWN;
00264     token_.end_ = cur_ + 1;
00265   }
00266 
00267   return token_.type_;
00268 }
00269 
00270 void Tokenizer::ConsumeToken() {
00271   token_.Clear();
00272 }
00273 
00274 MakefileParser::MakefileParser() {
00275   tokenizer_.SetMakefileFlavor();
00276 }
00277 
00278 bool MakefileParser::Parse(const string& input, string* err) {
00279   tokenizer_.Start(input.data(), input.data() + input.size());
00280 
00281   tokenizer_.SkipWhitespace(true);
00282 
00283   if (!tokenizer_.ReadIdent(&out_))
00284     return tokenizer_.ErrorExpected("output filename", err);
00285   if (!tokenizer_.ExpectToken(Token::COLON, err))
00286     return false;
00287   while (tokenizer_.PeekToken() == Token::IDENT) {
00288     StringPiece in;
00289     tokenizer_.ReadIdent(&in);
00290     ins_.push_back(in);
00291   }
00292   if (!tokenizer_.ExpectToken(Token::NEWLINE, err))
00293     return false;
00294   if (!tokenizer_.ExpectToken(Token::TEOF, err))
00295     return false;
00296 
00297   return true;
00298 }
00299 
00300 ManifestParser::ManifestParser(State* state, FileReader* file_reader)
00301   : state_(state), file_reader_(file_reader) {
00302   env_ = &state->bindings_;
00303 }
00304 bool ManifestParser::Load(const string& filename, string* err) {
00305   string contents;
00306   if (!file_reader_->ReadFile(filename, &contents, err))
00307     return false;
00308   return Parse(contents, err);
00309 }
00310 
00311 bool ManifestParser::Parse(const string& input, string* err) {
00312   tokenizer_.Start(input.data(), input.data() + input.size());
00313 
00314   tokenizer_.SkipWhitespace(true);
00315 
00316   while (tokenizer_.token().type_ != Token::TEOF) {
00317     switch (tokenizer_.PeekToken()) {
00318       case Token::IDENT: {
00319         const Token& token = tokenizer_.token();
00320         int len = token.end_ - token.pos_;
00321         if (len == 4 && memcmp(token.pos_, "rule", 4) == 0) {
00322           if (!ParseRule(err))
00323             return false;
00324         } else if (len == 5 && memcmp(token.pos_, "build", 5) == 0) {
00325           if (!ParseEdge(err))
00326             return false;
00327         } else if (len == 7 && memcmp(token.pos_, "default", 7) == 0) {
00328           if (!ParseDefaults(err))
00329             return false;
00330         } else if ((len == 7 && memcmp(token.pos_, "include", 7) == 0) ||
00331                    (len == 8 && memcmp(token.pos_, "subninja", 8) == 0)) {
00332           if (!ParseFileInclude(err))
00333             return false;
00334         } else {
00335           string name, value;
00336           if (!ParseLet(&name, &value, err))
00337             return false;
00338           env_->AddBinding(name, value);
00339         }
00340         break;
00341       }
00342       case Token::TEOF:
00343         continue;
00344       default:
00345         return tokenizer_.Error("unhandled " + tokenizer_.token().AsString(), err);
00346     }
00347     tokenizer_.SkipWhitespace(true);
00348   }
00349 
00350   return true;
00351 }
00352 
00353 bool ManifestParser::ParseRule(string* err) {
00354   if (!tokenizer_.ExpectIdent("rule", err))
00355     return false;
00356   string name;
00357   if (!tokenizer_.ReadIdent(&name))
00358     return tokenizer_.ErrorExpected("rule name", err);
00359   if (!tokenizer_.Newline(err))
00360     return false;
00361 
00362   if (state_->LookupRule(name) != NULL) {
00363     *err = "duplicate rule '" + name + "'";
00364     return false;
00365   }
00366 
00367   Rule* rule = new Rule(name);  // XXX scoped_ptr
00368 
00369   if (tokenizer_.PeekToken() == Token::INDENT) {
00370     tokenizer_.ConsumeToken();
00371 
00372     while (tokenizer_.PeekToken() != Token::OUTDENT) {
00373       const char* let_loc = tokenizer_.token_.pos_;
00374 
00375       string key;
00376       if (!ParseLetKey(&key, err))
00377         return false;
00378 
00379       EvalString* eval_target = NULL;
00380       if (key == "command") {
00381         eval_target = &rule->command_;
00382       } else if (key == "depfile") {
00383         eval_target = &rule->depfile_;
00384       } else if (key == "description") {
00385         eval_target = &rule->description_;
00386       } else if (key == "generator") {
00387         rule->generator_ = true;
00388         string dummy;
00389         if (!tokenizer_.ReadToNewline(&dummy, err))
00390           return false;
00391         continue;
00392       } else if (key == "restat") {
00393         rule->restat_ = true;
00394         string dummy;
00395         if (!tokenizer_.ReadToNewline(&dummy, err))
00396           return false;
00397         continue;
00398       } else {
00399         // Die on other keyvals for now; revisit if we want to add a
00400         // scope here.
00401         return tokenizer_.ErrorAt(let_loc, "unexpected variable '" + key + "'",
00402                                   err);
00403       }
00404 
00405       if (!ParseLetValue(eval_target, err))
00406         return false;
00407     }
00408     tokenizer_.ConsumeToken();
00409   }
00410 
00411   if (rule->command_.unparsed().empty())
00412     return tokenizer_.Error("expected 'command =' line", err);
00413 
00414   state_->AddRule(rule);
00415   return true;
00416 }
00417 
00418 bool ManifestParser::ParseLet(string* key, string* value, string* err) {
00419   if (!ParseLetKey(key, err))
00420     return false;
00421 
00422   EvalString eval;
00423   if (!ParseLetValue(&eval, err))
00424     return false;
00425 
00426   *value = eval.Evaluate(env_);
00427 
00428   return true;
00429 }
00430 
00431 bool ManifestParser::ParseLetKey(string* key, string* err) {
00432   if (!tokenizer_.ReadIdent(key))
00433     return tokenizer_.ErrorExpected("variable name", err);
00434   if (!tokenizer_.ExpectToken(Token::EQUALS, err))
00435     return false;
00436   return true;
00437 }
00438 
00439 bool ManifestParser::ParseLetValue(EvalString* eval, string* err) {
00440   // Backup the tokenizer state prior to consuming the line, for reporting
00441   // the source location in case of a parse error later.
00442   Tokenizer tokenizer_backup = tokenizer_;
00443 
00444   // XXX should we tokenize here?  it means we'll need to understand
00445   // command syntax, though...
00446   string value;
00447   if (!tokenizer_.ReadToNewline(&value, err))
00448     return false;
00449 
00450   string eval_err;
00451   size_t err_index;
00452   if (!eval->Parse(value, &eval_err, &err_index)) {
00453     value.clear();
00454     // Advance the saved tokenizer state up to the error index to report the
00455     // error at the correct source location.
00456     tokenizer_backup.ReadToNewline(&value, err, err_index);
00457     return tokenizer_backup.Error(eval_err, err);
00458   }
00459 
00460   return true;
00461 }
00462 
00463 bool ManifestParser::ParseDefaults(string* err) {
00464   if (!tokenizer_.ExpectIdent("default", err))
00465     return false;
00466 
00467   string target;
00468   if (!tokenizer_.ReadIdent(&target))
00469     return tokenizer_.ErrorExpected("target name", err);
00470 
00471   do {
00472     EvalString eval;
00473     string eval_err;
00474     if (!eval.Parse(target, &eval_err))
00475       return tokenizer_.Error(eval_err, err);
00476     string path = eval.Evaluate(env_);
00477     if (!CanonicalizePath(&path, &eval_err))
00478       return tokenizer_.Error(eval_err, err);
00479     if (!state_->AddDefault(path, &eval_err))
00480       return tokenizer_.Error(eval_err, err);
00481   } while (tokenizer_.ReadIdent(&target));
00482 
00483   if (!tokenizer_.Newline(err))
00484     return false;
00485 
00486   return true;
00487 }
00488 
00489 bool ManifestParser::ParseEdge(string* err) {
00490   vector<string> ins, outs;
00491 
00492   if (!tokenizer_.ExpectIdent("build", err))
00493     return false;
00494 
00495   for (;;) {
00496     if (tokenizer_.PeekToken() == Token::COLON) {
00497       tokenizer_.ConsumeToken();
00498       break;
00499     }
00500 
00501     string out;
00502     if (!tokenizer_.ReadIdent(&out))
00503       return tokenizer_.ErrorExpected("output file list", err);
00504     outs.push_back(out);
00505   }
00506   // XXX check outs not empty
00507 
00508   string rule_name;
00509   if (!tokenizer_.ReadIdent(&rule_name))
00510     return tokenizer_.ErrorExpected("build command name", err);
00511 
00512   const Rule* rule = state_->LookupRule(rule_name);
00513   if (!rule)
00514     return tokenizer_.Error("unknown build rule '" + rule_name + "'", err);
00515 
00516   for (;;) {
00517     string in;
00518     if (!tokenizer_.ReadIdent(&in))
00519       break;
00520     ins.push_back(in);
00521   }
00522 
00523   // Add all order-only deps, counting how many as we go.
00524   int implicit = 0;
00525   if (tokenizer_.PeekToken() == Token::PIPE) {
00526     tokenizer_.ConsumeToken();
00527     for (;;) {
00528       string in;
00529       if (!tokenizer_.ReadIdent(&in))
00530         break;
00531       ins.push_back(in);
00532       ++implicit;
00533     }
00534   }
00535 
00536   // Add all order-only deps, counting how many as we go.
00537   int order_only = 0;
00538   if (tokenizer_.PeekToken() == Token::PIPE2) {
00539     tokenizer_.ConsumeToken();
00540     for (;;) {
00541       string in;
00542       if (!tokenizer_.ReadIdent(&in))
00543         break;
00544       ins.push_back(in);
00545       ++order_only;
00546     }
00547   }
00548 
00549   if (!tokenizer_.Newline(err))
00550     return false;
00551 
00552   // Default to using outer env.
00553   BindingEnv* env = env_;
00554 
00555   // But use a nested env if there are variables in scope.
00556   if (tokenizer_.PeekToken() == Token::INDENT) {
00557     tokenizer_.ConsumeToken();
00558 
00559     // XXX scoped_ptr to handle error case.
00560     env = new BindingEnv;
00561     env->parent_ = env_;
00562     while (tokenizer_.PeekToken() != Token::OUTDENT) {
00563       string key, val;
00564       if (!ParseLet(&key, &val, err))
00565         return false;
00566       env->AddBinding(key, val);
00567     }
00568     tokenizer_.ConsumeToken();
00569   }
00570 
00571   // Evaluate all variables in paths.
00572   // XXX: fast path skip the eval parse if there's no $ in the path?
00573   vector<string>* paths[2] = { &ins, &outs };
00574   for (int p = 0; p < 2; ++p) {
00575     for (vector<string>::iterator i = paths[p]->begin();
00576          i != paths[p]->end(); ++i) {
00577       EvalString eval;
00578       string eval_err;
00579       if (!eval.Parse(*i, &eval_err))
00580         return tokenizer_.Error(eval_err, err);
00581       string path = eval.Evaluate(env);
00582       if (!CanonicalizePath(&path, &eval_err))
00583         return tokenizer_.Error(eval_err, err);
00584       *i = path;
00585     }
00586   }
00587 
00588   Edge* edge = state_->AddEdge(rule);
00589   edge->env_ = env;
00590   for (vector<string>::iterator i = ins.begin(); i != ins.end(); ++i)
00591     state_->AddIn(edge, *i);
00592   for (vector<string>::iterator i = outs.begin(); i != outs.end(); ++i)
00593     state_->AddOut(edge, *i);
00594   edge->implicit_deps_ = implicit;
00595   edge->order_only_deps_ = order_only;
00596 
00597   return true;
00598 }
00599 
00600 bool ManifestParser::ParseFileInclude(string* err) {
00601   string type;
00602   tokenizer_.ReadIdent(&type);
00603 
00604   string path;
00605   if (!tokenizer_.ReadIdent(&path))
00606     return tokenizer_.ErrorExpected("path to ninja file", err);
00607 
00608   string contents;
00609   string read_err;
00610   if (!file_reader_->ReadFile(path, &contents, &read_err))
00611     return tokenizer_.Error("loading " + path + ": " + read_err, err);
00612 
00613   ManifestParser subparser(state_, file_reader_);
00614   if (type == "subninja") {
00615     // subninja: Construct a new scope for the new parser.
00616     subparser.env_ = new BindingEnv;
00617     subparser.env_->parent_ = env_;
00618   } else {
00619     // include: Reuse the current scope.
00620     subparser.env_ = env_;
00621   }
00622 
00623   string sub_err;
00624   if (!subparser.Parse(contents, &sub_err))
00625     return tokenizer_.Error("in '" + path + "': " + sub_err, err);
00626 
00627   if (!tokenizer_.Newline(err))
00628     return false;
00629 
00630   return true;
00631 }