class PuppetLint::Lexer
Internal: The puppet-lint lexer. Converts your manifest into its tokenised form.
Constants
- APP_MANAGEMENT_TOKENS
Internal: A Hash whose keys are Strings representing reserved keywords in the Puppet DSL when Application Management is enabled From github.com/puppetlabs/puppet/blob/master/lib/puppet/pops/parser/lexer2.rb#L142-L159 or therabouts Currently unused
- FORMATTING_TOKENS
Internal: A Hash whose keys are Symbols representing token types which are considered to be formatting tokens (i.e. tokens that don’t contain code).
- KEYWORDS
Internal: A Hash whose keys are Strings representing reserved keywords in the Puppet DSL. From github.com/puppetlabs/puppet/blob/master/lib/puppet/pops/parser/lexer2.rb#L116-L137 or thereabouts
- KNOWN_TOKENS
Internal: An Array of Arrays containing tokens that can be described by a single regular expression. Each sub-Array contains 2 elements, the name of the token as a Symbol and a regular expression describing the value of the token.
- LINE_END_RE
- NAME_RE
- REGEX_PREV_TOKENS
Internal: A Hash whose keys are Symbols representing token types which a regular expression can follow.
- WHITESPACE_RE
Internal: some commonly used regular expressions t == tab v == vertical tab f == form feed p{Zs} == ASCII + Unicode non-linebreaking whitespace
Public Class Methods
# File lib/puppet-lint/lexer.rb, line 46 def self.heredoc_queue @heredoc_queue ||= [] end
# File lib/puppet-lint/lexer.rb, line 41 def initialize @line_no = 1 @column = 1 end
Public Instance Methods
# File lib/puppet-lint/lexer.rb, line 50 def heredoc_queue self.class.heredoc_queue end
Internal: Create a new PuppetLint::Lexer::Token
object, calculate its line number and column and then add it to the Linked List of tokens.
type - The Symbol token type. value - The token value. opts - A Hash of additional values required to determine line number and
column: :line - The Integer line number if calculated externally. :column - The Integer column number if calculated externally. :raw - The String raw value of the token (if necessary).
Returns the instantiated PuppetLint::Lexer::Token
object.
# File lib/puppet-lint/lexer.rb, line 349 def new_token(type, value, *args) # This bit of magic is used instead of an "opts = {}" argument so that we # can safely deprecate the old "length" parameter that might still be # passed by 3rd party plugins that haven't updated yet. opts = args.last.is_a?(Hash) ? args.last : {} # column number is calculated at the end of this method by calling # to_manifest on the token. Because the string tokens (DQPRE, DQMID etc) # are parsed before the variable token, they default to assuming that # they are followed by an enclosed variable and we need to remove 2 from # the column number if we encounter an unenclosed variable because of the # missing ${ at the end of the token value. @column -= 2 if type == :UNENC_VARIABLE column = opts[:column] || @column line_no = opts[:line] || @line_no token = Token.new(type, value, line_no, column) unless tokens.last.nil? token.prev_token = tokens.last tokens.last.next_token = token unless FORMATTING_TOKENS.include?(token.type) prev_nf_idx = tokens.rindex { |r| !FORMATTING_TOKENS.include?(r.type) } unless prev_nf_idx.nil? prev_nf_token = tokens[prev_nf_idx] prev_nf_token.next_code_token = token token.prev_code_token = prev_nf_token end end end token.raw = opts[:raw] if opts[:raw] if type == :NEWLINE @line_no += 1 @column = 1 else lines = token.to_manifest.split(LINE_END_RE, -1) @line_no += lines.length - 1 if lines.length > 1 # if the token renders to multiple lines, set the column state to the # length of the last line plus 1 (because column numbers are # 1 indexed) @column = lines.last.size + 1 else @column += (lines.last || '').size end end token end
Internal: Given the tokens already processed, determine if the next token could be a regular expression.
Returns true if the next token could be a regex, otherwise return false.
# File lib/puppet-lint/lexer.rb, line 327 def possible_regex? prev_token = tokens.reject { |r| FORMATTING_TOKENS.include?(r.type) }.last return true if prev_token.nil? REGEX_PREV_TOKENS.include?(prev_token.type) end
# File lib/puppet-lint/lexer.rb, line 432 def process_heredoc_segments(segments) return if segments.empty? end_tag = segments.delete_at(-1) if segments.length == 1 tokens << new_token(:HEREDOC, segments[0][1], :raw => "#{segments[0][1]}#{end_tag[1]}") return end pre_segment = segments.delete_at(0) post_segment = segments.delete_at(-1) tokens << new_token(:HEREDOC_PRE, pre_segment[1]) segments.each do |segment| case segment[0] when :INTERP lexer = PuppetLint::Lexer.new lexer.tokenise(segment[1]) lexer.tokens.each_with_index do |t, i| type = i.zero? && t.type == :NAME ? :VARIABLE : t.type tokens << new_token(type, t.value, :raw => t.raw) end when :UNENC_VAR tokens << new_token(:UNENC_VARIABLE, segment[1].gsub(%r{\A\$}, '')) else tokens << new_token(:HEREDOC_MID, segment[1]) end end tokens << new_token(:HEREDOC_POST, post_segment[1], :raw => "#{post_segment[1]}#{end_tag[1]}") end
# File lib/puppet-lint/lexer.rb, line 402 def process_string_segments(segments) return if segments.empty? if segments.length == 1 tokens << new_token(:STRING, segments[0][1]) return end pre_segment = segments.delete_at(0) post_segment = segments.delete_at(-1) tokens << new_token(:DQPRE, pre_segment[1]) segments.each do |segment| case segment[0] when :INTERP lexer = PuppetLint::Lexer.new lexer.tokenise(segment[1]) lexer.tokens.each_with_index do |t, i| type = i.zero? && (t.type == :NAME || KEYWORDS.include?(t.type.to_s.downcase)) ? :VARIABLE : t.type tokens << new_token(type, t.value, :raw => t.raw) end when :UNENC_VAR tokens << new_token(:UNENC_VARIABLE, segment[1].gsub(%r{\A\$}, '')) else tokens << new_token(:DQMID, segment[1]) end end tokens << new_token(:DQPOST, post_segment[1]) end
Internal: Convert a Puppet manifest into tokens.
code - The Puppet manifest to be tokenised as a String
.
Returns an Array of PuppetLint::Lexer::Token
objects. Raises PuppetLint::LexerError
if it encounters unexpected characters (usually the result of syntax errors).
# File lib/puppet-lint/lexer.rb, line 200 def tokenise(code) i = 0 while i < code.size chunk = code[i..-1] found = false KNOWN_TOKENS.each do |type, regex| value = chunk[regex, 1] next if value.nil? i += value.size tokens << if type == :NAME && KEYWORDS.include?(value) new_token(value.upcase.to_sym, value) else new_token(type, value) end found = true break end next if found if var_name = chunk[%r{\A\$((::)?(\w+(-\w+)*::)*\w+(-\w+)*(\[.+?\])*)}, 1] length = var_name.size + 1 opts = if chunk.start_with?('$') { :raw => "$#{var_name}" } else {} end tokens << new_token(:VARIABLE, var_name, opts) elsif chunk =~ %r{\A'.*?'}m str_content = StringScanner.new(code[i + 1..-1]).scan_until(%r{(\A|[^\\])(\\\\)*'}m) length = str_content.size + 1 tokens << new_token(:SSTRING, str_content[0..-2]) elsif chunk.start_with?('"') slurper = PuppetLint::Lexer::StringSlurper.new(code[i + 1..-1]) begin string_segments = slurper.parse process_string_segments(string_segments) length = slurper.consumed_bytes + 1 rescue PuppetLint::Lexer::StringSlurper::UnterminatedStringError raise PuppetLint::LexerError.new(@line_no, @column, 'unterminated string') end elsif heredoc_name = chunk[%r{\A@\(("?.+?"?(:.+?)?#{WHITESPACE_RE}*(/.*?)?)\)}, 1] heredoc_queue << heredoc_name tokens << new_token(:HEREDOC_OPEN, heredoc_name) length = heredoc_name.size + 3 elsif comment = chunk[%r{\A(#[^\r\n]*)#{LINE_END_RE}?}, 1] length = comment.size comment.sub!(%r{#}, '') tokens << new_token(:COMMENT, comment) elsif slash_comment = chunk[%r{\A(//[^\r\n]*)#{LINE_END_RE}?}, 1] length = slash_comment.size slash_comment.sub!(%r{//}, '') tokens << new_token(:SLASH_COMMENT, slash_comment) elsif mlcomment = chunk[%r{\A(/\*.*?\*/)}m, 1] length = mlcomment.size mlcomment_raw = mlcomment.dup mlcomment.sub!(%r{\A/\* ?}, '') mlcomment.sub!(%r{ ?\*/\Z}, '') mlcomment.gsub!(%r{^ *\*}, '') tokens << new_token(:MLCOMMENT, mlcomment, :raw => mlcomment_raw) elsif chunk.match(%r{\A/.*?/}m) && possible_regex? str_content = StringScanner.new(code[i + 1..-1]).scan_until(%r{(\A|[^\\])(\\\\)*/}m) length = str_content.size + 1 tokens << new_token(:REGEX, str_content[0..-2]) elsif eolindent = chunk[%r{\A(#{LINE_END_RE}#{WHITESPACE_RE}+)}m, 1] eol = eolindent[%r{\A(#{LINE_END_RE})}m, 1] tokens << new_token(:NEWLINE, eol) length = eol.size if heredoc_queue.empty? indent = eolindent[%r{\A#{LINE_END_RE}+(#{WHITESPACE_RE}+)}m, 1] tokens << new_token(:INDENT, indent) length += indent.size else heredoc_tag = heredoc_queue.shift slurper = PuppetLint::Lexer::StringSlurper.new(code[i + length..-1]) heredoc_segments = slurper.parse_heredoc(heredoc_tag) process_heredoc_segments(heredoc_segments) length += slurper.consumed_bytes end elsif eol = chunk[%r{\A(#{LINE_END_RE})}, 1] length = eol.size tokens << new_token(:NEWLINE, eol) unless heredoc_queue.empty? heredoc_tag = heredoc_queue.shift slurper = PuppetLint::Lexer::StringSlurper.new(code[i + length..-1]) heredoc_segments = slurper.parse_heredoc(heredoc_tag) process_heredoc_segments(heredoc_segments) length += slurper.consumed_bytes end elsif chunk.start_with?('/') length = 1 tokens << new_token(:DIV, '/') elsif chunk.start_with?('@') length = 1 tokens << new_token(:AT, '@') else raise PuppetLint::LexerError.new(@line_no, @column) end i += length end tokens end
Internal: Access the internal token storage.
Returns an Array of PuppetLint::Lexer::Toxen objects.
# File lib/puppet-lint/lexer.rb, line 189 def tokens @tokens ||= [] end