#!/usr/bin/mawk -f
# $MawkId: gdecl.awk,v 1.6 2020/09/19 14:04:25 tom Exp $
# parse a C declaration by recursive descent
#
# decl.awk with extra escapes \
################################################
############################################
# lexical scanner -- gobble()
# input : string s -- treated as a regular expression
# gobble eats SPACE, then eats longest match of s off front
# of global variable line.
# Cuts the matched part off of line
function gobble(s, xg)
{
sub( /^ /, "", line) # eat SPACE if any
# surround s with parenthesis to make sure ^ acts on the
# whole thing
if ( match(line, "^" "(" s ")") > 0 ) {
xg = substr(line, 1, RLENGTH)
line = (RLENGTH < length(line)) ? substr(line, RLENGTH+1) : ""
return xg
} else {
return ""
}
}
function ptr_to(n, xp) # print "pointer to" , n times
{ n = int(n)
if ( n <= 0 ) return ""
xp = "pointer to" ; n--
while ( n-- ) xp = xp " pointer to"
return xp
}
#recursively get a decl
# returns an english description of the declaration or
# "" if not a C declaration.
function decl( xd, t, ptr_part)
{
xd = gobble("[* ]+") # get list of *** ...
gsub(/ /, "", xd) # remove all SPACES
ptr_part = ptr_to( length(xd) )
# We expect to see either an identifier or '('
if ( gobble("\\(") )
{
# this is the recursive descent part
# we expect to match a declaration and closing ')'
# If not return "" to indicate failure
if ( (xd = decl()) == "" || gobble( "\\)" ) == "" ) return ""
}
else # expecting an identifier
{
if ( (xd = gobble(id)) == "" ) return ""
xd = xd ":"
}
# finally look for ()
# or [ opt_size ]
while ( 1 )
if ( gobble( funct_mark ) ) xd = xd " function returning"
else
if ( ( t = gobble( array_mark ) ) != "" )
{ gsub(/ /, "", t)
xd = xd " array" t " of"
}
else break
xd = xd " " ptr_part
return xd
}
BEGIN { id = "[_A-Za-z][_A-Za-z0-9]*"
funct_mark = "\\([ \t]*\\)"
array_mark = "\\[[ \t]*[_A-Za-z0-9]*[ \t]*\\]"
# I've assumed types are keywords or all CAPS or end in _t
# Other conventions could be added.
type0 = "int|char|short|long|double|float|void"
type1 = "[_A-Z][_A-Z0-9]*" # types are CAPS
type2 = "[_A-Za-z][_A-Za-z0-9]*_t" # end in _t
types = "(" type0 "|" type1 "|" type2 ")"
}
{
gsub( /\/\*([^*]|\*[^\/])*(\*\/|$)/ , " ") # remove comments
gsub( /[ \t]+/, " ") # squeeze white space to a single space
line = $0
scope = gobble( "extern|static" )
if ( ( type = gobble("(struct|union|enum) ") ) != "" )
{
type = type gobble(id) # get the tag
}
else
{
type = gobble("(un)?signed ") gobble( types )
}
if ( ! type ) next
if ( ( (x = decl()) != "" ) && gobble( ";") )
{
x = x " " type
if ( scope ) x = x " (" scope ")"
gsub( / +/, " ", x) #
print x
}
}
|