Marlin/buildroot/share/PlatformIO/scripts/schema.py

532 lines
24 KiB
Python
Executable file

#!/usr/bin/env python3
"""
schema.py
Extract firmware configuration into structured JSON or YAML schema format.
Used by signature.py via common-dependencies.py to generate a schema file during the
PlatformIO build when CONFIG_EXPORT is defined in the configuration.
This script can also be run standalone from within the Marlin repo, and is a companion to
abm/js/schema.js in the MarlinFirmware/AutoBuildMarlin project, which has been extended to
evaluate conditions and can determine what options are actually enabled, not just which
options are uncommented. That will be migrated to this script for standalone migration.
Usage: schema.py [-h] [some|json|jsons|group|yml|yaml]
Process Marlin firmware configuration files (Configuration.h and Configuration_adv.h)
to produce structured output suitable for documentation, tooling, or automated processing.
Positional arguments:
some Generate both JSON and YAML output (schema.json and schema.yml)
json Generate JSON output (schema.json)
jsons Generate grouped JSON output with wildcard options (schema.json and schema_grouped.json)
group Generate grouped JSON output only (schema_grouped.json)
yml Generate YAML output (schema.yml)
yaml Same as 'yml'
Optional arguments:
-h, --help Show this help message and exit
"""
import re, json
from pathlib import Path
def extend_dict(d:dict, k:tuple):
if len(k) >= 1 and k[0] not in d:
d[k[0]] = {}
if len(k) >= 2 and k[1] not in d[k[0]]:
d[k[0]][k[1]] = {}
if len(k) >= 3 and k[2] not in d[k[0]][k[1]]:
d[k[0]][k[1]][k[2]] = {}
grouping_patterns = [
re.compile(r'^([XYZIJKUVW]|[XYZ]2|Z[34]|E[0-7])$'),
re.compile(r'^AXIS\d$'),
re.compile(r'^(MIN|MAX)$'),
re.compile(r'^[0-8]$'),
re.compile(r'^HOTEND[0-7]$'),
re.compile(r'^(HOTENDS|BED|PROBE|COOLER)$'),
re.compile(r'^[XYZIJKUVW]M(IN|AX)$')
]
# If the indexed part of the option name matches a pattern
# then add it to the dictionary.
def find_grouping(gdict, filekey, sectkey, optkey, pindex):
optparts = optkey.split('_')
if 1 < len(optparts) > pindex:
for patt in grouping_patterns:
if patt.match(optparts[pindex]):
subkey = optparts[pindex]
modkey = '_'.join(optparts)
optparts[pindex] = '*'
wildkey = '_'.join(optparts)
kkey = f'{filekey}|{sectkey}|{wildkey}'
if kkey not in gdict: gdict[kkey] = []
gdict[kkey].append((subkey, modkey))
# Build a list of potential groups. Only those with multiple items will be grouped.
def group_options(schema):
for pindex in range(10, -1, -1):
found_groups = {}
for filekey, f in schema.items():
for sectkey, s in f.items():
for optkey in s:
find_grouping(found_groups, filekey, sectkey, optkey, pindex)
fkeys = [ k for k in found_groups.keys() ]
for kkey in fkeys:
items = found_groups[kkey]
if len(items) > 1:
f, s, w = kkey.split('|')
extend_dict(schema, (f, s, w)) # Add wildcard group to schema
for subkey, optkey in items: # Add all items to wildcard group
schema[f][s][w][subkey] = schema[f][s][optkey] # Move non-wildcard item to wildcard group
del schema[f][s][optkey]
del found_groups[kkey]
# Extract all board names from boards.h
def load_boards():
bpath = Path("Marlin/src/core/boards.h")
if bpath.is_file():
with bpath.open(encoding='utf-8') as bfile:
boards = []
for line in bfile:
if line.startswith("#define BOARD_"):
bname = line.split()[1]
if bname != "BOARD_UNKNOWN": boards.append(bname)
return "['" + "','".join(boards) + "']"
return ''
#
# Extract the specified configuration files in the form of a structured schema.
# Contains the full schema for the configuration files, not just the enabled options,
# Contains the current values of the options, not just data structure, so "schema" is a slight misnomer.
#
# The returned object is a nested dictionary with the following indexing:
#
# - schema[filekey][section][define_name] = define_info
#
# Where the define_info contains the following keyed fields:
# - section = The @section the define is in
# - name = The name of the define
# - enabled = True if the define is enabled (not commented out)
# - line = The line number of the define
# - sid = A serial ID for the define
# - value = The value of the define, if it has one
# - type = The type of the define, if it has one
# - requires = The conditions that must be met for the define to be enabled
# - comment = The comment for the define, if it has one
# - units = The units for the define, if it has one
# - options = The options for the define, if it has any
#
def extract_files(filekey):
# Load board names from boards.h
boards = load_boards()
# Parsing states
class Parse:
NORMAL = 0 # No condition yet
BLOCK_COMMENT = 1 # Looking for the end of the block comment
EOL_COMMENT = 2 # EOL comment started, maybe add the next comment?
SLASH_COMMENT = 3 # Block-like comment, starting with aligned //
GET_SENSORS = 4 # Gathering temperature sensor options
ERROR = 9 # Syntax error
# A JSON object to store the data
sch_out = { key:{} for key in filekey.values() }
# Regex for #define NAME [VALUE] [COMMENT] with sanitized line
defgrep = re.compile(r'^(//)?\s*(#define)\s+([A-Za-z0-9_]+)\s*(.*?)\s*(//.+)?$')
# Pattern to match a float value
flt = r'[-+]?\s*(\d+\.|\d*\.\d+)([eE][-+]?\d+)?[fF]?'
# Start with unknown state
state = Parse.NORMAL
# Serial ID
sid = 0
# Loop through files and parse them line by line
for fn, fk in filekey.items():
with Path("Marlin", fn).open(encoding='utf-8') as fileobj:
section = 'none' # Current Settings section
line_number = 0 # Counter for the line number of the file
conditions = [] # Create a condition stack for the current file
comment_buff = [] # A temporary buffer for comments
prev_comment = '' # Copy before reset for an EOL comment
options_json = '' # A buffer for the most recent options JSON found
eol_options = False # The options came from end of line, so only apply once
join_line = False # A flag that the line should be joined with the previous one
line = '' # A line buffer to handle \ continuation
last_added_ref = {} # Reference to the last added item
# Loop through the lines in the file
for the_line in fileobj.readlines():
line_number += 1
# Clean the line for easier parsing
the_line = the_line.strip()
if join_line: # A previous line is being made longer
line += (' ' if line else '') + the_line
else: # Otherwise, start the line anew
line, line_start = the_line, line_number
# If the resulting line ends with a \, don't process now.
# Strip the end off. The next line will be joined with it.
join_line = line.endswith("\\")
if join_line:
line = line[:-1].strip()
continue
else:
line_end = line_number
defmatch = defgrep.match(line)
# Special handling for EOL comments after a #define.
# At this point the #define is already digested and inserted,
# so we have to extend it
if state == Parse.EOL_COMMENT:
# If the line is not a comment, we're done with the EOL comment
if not defmatch and the_line.startswith('//'):
comment_buff.append(the_line[2:].strip())
else:
state = Parse.NORMAL
cline = ' '.join(comment_buff)
comment_buff = []
if cline != '':
# A (block or slash) comment was already added
cfield = 'notes' if 'comment' in last_added_ref else 'comment'
last_added_ref[cfield] = cline
#
# Add the given comment line to the comment buffer, unless:
# - The line starts with ':' and JSON values to assign to 'opt'.
# - The line starts with '@section' so a new section needs to be returned.
# - The line starts with '======' so just skip it.
#
def use_comment(c, opt, sec, bufref):
'''
c - The comment line to parse
opt - Options JSON string to return (if not updated)
sec - Section to return (if not updated)
bufref - The comment buffer to add to
'''
sc = c.strip() # Strip for special patterns
if sc.startswith(':'): # If the comment starts with : then it has magic JSON
d = sc[1:].strip() # Strip the leading : and spaces
# Look for a JSON container
cbr = sc.rindex('}') if d.startswith('{') else sc.rindex(']') if d.startswith('[') else 0
if cbr:
opt, cmt = sc[1:cbr+1].strip(), sc[cbr+1:].strip()
if cmt != '': bufref.append(cmt)
else:
opt = sc[1:].strip() # Some literal value not in a JSON container?
else:
m = re.match(r'@section\s*(.+)', sc) # Start a new section?
if m:
sec = m[1]
elif not sc.startswith('========'):
bufref.append(c) # Anything else is part of the comment
return opt, sec
# For slash comments, capture consecutive slash comments.
# The comment will be applied to the next #define.
if state == Parse.SLASH_COMMENT:
if not defmatch and the_line.startswith('//'):
options_json, section = use_comment(the_line[2:].strip(), options_json, section, comment_buff)
continue
else:
state = Parse.NORMAL
# In a block comment, capture lines up to the end of the comment.
# Assume nothing follows the comment closure.
if state in (Parse.BLOCK_COMMENT, Parse.GET_SENSORS):
endpos = line.find('*/')
if endpos < 0:
cline = line
else:
cline, line = line[:endpos].strip(), line[endpos+2:].strip()
# Temperature sensors are done
if state == Parse.GET_SENSORS:
options_json = f'[ {options_json[:-2]} ]'
state = Parse.NORMAL
# Strip the leading '* ' from block comments
cline = re.sub(r'^\* ?', '', cline)
# Collect temperature sensors
if state == Parse.GET_SENSORS:
sens = re.match(r'^\s*(-?\d+)\s*:\s*(.+)$', cline)
if sens:
s2 = sens[2].replace("'", "''")
options_json += f"{sens[1]}:'{sens[1]} - {s2}', "
elif state == Parse.BLOCK_COMMENT:
# Look for temperature sensors
if re.match(r'temperature sensors.*:', cline, re.IGNORECASE):
state, cline = Parse.GET_SENSORS, "Temperature Sensors"
options_json, section = use_comment(cline, options_json, section, comment_buff)
# For the normal state we're looking for any non-blank line
elif state == Parse.NORMAL:
# Skip a commented define when evaluating comment opening
st = 2 if re.match(r'^//\s*#define', line) else 0
cpos1 = line.find('/*') # Start a block comment on the line?
cpos2 = line.find('//', st) # Start an end of line comment on the line?
# Only the first comment starter gets evaluated
cpos = -1
if cpos1 != -1 and (cpos1 < cpos2 or cpos2 == -1):
cpos = cpos1
comment_buff = []
state = Parse.BLOCK_COMMENT
eol_options = False
elif cpos2 != -1 and (cpos2 < cpos1 or cpos1 == -1):
cpos = cpos2
# Comment after a define may be continued on the following lines
if defmatch is not None and cpos > 10:
state = Parse.EOL_COMMENT
prev_comment = '\n'.join(comment_buff)
comment_buff = []
else:
state = Parse.SLASH_COMMENT
# Process the start of a new comment
if cpos != -1:
comment_buff = []
cline, line = line[cpos+2:].strip(), line[:cpos].strip()
if state == Parse.BLOCK_COMMENT:
# Strip leading '*' from block comments
cline = re.sub(r'^\* ?', '', cline)
else:
# Expire end-of-line options after first use
if cline.startswith(':'): eol_options = True
# Buffer a non-empty comment start
if cline != '':
options_json, section = use_comment(cline, options_json, section, comment_buff)
# If the line has nothing before the comment, go to the next line
if line == '':
options_json = ''
continue
# Parenthesize the given expression if needed
def atomize(s):
if s == '' \
or re.match(r'^[A-Za-z0-9_]*(\([^)]+\))?$', s) \
or re.match(r'^[A-Za-z0-9_]+ == \d+?$', s):
return s
return f'({s})'
#
# The conditions stack is an array containing condition-arrays.
# Each condition-array lists the conditions for the current block.
# IF/N/DEF adds a new condition-array to the stack.
# ELSE/ELIF/ENDIF pop the condition-array.
# ELSE/ELIF negate the last item in the popped condition-array.
# ELIF adds a new condition to the end of the array.
# ELSE/ELIF re-push the condition-array.
#
cparts = line.split()
iselif, iselse = cparts[0] == '#elif', cparts[0] == '#else'
if iselif or iselse or cparts[0] == '#endif':
if len(conditions) == 0:
raise Exception(f'no #if block at line {line_number}')
# Pop the last condition-array from the stack
prev = conditions.pop()
if iselif or iselse:
prev[-1] = '!' + prev[-1] # Invert the last condition
if iselif: prev.append(atomize(line[5:].strip()))
conditions.append(prev)
elif cparts[0] == '#if':
conditions.append([ atomize(line[3:].strip()) ])
elif cparts[0] == '#ifdef':
conditions.append([ f'defined({line[6:].strip()})' ])
elif cparts[0] == '#ifndef':
conditions.append([ f'!defined({line[7:].strip()})' ])
# Handle a complete #define line
elif defmatch is not None:
# Get the match groups into vars
enabled, define_name, val = defmatch[1] is None, defmatch[3], defmatch[4]
# Increment the serial ID
sid += 1
# Create a new dictionary for the current #define
define_info = {
'section': section,
'name': define_name,
'enabled': enabled,
'line': line_start,
'sid': sid
}
# Type is based on the value
value_type = \
'switch' if val == '' \
else 'int' if re.match(r'^[-+]?\s*\d+$', val) \
else 'ints' if re.match(r'^([-+]?\s*\d+)(\s*,\s*[-+]?\s*\d+)+$', val) \
else 'floats' if re.match(rf'({flt}(\s*,\s*{flt})+)', val) \
else 'float' if re.match(f'^({flt})$', val) \
else 'string' if val[0] == '"' \
else 'char' if val[0] == "'" \
else 'bool' if val in ('true', 'false') \
else 'state' if val in ('HIGH', 'LOW') \
else 'enum' if re.match(r'^[A-Za-z0-9_]{3,}$', val) \
else 'int[]' if re.match(r'^{\s*[-+]?\s*\d+(\s*,\s*[-+]?\s*\d+)*\s*}$', val) \
else 'float[]' if re.match(r'^{{\s*{flt}(\s*,\s*{flt})*\s*}}$', val) \
else 'array' if val[0] == '{' \
else ''
val = (val == 'true') if value_type == 'bool' \
else int(val) if value_type == 'int' \
else val.replace('f','') if value_type == 'floats' \
else float(val.replace('f','')) if value_type == 'float' \
else val
if val != '': define_info['value'] = val
if value_type != '': define_info['type'] = value_type
# Join up accumulated conditions with &&
if conditions: define_info['requires'] = '(' + ') && ('.join(sum(conditions, [])) + ')'
# If the comment_buff is not empty, add the comment to the info
if comment_buff:
full_comment = '\n'.join(comment_buff).strip()
# An EOL comment will be added later
# The handling could go here instead of above
if state == Parse.EOL_COMMENT:
define_info['comment'] = ''
else:
define_info['comment'] = full_comment
comment_buff = []
# If the comment specifies units, add that to the info
units = re.match(r'^\(([^)]+)\)', full_comment)
if units:
units = units[1]
if units in ('s', 'sec'): units = 'seconds'
define_info['units'] = units
if 'comment' not in define_info or define_info['comment'] == '':
if prev_comment:
define_info['comment'] = prev_comment
prev_comment = ''
if 'comment' in define_info and define_info['comment'] == '':
del define_info['comment']
# Set the options for the current #define
if define_name == "MOTHERBOARD" and boards != '':
define_info['options'] = boards
elif options_json != '':
define_info['options'] = options_json
if eol_options: options_json = ''
# Create section dict if it doesn't exist yet
if section not in sch_out[fk]: sch_out[fk][section] = {}
# If define has already been seen...
if define_name in sch_out[fk][section]:
info = sch_out[fk][section][define_name]
if isinstance(info, dict): info = [ info ] # Convert a single dict into a list
info.append(define_info) # Add to the list
else:
# Add the define dict with name as key
sch_out[fk][section][define_name] = define_info
if state == Parse.EOL_COMMENT:
last_added_ref = define_info
return sch_out
#
# Extract the current configuration files in the form of a structured schema.
#
def extract():
# List of files to process, with shorthand
return extract_files({ 'Configuration.h':'basic', 'Configuration_adv.h':'advanced' })
def dump_json(schema:dict, jpath:Path):
with jpath.open('w', encoding='utf-8') as jfile:
json.dump(schema, jfile, ensure_ascii=False, indent=2)
def dump_yaml(schema:dict, ypath:Path):
import yaml
# Custom representer for all multi-line strings
def str_literal_representer(dumper, data):
if '\n' in data: # Check for multi-line strings
# Add a newline to trigger '|+'
if not data.endswith('\n'): data += '\n'
return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='|')
return dumper.represent_scalar('tag:yaml.org,2002:str', data)
yaml.add_representer(str, str_literal_representer)
with ypath.open('w', encoding='utf-8') as yfile:
yaml.dump(schema, yfile, default_flow_style=False, width=120, indent=2)
def main():
try:
schema = extract()
except Exception as exc:
print("Error: " + str(exc))
schema = None
if schema:
# Get the command line arguments after the script name
import sys
args = sys.argv[1:]
if len(args) == 0: args = ['some']
# Does the given array intersect at all with args?
def inargs(c): return len(set(args) & set(c)) > 0
# Help / Unknown option
unk = not inargs(['some','json','jsons','group','yml','yaml', '-h', '--help'])
if (unk): print(f"Unknown option: '{args[0]}'")
if inargs(['-h', '--help']) or unk:
print("Usage: schema.py [-h] [some|json|jsons|group|yml|yaml]")
print(" some = json + yml")
print(" jsons = json + group")
return
# JSON schema
if inargs(['some', 'json', 'jsons']):
print("Generating JSON ...")
dump_json(schema, Path('schema.json'))
# JSON schema (wildcard names)
if inargs(['group', 'jsons']):
group_options(schema)
dump_json(schema, Path('schema_grouped.json'))
# YAML
if inargs(['some', 'yml', 'yaml']):
try:
import yaml
except ImportError:
print("Installing YAML module ...")
import subprocess
try:
subprocess.run(['python3', '-m', 'pip', 'install', 'pyyaml'])
import yaml
except:
print("Failed to install YAML module")
return
print("Generating YML ...")
dump_yaml(schema, Path('schema.yml'))
if __name__ == '__main__':
main()