#!/usr/bin/env python3 """ schema.py Extract firmware configuration into structured JSON or YAML schema format. Used by signature.py via common-dependencies.py to generate a schema file during the PlatformIO build when CONFIG_EXPORT is defined in the configuration. This script can also be run standalone from within the Marlin repo, and is a companion to abm/js/schema.js in the MarlinFirmware/AutoBuildMarlin project, which has been extended to evaluate conditions and can determine what options are actually enabled, not just which options are uncommented. That will be migrated to this script for standalone migration. Usage: schema.py [-h] [some|json|jsons|group|yml|yaml] Process Marlin firmware configuration files (Configuration.h and Configuration_adv.h) to produce structured output suitable for documentation, tooling, or automated processing. Positional arguments: some Generate both JSON and YAML output (schema.json and schema.yml) json Generate JSON output (schema.json) jsons Generate grouped JSON output with wildcard options (schema.json and schema_grouped.json) group Generate grouped JSON output only (schema_grouped.json) yml Generate YAML output (schema.yml) yaml Same as 'yml' Optional arguments: -h, --help Show this help message and exit """ import re, json from pathlib import Path def extend_dict(d:dict, k:tuple): if len(k) >= 1 and k[0] not in d: d[k[0]] = {} if len(k) >= 2 and k[1] not in d[k[0]]: d[k[0]][k[1]] = {} if len(k) >= 3 and k[2] not in d[k[0]][k[1]]: d[k[0]][k[1]][k[2]] = {} grouping_patterns = [ re.compile(r'^([XYZIJKUVW]|[XYZ]2|Z[34]|E[0-7])$'), re.compile(r'^AXIS\d$'), re.compile(r'^(MIN|MAX)$'), re.compile(r'^[0-8]$'), re.compile(r'^HOTEND[0-7]$'), re.compile(r'^(HOTENDS|BED|PROBE|COOLER)$'), re.compile(r'^[XYZIJKUVW]M(IN|AX)$') ] # If the indexed part of the option name matches a pattern # then add it to the dictionary. def find_grouping(gdict, filekey, sectkey, optkey, pindex): optparts = optkey.split('_') if 1 < len(optparts) > pindex: for patt in grouping_patterns: if patt.match(optparts[pindex]): subkey = optparts[pindex] modkey = '_'.join(optparts) optparts[pindex] = '*' wildkey = '_'.join(optparts) kkey = f'{filekey}|{sectkey}|{wildkey}' if kkey not in gdict: gdict[kkey] = [] gdict[kkey].append((subkey, modkey)) # Build a list of potential groups. Only those with multiple items will be grouped. def group_options(schema): for pindex in range(10, -1, -1): found_groups = {} for filekey, f in schema.items(): for sectkey, s in f.items(): for optkey in s: find_grouping(found_groups, filekey, sectkey, optkey, pindex) fkeys = [ k for k in found_groups.keys() ] for kkey in fkeys: items = found_groups[kkey] if len(items) > 1: f, s, w = kkey.split('|') extend_dict(schema, (f, s, w)) # Add wildcard group to schema for subkey, optkey in items: # Add all items to wildcard group schema[f][s][w][subkey] = schema[f][s][optkey] # Move non-wildcard item to wildcard group del schema[f][s][optkey] del found_groups[kkey] # Extract all board names from boards.h def load_boards(): bpath = Path("Marlin/src/core/boards.h") if bpath.is_file(): with bpath.open(encoding='utf-8') as bfile: boards = [] for line in bfile: if line.startswith("#define BOARD_"): bname = line.split()[1] if bname != "BOARD_UNKNOWN": boards.append(bname) return "['" + "','".join(boards) + "']" return '' # # Extract the specified configuration files in the form of a structured schema. # Contains the full schema for the configuration files, not just the enabled options, # Contains the current values of the options, not just data structure, so "schema" is a slight misnomer. # # The returned object is a nested dictionary with the following indexing: # # - schema[filekey][section][define_name] = define_info # # Where the define_info contains the following keyed fields: # - section = The @section the define is in # - name = The name of the define # - enabled = True if the define is enabled (not commented out) # - line = The line number of the define # - sid = A serial ID for the define # - value = The value of the define, if it has one # - type = The type of the define, if it has one # - requires = The conditions that must be met for the define to be enabled # - comment = The comment for the define, if it has one # - units = The units for the define, if it has one # - options = The options for the define, if it has any # def extract_files(filekey): # Load board names from boards.h boards = load_boards() # Parsing states class Parse: NORMAL = 0 # No condition yet BLOCK_COMMENT = 1 # Looking for the end of the block comment EOL_COMMENT = 2 # EOL comment started, maybe add the next comment? SLASH_COMMENT = 3 # Block-like comment, starting with aligned // GET_SENSORS = 4 # Gathering temperature sensor options ERROR = 9 # Syntax error # A JSON object to store the data sch_out = { key:{} for key in filekey.values() } # Regex for #define NAME [VALUE] [COMMENT] with sanitized line defgrep = re.compile(r'^(//)?\s*(#define)\s+([A-Za-z0-9_]+)\s*(.*?)\s*(//.+)?$') # Pattern to match a float value flt = r'[-+]?\s*(\d+\.|\d*\.\d+)([eE][-+]?\d+)?[fF]?' # Start with unknown state state = Parse.NORMAL # Serial ID sid = 0 # Loop through files and parse them line by line for fn, fk in filekey.items(): with Path("Marlin", fn).open(encoding='utf-8') as fileobj: section = 'none' # Current Settings section line_number = 0 # Counter for the line number of the file conditions = [] # Create a condition stack for the current file comment_buff = [] # A temporary buffer for comments prev_comment = '' # Copy before reset for an EOL comment options_json = '' # A buffer for the most recent options JSON found eol_options = False # The options came from end of line, so only apply once join_line = False # A flag that the line should be joined with the previous one line = '' # A line buffer to handle \ continuation last_added_ref = {} # Reference to the last added item # Loop through the lines in the file for the_line in fileobj.readlines(): line_number += 1 # Clean the line for easier parsing the_line = the_line.strip() if join_line: # A previous line is being made longer line += (' ' if line else '') + the_line else: # Otherwise, start the line anew line, line_start = the_line, line_number # If the resulting line ends with a \, don't process now. # Strip the end off. The next line will be joined with it. join_line = line.endswith("\\") if join_line: line = line[:-1].strip() continue else: line_end = line_number defmatch = defgrep.match(line) # Special handling for EOL comments after a #define. # At this point the #define is already digested and inserted, # so we have to extend it if state == Parse.EOL_COMMENT: # If the line is not a comment, we're done with the EOL comment if not defmatch and the_line.startswith('//'): comment_buff.append(the_line[2:].strip()) else: state = Parse.NORMAL cline = ' '.join(comment_buff) comment_buff = [] if cline != '': # A (block or slash) comment was already added cfield = 'notes' if 'comment' in last_added_ref else 'comment' last_added_ref[cfield] = cline # # Add the given comment line to the comment buffer, unless: # - The line starts with ':' and JSON values to assign to 'opt'. # - The line starts with '@section' so a new section needs to be returned. # - The line starts with '======' so just skip it. # def use_comment(c, opt, sec, bufref): ''' c - The comment line to parse opt - Options JSON string to return (if not updated) sec - Section to return (if not updated) bufref - The comment buffer to add to ''' sc = c.strip() # Strip for special patterns if sc.startswith(':'): # If the comment starts with : then it has magic JSON d = sc[1:].strip() # Strip the leading : and spaces # Look for a JSON container cbr = sc.rindex('}') if d.startswith('{') else sc.rindex(']') if d.startswith('[') else 0 if cbr: opt, cmt = sc[1:cbr+1].strip(), sc[cbr+1:].strip() if cmt != '': bufref.append(cmt) else: opt = sc[1:].strip() # Some literal value not in a JSON container? else: m = re.match(r'@section\s*(.+)', sc) # Start a new section? if m: sec = m[1] elif not sc.startswith('========'): bufref.append(c) # Anything else is part of the comment return opt, sec # For slash comments, capture consecutive slash comments. # The comment will be applied to the next #define. if state == Parse.SLASH_COMMENT: if not defmatch and the_line.startswith('//'): options_json, section = use_comment(the_line[2:].strip(), options_json, section, comment_buff) continue else: state = Parse.NORMAL # In a block comment, capture lines up to the end of the comment. # Assume nothing follows the comment closure. if state in (Parse.BLOCK_COMMENT, Parse.GET_SENSORS): endpos = line.find('*/') if endpos < 0: cline = line else: cline, line = line[:endpos].strip(), line[endpos+2:].strip() # Temperature sensors are done if state == Parse.GET_SENSORS: options_json = f'[ {options_json[:-2]} ]' state = Parse.NORMAL # Strip the leading '* ' from block comments cline = re.sub(r'^\* ?', '', cline) # Collect temperature sensors if state == Parse.GET_SENSORS: sens = re.match(r'^\s*(-?\d+)\s*:\s*(.+)$', cline) if sens: s2 = sens[2].replace("'", "''") options_json += f"{sens[1]}:'{sens[1]} - {s2}', " elif state == Parse.BLOCK_COMMENT: # Look for temperature sensors if re.match(r'temperature sensors.*:', cline, re.IGNORECASE): state, cline = Parse.GET_SENSORS, "Temperature Sensors" options_json, section = use_comment(cline, options_json, section, comment_buff) # For the normal state we're looking for any non-blank line elif state == Parse.NORMAL: # Skip a commented define when evaluating comment opening st = 2 if re.match(r'^//\s*#define', line) else 0 cpos1 = line.find('/*') # Start a block comment on the line? cpos2 = line.find('//', st) # Start an end of line comment on the line? # Only the first comment starter gets evaluated cpos = -1 if cpos1 != -1 and (cpos1 < cpos2 or cpos2 == -1): cpos = cpos1 comment_buff = [] state = Parse.BLOCK_COMMENT eol_options = False elif cpos2 != -1 and (cpos2 < cpos1 or cpos1 == -1): cpos = cpos2 # Comment after a define may be continued on the following lines if defmatch is not None and cpos > 10: state = Parse.EOL_COMMENT prev_comment = '\n'.join(comment_buff) comment_buff = [] else: state = Parse.SLASH_COMMENT # Process the start of a new comment if cpos != -1: comment_buff = [] cline, line = line[cpos+2:].strip(), line[:cpos].strip() if state == Parse.BLOCK_COMMENT: # Strip leading '*' from block comments cline = re.sub(r'^\* ?', '', cline) else: # Expire end-of-line options after first use if cline.startswith(':'): eol_options = True # Buffer a non-empty comment start if cline != '': options_json, section = use_comment(cline, options_json, section, comment_buff) # If the line has nothing before the comment, go to the next line if line == '': options_json = '' continue # Parenthesize the given expression if needed def atomize(s): if s == '' \ or re.match(r'^[A-Za-z0-9_]*(\([^)]+\))?$', s) \ or re.match(r'^[A-Za-z0-9_]+ == \d+?$', s): return s return f'({s})' # # The conditions stack is an array containing condition-arrays. # Each condition-array lists the conditions for the current block. # IF/N/DEF adds a new condition-array to the stack. # ELSE/ELIF/ENDIF pop the condition-array. # ELSE/ELIF negate the last item in the popped condition-array. # ELIF adds a new condition to the end of the array. # ELSE/ELIF re-push the condition-array. # cparts = line.split() iselif, iselse = cparts[0] == '#elif', cparts[0] == '#else' if iselif or iselse or cparts[0] == '#endif': if len(conditions) == 0: raise Exception(f'no #if block at line {line_number}') # Pop the last condition-array from the stack prev = conditions.pop() if iselif or iselse: prev[-1] = '!' + prev[-1] # Invert the last condition if iselif: prev.append(atomize(line[5:].strip())) conditions.append(prev) elif cparts[0] == '#if': conditions.append([ atomize(line[3:].strip()) ]) elif cparts[0] == '#ifdef': conditions.append([ f'defined({line[6:].strip()})' ]) elif cparts[0] == '#ifndef': conditions.append([ f'!defined({line[7:].strip()})' ]) # Handle a complete #define line elif defmatch is not None: # Get the match groups into vars enabled, define_name, val = defmatch[1] is None, defmatch[3], defmatch[4] # Increment the serial ID sid += 1 # Create a new dictionary for the current #define define_info = { 'section': section, 'name': define_name, 'enabled': enabled, 'line': line_start, 'sid': sid } # Type is based on the value value_type = \ 'switch' if val == '' \ else 'int' if re.match(r'^[-+]?\s*\d+$', val) \ else 'ints' if re.match(r'^([-+]?\s*\d+)(\s*,\s*[-+]?\s*\d+)+$', val) \ else 'floats' if re.match(rf'({flt}(\s*,\s*{flt})+)', val) \ else 'float' if re.match(f'^({flt})$', val) \ else 'string' if val[0] == '"' \ else 'char' if val[0] == "'" \ else 'bool' if val in ('true', 'false') \ else 'state' if val in ('HIGH', 'LOW') \ else 'enum' if re.match(r'^[A-Za-z0-9_]{3,}$', val) \ else 'int[]' if re.match(r'^{\s*[-+]?\s*\d+(\s*,\s*[-+]?\s*\d+)*\s*}$', val) \ else 'float[]' if re.match(r'^{{\s*{flt}(\s*,\s*{flt})*\s*}}$', val) \ else 'array' if val[0] == '{' \ else '' val = (val == 'true') if value_type == 'bool' \ else int(val) if value_type == 'int' \ else val.replace('f','') if value_type == 'floats' \ else float(val.replace('f','')) if value_type == 'float' \ else val if val != '': define_info['value'] = val if value_type != '': define_info['type'] = value_type # Join up accumulated conditions with && if conditions: define_info['requires'] = '(' + ') && ('.join(sum(conditions, [])) + ')' # If the comment_buff is not empty, add the comment to the info if comment_buff: full_comment = '\n'.join(comment_buff).strip() # An EOL comment will be added later # The handling could go here instead of above if state == Parse.EOL_COMMENT: define_info['comment'] = '' else: define_info['comment'] = full_comment comment_buff = [] # If the comment specifies units, add that to the info units = re.match(r'^\(([^)]+)\)', full_comment) if units: units = units[1] if units in ('s', 'sec'): units = 'seconds' define_info['units'] = units if 'comment' not in define_info or define_info['comment'] == '': if prev_comment: define_info['comment'] = prev_comment prev_comment = '' if 'comment' in define_info and define_info['comment'] == '': del define_info['comment'] # Set the options for the current #define if define_name == "MOTHERBOARD" and boards != '': define_info['options'] = boards elif options_json != '': define_info['options'] = options_json if eol_options: options_json = '' # Create section dict if it doesn't exist yet if section not in sch_out[fk]: sch_out[fk][section] = {} # If define has already been seen... if define_name in sch_out[fk][section]: info = sch_out[fk][section][define_name] if isinstance(info, dict): info = [ info ] # Convert a single dict into a list info.append(define_info) # Add to the list else: # Add the define dict with name as key sch_out[fk][section][define_name] = define_info if state == Parse.EOL_COMMENT: last_added_ref = define_info return sch_out # # Extract the current configuration files in the form of a structured schema. # def extract(): # List of files to process, with shorthand return extract_files({ 'Configuration.h':'basic', 'Configuration_adv.h':'advanced' }) def dump_json(schema:dict, jpath:Path): with jpath.open('w', encoding='utf-8') as jfile: json.dump(schema, jfile, ensure_ascii=False, indent=2) def dump_yaml(schema:dict, ypath:Path): import yaml # Custom representer for all multi-line strings def str_literal_representer(dumper, data): if '\n' in data: # Check for multi-line strings # Add a newline to trigger '|+' if not data.endswith('\n'): data += '\n' return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='|') return dumper.represent_scalar('tag:yaml.org,2002:str', data) yaml.add_representer(str, str_literal_representer) with ypath.open('w', encoding='utf-8') as yfile: yaml.dump(schema, yfile, default_flow_style=False, width=120, indent=2) def main(): try: schema = extract() except Exception as exc: print("Error: " + str(exc)) schema = None if schema: # Get the command line arguments after the script name import sys args = sys.argv[1:] if len(args) == 0: args = ['some'] # Does the given array intersect at all with args? def inargs(c): return len(set(args) & set(c)) > 0 # Help / Unknown option unk = not inargs(['some','json','jsons','group','yml','yaml', '-h', '--help']) if (unk): print(f"Unknown option: '{args[0]}'") if inargs(['-h', '--help']) or unk: print("Usage: schema.py [-h] [some|json|jsons|group|yml|yaml]") print(" some = json + yml") print(" jsons = json + group") return # JSON schema if inargs(['some', 'json', 'jsons']): print("Generating JSON ...") dump_json(schema, Path('schema.json')) # JSON schema (wildcard names) if inargs(['group', 'jsons']): group_options(schema) dump_json(schema, Path('schema_grouped.json')) # YAML if inargs(['some', 'yml', 'yaml']): try: import yaml except ImportError: print("Installing YAML module ...") import subprocess try: subprocess.run(['python3', '-m', 'pip', 'install', 'pyyaml']) import yaml except: print("Failed to install YAML module") return print("Generating YML ...") dump_yaml(schema, Path('schema.yml')) if __name__ == '__main__': main()