mirror of
https://github.com/Motorhead1991/qemu.git
synced 2025-08-04 16:23:55 -06:00

This started as a simple script that scanned for regular expressions, but became more and more complex when exceptions to the rules were found. I don't know if this should be maintained in the QEMU source tree long term (maybe it can be reused for other code transformations that Coccinelle can't handle). In either case, this is included as part of the patch series to document how exactly the automated code transformations in the next patches were done. Signed-off-by: Eduardo Habkost <ehabkost@redhat.com> Message-Id: <20200831210740.126168-7-ehabkost@redhat.com> Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
118 lines
3.3 KiB
Python
118 lines
3.3 KiB
Python
# Copyright (C) 2020 Red Hat Inc.
|
|
#
|
|
# Authors:
|
|
# Eduardo Habkost <ehabkost@redhat.com>
|
|
#
|
|
# This work is licensed under the terms of the GNU GPL, version 2. See
|
|
# the COPYING file in the top-level directory.
|
|
"""Helpers for creation of regular expressions"""
|
|
import re
|
|
|
|
import logging
|
|
logger = logging.getLogger(__name__)
|
|
DBG = logger.debug
|
|
INFO = logger.info
|
|
WARN = logger.warning
|
|
|
|
def S(*regexps) -> str:
|
|
"""Just a shortcut to concatenate multiple regexps more easily"""
|
|
return ''.join(regexps)
|
|
|
|
def P(*regexps, name=None, capture=False, repeat='') -> str:
|
|
"""Just add parenthesis around regexp(s), with optional name or repeat suffix"""
|
|
s = S(*regexps)
|
|
if name:
|
|
return f'(?P<{name}>{s}){repeat}'
|
|
elif capture:
|
|
return f'({s}){repeat}'
|
|
else:
|
|
return f'(?:{s}){repeat}'
|
|
|
|
def NAMED(name, *regexps) -> str:
|
|
"""Make named group using <P<name>...) syntax
|
|
|
|
>>> NAMED('mygroup', 'xyz', 'abc')
|
|
'(?P<mygroup>xyzabc)'
|
|
"""
|
|
return P(*regexps, name=name)
|
|
|
|
def OR(*regexps, **kwargs) -> str:
|
|
"""Build (a|b|c) regexp"""
|
|
return P('|'.join(regexps), **kwargs)
|
|
|
|
def M(*regexps, n='*', name=None) -> str:
|
|
"""Add repetition qualifier to regexp(s)
|
|
|
|
>>> M('a', 'b')
|
|
'(?:ab)*'
|
|
>>> M('a' , 'b', n='+')
|
|
'(?:ab)+'
|
|
>>> M('a' , 'b', n='{2,3}', name='name')
|
|
'(?P<name>(?:ab){2,3})'
|
|
"""
|
|
r = P(*regexps, repeat=n)
|
|
if name:
|
|
r = NAMED(name, r)
|
|
return r
|
|
|
|
# helper to make parenthesis optional around regexp
|
|
OPTIONAL_PARS = lambda R: OR(S(r'\(\s*', R, r'\s*\)'), R)
|
|
def test_optional_pars():
|
|
r = OPTIONAL_PARS('abc')+'$'
|
|
assert re.match(r, 'abc')
|
|
assert re.match(r, '(abc)')
|
|
assert not re.match(r, '(abcd)')
|
|
assert not re.match(r, '(abc')
|
|
assert not re.match(r, 'abc)')
|
|
|
|
|
|
# this disables the MULTILINE flag, so it will match at the
|
|
# beginning of the file:
|
|
RE_FILE_BEGIN = r'(?-m:^)'
|
|
|
|
# C primitives:
|
|
|
|
SP = r'\s*'
|
|
|
|
RE_COMMENT = r'//[^\n]*$|/\*([^*]|\*[^/])*\*/'
|
|
RE_COMMENTS = M(RE_COMMENT + SP)
|
|
|
|
RE_IDENTIFIER = r'[a-zA-Z_][a-zA-Z0-9_]*(?![a-zA-Z0-9])'
|
|
RE_STRING = r'\"([^\"\\]|\\[a-z\"])*\"'
|
|
RE_NUMBER = r'[0-9]+|0x[0-9a-fA-F]+'
|
|
|
|
# space or escaped newlines:
|
|
CPP_SPACE = OR(r'\s', r'\\\n', repeat='+')
|
|
|
|
RE_PATH = '[a-zA-Z0-9/_.-]+'
|
|
|
|
RE_INCLUDEPATH = OR(S(r'\"', RE_PATH, r'\"'),
|
|
S(r'<', RE_PATH, r'>'))
|
|
|
|
RE_INCLUDE = S(r'^[ \t]*#[ \t]*include[ \t]+', NAMED('includepath', RE_INCLUDEPATH), r'[ \t]*\n')
|
|
RE_SIMPLEDEFINE = S(r'^[ \t]*#[ \t]*define[ \t]+', RE_IDENTIFIER, r'[ \t]*\n')
|
|
|
|
RE_STRUCT_TYPE = S(r'struct\s+', RE_IDENTIFIER)
|
|
RE_TYPE = OR(RE_IDENTIFIER, RE_STRUCT_TYPE)
|
|
|
|
RE_MACRO_CONCAT = M(S(OR(RE_IDENTIFIER, RE_STRING), SP), n='{2,}')
|
|
|
|
RE_SIMPLE_VALUE = OR(RE_IDENTIFIER, RE_STRING, RE_NUMBER)
|
|
|
|
RE_FUN_CALL = S(RE_IDENTIFIER, r'\s*\(\s*', RE_SIMPLE_VALUE, r'\s*\)')
|
|
RE_SIZEOF = S(r'sizeof\s*\(\s*', NAMED('sizeoftype', RE_TYPE), r'\s*\)')
|
|
|
|
RE_ADDRESS = S(r'&\s*', RE_IDENTIFIER)
|
|
|
|
RE_ARRAY_ITEM = S(r'{\s*', NAMED('arrayitem', M(RE_SIMPLE_VALUE, n='?')), r'\s*}\s*,?')
|
|
RE_ARRAY_CAST = S(r'\(\s*', RE_IDENTIFIER, r'\s*\[\s*\]\)')
|
|
RE_ARRAY_ITEMS = M(S(RE_ARRAY_ITEM, SP))
|
|
RE_ARRAY = S(M(RE_ARRAY_CAST, n='?'), r'\s*{\s*',
|
|
NAMED('arrayitems', RE_ARRAY_ITEMS),
|
|
r'}')
|
|
|
|
# NOTE: this covers a very small subset of valid expressions
|
|
|
|
RE_EXPRESSION = OR(RE_SIZEOF, RE_FUN_CALL, RE_MACRO_CONCAT, RE_SIMPLE_VALUE,
|
|
RE_ARRAY, RE_ADDRESS)
|
|
|