[Bug 16076][emscripten] Generate Emterpreter whitelist more cleverly

peter-b · peter-b · commit 7e57b9be6ca9 · 2015-12-08T17:00:09.000Z
Apply two sets of regular expressions to the symbols in the engine:

- Only symbols that match a whitelist expression are compiled with
  Emterpreter

- No symbol that matches a blacklist expression is compiled with
  Emterpreter

The set of regular expressions provided in this patch seems to cover
most possible ways that the engine could end up waiting, but it's not
guaranteed to be exhausted.  Just compiling everything in the core
engine to Emtepreter bytecode made the engine very large and very
slow, so a more selective approach was required.
diff --git a/docs/notes/bugfix-16076.md b/docs/notes/bugfix-16076.md
@@ -0,0 +1 @@
+# Enable "wait" syntax in HTML5 standalones
diff --git a/engine/engine.gyp b/engine/engine.gyp
@@ -764,6 +764,38 @@
 
 						'actions':
 						[
+							{
+								'action_name': 'genwhitelist',
+								'message': 'Generating the Emterpreter whitelist',
+
+								'inputs':
+								[
+									'../util/emscripten-genwhitelist.py',
+									'<(PRODUCT_DIR)/standalone-community.bc',
+									'src/em-whitelist.json',
+									'src/em-blacklist.json',
+								],
+
+								'outputs':
+								[
+									'<(PRODUCT_DIR)/standalone-community-whitelist.json',
+									'<(PRODUCT_DIR)/standalone-community-blacklist.json',
+								],
+
+								'action':
+								[
+									'../util/emscripten-genwhitelist.py',
+									'--input',
+									'<(PRODUCT_DIR)/standalone-community.bc',
+									'--output',
+									'<(PRODUCT_DIR)/standalone-community-whitelist.json',
+									'<(PRODUCT_DIR)/standalone-community-blacklist.json',
+									'--include',
+									'src/em-whitelist.json',
+									'--exclude',
+									'src/em-blacklist.json',
+								],
+							},
 							{
 								'action_name': 'javascriptify',
 								'message': 'Javascript-ifying the Emscripten engine',
@@ -773,7 +805,7 @@
 									'../util/emscripten-javascriptify.py',
 									'<(PRODUCT_DIR)/standalone-community.bc',
 									'rsrc/emscripten-html-template.html',
-									'src/em-whitelist.json',
+									'<(PRODUCT_DIR)/standalone-community-whitelist.json',
 									'src/em-preamble.js',
 									'src/em-preamble-overlay.js',
 									'src/em-util.js',
@@ -802,7 +834,7 @@
 									'--shell-file',
 									'rsrc/emscripten-html-template.html',
 									'--whitelist',
-									'src/em-whitelist.json',
+									'<(PRODUCT_DIR)/standalone-community-whitelist.json',
 									'--pre-js',
 									'src/em-preamble.js',
 									'src/em-preamble-overlay.js',
diff --git a/engine/src/em-blacklist.json b/engine/src/em-blacklist.json
@@ -0,0 +1,4 @@
+[
+    "ER13MCScriptPoint",
+    "D[0-9]+Ev"
+]
diff --git a/engine/src/em-whitelist.json b/engine/src/em-whitelist.json
@@ -1,33 +1,56 @@
 [
-    "_main",
-    "__Z13platform_mainiPPcS0_",
-    "__Z21X_main_loop_iterationv",
-    "__ZN10MCScreenDC4waitEdhh",
-
-    "_MCEventQueueDispatch",
-    "__ZL25MCEventQueueDispatchEventP7MCEvent",
-
-    "__ZN6MCCard3mupEtb",
-    "__ZN7MCStack3mupEtb",
-    "__ZN8MCButton3mupEtb",
-
-    "__ZN8MCObject26message_with_valueref_argsEP8__MCNamePv",
-    "__ZN8MCObject7messageEP8__MCNameP11MCParameterhhh",
-    "__ZN8MCObject6handleE12Handler_typeP8__MCNameP11MCParameterPS_",
-    "__ZN8MCObject10handleselfE12Handler_typeP8__MCNameP11MCParameter",
-    "__ZN8MCObject11exechandlerEP9MCHandlerP11MCParameter",
-
-    "__ZN9MCHandler4execER13MCExecContextP11MCParameter",
-
-    "__ZN5MCPut9exec_ctxtER13MCExecContext",
-
-    "__ZN13MCExecContext18EvaluateExpressionEP12MCExpression11Exec_errorsR11MCExecValue",
-
-    "__ZN7MCChunk9eval_ctxtER13MCExecContextR11MCExecValue",
-
-    "__Z10MCU_geturlR13MCExecContextP10__MCStringRPv",
-
-    "__Z10MCS_geturlP8MCObjectP10__MCString",
-
-    "_MCEmscriptenAsyncYield"
+    "^_main$",
+    "^__Z6X_initiPP10__MCStringiS1_$",
+    "^__Z6X_openiPP10__MCStringS1_$",
+    "^__ZN10MCDispatch7startupEv$",
+    "^__Z20send_startup_messageb",
+    "^__Z13platform_mainiPPcS0_$",
+    "^__Z21X_main_loop_iterationv$",
+
+    "^_MCEventQueueDispatch$",
+    "^__ZL25MCEventQueueDispatchEventP7MCEvent$",
+    "^_MCEmscriptenAsyncYield$",
+
+    "MCWidgetExec",
+    "MCWidgetOn",
+
+    "AddRunloopAction",
+    "DoRunloopActions",
+
+    "(exec|eval)_ctxt",
+    "4wait",
+    "26message_with_valueref_args",
+    "addmessage",
+    "7message",
+    "6handle",
+    "13handlepending",
+    "10handleself",
+    "11exechandler",
+    "[0-9]+close",
+    "3del",
+    "help",
+    "[0-9]+(k|m)(focus|down|up)",
+    "mdrag",
+    "paste",
+    "doubledown",
+    "layerchanged",
+    "resizeparent",
+    "sync_mfocus",
+    "toolchanged",
+    "wdoubledown",
+    "wmdragenter",
+    "wmdragleave",
+    "wmfocus_stack",
+
+    "MCExecContext[0-9]+(TryTo)?[Ee]val",
+    "MCEngineEvalValue",
+    "MCEngine\\w*Wait",
+    "MCEngineExecDispatch",
+    "MCEngineExecDo",
+    "MCEngineExecSend",
+
+    "MC[SU]_\\w*url(?!(en|de)code)",
+
+    "__ZN9MCHandler4exec",
+    "MCKeywordsExec"
 ]
diff --git a/tests/lcs/core/engine/engine.livecodescript b/tests/lcs/core/engine/engine.livecodescript
@@ -287,10 +287,6 @@ end TestVersion
 
 
 on TestCancelPendingMessage
-if the platform is "HTML" then
-TestSkip "cancel message", "bug 16076"
-exit TestCancelPendingMessage
-end if
 
 create button
 
@@ -429,10 +425,6 @@ end TestReleaseStackMessage
 
 
 on TestMilliseconds
-if the platform is "HTML" then
-TestSkip "cancel message", "bug 16076"
-exit TestMilliseconds
-end if
 
 local tTime
 
@@ -446,10 +438,6 @@ end TestMilliseconds
 
 
 on TestSeconds
-if the platform is "HTML" then
-TestSkip "cancel message", "bug 16076"
-exit TestSeconds
-end if
 
 local tTime
 
@@ -463,10 +451,6 @@ end TestSeconds
 
 
 on TestTicks
-if the platform is "HTML" then
-TestSkip "cancel message", "bug 16076"
-exit TestTicks
-end if
 
 local tTime
 
diff --git a/tests/lcs/core/interface/interface.livecodescript b/tests/lcs/core/interface/interface.livecodescript
@@ -228,10 +228,6 @@ end TestMouse
 
 
 on TestMovingControls
-   if the platform is "HTML" then
-      TestSkip "moving button", "bug 16076"
-      exit TestMovingControls
-   end if
 
    TestAssert "No moving controls", the movingControls is empty
    create button
@@ -699,10 +695,6 @@ end TestSortField
 
 
 on TestMoveControl
-   if the platform is "HTML" then
-      TestSkip "stop moving button", "bug 16076"
-      exit TestMoveControl
-   end if
 
    create button
    
diff --git a/util/emscripten-genwhitelist.py b/util/emscripten-genwhitelist.py
@@ -0,0 +1,136 @@
+#!/usr/bin/env python
+
+# This script extracts symbols from an LLVM bitcode file, and uses
+# them to generate a whitelist of symbols that should be compiled with
+# Emterpreter.
+#
+# There are two sets of regular expressions loaded from JSON files:
+# "include" expressions are used to find functions that should be
+# Emterpreted, and "exclude" expressions are used to prune out
+# functions that are too greedily included.
+
+import sys
+import os
+import subprocess
+import re
+import json
+
+# Get any relevant info from the environment
+# ------------------------------------------
+
+env_verbose = os.getenv('V', '0').strip()
+env_nm = os.getenv('NM', 'llvm-nm')
+
+# Separate out separate elements of command line
+nm = env_nm.split()
+
+# Verbosity
+verbose = (env_verbose.strip() is not '0')
+
+# Process command line options
+# ----------------------------
+#
+# Each option absorbs all subsequent arguments up to the next option.
+# Options are identified by the fact they start with "--".
+
+option = None
+options = {}
+for arg in sys.argv[1:]:
+    if arg.startswith('--'):
+        option = arg[2:]
+        options[option] = []
+    else:
+        if option is None:
+            print('ERROR: unrecognized option \'{}\''.format(arg))
+            sys.exit(1)
+        options[option].append(arg)
+
+# Generate include/exclude predicate
+# ----------------------------------
+
+def build_regexp_from_json_files(paths):
+    expressions = []
+    for p in paths:
+        with file(p) as fp:
+            expressions += json.load(fp)
+    if len(expressions) is 0:
+        return None
+    return '(' + '|'.join(expressions) + ')'
+
+exclude_re = None
+if 'exclude' in options:
+    exclude_re = build_regexp_from_json_files(options['exclude'])
+if exclude_re is not None:
+    exclude_re = re.compile(exclude_re)
+
+include_re = None
+if 'include' in options:
+    include_re = build_regexp_from_json_files(options['include'])
+if include_re is not None:
+    include_re = re.compile(include_re)
+
+def is_emterpreted(symbol):
+    if include_re is not None:
+        if include_re.search(symbol) is None:
+            return False
+    if exclude_re is not None:
+        if exclude_re.search(symbol) is not None:
+            return False
+    return True
+
+# Generate emterpreter whitelist
+# ------------------------------
+
+# Run llvm-nm and yield symbol names from its standard output
+def iter_archive_symbols(archive):
+    command = nm + ['-B', archive]
+    if verbose:
+        print(' '.join(command))
+    output = subprocess.check_output(command)
+    for line in output.splitlines():
+        line = line.strip()
+
+        if len(line) is 0:
+            # Empty line
+            continue
+
+        if line.endswith(':'):
+            # This line names a code object file (foo.o), so ignore it
+            continue
+
+        # Split the line into elements.  The layout of the line should
+        # be something like: "[<address>] <type> <name>"
+        line = line.split()
+
+        # Only "text" symbols, i.e. code defined in the current
+        # object, should be emterpreted.
+        if len(line) < 3 or line[1].lower() != 't':
+            continue
+
+        # Put an "_" before each symbol to get the name as generated
+        # by emscripten
+        yield ('_' + line[2])
+
+
+# Generate list of all symbols that should be whitelisted
+object_path = options['input'][0]
+whitelist_symbols = []
+blacklist_symbols = []
+for symbol in iter_archive_symbols(object_path):
+    if is_emterpreted(symbol):
+        whitelist_symbols.append(symbol)
+    else:
+        blacklist_symbols.append(symbol)
+
+print("Compiling {} functions".format(len(blacklist_symbols)))
+print("Emterpreting {} functions".format(len(whitelist_symbols)))
+
+# Put into a JSON file
+if len(options['output']) > 0:
+    json_path = options['output'][0]
+    with file(json_path, 'w') as fp:
+        json.dump(whitelist_symbols, fp, indent=4, separators=(',', ': '))
+if len(options['output']) > 1:
+    json_path = options['output'][1]
+    with file(json_path, 'w') as fp:
+        json.dump(blacklist_symbols, fp, indent=4, separators=(',', ': '))

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+# Enable "wait" syntax in HTML5 standalones`
-Original file line number
+Diff line change
@@ @@ -0,0 +1,4 @@ @@
 +[
 +    "ER13MCScriptPoint",
 +    "D[0-9]+Ev"
 +]