forked from FG42/FG42
761 lines
29 KiB
Nix
761 lines
29 KiB
Nix
# This file is copied from https://github.com/talyz/fromElisp which
|
|
# distributes under the compatible license MIT.
|
|
#
|
|
# We have changed this according to our needs
|
|
{ lib
|
|
, commentMaxLength ? 300
|
|
, stringMaxLength ? 3000
|
|
, characterMaxLength ? 50
|
|
, integerMaxLength ? 50
|
|
, floatMaxLength ? 50
|
|
, boolVectorMaxLength ? 50
|
|
, symbolMaxLength ? 50
|
|
, orgModeBabelCodeBlockHeaderMaxLength ? 200
|
|
, orgModeBabelCodeBlockArgMaxLength ? 30
|
|
}:
|
|
|
|
with lib;
|
|
with builtins;
|
|
|
|
let
|
|
|
|
# Create a matcher from a regex string and maximum length. A
|
|
# matcher takes a string and returns the first match produced by
|
|
# running its regex on it, or null if the match is unsuccessful,
|
|
# but only as far in as specified by maxLength.
|
|
mkMatcher = regex: maxLength:
|
|
string:
|
|
let
|
|
substr = substring 0 maxLength string;
|
|
matched = match regex substr;
|
|
in
|
|
if matched != null then head matched else null;
|
|
|
|
removeStrings = stringsToRemove: string:
|
|
let
|
|
len = length stringsToRemove;
|
|
listOfNullStrings = genList (const "") len;
|
|
in
|
|
replaceStrings stringsToRemove listOfNullStrings string;
|
|
|
|
# Split a string of elisp into individual tokens and add useful
|
|
# metadata.
|
|
tokenizeElisp' = { elisp, startLineNumber ? 1 }:
|
|
let
|
|
# These are the only characters that can not be unescaped in a
|
|
# symbol name. We match the inverse of these to get the actual
|
|
# symbol characters and use them to differentiate between
|
|
# symbols and tokens that could potentially look like symbols,
|
|
# such as numbers. Due to the leading bracket, this has to be
|
|
# placed _first_ inside a bracket expression.
|
|
notInSymbol = '']["'`,#;\\()[:space:][:cntrl:]'';
|
|
|
|
matchComment = mkMatcher "(;[^\n]*).*" commentMaxLength;
|
|
|
|
matchString = mkMatcher ''("([^"\\]|\\.)*").*'' stringMaxLength;
|
|
|
|
matchCharacter = mkMatcher ''([?]((\\[sSHMAC]-)|\\\^)*(([^][\\()]|\\[][\\()])|\\[^^SHMACNuUx0-7]|\\[uU][[:digit:]a-fA-F]+|\\x[[:digit:]a-fA-F]*|\\[0-7]{1,3}|\\N\{[^}]+}))([${notInSymbol}?]|$).*'' characterMaxLength;
|
|
|
|
matchNonBase10Integer = mkMatcher ''(#([BbOoXx]|[[:digit:]]{1,2}r)[[:digit:]a-fA-F]+)([${notInSymbol}]|$).*'' integerMaxLength;
|
|
|
|
matchInteger = mkMatcher ''([+-]?[[:digit:]]+[.]?)([${notInSymbol}]|$).*'' integerMaxLength;
|
|
|
|
matchBoolVector = mkMatcher ''(#&[[:digit:]]+"([^"\\]|\\.)*").*'' boolVectorMaxLength;
|
|
|
|
matchFloat = mkMatcher ''([+-]?([[:digit:]]*[.][[:digit:]]+|([[:digit:]]*[.])?[[:digit:]]+e([+-]?[[:digit:]]+|[+](INF|NaN))))([${notInSymbol}]|$).*'' floatMaxLength;
|
|
|
|
matchDot = mkMatcher ''([.])([${notInSymbol}]|$).*'' 2;
|
|
|
|
# Symbols can contain pretty much any characters - the general
|
|
# rule is that if nothing else matches, it's a symbol, so we
|
|
# should be pretty generous here and match for symbols last. See
|
|
# https://www.gnu.org/software/emacs/manual/html_node/elisp/Symbol-Type.html
|
|
matchSymbol =
|
|
let
|
|
symbolChar = ''([^${notInSymbol}]|\\.)'';
|
|
in
|
|
mkMatcher ''(${symbolChar}+)([${notInSymbol}]|$).*'' symbolMaxLength;
|
|
|
|
maxTokenLength = foldl' max 0 [
|
|
commentMaxLength
|
|
stringMaxLength
|
|
characterMaxLength
|
|
integerMaxLength
|
|
floatMaxLength
|
|
boolVectorMaxLength
|
|
symbolMaxLength
|
|
];
|
|
|
|
# Fold over all the characters in a string, checking for
|
|
# matching tokens.
|
|
#
|
|
# The implementation is a bit obtuse, for optimization reasons:
|
|
# nix doesn't have tail-call optimization, thus a strict fold,
|
|
# which should essentially force a limited version of tco when
|
|
# iterating a list, is our best alternative.
|
|
#
|
|
# The string read from is split into a list of its constituent
|
|
# characters, which is then folded over. Each character is then
|
|
# used to determine a likely matching regex "matcher" to run on
|
|
# the string, starting at the position of the aforementioned
|
|
# character. When an appropriate matcher has been found and run
|
|
# successfully on the string, its result is added to
|
|
# `state.acc`, a list of all matched tokens. The length of the
|
|
# matched token is determined and passed on to the following
|
|
# iteration through `state.skip`. If `state.skip` is positive,
|
|
# nothing will be done in the current iteration, except
|
|
# decrementing `state.skip` for the next one: this skips the
|
|
# characters we've already matched. At each iteration,
|
|
# `state.pos` is also incremented, to keep track of the current
|
|
# string position.
|
|
#
|
|
# The order of the matches is significant - matchSymbol will,
|
|
# for example, also match numbers and characters, so we check
|
|
# for symbols last.
|
|
readToken = state: char:
|
|
let
|
|
rest = substring state.pos maxTokenLength elisp;
|
|
comment = matchComment rest;
|
|
character = matchCharacter rest;
|
|
nonBase10Integer = matchNonBase10Integer rest;
|
|
integer = matchInteger rest;
|
|
float = matchFloat rest;
|
|
function = matchFunction rest;
|
|
boolVector = matchBoolVector rest;
|
|
string = matchString rest;
|
|
dot = matchDot rest;
|
|
symbol = matchSymbol rest;
|
|
in
|
|
if state.skip > 0 then
|
|
state // {
|
|
pos = state.pos + 1;
|
|
skip = state.skip - 1;
|
|
line = if char == "\n" then state.line + 1 else state.line;
|
|
}
|
|
else if char == "\n" then
|
|
let
|
|
mod = state.line / 1000;
|
|
newState = {
|
|
pos = state.pos + 1;
|
|
line = state.line + 1;
|
|
inherit mod;
|
|
};
|
|
in
|
|
state // (
|
|
# Force evaluation of old state every 1000 lines. Nix
|
|
# doesn't have a modulo builtin, so we have to save
|
|
# the result of an integer division and compare
|
|
# between runs.
|
|
if mod > state.mod then
|
|
seq state.acc newState
|
|
else
|
|
newState
|
|
)
|
|
else if elem char [ " " "\t" "\r" ] then
|
|
state // {
|
|
pos = state.pos + 1;
|
|
inherit (state) line;
|
|
}
|
|
else if char == ";" then
|
|
if comment != null then
|
|
state // {
|
|
pos = state.pos + 1;
|
|
skip = (stringLength comment) - 1;
|
|
}
|
|
else throw "Unrecognized token on line ${toString state.line}: ${rest}"
|
|
else if char == "(" then
|
|
state // {
|
|
acc = state.acc ++ [{ type = "openParen"; value = "("; inherit (state) line; }];
|
|
pos = state.pos + 1;
|
|
}
|
|
else if char == ")" then
|
|
state // {
|
|
acc = state.acc ++ [{ type = "closeParen"; value = ")"; inherit (state) line; }];
|
|
pos = state.pos + 1;
|
|
}
|
|
else if char == "[" then
|
|
state // {
|
|
acc = state.acc ++ [{ type = "openBracket"; value = "["; inherit (state) line; }];
|
|
pos = state.pos + 1;
|
|
}
|
|
else if char == "]" then
|
|
state // {
|
|
acc = state.acc ++ [{ type = "closeBracket"; value = "]"; inherit (state) line; }];
|
|
pos = state.pos + 1;
|
|
}
|
|
else if char == "'" then
|
|
state // {
|
|
acc = state.acc ++ [{ type = "quote"; value = "'"; inherit (state) line; }];
|
|
pos = state.pos + 1;
|
|
}
|
|
else if char == ''"'' then
|
|
if string != null then
|
|
state // {
|
|
acc = state.acc ++ [{ type = "string"; value = string; inherit (state) line; }];
|
|
pos = state.pos + 1;
|
|
skip = (stringLength string) - 1;
|
|
}
|
|
else throw "Unrecognized token on line ${toString state.line}: ${rest}"
|
|
else if char == "#" then
|
|
let nextChar = substring 1 1 rest;
|
|
in
|
|
if nextChar == "'" then
|
|
state // {
|
|
acc = state.acc ++ [{ type = "function"; value = "#'"; inherit (state) line; }];
|
|
pos = state.pos + 1;
|
|
skip = 1;
|
|
}
|
|
else if nextChar == "&" then
|
|
if boolVector != null then
|
|
state // {
|
|
acc = state.acc ++ [{ type = "boolVector"; value = boolVector; inherit (state) line; }];
|
|
pos = state.pos + 1;
|
|
skip = (stringLength boolVector) - 1;
|
|
}
|
|
else throw "Unrecognized token on line ${toString state.line}: ${rest}"
|
|
else if nextChar == "s" then
|
|
if substring 2 1 rest == "(" then
|
|
state // {
|
|
acc = state.acc ++ [{ type = "record"; value = "#s"; inherit (state) line; }];
|
|
pos = state.pos + 1;
|
|
skip = 1;
|
|
}
|
|
else throw "List must follow #s in record on line ${toString state.line}: ${rest}"
|
|
else if nextChar == "[" then
|
|
state // {
|
|
acc = state.acc ++ [{ type = "byteCode"; value = "#"; inherit (state) line; }];
|
|
pos = state.pos + 1;
|
|
}
|
|
else if nonBase10Integer != null then
|
|
state // {
|
|
acc = state.acc ++ [{ type = "nonBase10Integer"; value = nonBase10Integer; inherit (state) line; }];
|
|
pos = state.pos + 1;
|
|
skip = (stringLength nonBase10Integer) - 1;
|
|
}
|
|
else throw "Unrecognized token on line ${toString state.line}: ${rest}"
|
|
else if elem char [ "+" "-" "." "0" "1" "2" "3" "4" "5" "6" "7" "8" "9" ] then
|
|
if integer != null then
|
|
state // {
|
|
acc = state.acc ++ [{ type = "integer"; value = integer; inherit (state) line; }];
|
|
pos = state.pos + 1;
|
|
skip = (stringLength integer) - 1;
|
|
}
|
|
else if float != null then
|
|
state // {
|
|
acc = state.acc ++ [{ type = "float"; value = float; inherit (state) line; }];
|
|
pos = state.pos + 1;
|
|
skip = (stringLength float) - 1;
|
|
}
|
|
else if dot != null then
|
|
state // {
|
|
acc = state.acc ++ [{ type = "dot"; value = dot; inherit (state) line; }];
|
|
pos = state.pos + 1;
|
|
skip = (stringLength dot) - 1;
|
|
}
|
|
else if symbol != null then
|
|
state // {
|
|
acc = state.acc ++ [{ type = "symbol"; value = symbol; inherit (state) line; }];
|
|
pos = state.pos + 1;
|
|
skip = (stringLength symbol) - 1;
|
|
}
|
|
else throw "Unrecognized token on line ${toString state.line}: ${rest}"
|
|
else if char == "?" then
|
|
if character != null then
|
|
state // {
|
|
acc = state.acc ++ [{ type = "character"; value = character; inherit (state) line; }];
|
|
pos = state.pos + 1;
|
|
skip = (stringLength character) - 1;
|
|
}
|
|
else throw "Unrecognized token on line ${toString state.line}: ${rest}"
|
|
else if char == "`" then
|
|
state // {
|
|
acc = state.acc ++ [{ type = "backquote"; value = "`"; inherit (state) line; }];
|
|
pos = state.pos + 1;
|
|
}
|
|
else if char == "," then
|
|
if substring 1 1 rest == "@" then
|
|
state // {
|
|
acc = state.acc ++ [{ type = "slice"; value = ",@"; inherit (state) line; }];
|
|
skip = 1;
|
|
pos = state.pos + 1;
|
|
}
|
|
else
|
|
state // {
|
|
acc = state.acc ++ [{ type = "expand"; value = ","; inherit (state) line; }];
|
|
pos = state.pos + 1;
|
|
}
|
|
else if symbol != null then
|
|
state // {
|
|
acc = state.acc ++ [{ type = "symbol"; value = symbol; inherit (state) line; }];
|
|
pos = state.pos + 1;
|
|
skip = (stringLength symbol) - 1;
|
|
}
|
|
else
|
|
throw "Unrecognized token on line ${toString state.line}: ${rest}";
|
|
in
|
|
(builtins.foldl' readToken { acc = [ ]; pos = 0; skip = 0; line = startLineNumber; mod = 0; } (stringToCharacters elisp)).acc;
|
|
|
|
tokenizeElisp = elisp:
|
|
tokenizeElisp' { inherit elisp; };
|
|
|
|
# Produce an AST from a list of tokens produced by `tokenizeElisp`.
|
|
parseElisp' = tokens:
|
|
let
|
|
# Convert literal value tokens in a flat list to their
|
|
# corresponding nix representation.
|
|
parseValues = tokens:
|
|
map
|
|
(token:
|
|
if token.type == "string" then
|
|
token // {
|
|
value = substring 1 (stringLength token.value - 2) token.value;
|
|
}
|
|
else if token.type == "integer" then
|
|
token // {
|
|
value = fromJSON (removeStrings [ "+" "." ] token.value);
|
|
}
|
|
else if token.type == "symbol" && token.value == "t" then
|
|
token // {
|
|
value = true;
|
|
}
|
|
else if token.type == "float" then
|
|
let
|
|
initial = head (match "([+-]?([[:digit:]]*[.])?[[:digit:]]+(e([+-]?[[:digit:]]+|[+](INF|NaN)))?)" token.value);
|
|
isSpecial = (match "(.+(e[+](INF|NaN)))" initial) != null;
|
|
withoutPlus = removeStrings [ "+" ] initial;
|
|
withPrefix =
|
|
if substring 0 1 withoutPlus == "." then
|
|
"0" + withoutPlus
|
|
else if substring 0 2 withoutPlus == "-." then
|
|
"-0" + removeStrings [ "-" ] withoutPlus
|
|
else
|
|
withoutPlus;
|
|
in
|
|
if !isSpecial && withPrefix != null then
|
|
token // {
|
|
value = fromJSON withPrefix;
|
|
}
|
|
else
|
|
token
|
|
else
|
|
token
|
|
)
|
|
tokens;
|
|
|
|
# Convert pairs of opening and closing tokens to their
|
|
# respective collection types, i.e. lists and vectors. Also,
|
|
# normalize the forms of nil, which can be written as either
|
|
# `nil` or `()`, to empty lists.
|
|
#
|
|
# For performance reasons, this is implemented as a fold over
|
|
# the list of tokens, rather than as a recursive function. To
|
|
# keep track of list depth when sublists are parsed, a list,
|
|
# `state.acc`, is used as a stack. When entering a sublist, an
|
|
# empty list is pushed to `state.acc`, and items in the sublist
|
|
# are subsequently added to this list. When exiting the list,
|
|
# `state.acc` is popped and the completed list is added to the
|
|
# new head of `state.acc`, i.e. the outer list, which we were
|
|
# parsing before entering the sublist.
|
|
#
|
|
# Evaluation of old state is forced with `seq` in a few places,
|
|
# because nix otherwise keeps it around, eventually resulting in
|
|
# a stack overflow.
|
|
parseCollections = tokens:
|
|
let
|
|
parseToken = state: token:
|
|
let
|
|
openColl = if token.type == "openParen" then "list" else if token.type == "openBracket" then "vector" else null;
|
|
closeColl = if token.type == "closeParen" then "list" else if token.type == "closeBracket" then "vector" else null;
|
|
in
|
|
if openColl != null then
|
|
state // {
|
|
acc = [ [ ] ] ++ seq (head state.acc) state.acc;
|
|
inColl = [ openColl ] ++ state.inColl;
|
|
depth = state.depth + 1;
|
|
line = [ token.line ] ++ state.line;
|
|
}
|
|
else if closeColl != null then
|
|
if (head state.inColl) == closeColl then
|
|
let
|
|
outerColl = elemAt state.acc 1;
|
|
currColl = {
|
|
type = closeColl;
|
|
value = head state.acc;
|
|
line = head state.line;
|
|
inherit (state) depth;
|
|
};
|
|
rest = tail (tail state.acc);
|
|
in
|
|
state // seq state.acc {
|
|
acc = [ (outerColl ++ [ currColl ]) ] ++ rest;
|
|
inColl = tail state.inColl;
|
|
depth = state.depth - 1;
|
|
line = tail state.line;
|
|
}
|
|
else
|
|
throw "Unmatched ${token.type} on line ${toString token.line}"
|
|
else if token.type == "symbol" && token.value == "nil" then
|
|
let
|
|
currColl = head state.acc;
|
|
rest = tail state.acc;
|
|
emptyList = {
|
|
type = "list";
|
|
depth = state.depth + 1;
|
|
value = [ ];
|
|
};
|
|
in
|
|
state // seq currColl { acc = [ (currColl ++ [ emptyList ]) ] ++ rest; }
|
|
else
|
|
let
|
|
currColl = head state.acc;
|
|
rest = tail state.acc;
|
|
in
|
|
state // seq currColl { acc = [ (currColl ++ [ token ]) ] ++ rest; };
|
|
in
|
|
head (builtins.foldl' parseToken { acc = [ [ ] ]; inColl = [ null ]; depth = -1; line = [ ]; } tokens).acc;
|
|
|
|
# Handle dotted pair notation, a syntax where the car and cdr
|
|
# are represented explicitly. See
|
|
# https://www.gnu.org/software/emacs/manual/html_node/elisp/Dotted-Pair-Notation.html#Dotted-Pair-Notation
|
|
# for more info.
|
|
#
|
|
# This mainly entails handling lists that are the cdrs of a
|
|
# dotted pairs, concatenating the lexically distinct lists into
|
|
# the logical list they actually represent.
|
|
#
|
|
# For example:
|
|
# (a . (b . (c . nil))) -> (a b c)
|
|
parseDots = tokens:
|
|
let
|
|
parseToken = state: token:
|
|
if token.type == "dot" then
|
|
if state.inList then
|
|
state // {
|
|
dotted = true;
|
|
depthReduction = state.depthReduction + 1;
|
|
}
|
|
else
|
|
throw ''"Dotted pair notation"-dot outside list on line ${toString token.line}''
|
|
else if isList token.value then
|
|
let
|
|
collectionContents = foldl' parseToken
|
|
{
|
|
acc = [ ];
|
|
dotted = false;
|
|
inList = token.type == "list";
|
|
inherit (state) depthReduction;
|
|
}
|
|
token.value;
|
|
in
|
|
state // {
|
|
acc = state.acc ++ (
|
|
if state.dotted then
|
|
collectionContents.acc
|
|
else
|
|
[
|
|
(token // {
|
|
value = collectionContents.acc;
|
|
depth = token.depth - state.depthReduction;
|
|
})
|
|
]
|
|
);
|
|
dotted = false;
|
|
}
|
|
else
|
|
state // {
|
|
acc = state.acc ++ [ token ];
|
|
};
|
|
in
|
|
(foldl' parseToken { acc = [ ]; dotted = false; inList = false; depthReduction = 0; } tokens).acc;
|
|
|
|
parseQuotes = tokens:
|
|
let
|
|
parseToken = state: token':
|
|
let
|
|
token =
|
|
if isList token'.value then
|
|
token' // {
|
|
value = (foldl' parseToken { acc = [ ]; quotes = [ ]; } token'.value).acc;
|
|
}
|
|
else
|
|
token';
|
|
in
|
|
if elem token.type [ "quote" "expand" "slice" "backquote" "function" "record" "byteCode" ] then
|
|
state // {
|
|
quotes = [ token ] ++ state.quotes;
|
|
}
|
|
else if state.quotes != [ ] then
|
|
let
|
|
quote = value: token:
|
|
token // {
|
|
inherit value;
|
|
};
|
|
quotedValue = foldl' quote token state.quotes;
|
|
in
|
|
state // {
|
|
acc = state.acc ++ [ quotedValue ];
|
|
quotes = [ ];
|
|
}
|
|
else
|
|
state // {
|
|
acc = state.acc ++ [ token ];
|
|
};
|
|
in
|
|
(foldl' parseToken { acc = [ ]; quotes = [ ]; } tokens).acc;
|
|
in
|
|
parseQuotes (parseDots (parseCollections (parseValues tokens)));
|
|
|
|
parseElisp = elisp:
|
|
parseElisp' (tokenizeElisp elisp);
|
|
|
|
fromElisp' = ast:
|
|
let
|
|
readObject = object:
|
|
if isList object.value then
|
|
map readObject object.value
|
|
else if object.type == "quote" then
|
|
[ "quote" (readObject object.value) ]
|
|
else if object.type == "backquote" then
|
|
[ "`" (readObject object.value) ]
|
|
else if object.type == "expand" then
|
|
[ "," (readObject object.value) ]
|
|
else if object.type == "slice" then
|
|
[ ",@" (readObject object.value) ]
|
|
else if object.type == "function" then
|
|
[ "#'" (readObject object.value) ]
|
|
else if object.type == "byteCode" then
|
|
[ "#" ] ++ (readObject object.value)
|
|
else if object.type == "record" then
|
|
[ "#s" ] ++ (readObject object.value)
|
|
else
|
|
object.value;
|
|
in
|
|
map readObject ast;
|
|
|
|
fromElisp = elisp:
|
|
fromElisp' (parseElisp elisp);
|
|
|
|
# Parse an Org mode babel text and return a list of all code blocks
|
|
# with metadata.
|
|
#
|
|
# The general operation is similar to tokenizeElisp', so check its
|
|
# documentation for a more in-depth description.
|
|
#
|
|
# As in tokenizeElisp', the string read from is split into a list of
|
|
# its constituent characters, which is then folded over. Each
|
|
# character is then used to determine whether we should try to run a
|
|
# match for a `#+begin_src` header or `#+end_src` footer, starting
|
|
# at the position of the aforementioned character. These matches
|
|
# should only be attempted if the current character is `#` and the
|
|
# line has nothing but whitespace before it (noted by
|
|
# `state.leadingWhitespace`).
|
|
#
|
|
# When an appropriate match for a header has been found, its
|
|
# arguments are further parsed and the result is put into the code
|
|
# block's `flags` attribute. The subsequent characters are added to
|
|
# the code block's `body` attribute, until a footer is successfully
|
|
# matched and the block is added to the list of parsed blocks,
|
|
# `state.acc`.
|
|
parseOrgModeBabel = text:
|
|
let
|
|
matchBeginCodeBlock = mkMatcher "(#[+][bB][eE][gG][iI][nN]_[sS][rR][cC])([[:space:]]+).*" orgModeBabelCodeBlockHeaderMaxLength;
|
|
matchHeader = mkMatcher "(#[+][hH][eE][aA][dD][eE][rR][sS]?:)([[:space:]]+).*" orgModeBabelCodeBlockHeaderMaxLength;
|
|
matchEndCodeBlock = mkMatcher "(#[+][eE][nN][dD]_[sS][rR][cC][^\n]*).*" orgModeBabelCodeBlockHeaderMaxLength;
|
|
|
|
matchBeginCodeBlockLang = match "([[:blank:]]*)([[:alnum:]][[:alnum:]-]*).*";
|
|
matchBeginCodeBlockFlags = mkMatcher "([^\n]*[\n]).*" orgModeBabelCodeBlockHeaderMaxLength;
|
|
|
|
parseToken = state: char:
|
|
let
|
|
rest = substring state.pos orgModeBabelCodeBlockHeaderMaxLength text;
|
|
beginCodeBlock = matchBeginCodeBlock rest;
|
|
header = matchHeader rest;
|
|
endCodeBlock = matchEndCodeBlock rest;
|
|
language = matchBeginCodeBlockLang rest;
|
|
flags = matchBeginCodeBlockFlags rest;
|
|
|
|
force = expr: seq state.pos (seq state.line expr);
|
|
in
|
|
if state.skip > 0 then
|
|
state // force {
|
|
pos = state.pos + 1;
|
|
skip = state.skip - 1;
|
|
line = if char == "\n" then state.line + 1 else state.line;
|
|
leadingWhitespace = char == "\n" || (state.leadingWhitespace && elem char [ " " "\t" "\r" ]);
|
|
}
|
|
else if char == "#" && state.leadingWhitespace && !state.readBody && beginCodeBlock != null then
|
|
state // {
|
|
pos = state.pos + 1;
|
|
skip = (stringLength beginCodeBlock) - 1;
|
|
leadingWhitespace = false;
|
|
readLanguage = true;
|
|
}
|
|
else if char == "#" && state.leadingWhitespace && !state.readBody && header != null then
|
|
state // {
|
|
pos = state.pos + 1;
|
|
skip = (stringLength header) - 1;
|
|
leadingWhitespace = false;
|
|
readFlags = true;
|
|
}
|
|
else if state.readLanguage then
|
|
if language != null then
|
|
state // {
|
|
block = state.block // {
|
|
language = elemAt language 1;
|
|
};
|
|
pos = state.pos + 1;
|
|
skip = (foldl' (total: string: total + (stringLength string)) 0 language) - 1;
|
|
leadingWhitespace = false;
|
|
readLanguage = false;
|
|
readFlags = true;
|
|
readBody = true;
|
|
}
|
|
else throw "Language missing or invalid for code block on line ${toString state.line}!"
|
|
else if state.readFlags then
|
|
if flags != null then
|
|
let
|
|
parseFlag = state: item:
|
|
let
|
|
prefix = if isString item then substring 0 1 item else null;
|
|
in
|
|
if elem prefix [ ":" "-" "+" ] then
|
|
state // {
|
|
acc = state.acc // { ${item} = true; };
|
|
flag = item;
|
|
}
|
|
else if state.flag != null then
|
|
state // {
|
|
acc = state.acc // { ${state.flag} = item; };
|
|
flag = null;
|
|
}
|
|
else
|
|
state;
|
|
in
|
|
state // {
|
|
block = state.block // {
|
|
flags =
|
|
(foldl'
|
|
parseFlag
|
|
{
|
|
acc = state.block.flags;
|
|
flag = null;
|
|
inherit (state) line;
|
|
}
|
|
(fromElisp flags)).acc;
|
|
startLineNumber = state.line + 1;
|
|
};
|
|
pos = state.pos + 1;
|
|
skip = (stringLength flags) - 1;
|
|
line = if char == "\n" then state.line + 1 else state.line;
|
|
leadingWhitespace = char == "\n";
|
|
readFlags = false;
|
|
}
|
|
else throw "Arguments malformed for code block on line ${toString state.line}!"
|
|
else if char == "#" && state.leadingWhitespace && endCodeBlock != null then
|
|
state // {
|
|
acc = state.acc ++ [ state.block ];
|
|
block = {
|
|
language = null;
|
|
body = "";
|
|
flags = { };
|
|
};
|
|
pos = state.pos + 1;
|
|
skip = (stringLength endCodeBlock) - 1;
|
|
leadingWhitespace = false;
|
|
readBody = false;
|
|
}
|
|
else if state.readBody then
|
|
let
|
|
mod = state.pos / 100;
|
|
newState = {
|
|
block = state.block // {
|
|
body = state.block.body + char;
|
|
};
|
|
inherit mod;
|
|
pos = state.pos + 1;
|
|
line = if char == "\n" then state.line + 1 else state.line;
|
|
leadingWhitespace = char == "\n" || (state.leadingWhitespace && elem char [ " " "\t" "\r" ]);
|
|
};
|
|
in
|
|
if mod > state.mod then
|
|
state // seq state.block.body (force newState)
|
|
else
|
|
state // newState
|
|
else
|
|
state // force {
|
|
pos = state.pos + 1;
|
|
line = if char == "\n" then state.line + 1 else state.line;
|
|
leadingWhitespace = char == "\n" || (state.leadingWhitespace && elem char [ " " "\t" "\r" ]);
|
|
};
|
|
in
|
|
(foldl'
|
|
parseToken
|
|
{
|
|
acc = [ ];
|
|
mod = 0;
|
|
pos = 0;
|
|
skip = 0;
|
|
line = 1;
|
|
block = {
|
|
language = null;
|
|
body = "";
|
|
flags = { };
|
|
};
|
|
leadingWhitespace = true;
|
|
readLanguage = false;
|
|
readFlags = false;
|
|
readBody = false;
|
|
}
|
|
(stringToCharacters text)).acc;
|
|
|
|
# Run tokenizeElisp' on all Elisp code blocks (with `:tangle yes`
|
|
# set) from an Org mode babel text. If the block doesn't have a
|
|
# `tangle` attribute, it's determined by `defaultArgs`.
|
|
tokenizeOrgModeBabelElisp' = defaultArgs: text:
|
|
let
|
|
codeBlocks =
|
|
filter
|
|
(block:
|
|
let
|
|
tangle = toLower (block.flags.":tangle" or defaultArgs.":tangle" or "no");
|
|
language = toLower block.language;
|
|
in
|
|
elem language [ "elisp" "emacs-lisp" ]
|
|
&& elem tangle [ "yes" ''"yes"'' ])
|
|
(parseOrgModeBabel text);
|
|
in
|
|
foldl'
|
|
(result: codeBlock:
|
|
result ++ (tokenizeElisp' {
|
|
elisp = codeBlock.body;
|
|
inherit (codeBlock) startLineNumber;
|
|
})
|
|
)
|
|
[ ]
|
|
codeBlocks;
|
|
|
|
tokenizeOrgModeBabelElisp =
|
|
tokenizeOrgModeBabelElisp' {
|
|
":tangle" = "no";
|
|
};
|
|
|
|
parseOrgModeBabelElisp' = defaultArgs: text:
|
|
parseElisp' (tokenizeOrgModeBabelElisp' defaultArgs text);
|
|
|
|
parseOrgModeBabelElisp = text:
|
|
parseElisp' (tokenizeOrgModeBabelElisp text);
|
|
|
|
fromOrgModeBabelElisp' = defaultArgs: text:
|
|
fromElisp' (parseOrgModeBabelElisp' defaultArgs text);
|
|
|
|
fromOrgModeBabelElisp = text:
|
|
fromElisp' (parseOrgModeBabelElisp text);
|
|
|
|
in
|
|
{
|
|
inherit tokenizeElisp parseElisp fromElisp;
|
|
inherit tokenizeElisp' parseElisp' fromElisp';
|
|
inherit tokenizeOrgModeBabelElisp parseOrgModeBabelElisp fromOrgModeBabelElisp;
|
|
inherit tokenizeOrgModeBabelElisp' parseOrgModeBabelElisp' fromOrgModeBabelElisp';
|
|
}
|