never executed always true always false
1 {-# LANGUAGE LambdaCase #-}
2 {-# LANGUAGE RecordWildCards #-}
3
4 {-|
5 Description : Brainfuck compilation to IR
6 Copyright : (c) Sebastian Galkin, 2018
7 License : GPL-3
8
9 In this module we:
10
11 - Convert 'Text' into a Brainfuck intermediate representation (IR) consisting of lists of 'Op's.
12 - Provide optimization rules to speed up IR execution.
13 - Parse compiler command line options
14 -}
15 module HBF.Compiler
16 ( module HBF.Compiler
17 -- * Reexport from "BFP.Parser"
18 , BFP.ParseError
19 ) where
20
21 import Control.Monad (when)
22 import Control.Monad.Trans.State.Lazy
23 import qualified Data.Binary as B
24 import Data.ByteString.Lazy (ByteString)
25 import Data.Coerce (coerce)
26 import Data.Foldable (traverse_)
27 import Data.Functor.Identity (Identity)
28 import Data.Maybe (fromMaybe)
29 import Data.Semigroup (Semigroup (..), (<>))
30 import Data.Text.Lazy (Text)
31 import qualified Data.Text.Lazy.IO as TIO
32 import Data.Tuple (swap)
33 import Options.Applicative
34 import System.Environment (getArgs)
35 import System.FilePath ((-<.>))
36 import qualified Text.Parsec as Parsec
37 import Text.Parsec.Pos (initialPos)
38
39 import qualified HBF.Parser as BFP
40 import HBF.Types
41
42 -- * Compilation
43 -- | Encode the compiled file into the given path.
44 saveCompilerOutput :: Program Optimized -> FilePath -> IO ()
45 saveCompilerOutput = flip B.encodeFile . instructions
46
47 -- | Use the given 'CompilerOptions' to parse, compile and optimize the text representation of a
48 -- Brainfuck program into the IR. 'cOptsSource' and 'cOptsOut' in the compiler options are ignored.
49 inMemoryCompile ::
50 CompilerOptions
51 -> Text
52 -> Either BFP.ParseError (Program Optimized, CompilationSummary)
53 inMemoryCompile opts code =
54 (\p -> (p, summarizeCompilation p)) . optimize opts <$> BFP.parseProgram code
55
56 -- | Compilation summary for the user. It contains overview information and
57 -- statistics about the compilation result.
58 newtype CompilationSummary = CompilationSummary
59 { compNumInstructions :: Int
60 } deriving (Show)
61
62 -- | Summarize a compiled program creating the 'CompilationSummary'
63 summarizeCompilation :: Program Optimized -> CompilationSummary
64 summarizeCompilation = CompilationSummary . length . instructions
65
66 -- | Use 'CompilerOptions' to read, compile, optimize, and save a program from/to the filesystem.
67 -- Input and output files are provided by 'cOptsSource' and 'cOptsOut'.
68 compile :: CompilerOptions -> IO (Either BFP.ParseError CompilationSummary)
69 compile opts@CompilerOptions {..} = do
70 when cOptsVerbose $ do
71 putStrLn "Compiler options:"
72 print opts
73 compileResult <- inMemoryCompile opts <$> TIO.readFile cOptsSource
74 either
75 (return . Left)
76 (\p -> save p >> (return . Right . snd) p)
77 compileResult
78 where
79 outPath = fromMaybe (cOptsSource -<.> "bfc") cOptsOut
80 save (program, _) = saveCompilerOutput program outPath
81
82 -- | Apply optimizations to the 'Unoptimized' program turning. The optimizations that
83 -- will be available are the ones specified by the 'CompilerOptions' given.
84 optimize :: CompilerOptions -> Program Unoptimized -> Program Optimized
85 optimize CompilerOptions {..} p = foldl (flip ($)) base optimizations
86 where
87 base = toIR p
88 opt condition f =
89 if condition
90 then f
91 else id
92 optimizations =
93 [ opt cOptsClearLoopOptimization clearOpt
94 , opt cOptsMulOptimization mulOpt
95 , opt cOptsScanOptimization scanOpt
96 , opt cOptsOffsetInstructionsOptimization offsetInstructionOpt
97 , opt cOptsFusionOptimization fusionOpt
98 ]
99
100 -- | Given a parsed program, turn it into an optimized one, but with the null optimization.
101 -- Effectively this is only a type change.
102 toIR :: Program Unoptimized -> Program Optimized
103 toIR = coerce
104
105 -- * Optimization
106 -- | Helper type to apply the Fuse optimization using a 'Monoid'.
107 newtype FusedProgram = Fused
108 { unfused :: Program Optimized
109 } deriving (Show)
110
111 -- | This 'Semigroup' for 'FusedProgram' does all the fusion optimization work.
112 -- When two contiguous optimizations can be fused into one, '<>' will reduce the
113 -- size of the list in the 'FusedProgram'.
114 --
115 -- Examples of fusable operations:
116 --
117 -- - (Inc a offset) (Inc b offset) -> (Inc (a+b) offset)
118 -- - (Move 3 offset) (Move (-3) offset) -> NoOp
119 -- - (Clear offset) (Clear offset) -> Clear offset
120 -- - (Scan Up offset) (Scan _ offset') -> Scan Up offset
121 instance Semigroup FusedProgram where
122 Fused (Program p1) <> Fused (Program p2) = Fused $ Program $ fuse p1 p2
123 where
124 fuse :: [Op] -> [Op] -> [Op]
125 fuse [] ops = ops
126 fuse ops [] = ops
127 fuse [op1] (op2:more) = join op1 op2 ++ more
128 fuse (op1:more) ops2 = op1 : fuse more ops2
129 join :: Op -> Op -> [Op]
130 join (Inc a n) (Inc b m)
131 | n == m = ifNotZero (flip Inc n) $ a + b
132 join (Move a) (Move b) = ifNotZero Move $ a + b
133 join (In a n) (In b m)
134 | n == m = ifNotZero (flip In n) $ a + b
135 join (Out a n) (Out b m)
136 | n == m = ifNotZero (flip Out n) $ a + b
137 join (Clear n) (Clear m)
138 | n == m = [Clear n]
139 -- once a scan is found, another one won't move the pointer
140 join (Scan Up o1) (Scan _ o2)
141 | o1 == o2 = [Scan Up o1]
142 join (Scan Down o1) (Scan _ o2)
143 | o1 == o2 = [Scan Down o1]
144 join a b = [a, b]
145 ifNotZero f n = [f n | n /= 0]
146
147 -- | Use the 'Semigroup' instance and an empty program as 'mempty'.
148 instance Monoid FusedProgram where
149 mempty = Fused mempty
150 mappend = (<>)
151
152 -- | Apply the fusion optimization using the 'FusedProgram' 'Monoid' instance.
153 --
154 -- The fusion optimization consist of turning multiple instructions into one. For example
155 -- if the original Brainfuck code contains '++++', this would be parsed as
156 --
157 -- @
158 --'Program' ['Inc' 1 0, 'Inc' 1 0, 'Inc' 1 0, 'Inc' 1 0]
159 -- @
160 --
161 -- but it would be fused to a single IR instruction: @Inc 4 0@.
162 --
163 -- >>> fusionOpt $ Program [Inc 1 0, Inc 1 0, Inc 1 0, Inc 1 0]
164 -- [Inc 4 0]
165 --
166 -- Similarly, other instructions,
167 -- like 'Move', 'In', 'Out', 'Clear' and 'Scan' can be fused as long as the offset at which they
168 -- must be applied is the same.
169 --
170 -- Non fusable operation remain unchanged:
171 --
172 -- >>> fusionOpt $ Program [Inc 1 0, Inc 1 1]
173 -- [Inc 1 0,Inc 1 1]
174 fusionOpt :: Program Optimized -> Program Optimized
175 fusionOpt = unfused . foldMap (Fused . Program . optimizeIn) . instructions
176 where
177 optimizeIn (Loop as) = [Loop inner | not (null inner)]
178 where
179 inner = instructions $ fusionOpt $ Program as
180 optimizeIn other = [other]
181
182 -- | Helper function used to implement optimizations
183 -- Iterate over all 'Program' instructions searching for 'Loop's. For each 'Loop'
184 -- apply 'f'. If 'f' returns a list of new operations, replace the original loop with
185 -- the new instructions. If 'f' returns 'Nothing', process recursively the loop instructions.
186 liftLoop :: ([Op] -> Maybe [Op]) -> Program o -> Program o
187 liftLoop f = Program . (>>= g) . instructions
188 where
189 g :: Op -> [Op]
190 g (Loop ops) =
191 fromMaybe ((: []) . Loop . instructions . liftLoop f $ Program ops) $
192 f ops
193 g other = [other]
194
195 -- | Basic optimization that turns the loop @[-]@ into a single instruction 'Clear'.
196 -- Useful because clearing a memory position is a pretty common operation in Brainfuck and
197 -- very expensive if treated as a loop.
198 --
199 -- >>> :set -XOverloadedStrings
200 -- >>> Right (res, _) = inMemoryCompile defaultCompilerOptions "[-]"
201 -- >>> res
202 -- [Clear 0]
203 clearOpt :: Program Optimized -> Program Optimized
204 clearOpt = liftLoop onLoops
205 where
206 onLoops :: [Op] -> Maybe [Op]
207 onLoops [Inc (-1) 0] = Just [Clear 0]
208 onLoops _ = Nothing
209
210 -- | Copy and multiply optimization. A very common usage of loops is to copy the value of a memory
211 -- position to a different: @[->>+<<]@ this will move the contents of the current memory position
212 -- to places to the right, also clearing the original position to zero. If we change the number of @+@
213 -- operations we get multiplication, if we have several groups of @++..@ operations we get multiple copies.
214 -- In the general case, for example:
215 --
216 -- >>> :set -XOverloadedStrings
217 -- >>> Right (res, _) = inMemoryCompile defaultCompilerOptions "[->+>++>++++<<<]"
218 -- >>> res
219 -- [Mul 1 0 1,Mul 2 0 2,Mul 4 0 3,Clear 0]
220 --
221 -- The original Brainfuck copies the current position one place to the right, doubles
222 -- the current position two places to the right, and quadruples the current position three places to the right;
223 -- finally zeroing the current position. With the mul optimization in this function, all that loop would be
224 -- replaced by 4 instructions.
225 mulOpt :: Program Optimized -> Program Optimized
226 mulOpt = liftLoop onLoops
227 where
228 onLoops :: [Op] -> Maybe [Op]
229 onLoops ops = makeOp <$> eitherToMaybe (Parsec.parse mulP "" ops)
230 where
231 makeOp :: [(MulFactor, MemOffset)] -> [Op]
232 makeOp = (++ [Clear 0]) . snd . foldl it (0, [])
233 where
234 it (totalOff, res) (fact, off) =
235 (totalOff + off, res ++ [Mul fact 0 (off + totalOff)]) -- todo very inefficient foldr
236
237 -- | Implement the scan optimization. Another common operation in Brainfuck is to search for the first zero
238 -- in the neighboring memory, either to the right or to the left @[>]@ or @[<]@. These loops can be replaced
239 -- for a more optimal search, represented as a single @'Scan' 'Up'@ or @'Scan' 'Down'@ instruction.
240 --
241 -- >>> scanOpt $ Program [Loop [Move 1]]
242 -- [Scan Up 0]
243 scanOpt :: Program Optimized -> Program Optimized
244 scanOpt = liftLoop onLoops
245 where
246 onLoops :: [Op] -> Maybe [Op]
247 onLoops [Move 1] = Just [Scan Up 0]
248 onLoops [Move (-1)] = Just [Scan Down 0]
249 onLoops _ = Nothing
250
251 -- | Helper datastructure to implement a stateful transformation in 'offsetInstructionOpt'.
252 data OffsetState = OffSt
253 { stOptimized :: [Op] -- ^ The optimized program so far
254 , stBatch :: [Op] -- ^ The current batch of instructions being optimized (between loops)
255 , stOffset :: MemOffset -- ^ The current offset since the last loop
256 } deriving (Show)
257
258 -- | Start state for 'offsetInstructionOpt'.
259 emptyState :: OffsetState
260 emptyState = OffSt [] [] 0
261
262 -- | Implement the offset instruction optimization. This is probably the most complex
263 -- optimization implemented in the library.
264 --
265 -- In streams of instructions between loops, there is no need to keep updating the current position
266 -- if we can keep track of where the different operations should be applied. This is a trade-off
267 -- of time (not updating the pointer) by space (keeping track of the offset in every operation).
268 -- For example the following unoptimized code
269 --
270 --
271 -- >>> offsetInstructionOpt $ Program [Loop [], Move 1, Inc 1 0, Move 2, Clear 0, Mul 2 0 1, Loop []]
272 -- [Loop [],Inc 1 1,Clear 3,Mul 2 3 1,Move 3,Loop []]
273 --
274 -- And the optimization eliminated one 'Move' instruction. In general, for larger programs the gain
275 -- will be more noticeable.
276 --
277 -- An important detail to take into account is that 'Scan' operations break the stream of operations
278 -- that can be optimized together, and turn the accumulated offset back to zero:
279 --
280 -- >>> offsetInstructionOpt $ Program [Loop [], Move 1, Inc 1 0, Scan Up 0, Inc 0 2, Loop []]
281 -- [Loop [],Inc 1 1,Scan Up 1,Inc 0 2,Loop []]
282 offsetInstructionOpt :: Program Optimized -> Program Optimized
283 offsetInstructionOpt -- We implement this as a stateful computation for code clarity
284 =
285 Program .
286 stOptimized .
287 (`execState` emptyState) .
288 (*> finishLastBatch) . traverse_ processOp . instructions
289 where
290 processOp :: Op -> State OffsetState ()
291 processOp (Loop l) = do
292 let newLoop = Loop (instructions $ offsetInstructionOpt (Program l))
293 finishBatch
294 modify $ \s@OffSt {..} -> s {stOptimized = newLoop : stOptimized}
295 processOp (Move n) = get >>= \s -> put s {stOffset = stOffset s + n}
296 processOp (Inc n off) = add off (Inc n)
297 processOp (In n off) = add off (In n)
298 processOp (Out n off) = add off (Out n)
299 processOp (Clear off) = add off Clear
300 processOp (Mul factor from to) = add from (\o -> Mul factor o to)
301 processOp (Scan d off) = do
302 OffSt {..} <- get
303 put
304 OffSt
305 { stOffset = 0
306 , stOptimized = stOptimized
307 , stBatch = Scan d (off + stOffset) : stBatch
308 }
309 add :: MemOffset -> (MemOffset -> Op) -> State OffsetState ()
310 add off op =
311 get >>= \s@OffSt {..} -> put s {stBatch = op (off + stOffset) : stBatch}
312 finishBatch :: State OffsetState ()
313 finishBatch = do
314 s@OffSt {..} <- get
315 let batch =
316 if stOffset /= 0
317 then Move stOffset : stBatch
318 else stBatch
319 put s {stBatch = [], stOffset = 0, stOptimized = batch ++ stOptimized}
320 finishLastBatch :: State OffsetState ()
321 finishLastBatch = do
322 finishBatch
323 modify $ \s@OffSt {..} -> s {stOptimized = reverse stOptimized}
324
325 -- * Loading Compiled Code
326 -- | Load a compiled program from 'saveCompilerOutput' output.
327 load :: ByteString -> Program Optimized
328 load = B.decode
329
330 -- | Load a compiled program saved with 'saveCompilerOutput'.
331 loadFile :: FilePath -> IO (Program Optimized)
332 loadFile = B.decodeFile
333
334 -- * Compiler Flags
335 -- | Command line flags to the Brainfuck compiler
336 data CompilerOptions = CompilerOptions
337 { cOptsOut :: Maybe FilePath -- ^ Where to put the compiled output, if 'Nothing' use the input basename with bfc extension
338 , cOptsFusionOptimization :: Bool -- ^ Enable fusion optimization
339 , cOptsClearLoopOptimization :: Bool -- ^ Enable clear loop optimization
340 , cOptsMulOptimization :: Bool -- ^ Enable mul loop optimization
341 , cOptsScanOptimization :: Bool -- ^ Enable scan loop optimization
342 , cOptsOffsetInstructionsOptimization :: Bool -- ^ Enable offset instructions optimization
343 , cOptsVerbose :: Bool -- ^ Output more debugging information
344 , cOptsSource :: FilePath -- ^ Input source to the compiler, this should be Brainfuck code
345 } deriving (Show)
346
347 optionsP :: Parser CompilerOptions
348 optionsP =
349 (\output disableAll fusion clear mul scan offset verbose source ->
350 CompilerOptions
351 { cOptsOut = output
352 , cOptsFusionOptimization = not disableAll || fusion
353 , cOptsClearLoopOptimization = not disableAll || clear
354 , cOptsMulOptimization = not disableAll || mul
355 , cOptsScanOptimization = not disableAll || scan
356 , cOptsOffsetInstructionsOptimization = not disableAll || offset
357 , cOptsVerbose = verbose
358 , cOptsSource = source
359 }) <$>
360 optional
361 (option
362 str
363 (long "output" <> short 'o' <> metavar "OUT" <>
364 help "Compiled output path")) <*>
365 switch
366 (long "disable-all-optimizations" <> short 'd' <>
367 help "Disable all optimizations") <*>
368 switch
369 (long "fusion" <>
370 help
371 "Reenable fusion optimization (turn multiple + or > into a single operation)") <*>
372 switch
373 (long "clear" <>
374 help "Reenable clear loop optimization (turn [-] into a single operation)") <*>
375 switch
376 (long "mul" <>
377 help
378 "Reenable mul loop optimization (turn [->++>+++<<] into [Mul(1, 2) Mul(2,3)] Clear operations)") <*>
379 switch
380 (long "scan" <>
381 help "Reenable scan loop optimization (turn [>] into ScanR operation)") <*>
382 switch
383 (long "offset" <>
384 help
385 "Reenable offset instructions optimization (turn >>+>->> into Inc 1 2, Inc (-1) 1, Move 1, Move 1, Move 1, Move 1, Move 1, operation)") <*>
386 switch
387 (long "verbose" <> short 'v' <> help "Output more debugging information") <*>
388 argument str (metavar "SRC" <> help "Input source code file")
389
390 options :: ParserInfo CompilerOptions
391 options =
392 info
393 (optionsP <**> helper)
394 (fullDesc <> progDesc "Compile Brainfuck code in SRC file" <>
395 header "An optimizing Brainfuck compiler and evaluator")
396
397 -- | Default compiler options: all optimizations, not verbose, no input or output files.
398 defaultCompilerOptions :: CompilerOptions
399 defaultCompilerOptions =
400 CompilerOptions
401 { cOptsOut = Nothing
402 , cOptsFusionOptimization = True
403 , cOptsClearLoopOptimization = True
404 , cOptsMulOptimization = True
405 , cOptsScanOptimization = True
406 , cOptsOffsetInstructionsOptimization = True
407 , cOptsVerbose = False
408 , cOptsSource = ""
409 }
410
411 -- | Compiler options: all optimizations off.
412 noOptimizationCompilerOptions :: CompilerOptions
413 noOptimizationCompilerOptions =
414 CompilerOptions
415 { cOptsOut = Nothing
416 , cOptsFusionOptimization = False
417 , cOptsClearLoopOptimization = False
418 , cOptsMulOptimization = False
419 , cOptsScanOptimization = False
420 , cOptsOffsetInstructionsOptimization = False
421 , cOptsVerbose = False
422 , cOptsSource = ""
423 }
424
425 -- | Parse a list of command line arguments
426 parsePure :: [String] -> ParserResult CompilerOptions
427 parsePure = execParserPure defaultPrefs options
428
429 -- | Parse a list of command line arguments printing errors to the stderr
430 unsafeParse :: [String] -> IO CompilerOptions
431 unsafeParse = handleParseResult . parsePure
432
433 -- | Parse command line arguments printing errors to the stderr
434 parse :: IO CompilerOptions
435 parse = getArgs >>= unsafeParse
436
437 ----------------------- implementation details ----------------------
438 -- * Implementation Detail: Parsing Lists of Instructions
439 -- | This parser is used to implement the mul optimization. See 'mulOpt'.
440 type ProgramParser a = Parsec.ParsecT [Op] () Identity a
441
442 -- | Parse successfully if the token satisfies the predicate.
443 satisfy' :: Show t => (t -> Bool) -> Parsec.ParsecT [t] () Identity t
444 satisfy' predicate = Parsec.token showTok posFromTok testTok
445 where
446 showTok t = show t
447 posFromTok _ = initialPos ""
448 testTok t =
449 if predicate t
450 then Just t
451 else Nothing
452
453 -- | Parse movement to the right (\>), returning the offset value.
454 --
455 -- >>> Parsec.parse mrightP "" [Move 3]
456 -- Right 3
457 --
458 -- >>> Data.Either.isLeft $ Parsec.parse mrightP "" [Move (-1)]
459 -- True
460 mrightP :: ProgramParser MemOffset
461 mrightP =
462 satisfy' isRight <&> \case
463 Move n -> n
464 _ -> undefined
465
466 -- | Parsemovement to the left (\<), returning the offset value.
467 --
468 -- >>> Parsec.parse mleftP "" [Move (-3)]
469 -- Right 3
470 --
471 -- >>> Data.Either.isLeft $ Parsec.parse mleftP "" [Move 1]
472 -- True
473 mleftP :: ProgramParser MemOffset
474 mleftP =
475 satisfy' isLeft <&> \case
476 Move n -> (negate n)
477 _ -> undefined
478
479 -- | Parse increment, returning total increment.
480 --
481 -- >>> Parsec.parse plusP "" [Inc 3 0]
482 -- Right 3
483 --
484 -- >>> Data.Either.isLeft $ Parsec.parse plusP "" [Inc (-2) 0]
485 -- True
486 plusP :: ProgramParser Int
487 plusP =
488 satisfy' isPlus <&> \case
489 Inc n 0 -> n
490 _ -> undefined
491
492 -- | Parse decrement, returning total decrement.
493 --
494 -- >>> Parsec.parse minusP "" [Inc (-3) 0]
495 -- Right 3
496 --
497 -- >>> Data.Either.isLeft $ Parsec.parse minusP "" [Inc 2 0]
498 -- True
499 minusP :: ProgramParser Int
500 minusP =
501 satisfy' isMinus <&> \case
502 Inc n 0 -> (negate n)
503 _ -> undefined
504
505 -- | Sum the result of a parser applied repeatedly
506 --
507 -- >>> Parsec.parse (summedP plusP) "" [Inc 3 0, Inc 1 0, Inc (-4) 0]
508 -- Right 4
509 summedP :: Num n => ProgramParser n -> ProgramParser n
510 summedP = fmap sum . Parsec.many1
511
512 -- | Full multiple copy/multiply operation parser. Returns the set of factors and relative, incremental offsets.
513 --
514 -- >>> Parsec.parse mulP "" [Inc (-1) 0, Move 1, Inc 2 0, Move 3, Inc 1 0, Move (-4)]
515 -- Right [(2,1),(1,3)]
516 mulP :: ProgramParser [(MulFactor, MemOffset)]
517 mulP = do
518 _ <- minusP
519 copies <- Parsec.many1 shiftFactorP
520 let totalShift = sum $ map fst copies
521 back <- summedP mleftP
522 Parsec.eof
523 if back == coerce totalShift
524 then return (fmap swap copies)
525 else Parsec.unexpected "number of left returns to close the loop"
526 where
527 shiftFactorP = (,) <$> summedP mrightP <*> fmap MulFactor (summedP plusP)
528
529 -- | Is the instruction a right movement?
530 isRight :: Op -> Bool
531 isRight (Move n)
532 | n > 0 = True
533 isRight _ = False
534
535 -- | Is the instruction a left movement?
536 isLeft :: Op -> Bool
537 isLeft (Move n)
538 | n < 0 = True
539 isLeft _ = False
540
541 -- | Is the instruction an increment?
542 isPlus :: Op -> Bool
543 isPlus (Inc n 0)
544 | n > 0 = True
545 isPlus _ = False
546
547 -- | Is the instruction a decrement?
548 isMinus :: Op -> Bool
549 isMinus (Inc n 0)
550 | n < 0 = True
551 isMinus _ = False