REBOL [
    Title:   "Rebol Code colorizer"
    Date:    20-Jan-2009/3:05:02+1:00
    Name:    "Rebol Code colorizer"
    Version: 0.9.5
    File:    %code-colorizer.r
    Author:  "David 'Oldes' Oliva"
    Email:   oliva.david@seznam.cz
    Home:    http://box.lebeda.ws/~hmm/
    Owner:   none
    Rights:  none
    Needs:   none
    Tabs:    none
    encoding: 'cp1250
    Usage:   [
		code-colorizer/remove-parens?: off
		code-colorizer/footer-final: {
		
		}
		
		colorize/save/title
		    %code-colorizer.r
		    %code-colorizer.html
		    "Rebol Code Colorizer"
	]
    Purpose: {To convert Rebol script into html with colorized code. Using string based parsing.}
    Comment: {
    	To change colors, download this CSS style: http://box.lebeda.ws/~hmm/css/rebolcc.css
    	modify it and use it (change code-colorizer/css-file to your new version)
    }
    History: [
    	0.9.5 20-Jan-2009 {
    		- Using hash! table to get word's classes instead of parse rules
    		- Using  tags instead of 
    		- Updated 'seo-name function
    		- Fixed special char! notation like #"^(60)"
    	}
    	0.9.2 12-Mar-2008 {
    		- Fixed bug with single escape character in string ( "^^" )
    	}
    	0.9.1 8-Mar-2008 {
    		- Fixed bug with conversion of REBOL word to url
    		- Added new color class .iss for issue! datatype.
    	}
    	0.9.0 28-Sep-2007 {
    		Complete remake of the script using string based parsing (besause of recursions limits in the old code)}
    	0.0.1 29-Oct-2003 {
    		%colorize-rswf.r script inspired by Carl's %color-code.r file but was designed
    		to colorize using CSS classes instead of font tags
    	}
	]
    Language: none
    Type:     none
    Content:  none
	library: [
		level:        'intermediate
		platform:     'all
		type:         [tool]
		domain:       [html parse text-processing visualization web]
		tested-under: none
		support:      none
		license:      'public-domain
		see-also:     none
	]
;    preprocess: true
;    require: [
;    	rs-project 'seo-name
;	]
]

;### Seo-name
comment {
#### RS include: %seo-name.r
#### Title:   "seo-name"
----} 
unless value? 'seo-name [
	seo-name: func [
		"Creates SEO friendly version of string with diacritics"
		str
		/local new normal-chars trans-chars other-char pos pos2
	][
		was-type?: type? str
	    new: lowercase copy as-string str 
	    normal-chars: charset [#"A" - #"Z" #"a" - #"z" #"0" - #"9" #"_" #"."] 
	    trans-chars:  charset "뚜"
	    other-char: complement (union normal-chars trans-chars)
	    parse/all new [
		some [
		    some normal-chars 
		    | some [
			pos: [
			      ["" | "" | ""] (change pos "e") 
			    | ["" | ""]       (change pos "s") 
			    | ["" | "" | ""] (change pos "c") 
			    | ["" | ""]       (change pos "r") 
			    | ["" | ""]       (change pos "z") 
			    | ""               (change pos "y") 
			    | ""               (change pos "i") 
			    | ["" | ""]       (change pos "o") 
			    | ["" | ""]       (change pos "a") 
			    | ["" | "" | ""] (change pos "u") 
			    | ""               (change pos "d") 
			    | ""               (change pos "t") 
			    | ""               (change pos "n")
			]
		    ] 
		    | some other-char pos2: (pos2: change/part pos "-" pos2) :pos2
		]
	    ] 
	    to was-type? head new
	] 
]
comment "---- end of RS include %seo-name.r ----" 

;### Code-colorizer

code-colorizer: context [

;## Default settings
	remove-parens?: off ;removes parens from code (used to document big parsing rules)
	remove-newline-comments?: on ;removes all comments which start at newline
	index-comments?: on ;creates index from special comments
	break-on-error?: on ;stops parsing if founds invalid code

    out: str: x:   none
	output?:       true
	level-block:  
	level-paren:  
	level-string:  0
	string-type:   none
	string-buffer: make string! 10000
	index-html:    make string! 1000
	index-type:    none
	css-file:      http://box.lebeda.ws/~hmm/css/rebolcc.css
	footer-final:  none ;using this to add final note (for example counter) on HTML page
    
;## Basic charsets
;** These charsets are used in string based parse in Colorize function

	ch_word-dividers: charset " ^-^/^M{}[]()^"^^;"
	ch_newlines:      charset "^/^M"
	ch_space:         charset " ^-"
	ch_spaces:        charset " ^-^/^M"
	ch_numbers:       charset "0123456789"
	ch_binary2:       charset "01"
	ch_alpha:         charset [#"a" - #"z" #"A" - #"Z"]
	ch_hexadecimal:   charset [#"a" - #"f" #"A" - #"F" "0123456789"]
	ch_tonewline:     complement ch_newlines
	ch_word:          complement ch_word-dividers
	ch_anychar:       complement charset ""
	ch_alphanum: union ch_alpha ch_numbers
	ch_base64:   union ch_alphanum union charset "+/=" ch_spaces

;## Rules used for parsing
	rl_integer:   [some ch_numbers]
	rl_word:      [some ch_word]
	rl_binary2:   [ "2#{" any [8 [ch_binary2  any ch_spaces]] "}"]
	rl_binary32:  [  "#{" any [2 [ch_alphanum any ch_spaces]] "}"]
	rl_binary64:  ["64#{" any ch_base64 "}"]
	rl_binary: [
		  rl_binary2
		| rl_binary32
		| rl_binary64
		| ["#{" | "2#{" | "64#{"] (
			if level-string = 0 [
				print ["!!! Invalid binary --" copy/part str 20]
				print [level-string level-block level-paren]
				if break-on-error? [break]
			]
		)
	]
	rl_pair: [some ch_numbers #"x" some ch_numbers]
	rl_char: [
		{#"} ["^^(" 2 ch_hexadecimal #")" | #"^^" 1 ch_anychar | 1 ch_anychar ] {"}
	]
	
;** These are groups with words used in Rebol

	rl_comparison: [
		"<="  "<>"  "<"  "=="  "=?"  "="  ">"  ">="  "equal?"  "greater-or-equal?" 
		"greater?"  "lesser-or-equal?"  "lesser?"  "maximum-of"  "minimum-of" 
		"not-equal?"  "same?"  "sign?"  "strict-equal?"  "strict-not-equal?"
	]
	rl_context: ["alias"  "bind"  "context"  "get"  "in"  "set"  "unset"  "use"  "value?"]
	rl_control: [
		"all"  "any"  "opt"  "attempt"  "break"  "catch"  "compose"  "disarm"  "dispatch" 
		"do-events"  "does"  "either"  "else"  "exit"  "forall"  "foreach"  "for"  
		"forever"  "forskip"  "func"  "function"  "halt"  "has"  "if"  "launch"  "loop" 
		"next"  "quit"  "reduce"  "remove-each"  "repeat"  "return"  "secure"  "switch" 
		"throw"  "try"  "until"  "wait"  "while"  "do"
	]
	rl_help: [
		"?"  "??"  "about"  "comment"  "dump-face"  "dump-obj"  "help" 
		"license"  "probe"  "source"  "trace"  "usage"  "what"
	]
	rl_logic: [
		"all"  "and"  "any"  "complement"  "found?"  "not"  "or"  "random"  "xor" 
		"on"  "off"  "true"  "false"  "none"
	]
	rl_math: [
		"**"  "*"  "+"  "-"  "//"  "/"  "abs"  "absolute"  "add"  "and"  "arccosine" 
		"arcsine"  "arctangent"  "complement"  "cosine"  "divide"  "even?"  "exp" 
		"log-10"  "log-2"  "log-e"  "maximum-of"  "maximum"  "max"   "min"  "minimum" 
		"minimum-of"  "multiply"  "negate"  "negative?"  "not"  "odd?"  "or" 
		"positive?"  "power"  "random"  "remainder"  "sign?"  "sine"  "square-root" 
		"subtract"  "tangent"  "xor"  "zero?"
	]
	rl_io: [
		"ask"  "change-dir"  "clean-path"  "close"  "confirm"  "connected?" 
		"delete"  "dir?"  "dirize"  "dispatch"  "do"  "echo"  "exists?"  "get-modes" 
		"info?"  "input"  "input?"  "list-dir"  "load"  "make-dir"  "modified?" 
		"open"  "pick"  "poke"  "prin"  "print"  "query"  "read"  "read-io"  "rename" 
		"resend"  "save"  "script?"  "secure"  "send"  "set-modes"  "set-net"  "size?" 
		"split-path"  "suffix?"  "to-local-file"  "to-rebol-file"  "update"  "wait" 
		"what-dir"  "write-io"  "write" 
	]
	rl_series: [
		"alter"  "append"  "array"  "at"  "back"  "change"  "clear"  "copy"  "difference" 
		"empty?"  "exclude"  "extract"  "fifth"  "find"  "first"  "found?"  "fourth" 
		"free"  "head?"  "head"  "index?"  "insert"  "intersect"  "join"  "last"  "length?" 
		"load"  "maximum-of"  "minimum-of"  "offset?"  "parse"  "pick"  "poke"  "random" 
		"rejoin"  "remove"  "remove-each"  "repend"  "replace"  "reverse"  "second" 
		"select"  "skip"  "sort"  "switch"  "tail?"  "tail"  "third"  "union"  "unique"
	]
	rl_dataset: [
		"alter"  "charset"  "difference"  "exclude"  "extract"  "intersect"  "union"  "unique"
	]
	rl_specialstring: [
		"build-tag"  "checksum"  "clean-path"  "compress"  "debase"  "decode-cgi"  "decompress" 
		"dehex"  "detab"  "dirize"  "enbase"  "entab"  "find"  "form"  "import-email"  "lowercase" 
		"mold"  "parse-xml"  "reform"  "rejoin"  "remold"  "split-path"  "suffix?"  "trim"  "uppercase"
	]
	rl_system: [
		"browse"  "component?"  "link?"  "now"  "protect"  "protect-system"  "recycle" 
		"unprotect"  "upgrade"
	]
	rl_datatype: [
		"any-block?"  "any-function?"  "any-string?"  "any-type?"  "any-word?"  "as-pair" 
		"binary?"  "bitset?"  "block?"  "char?"  "construct"  "datatype?"  "date?"  "decimal?" 
		"dump-obj"  "email?"  "error?"  "event?"  "file?"  "function?"  "get-word?"  "hash?" 
		"image?"  "integer?"  "issue?"  "library?"  "list?"  "lit-path?"  "lit-word?"  "logic?" 
		"make"  "money?"  "native?"  "none?"  "number?"  "object?"  "op?"  "pair?"  "paren?" 
		"path?"  "port?"  "refinement?"  "routine?"  "series?"  "set-path?"  "set-word?" 
		"string?"  "struct?"  "tag?"  "time?"  "to-binary"  "to-bitset"  "to-block" 
		"to-char"  "to-date"  "to-decimal"  "to-email"  "to-file"  "to-get-word"  "to-hash" 
		"to-hex"  "to-idate"  "to-image"  "to-integer"  "to-issue"  "to-list"  "to-lit-path" 
		"to-lit-word"  "to-logic"  "to-money"  "to-pair"  "to-paren"  "to-path"  "to-refinement" 
		"to-set-path"  "to-set-word"  "to-string"  "to-tag"  "to-time"  "to-tuple"  "to-url" 
		"to-word"  "tuple?"  "type?"  "unset?"  "url?"  "word?"  "to" 
	]
	rl_view: [
		"alert"  "as-pair"  "brightness?"  "caret-to-offset"  "center-face"  "choose"  "clear-fields" 
		"do-events"  "dump-face"  "flash"  "focus"  "hide-popup"  "hide"  "in-window?"  "inform" 
		"layout"  "link?"  "load-image"  "make-face"  "offset-to-caret"  "request-color"  "request" 
		"request-date"  "request-download"  "request-file"  "request-list"  "request-pass"  "request-text" 
		"show-popup"  "show"  "size-text"  "span?"  "stylize"  "unfocus"  "unview"  "viewed?"  "view"  "within?"
	]
	word-classes: copy []
	foreach [group class] reduce [
		rl_comparison 'kw2
		rl_context    'kw3
		rl_control    'kw4
		rl_help       'kw5
		rl_logic      'kw6
		rl_math       'kw7
		rl_io         'kw8
		rl_series     'kw9
		rl_dataset    'kw10
		rl_specialstring 'kw11
		rl_system     'kw12
		rl_datatype   'kw13
		rl_view       'kw14
	][	foreach word group [repend word-classes [word class] ] ]
	word-classes: make hash! word-classes

;## escape-html
	escape-html: func[data][
		data: to string! reduce data
		foreach [from to] [ "&" "&" "<" "<" ">" ">"][
			replace/all data from to
		]
		data
	]
;## emit
	emit: func [data /class cl /html] [
;print ["EMIT:" mold data cl (mold copy/part str 5)]
		case [
			level-string > 0 [
				append string-buffer data
			]
			output? [
	       		repend out either class [
	       			[
	       				{}
	       				escape-html data
	       				""
	   				]
				][
					either html [data][	escape-html data ]
				]
	       	]
		]
	]

;## add-index-comment
	add-index-comment: func[x /local st n][
		parse/all x [
			[
				  "###" (st: 'co2)
				| "##"  (st: 'co3)
				| "**"  (st: 'co4)
				| "*"   (st: 'co5)
				| "-"   (st: 'co6)
			] copy x some ch_tonewline (
				if st = 'co3 [
					;use only content to paren
					parse/all x [copy x to "(" to end]
				]
				
				case [
					st = 'co2 [
						
						append index-html rejoin [
							case [
								none? index-type ["
    "] ;index-type <> 'co2 ["
^/"] all [ not empty? index-html #"," = last index-html ][ remove back tail index-html] true [""] ] {
  • } x {} ] append out rejoin [{}] index-type: 'co2 ] st = 'co3 [ append index-html rejoin [ ;either index-type = 'co2 ["^/
      ^/"][""] ;{^-
    1. } x {} {^/} x {,} ] append out rejoin [{}] index-type: 'co3 ] ] emit/class join ";" x st ) ] ] ;## colorize set 'colorize func[source /save outfile /title ttl /local source-file text x tmp][ text: either any [file? source url? source][ source-file: source read/binary source ][ source ] out: make string! 3 * length? text level-block: level-paren: level-string: 0 string-type: index-type: none clear string-buffer clear index-html loop 1 [ ;<-- to be able break parsing parse/all detab text [ any [ str: ;(print [">>>" mold copy/part str 10]) #"^^" [ #"^^" (emit "^^^^") | #"(" some ch_hexadecimal #")" x: ( emit/class copy/part str x 'ch ) | #"{" ( case [ level-string = 0 [ emit #"^^" string-type: #"{" level-string: 1 emit #"{" ] true [ emit "^^{" ] ] ) | #"^"" ( case [ level-string = 0 [ emit #"^^" string-type: #"^"" level-string: 1 emit {"} ] true [ emit {^^"} ] ] ) | (emit #"^^") ] | copy x rl_char (emit/class x 'ch ) | {"} ( either level-string = 0 [ string-type: #"^"" level-string: level-string + 1 emit {"} ][ emit {"} if string-type = #"^"" [ level-string: 0 string-type: none emit/class string-buffer 'st0 clear string-buffer ] ] ) | copy x rl_binary (emit/class x 'bi0) | copy x rl_word ( case [ #":" = last x [emit/class x 'sw] #"!" = last x [emit/class x 'dt] parse/case x ["REBOL"][ emit/html {REBOL} ] true [ ;probe x either tmp: select word-classes x [ emit/class x tmp ][ parse x [ rl_pair (emit/class x 't1) | some ch_numbers (emit/class x 'nu0) | #"#" to end (emit/class x 'iss) | [#"%" | "http://" | "ftp://" | "https://"] to end (emit/class x 'fl) | #"'" to end (emit/class x 'lw) | (emit x) ] ] ] ] ) | #"[" ( level-block: level-block + 1 either level-string > 0 [ emit #"[" ][ emit/class #"[" 'br0 ] ) | #"]" ( level-block: level-block - 1 either level-string > 0 [ emit #"]" ][ emit/class #"]" 'br0 ] ) | #"(" ( if remove-parens? [ output?: off ] level-paren: level-paren + 1 either level-string > 0 [ emit #"(" ][ emit/class #"(" 'br0 ] ) | #")" ( either level-string > 0 [ emit #")" ][ emit/class #")" 'br0 ] if 0 = (level-paren: level-paren - 1) [output?: on] ) | #"{" ( either level-string = 0 [ emit/class #"{" 'br1 string-type: #"{" ][ emit #"{" ] if string-type = #"{" [ level-string: level-string + 1 ] ) | #"}" ( either string-type = #"{" [ level-string: level-string - 1 either level-string = 0 [ emit/class string-buffer 'st0 emit/class #"}" 'br1 string-type: none clear string-buffer ][ emit #"}" ] ][ emit #"}" ] ) | #";" copy x [any ch_space any ch_tonewline] new: ( if none? x [x: ""] either level-string = 0 [ case [ all [ index-comments? add-index-comment x ] none all [remove-newline-comments? (find ch_newlines first back str)][ ;remove this comment from output with the newline as well parse/all new [some ch_newlines new: to end] ] true [ emit/class join ";" x 'co1 ] ] ][ emit #";" new: next str ] ) :new | copy x some ch_newlines ( either string-type = #"^"" [ print ["!!! Invalid string --" mold copy/part string-buffer 20] if break-on-error? [break] ][ emit x ] ) | copy x some ch_spaces (emit x) () ] ( if level-string > 0 [print ["!!! Invalid string!" level-string mold string-type] ] if break-on-error? [break] ) ] ] if not empty? index-html [append index-html "
    "] if save [ write/binary outfile rejoin [ {} {} {} {} any [ttl "a Rebol code"] {} {^/} {} {
    } either title [rejoin ["

    " ttl "

    "]][""] either none? source-file [""][ rejoin [ {Source: } last split-path source-file { modified: } modified? source-file {} ] ] either remove-parens? ["
    Parens were removed from the source!
    "][""] {
    ^/} index-html {
    }
    	    		out
    	    		{
    ^/} {} any [footer-final ""] {} ] ] out ] ] ;print colorize/save %test-code.txt %test.html ;colorize/save %code-colorizer.r %test.html ;code-colorizer/remove-parens?: off ;code-colorizer/footer-final: { ; ;} ;colorize/save/title ; %code-colorizer.r ; %code-colorizer.html ; "Rebol Code Colorizer" ;colorize/save/title ; to-rebol-file "I:\rebol\rs\projects-rswf\rswf\new\swf-tag-rules_enczes.rb" ; %rswf-main-rules-full-code.html ; "Rebol/Flash Dialect (RSWF) main rules" ;### Test code comment [ ;some code to test if it works {str{nasted} and escaped ^{} "^^" "^(1f)" multilined-string: { some text on more lines with code inside: x: sine 1 + 2 } x: sine 1 + 2 ;pair datatype: 320x240 ;char! datatype: #"A" ;with escape: #"^-" = tab ;tuple! datatype: red: 255.0.0 ;tag!: [ 'hello ] ;valid word with escape char word^s "string escaped^" char " ;test to image! #{} #{1 2} 64#{Eg==} 2#{00000000} ;issue #FF0000 ;red table: [ q0: "# # L" q0 "1 1 L" q0 "+ 1 R" q1 q1: "1 1 R" q1 "# # L" q2 q2: "1 # L" q3 q3: "1 1 L" q3 "+ 1 R" q1 "# # R" q4 q4: "1 1 R" q4 "# # R" q5 q5: "# # L" stop ] ]