REBOL [
Title: "GeoRSS converter"
Purpose: "Converts GeoRSS xml to GPX/KML/OziExplorer formats"
Author: "pijoter"
Date: 7-Oct-2009/21:04:06+2:00
File: %georss.r
Home: http://rowery.olsztyn.pl/rebol
License: "GNU General Public License (Version II)"
Library: [
level: 'intermediate
platform: 'all
type: [tool]
domain: [file-handling web]
tested-under: [
view 2.7.6 on [WinXP Linux]
]
support: none
license: 'GPL
]
Tabs: 3
]
dt: context [
to-epoch: func [dt [date!]] [
;; epoch to czas gmt
any [
attempt [to-integer (difference dt 1970-01-01/00:00:00)]
(dt - 1970-01-01/00:00:00) * 86400
]
]
from-epoch: func [value [integer!] /zone tz [time!] /local date time dt] [
value: to-time value
date: 1970-01-01 + (round/down value / 24:00:00)
time: value // 24:00:00
dt: to-date rejoin [date "/" time]
dt/zone: any [(if value? zone [tz]) 0:00]
dt + dt/zone
]
normalize: func [dt [date!] /date /time /local pad d t s] [
pad: func [val n] [head insert/dup val: form val #"0" (n - length? val)]
dt: rejoin [
(pad dt/year 4) #"-" (pad dt/month 2) #"-" (pad dt/day 2)
#"/" to-itime any [dt/time 0:00]
]
any [
if date [copy/part dt 10]
if time [copy/part (skip dt 11) 8]
dt
]
]
to-stamp: func [dt [date!] /date /time] [
dt: any [
if date [self/normalize/date dt]
if time [self/normalize/time dt]
self/normalize dt
]
remove-each ch dt [found? find "-/:" ch]
]
to-gmt: func [dt [date!]] [
any [
zero? dt/zone
attempt [
dt: dt - dt/zone
dt/zone: 0:00
]
]
dt
]
to-iso: func [dt [date!]] [
dt: self/to-gmt dt
append (replace (self/normalize dt) "/" "T") "Z"
]
]
lang: context [
local-encoding: 'iso-8859-1
standards: [
; tabela standardow zamiany znakow (DUZE/male)
windows-1250 [165 198 202 163 209 211 140 143 175 185 230 234 179 241 243 156 159 191]
iso-8859-2 [161 198 202 163 209 211 166 172 175 177 230 234 179 241 243 182 188 191]
utf-8 [
; pl
260 262 280 321 323 211 346 377 379
261 263 281 322 324 243 347 378 380
; de
196 203 207 214 220 223
228 235 239 246 252 223
; sk
193 196 268 270 201 205 313 317 327 211 212 340 352 356 218 221 381
223 228 269 271 233 237 314 318 328 243 244 341 353 357 250 253 382
; cz
193 268 270 201 282 205 327 211 344 352 356 218 366 221 381
225 269 271 233 283 237 328 243 345 353 357 250 367 253 382
]
iso-8859-1 [
; pl
065 067 069 076 078 079 083 090 090
097 099 101 108 110 111 115 122 122
; de
065 069 073 079 085 083
097 101 105 111 117 115
; sk
065 065 067 068 069 073 076 076 078 079 079 082 083 084 085 089 090
097 097 099 100 101 105 108 108 110 111 111 114 115 116 117 121 122
; cz
065 067 068 069 069 073 078 079 082 083 084 085 085 089 090
097 099 100 101 101 105 110 111 114 115 116 117 117 121 122
]
]
local-charset: does [select self/standards self/local-encoding]
check: func [
"Sprawdza standard znakow danych rss/xml; Zwraca word! nazwy standardu"
text [string! binary!] "rss/xml do sprawdzenia" /local encoding] [
encoding: none
parse/all detab to-string text [
to "} to end
]
to-word any [encoding 'utf-8]
]
to-ascii: func [
"Zamienia polskie znaki na ASCII; Zwraca string! po konwersji"
text [string! binary!] "tekst do konwersji"
encoding [string! word! none!] "standard zrodlowy" /local text-charset] [
text-charset: any [
select self/standards (to-word encoding)
self/standards/utf-8
]
to-string self/iconv text text-charset self/standards/iso-8859-1
]
to-local-charset: func [
"Zmienia standard polskich znakow; Zwraca string! po konwersji"
text [string! binary!] "tekst do konwersji"
encoding [string! word! none!] "standard zrodlowy" /local text-charset] [
text-charset: any [
select self/standards (to-word encoding)
self/standards/utf-8
]
if (self/local-encoding = 'utf-8) [text: self/clean text]
to-string self/iconv text text-charset self/local-charset
]
clean: func [
"Czysci tekst ze znaku #352 (2 oktety)"
text [string! binary!] "tekst do konwersji" /local here c i j] [
parse/all text [
any [
here: skip (
c: first here
if (c > 127) [
; UTF-8
; znaki < 128 sa przepuszczane bez zmian
i: 0
either all [(c > 191) (c < 224)] [
; dwa okrety
i: ((to-integer c) and 31) * to-integer (power 2 6)
i: i or (to-integer (second here) and 63)
][
; trzy oktety
i: ((to-integer c) and 15) * to-integer (power 2 12)
i: i or ((to-integer (second here) and 63) * to-integer (power 2 6))
i: i or (to-integer (third here) and 63)
]
; znak #352 powoduje problemy przy wczytywaniu pliku do programow zarzadzajacych GPX
; najlepiej zamienic go na ASCII
if i = 352 [
remove/part here 2
insert here any [
if none? j: attempt [index? find self/standards/utf-8 i] [#"."]
to-char self/standards/iso-8859-1/:j
]
]
]
) :here
skip
]
]
head here
]
unicode?: func [text-charset [block!]] [same? text-charset self/standards/utf-8]
ascii?: func [text-charset [block!]] [same? text-charset self/standards/iso-8859-1]
iconv: func [
"Konwertuje polskie znaki w tekscie; Zwraca string! po konwersji"
text [string! binary!] "tekst do konwersji"
inp [block!] "tablica konwersji (wejsciowa)"
out [block!] "tablica konwersji (wyjsciowa)" /local here unicode c i j] [
all [
any [
(same? inp out)
(self/unicode? out) ;; unikod nie moze byc standardem docelowym
(self/ascii? inp) ;; ascii nie moze byc zrodlowym
]
return text
]
unicode: unicode? inp
parse/all text [
any [
here: skip (
c: first here
either not unicode [
if c > 127 [
; znaki narodowe maja kod >= 127
any [
none? i: attempt [index? find inp (to-integer c)]
change here (to-char out/:i)
]
]
][
if (c > 127) [
; UTF-8
; znaki < 128 sa przepuszczane bez zmian
either all [(c > 191) (c < 224)] [
; dwa okrety
i: ((to-integer c) and 31) * to-integer (power 2 6)
i: i or (to-integer (second here) and 63)
remove/part here 2
][
; trzy oktety
i: ((to-integer c) and 15) * to-integer (power 2 12)
i: i or ((to-integer (second here) and 63) * to-integer (power 2 6))
i: i or (to-integer (third here) and 63)
remove/part here 3
]
insert here any [
if none? j: attempt [index? find inp i] [#"."]
to-char (out/:j)
]
]
]
) :here
skip
]
]
head here
]
]
html: context [
tokens: [
"lt" {<} "gt" {>} "amp" {&} "nbsp" { } "apos" {'}
"quot" {"} "raquo" {-} "ldquo" {"} "rdquo" {"} "rsquo" {'}
]
escape: func [
"Zamienia encje HTML na tekst; Zwraca string! po konwersji"
text [string!] "tekst do konwersji" /local here there entity] [
entity: complement charset { :;<>}
parse/all text [
any [
here:
end break
| "&"
[
"#" copy item to ";" skip there: (
remove/part here there
attempt [insert here form to-char to-integer item]
)
:here
| copy item some entity ";" there: (
remove/part here there
any [
none? (code: select tokens item)
insert here code
]
)
:here
]
| [""] there: (remove/part here there) :here
| skip
]
]
head here
]
strip-tags: func [
"Usuwa znaczniki HTML z tekstu; Zwraca string! po konwersji"
text [string!] "tekst do konwersji"
/allow tags [block! tag!] "znaczniki ignorowane"
/local allow-tags page] [
contains?: func [tags [block!] tag [tag!]] [found? attempt [find tags to-tag first (parse (to-string tag) none)]]
allow-tags: make block! []
if tags [append allow-tags tags]
page: load/markup (self/escape (trim/lines text))
comment {
replace/all text {
} LF
page: load/markup (self/escape (trim text))
}
remove-each tag page [
all [
tag? tag
not contains? allow-tags tag
]
]
form page
]
]
rss: context [
rss: copy [] ; miejsce na wynikowa tablice informacji
ctx: copy [] ; kontekst znalezionego znacznika
round-location: func [value [string! number!]] [
any [
number? value
value: to-decimal value
]
round/to value 0.000001
]
round-alt: func [value [string! number!]] [
any [
number? value
value: to-decimal value
]
round/to value 0.01
]
emit-text: func [tag [word!] text [string! none!]] [
text: any [text {}]
repend self/ctx [tag (html/strip-tags/allow text [ ])]
any [
select self/ctx 'encoding
repend self/ctx ['encoding (form lang/local-encoding)]
]
]
emit-decimal: func [tag [word!] value [number! none!]] [
value: any [value 0.0]
repend self/ctx [tag value]
]
emit-date: func [tag [word!] date [string! none!]] [
repend self/ctx [
tag any [
attempt [to-date (skip date 5)]
now
]
]
]
emit-point: func [point [string! none!] /local lat lon] [
point: html/strip-tags any [point {0.0 0.0}]
set [lat lon] parse point none
self/emit-decimal 'lat (self/round-location lat)
self/emit-decimal 'lon (self/round-location lon)
]
emit-poslist: func [poslist [string! none!] /local blk lat lon] [
track: html/strip-tags any [poslist {0.0 0.0}]
blk: make block! 100
repend self/ctx ['track (parse poslist none)]
]
emit-alt: func [alt [string! number! none!]] [
alt: any [attempt [to-decimal alt] 0]
self/emit-decimal 'alt (self/round-alt alt)
]
parts: [
["