No warranty, and not to be considered official position of my employer and other international bodies.
2011-01-26
Reverse-engineered RelaxNG schema for GAW XML Catalogue
legacy, but I don't mean it is bad or inferior. Rather I'm interested in
the fact that there already exists an operating network. Why the WIS don't
make use of it to achieve efficient implementation of catalogue? So I've
got an XML sample data from WDCGG and analysed its structure.
grammar {
start =
element wdcgg {
element station {
element station_name { xsd:token { maxLength = "64" } },
element id {
xsd:Name { length = "9" pattern = "\s*\w\w\w\d\d\d\w\d\d\s*" }
},
element latitude {
xsd:float { maxInclusive = "90" minInclusive = "-90" }
},
element longitude {
xsd:float { maxInclusive = "180" minInclusive = "-180" }
},
element altitude {
xsd:float { maxInclusive = "5079" minInclusive = "0" }
},
element wmo_region { WMORegion },
element gaw_category { GawCategory },
element country { xsd:token },
element organization { xsd:token },
element description { xsd:string },
element parameter {
element parameter_name {
xsd:NMTOKEN { maxLength = "7" pattern = "\s*[0-9A-Za-z]+\s*" }
},
element status_of_report { xsd:string },
element start_of_data { DateOrEmpty },
element end_of_data { DateOrEmpty },
element last_update { DateOrEmpty },
element contributor {
element organization { xsd:token },
element country { xsd:token }
}+,
element contact_person {
element name { xsd:token },
element organization { xsd:token },
element phone { xsd:token },
element fax { xsd:token },
element email { xsd:token }
}*
}*
}*
}
DateOrEmpty =
empty
| xsd:date { pattern = "\s*[0-9]{4}-[0-9]{2}-[0-9]{2}\s*" }
WMORegion =
"REGION I (Africa)"
| "REGION II (Asia)"
| "REGION III (South America)"
| "REGION IV (North and Central America)"
| "REGION V (South-West Pacific)"
| "REGION VI (Europe)"
| "ANTARCTICA"
GawCategory =
empty
| "Regional"
| "Global"
| "Contributing"
| "Non-GAW (international)"
}
2010-12-07
reading draft 0.1 of v1.2 WMO Core Metadata Profile
My comments are buzzed and is visible at http://www.google.com/profiles/toyoda.eizi.
2010-10-30
Forward proxy of HTTPS by Apache HTTPD
both HTTPS and HTTP protocols.
Okay it is well documented in
<http://httpd.apache.org/docs/2.2/en/mod/mod_proxy.html>.
One thing I had to find by trial and error is that https connection is
implemented by CONNECT method of HTTP and is not represented directly
in <Proxy> directive.
Instead we have to match URI in form "proxy:host:443".
# for HTTP
<Proxy http://{ORIGIN-SERVER}/*>
Order deny,allow
Allow from {CLIENT-IP}
</Proxy>
# for HTTPS
<Proxy proxy:{ORIGIN-SERVER}:443>
Order deny,allow
Allow from {CLIENT-IP}
</Proxy>
2010-10-21
ctags for XML Schema
require 'rubygems'
require 'xml'
class App
def wputs str
$stderr.puts str if $VERBOSE
end
def eputs str
$stderr.puts str
end
def assert_equal test, right
raise "#{test} != #{right}" unless test == right
end
def getopts
while /^-/ === @argv.first
case opt = @argv.shift
when /^-o(.*)/ then @outfnam = $1
end
end
end
def initialize argv
@argv = argv.dup
@cache = {}
@names = {}
@outfnam = 'tags'
getopts
end
def help
puts <
vi tags generator
usage: ruby #$0 file.xsd ...
EOF
end
XSD_NS = 'http://www.w3.org/2001/XMLSchema'
def get1 uri, disp = nil
if @cache[uri.to_s] then
wputs "skipping #{disp or uri}"
return
end
doc = XML::Document.file(uri.path)
@cache[uri.to_s] = true
children = []
assert_equal(doc.root.namespaces.namespace.href, XSD_NS)
nodes = doc.find('/xs:schema/xs:import|/xs:schema/xs:include', 'xs'=>XSD_NS)
nodes.each { |node|
children.push node['schemaLocation'].to_s
}
nodes = nil
nodes = doc.find('/xs:schema/xs:*/@name', 'xs'=>XSD_NS)
nodes.each { |node|
name = node.value
if @names[name]
eputs "duplicated #{name} in #{uri.path} and #{@names[name]}"
end
@names[name] = uri.path
}
nodes = nil
doc = nil
for child in children
get1(uri + child, child)
end
end
def run1 filename
uri = URI('file:///' + filename)
get1(uri)
end
def run
for filename in @argv
run1 filename
end
self
end
def output
File.open(@outfnam, 'w') { |fp|
now = Time.now.utc.strftime('%Y-%m-%dT%H:%M:%SZ')
fp.puts "!_TAG_FILE_SORTED\t1\tsort=case-sensitive date=#{now}"
for name in @names.keys.sort
query = '/\["\']' + name.gsub(/\W/, '.') + '\["\']/'
fp.puts [name, @names[name], query].join("\t")
end
}
eputs "saved to #{@outfnam}"
end
def close
output
self
end
end
App.new(ARGV).run.close
When editing XMLs in vi, it is really powerful.
====
require 'uri'
require 'rubygems'
require 'xml'
class App
def wputs str
$stderr.puts str if $VERBOSE
end
def eputs str
$stderr.puts str
end
def assert_equal test, right
raise "#{test} != #{right}" unless test == right
end
def getopts
while /^-/ === @argv.first
case opt = @argv.shift
when /^-o(.*)/ then @outfnam = $1
end
end
end
def initialize argv
@argv = argv.dup
@cache = {}
@names = {}
@outfnam = 'tags'
getopts
end
def help
puts <
vi tags generator
usage: ruby #$0 file.xsd ...
EOF
end
XSD_NS = 'http://www.w3.org/2001/XMLSchema'
def get1 uri, disp = nil
if @cache[uri.to_s] then
wputs "skipping #{disp or uri}"
return
end
doc = XML::Document.file(uri.path)
@cache[uri.to_s] = true
children = []
assert_equal(doc.root.namespaces.namespace.href, XSD_NS)
nodes = doc.find('/xs:schema/xs:import|/xs:schema/xs:include', 'xs'=>XSD_NS)
nodes.each { |node|
children.push node['schemaLocation'].to_s
}
nodes = nil
nodes = doc.find('/xs:schema/xs:*/@name', 'xs'=>XSD_NS)
nodes.each { |node|
name = node.value
if @names[name]
eputs "duplicated #{name} in #{uri.path} and #{@names[name]}"
end
@names[name] = uri.path
}
nodes = nil
doc = nil
for child in children
get1(uri + child, child)
end
end
def run1 filename
uri = URI('file:///' + filename)
get1(uri)
end
def run
for filename in @argv
run1 filename
end
self
end
def output
File.open(@outfnam, 'w') { |fp|
now = Time.now.utc.strftime('%Y-%m-%dT%H:%M:%SZ')
fp.puts "!_TAG_FILE_SORTED\t1\tsort=case-sensitive date=#{now}"
for name in @names.keys.sort
query = '/\["\']' + name.gsub(/\W/, '.') + '\["\']/'
fp.puts [name, @names[name], query].join("\t")
end
}
eputs "saved to #{@outfnam}"
end
def close
output
self
end
end
App.new(ARGV).run.close
Caveat - xlink:href in ISO 19139 Geographic Metadata
many times, so I came up with using xlink:href.
[definition]
<gmd:authority>
<gmd:CI_Citation id="url.authority">
<gmd:title>...
</gmd:CI_Citation>
</gmd:authority>
[quotation]
<gmd:authority>
<gmd:CI_Citation xlink:href="#url.authority">
</gmd:authority>
But it causes validation error, saying xlink:href is not allowed in CI_Citation.
It took a while, but finally I got the reason reading XML schema.
The xlink:href attribute must be attached to parent element of omitted
element that would be if xlink:href is not used.
So following validates.
<gmd:authority>
<gmd:CI_Citation id="url.authority">
<gmd:title>...
<gmd:authority xlink:href="#url.authority"/>
2010-10-20
A libxml-ruby script to download XSD recursively
require 'net/http'
require 'rubygems'
require 'xml'
class App
def initialize argv
@argv = argv
@cache = {}
@htconn = {}
end
def help
puts <<EOF
XSD downloader following includes and imports
usage: ruby #$0 [-pNUM] uri ...
-pNUM number of directory structure (including hostname) to be stripped
EOF
end
def outfnam uri
File.join(*[uri.host, uri.path].compact)
end
def close
@htconn[:conn].finish if @htconn[:conn]
end
def getconn uri
shp = [uri.scheme, uri.host, uri.port]
if @htconn[:shp] == shp then
puts 'reusing connection'
yield @htconn[:conn]
else
@htconn[:conn].finish if @htconn[:conn]
puts "connecting #{shp.join(' ')}"
@htconn[:shp] = shp
@htconn[:conn] = Net::HTTP.new(uri.host, uri.port)
@htconn[:conn].start
yield @htconn[:conn]
end
end
def assert_equal test, right
raise "#{test} != #{right}" unless test == right
end
XSD_NS = 'http://www.w3.org/2001/XMLSchema'
def mkdir_p dirname
return nil if File.directory?(dirname)
raise Errno::ENOTDIR, "not a directory: (#{dirname})" if
File.exist?(dirname)
puts "mkdir #{dirname}"
mkdir_p(File.dirname(dirname))
Dir.mkdir(dirname)
end
def savefile filename, content
puts "saving #{filename}"
mkdir_p File.dirname(filename)
File.open(filename, 'wb') { |fp| fp.write(content) }
end
def get1 uri, disp = nil
if @cache[uri.to_s] then
puts "skipping #{disp or uri}"
return
end
ofn = outfnam(uri)
buf = nil
getconn(uri) {|conn|
resp = conn.get(uri.path)
raise "#{resp.code} #{resp.message}" unless /^200/ === resp.code
buf = resp.body
}
savefile(ofn, buf)
@cache[uri.to_s] = true
doc = XML::Document.string(buf)
assert_equal(doc.root.namespaces.namespace.href, XSD_NS)
nodes = doc.find('/xs:schema/xs:import|/xs:schema/xs:include', 'xs'=>XSD_NS)
nodes.each { |node|
child = node['schemaLocation'].to_s
get1(uri + child, child)
}
nodes = nil
end
def run1 arg
uri = URI(arg)
get1(uri)
end
def run
for arg in @argv
run1 arg
end
self
end
end
App.new(ARGV).run.close
2010-10-13
How to create processing instruction when writing XML using libxml-ruby
apply a stylesheet simply inserts PI.
require 'libxslt'
if @xslt
filter = <<-END_OF_XSLT
<?xml version="1.0" encoding="utf-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:template match="/">
<xsl:processing-instruction name="xml-stylesheet"
>type="text/xsl" href="#{@xslt}"</xsl:processing-instruction>
<xsl:copy-of select="."/>
</xsl:template>
</xsl:stylesheet>
END_OF_XSLT
stylesheet = LibXSLT::XSLT::Stylesheet.new(XML::Document.string(filter))
@xdoc = stylesheet.apply(@xdoc)
end