No warranty, and not to be considered official position of my employer and other international bodies.
2010-12-07
reading draft 0.1 of v1.2 WMO Core Metadata Profile
My comments are buzzed and is visible at http://www.google.com/profiles/toyoda.eizi.
2010-10-30
Forward proxy of HTTPS by Apache HTTPD
both HTTPS and HTTP protocols.
Okay it is well documented in
<http://httpd.apache.org/docs/2.2/en/mod/mod_proxy.html>.
One thing I had to find by trial and error is that https connection is
implemented by CONNECT method of HTTP and is not represented directly
in <Proxy> directive.
Instead we have to match URI in form "proxy:host:443".
# for HTTP
<Proxy http://{ORIGIN-SERVER}/*>
Order deny,allow
Allow from {CLIENT-IP}
</Proxy>
# for HTTPS
<Proxy proxy:{ORIGIN-SERVER}:443>
Order deny,allow
Allow from {CLIENT-IP}
</Proxy>
2010-10-21
ctags for XML Schema
require 'rubygems'
require 'xml'
class App
def wputs str
$stderr.puts str if $VERBOSE
end
def eputs str
$stderr.puts str
end
def assert_equal test, right
raise "#{test} != #{right}" unless test == right
end
def getopts
while /^-/ === @argv.first
case opt = @argv.shift
when /^-o(.*)/ then @outfnam = $1
end
end
end
def initialize argv
@argv = argv.dup
@cache = {}
@names = {}
@outfnam = 'tags'
getopts
end
def help
puts <
vi tags generator
usage: ruby #$0 file.xsd ...
EOF
end
XSD_NS = 'http://www.w3.org/2001/XMLSchema'
def get1 uri, disp = nil
if @cache[uri.to_s] then
wputs "skipping #{disp or uri}"
return
end
doc = XML::Document.file(uri.path)
@cache[uri.to_s] = true
children = []
assert_equal(doc.root.namespaces.namespace.href, XSD_NS)
nodes = doc.find('/xs:schema/xs:import|/xs:schema/xs:include', 'xs'=>XSD_NS)
nodes.each { |node|
children.push node['schemaLocation'].to_s
}
nodes = nil
nodes = doc.find('/xs:schema/xs:*/@name', 'xs'=>XSD_NS)
nodes.each { |node|
name = node.value
if @names[name]
eputs "duplicated #{name} in #{uri.path} and #{@names[name]}"
end
@names[name] = uri.path
}
nodes = nil
doc = nil
for child in children
get1(uri + child, child)
end
end
def run1 filename
uri = URI('file:///' + filename)
get1(uri)
end
def run
for filename in @argv
run1 filename
end
self
end
def output
File.open(@outfnam, 'w') { |fp|
now = Time.now.utc.strftime('%Y-%m-%dT%H:%M:%SZ')
fp.puts "!_TAG_FILE_SORTED\t1\tsort=case-sensitive date=#{now}"
for name in @names.keys.sort
query = '/\["\']' + name.gsub(/\W/, '.') + '\["\']/'
fp.puts [name, @names[name], query].join("\t")
end
}
eputs "saved to #{@outfnam}"
end
def close
output
self
end
end
App.new(ARGV).run.close
When editing XMLs in vi, it is really powerful.
====
require 'uri'
require 'rubygems'
require 'xml'
class App
def wputs str
$stderr.puts str if $VERBOSE
end
def eputs str
$stderr.puts str
end
def assert_equal test, right
raise "#{test} != #{right}" unless test == right
end
def getopts
while /^-/ === @argv.first
case opt = @argv.shift
when /^-o(.*)/ then @outfnam = $1
end
end
end
def initialize argv
@argv = argv.dup
@cache = {}
@names = {}
@outfnam = 'tags'
getopts
end
def help
puts <
vi tags generator
usage: ruby #$0 file.xsd ...
EOF
end
XSD_NS = 'http://www.w3.org/2001/XMLSchema'
def get1 uri, disp = nil
if @cache[uri.to_s] then
wputs "skipping #{disp or uri}"
return
end
doc = XML::Document.file(uri.path)
@cache[uri.to_s] = true
children = []
assert_equal(doc.root.namespaces.namespace.href, XSD_NS)
nodes = doc.find('/xs:schema/xs:import|/xs:schema/xs:include', 'xs'=>XSD_NS)
nodes.each { |node|
children.push node['schemaLocation'].to_s
}
nodes = nil
nodes = doc.find('/xs:schema/xs:*/@name', 'xs'=>XSD_NS)
nodes.each { |node|
name = node.value
if @names[name]
eputs "duplicated #{name} in #{uri.path} and #{@names[name]}"
end
@names[name] = uri.path
}
nodes = nil
doc = nil
for child in children
get1(uri + child, child)
end
end
def run1 filename
uri = URI('file:///' + filename)
get1(uri)
end
def run
for filename in @argv
run1 filename
end
self
end
def output
File.open(@outfnam, 'w') { |fp|
now = Time.now.utc.strftime('%Y-%m-%dT%H:%M:%SZ')
fp.puts "!_TAG_FILE_SORTED\t1\tsort=case-sensitive date=#{now}"
for name in @names.keys.sort
query = '/\["\']' + name.gsub(/\W/, '.') + '\["\']/'
fp.puts [name, @names[name], query].join("\t")
end
}
eputs "saved to #{@outfnam}"
end
def close
output
self
end
end
App.new(ARGV).run.close
Caveat - xlink:href in ISO 19139 Geographic Metadata
many times, so I came up with using xlink:href.
[definition]
<gmd:authority>
<gmd:CI_Citation id="url.authority">
<gmd:title>...
</gmd:CI_Citation>
</gmd:authority>
[quotation]
<gmd:authority>
<gmd:CI_Citation xlink:href="#url.authority">
</gmd:authority>
But it causes validation error, saying xlink:href is not allowed in CI_Citation.
It took a while, but finally I got the reason reading XML schema.
The xlink:href attribute must be attached to parent element of omitted
element that would be if xlink:href is not used.
So following validates.
<gmd:authority>
<gmd:CI_Citation id="url.authority">
<gmd:title>...
<gmd:authority xlink:href="#url.authority"/>
2010-10-20
A libxml-ruby script to download XSD recursively
require 'net/http'
require 'rubygems'
require 'xml'
class App
def initialize argv
@argv = argv
@cache = {}
@htconn = {}
end
def help
puts <<EOF
XSD downloader following includes and imports
usage: ruby #$0 [-pNUM] uri ...
-pNUM number of directory structure (including hostname) to be stripped
EOF
end
def outfnam uri
File.join(*[uri.host, uri.path].compact)
end
def close
@htconn[:conn].finish if @htconn[:conn]
end
def getconn uri
shp = [uri.scheme, uri.host, uri.port]
if @htconn[:shp] == shp then
puts 'reusing connection'
yield @htconn[:conn]
else
@htconn[:conn].finish if @htconn[:conn]
puts "connecting #{shp.join(' ')}"
@htconn[:shp] = shp
@htconn[:conn] = Net::HTTP.new(uri.host, uri.port)
@htconn[:conn].start
yield @htconn[:conn]
end
end
def assert_equal test, right
raise "#{test} != #{right}" unless test == right
end
XSD_NS = 'http://www.w3.org/2001/XMLSchema'
def mkdir_p dirname
return nil if File.directory?(dirname)
raise Errno::ENOTDIR, "not a directory: (#{dirname})" if
File.exist?(dirname)
puts "mkdir #{dirname}"
mkdir_p(File.dirname(dirname))
Dir.mkdir(dirname)
end
def savefile filename, content
puts "saving #{filename}"
mkdir_p File.dirname(filename)
File.open(filename, 'wb') { |fp| fp.write(content) }
end
def get1 uri, disp = nil
if @cache[uri.to_s] then
puts "skipping #{disp or uri}"
return
end
ofn = outfnam(uri)
buf = nil
getconn(uri) {|conn|
resp = conn.get(uri.path)
raise "#{resp.code} #{resp.message}" unless /^200/ === resp.code
buf = resp.body
}
savefile(ofn, buf)
@cache[uri.to_s] = true
doc = XML::Document.string(buf)
assert_equal(doc.root.namespaces.namespace.href, XSD_NS)
nodes = doc.find('/xs:schema/xs:import|/xs:schema/xs:include', 'xs'=>XSD_NS)
nodes.each { |node|
child = node['schemaLocation'].to_s
get1(uri + child, child)
}
nodes = nil
end
def run1 arg
uri = URI(arg)
get1(uri)
end
def run
for arg in @argv
run1 arg
end
self
end
end
App.new(ARGV).run.close
2010-10-13
How to create processing instruction when writing XML using libxml-ruby
apply a stylesheet simply inserts PI.
require 'libxslt'
if @xslt
filter = <<-END_OF_XSLT
<?xml version="1.0" encoding="utf-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:template match="/">
<xsl:processing-instruction name="xml-stylesheet"
>type="text/xsl" href="#{@xslt}"</xsl:processing-instruction>
<xsl:copy-of select="."/>
</xsl:template>
</xsl:stylesheet>
END_OF_XSLT
stylesheet = LibXSLT::XSLT::Stylesheet.new(XML::Document.string(filter))
@xdoc = stylesheet.apply(@xdoc)
end
2010-09-16
XML 文書内に散らばる同名の要素を集めて数えたりする XSLT
<?xml version="1.0"?>
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:in="http://example.com/hack"
xmlns="http://example.com/hack2"
>
<xsl:output method="xml" indent="yes"/>
<xsl:template match="/">
<root>
<xsl:call-template name="kvlistParser">
<xsl:with-param name="kvlist">
|uri=http://example.com/hack|ln=node1|param=string|
|uri=http://example.com/hack|ln=node2|param=float|
|uri=http://example.com/hack|ln=node3|param=date|
</xsl:with-param>
</xsl:call-template>
</root>
</xsl:template>
<xsl:template name="kvlistParser">
<xsl:param name="kvlist"/>
<xsl:variable name="nkvl" select="normalize-space($kvlist)"/>
<xsl:choose>
<xsl:when test="contains($kvlist, ' ')">
<xsl:call-template name="collectElems">
<xsl:with-param name="keyval" select="substring-before($nkvl, ' ')"/>
</xsl:call-template>
<xsl:call-template name="kvlistParser">
<xsl:with-param name="kvlist" select="substring-after($nkvl, ' ')"/>
</xsl:call-template>
</xsl:when>
<xsl:otherwise>
<xsl:call-template name="collectElems">
<xsl:with-param name="keyval" select="$nkvl"/>
</xsl:call-template>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
<xsl:template name="collectElems">
<xsl:param name="keyval"/>
<xsl:variable name="uri">
<xsl:call-template name="keyvalGet">
<xsl:with-param name="keyval" select="$keyval"/>
<xsl:with-param name="key" select="'uri'"/>
</xsl:call-template>
</xsl:variable>
<xsl:variable name="tagn">
<xsl:call-template name="keyvalGet">
<xsl:with-param name="keyval" select="$keyval"/>
<xsl:with-param name="key" select="'ln'"/>
</xsl:call-template>
</xsl:variable>
<class debug="{$keyval}">
<tagname namespace="{$uri}">
<xsl:value-of select="$tagn"/>
</tagname>
<count><xsl:value-of select="count(//*[local-name()=$tagn][namespace-uri()=$uri])"/></count>
<xsl:for-each select="//*[local-name()=$tagn][namespace-uri()=$uri]">
<leaf><xsl:value-of select="."/></leaf>
</xsl:for-each>
</class>
</xsl:template>
<xsl:template name="keyvalGet">
<xsl:param name="keyval"/>
<xsl:param name="key"/>
<xsl:value-of
select="substring-before(
substring-after($keyval, concat('|', $key, '=')), '|')"
/>
</xsl:template>
</xsl:stylesheet>
=== input ===
<?xml version="1.0"?>
<weirdPrefix:root
xmlns:weirdPrefix="http://example.com/hack">
<weirdPrefix:node2>val2a</weirdPrefix:node2>
<weirdPrefix:node1>val1a</weirdPrefix:node1>
<weirdPrefix:node2>val2b</weirdPrefix:node2>
<weirdPrefix:node1>val1b</weirdPrefix:node1>
<weirdPrefix:node1>val1c</weirdPrefix:node1>
<weirdPrefix:node2>val2c</weirdPrefix:node2>
</weirdPrefix:root>
=== output ===
<?xml version="1.0"?>
<root xmlns="http://example.com/hack2" xmlns:in="http://example.com/hack">
<class debug="|uri=http://example.com/hack|ln=node1|param=string|">
<tagname namespace="http://example.com/hack">node1</tagname>
<count>3</count>
<leaf>val1a</leaf>
<leaf>val1b</leaf>
<leaf>val1c</leaf>
</class>
<class debug="|uri=http://example.com/hack|ln=node2|param=float|">
<tagname namespace="http://example.com/hack">node2</tagname>
<count>3</count>
<leaf>val2a</leaf>
<leaf>val2b</leaf>
<leaf>val2c</leaf>
</class>
<class debug="|uri=http://example.com/hack|ln=node3|param=date|">
<tagname namespace="http://example.com/hack">node3</tagname>
<count>0</count>
</class>
</root>