2010-10-30

Forward proxy of HTTPS by Apache HTTPD

I wanted to set up an forward proxy server for a web site that uses
both HTTPS and HTTP protocols.
Okay it is well documented in
<http://httpd.apache.org/docs/2.2/en/mod/mod_proxy.html>.

One thing I had to find by trial and error is that https connection is
implemented by CONNECT method of HTTP and is not represented directly
in <Proxy> directive.
Instead we have to match URI in form "proxy:host:443".

# for HTTP
<Proxy http://{ORIGIN-SERVER}/*>
Order deny,allow
Allow from {CLIENT-IP}
</Proxy>

# for HTTPS
<Proxy proxy:{ORIGIN-SERVER}:443>
Order deny,allow
Allow from {CLIENT-IP}
</Proxy>

2010-10-21

ctags for XML Schema

require 'uri'
require 'rubygems'
require 'xml'

class App

def wputs str
$stderr.puts str if $VERBOSE
end

def eputs str
$stderr.puts str
end

def assert_equal test, right
raise "#{test} != #{right}" unless test == right
end

def getopts
while /^-/ === @argv.first
case opt = @argv.shift
when /^-o(.*)/ then @outfnam = $1
end
end
end

def initialize argv
@argv = argv.dup
@cache = {}
@names = {}
@outfnam = 'tags'
getopts
end

def help
puts <
vi tags generator
usage: ruby #$0 file.xsd ...
EOF
end

XSD_NS = 'http://www.w3.org/2001/XMLSchema'

def get1 uri, disp = nil
if @cache[uri.to_s] then
wputs "skipping #{disp or uri}"
return
end
doc = XML::Document.file(uri.path)
@cache[uri.to_s] = true
children = []
assert_equal(doc.root.namespaces.namespace.href, XSD_NS)
nodes = doc.find('/xs:schema/xs:import|/xs:schema/xs:include', 'xs'=>XSD_NS)
nodes.each { |node|
children.push node['schemaLocation'].to_s
}
nodes = nil
nodes = doc.find('/xs:schema/xs:*/@name', 'xs'=>XSD_NS)
nodes.each { |node|
name = node.value
if @names[name]
eputs "duplicated #{name} in #{uri.path} and #{@names[name]}"
end
@names[name] = uri.path
}
nodes = nil
doc = nil
for child in children
get1(uri + child, child)
end
end

def run1 filename
uri = URI('file:///' + filename)
get1(uri)
end

def run
for filename in @argv
run1 filename
end
self
end

def output
File.open(@outfnam, 'w') { |fp|
now = Time.now.utc.strftime('%Y-%m-%dT%H:%M:%SZ')
fp.puts "!_TAG_FILE_SORTED\t1\tsort=case-sensitive date=#{now}"
for name in @names.keys.sort
query = '/\["\']' + name.gsub(/\W/, '.') + '\["\']/'
fp.puts [name, @names[name], query].join("\t")
end
}
eputs "saved to #{@outfnam}"
end

def close
output
self
end

end

App.new(ARGV).run.close
When editing XMLs in vi, it is really powerful.
====
require 'uri'
require 'rubygems'
require 'xml'

class App

def wputs str
$stderr.puts str if $VERBOSE
end

def eputs str
$stderr.puts str
end

def assert_equal test, right
raise "#{test} != #{right}" unless test == right
end

def getopts
while /^-/ === @argv.first
case opt = @argv.shift
when /^-o(.*)/ then @outfnam = $1
end
end
end

def initialize argv
@argv = argv.dup
@cache = {}
@names = {}
@outfnam = 'tags'
getopts
end

def help
puts <
vi tags generator
usage: ruby #$0 file.xsd ...
EOF
end

XSD_NS = 'http://www.w3.org/2001/XMLSchema'

def get1 uri, disp = nil
if @cache[uri.to_s] then
wputs "skipping #{disp or uri}"
return
end
doc = XML::Document.file(uri.path)
@cache[uri.to_s] = true
children = []
assert_equal(doc.root.namespaces.namespace.href, XSD_NS)
nodes = doc.find('/xs:schema/xs:import|/xs:schema/xs:include', 'xs'=>XSD_NS)
nodes.each { |node|
children.push node['schemaLocation'].to_s
}
nodes = nil
nodes = doc.find('/xs:schema/xs:*/@name', 'xs'=>XSD_NS)
nodes.each { |node|
name = node.value
if @names[name]
eputs "duplicated #{name} in #{uri.path} and #{@names[name]}"
end
@names[name] = uri.path
}
nodes = nil
doc = nil
for child in children
get1(uri + child, child)
end
end

def run1 filename
uri = URI('file:///' + filename)
get1(uri)
end

def run
for filename in @argv
run1 filename
end
self
end

def output
File.open(@outfnam, 'w') { |fp|
now = Time.now.utc.strftime('%Y-%m-%dT%H:%M:%SZ')
fp.puts "!_TAG_FILE_SORTED\t1\tsort=case-sensitive date=#{now}"
for name in @names.keys.sort
query = '/\["\']' + name.gsub(/\W/, '.') + '\["\']/'
fp.puts [name, @names[name], query].join("\t")
end
}
eputs "saved to #{@outfnam}"
end

def close
output
self
end

end

App.new(ARGV).run.close

Caveat - xlink:href in ISO 19139 Geographic Metadata

I wanted to use the same instances of gmd:authority/gmd:CI_Citation
many times, so I came up with using xlink:href.

[definition]
<gmd:authority>
<gmd:CI_Citation id="url.authority">
<gmd:title>...
</gmd:CI_Citation>
</gmd:authority>

[quotation]
<gmd:authority>
<gmd:CI_Citation xlink:href="#url.authority">
</gmd:authority>

But it causes validation error, saying xlink:href is not allowed in CI_Citation.
It took a while, but finally I got the reason reading XML schema.

The xlink:href attribute must be attached to parent element of omitted
element that would be if xlink:href is not used.
So following validates.

<gmd:authority>
<gmd:CI_Citation id="url.authority">
<gmd:title>...

<gmd:authority xlink:href="#url.authority"/>

2010-10-20

A libxml-ruby script to download XSD recursively

require 'uri'
require 'net/http'
require 'rubygems'
require 'xml'

class App

def initialize argv
@argv = argv
@cache = {}
@htconn = {}
end

def help
puts <<EOF
XSD downloader following includes and imports
usage: ruby #$0 [-pNUM] uri ...
-pNUM number of directory structure (including hostname) to be stripped
EOF
end

def outfnam uri
File.join(*[uri.host, uri.path].compact)
end

def close
@htconn[:conn].finish if @htconn[:conn]
end

def getconn uri
shp = [uri.scheme, uri.host, uri.port]
if @htconn[:shp] == shp then
puts 'reusing connection'
yield @htconn[:conn]
else
@htconn[:conn].finish if @htconn[:conn]
puts "connecting #{shp.join(' ')}"
@htconn[:shp] = shp
@htconn[:conn] = Net::HTTP.new(uri.host, uri.port)
@htconn[:conn].start
yield @htconn[:conn]
end
end

def assert_equal test, right
raise "#{test} != #{right}" unless test == right
end

XSD_NS = 'http://www.w3.org/2001/XMLSchema'

def mkdir_p dirname
return nil if File.directory?(dirname)
raise Errno::ENOTDIR, "not a directory: (#{dirname})" if
File.exist?(dirname)
puts "mkdir #{dirname}"
mkdir_p(File.dirname(dirname))
Dir.mkdir(dirname)
end

def savefile filename, content
puts "saving #{filename}"
mkdir_p File.dirname(filename)
File.open(filename, 'wb') { |fp| fp.write(content) }
end

def get1 uri, disp = nil
if @cache[uri.to_s] then
puts "skipping #{disp or uri}"
return
end
ofn = outfnam(uri)
buf = nil
getconn(uri) {|conn|
resp = conn.get(uri.path)
raise "#{resp.code} #{resp.message}" unless /^200/ === resp.code
buf = resp.body
}
savefile(ofn, buf)
@cache[uri.to_s] = true
doc = XML::Document.string(buf)
assert_equal(doc.root.namespaces.namespace.href, XSD_NS)
nodes = doc.find('/xs:schema/xs:import|/xs:schema/xs:include', 'xs'=>XSD_NS)
nodes.each { |node|
child = node['schemaLocation'].to_s
get1(uri + child, child)
}
nodes = nil
end

def run1 arg
uri = URI(arg)
get1(uri)
end

def run
for arg in @argv
run1 arg
end
self
end

end

App.new(ARGV).run.close

2010-10-13

How to create processing instruction when writing XML using libxml-ruby

Actually I didn't find the way. So I had to use libxslt-ruby and
apply a stylesheet simply inserts PI.

require 'libxslt'
if @xslt
filter = <<-END_OF_XSLT
<?xml version="1.0" encoding="utf-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:template match="/">
<xsl:processing-instruction name="xml-stylesheet"
>type="text/xsl" href="#{@xslt}"</xsl:processing-instruction>
<xsl:copy-of select="."/>
</xsl:template>
</xsl:stylesheet>
END_OF_XSLT
stylesheet = LibXSLT::XSLT::Stylesheet.new(XML::Document.string(filter))
@xdoc = stylesheet.apply(@xdoc)
end

2010-09-16

XML 文書内に散らばる同名の要素を集めて数えたりする XSLT

=== XSLT ===
<?xml version="1.0"?>
<xsl:stylesheet version="1.0"
  xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
  xmlns:in="http://example.com/hack"
  xmlns="http://example.com/hack2"
  >

  <xsl:output method="xml" indent="yes"/>

  <xsl:template match="/">
    <root>
      <xsl:call-template name="kvlistParser">
        <xsl:with-param name="kvlist">
          |uri=http://example.com/hack|ln=node1|param=string|
          |uri=http://example.com/hack|ln=node2|param=float|
          |uri=http://example.com/hack|ln=node3|param=date|
        </xsl:with-param>
      </xsl:call-template>
    </root>
  </xsl:template>

  <xsl:template name="kvlistParser">
    <xsl:param name="kvlist"/>
    <xsl:variable name="nkvl" select="normalize-space($kvlist)"/>
    <xsl:choose>
      <xsl:when test="contains($kvlist, ' ')">
        <xsl:call-template name="collectElems">
          <xsl:with-param name="keyval" select="substring-before($nkvl, ' ')"/>
        </xsl:call-template>
        <xsl:call-template name="kvlistParser">
          <xsl:with-param name="kvlist" select="substring-after($nkvl, ' ')"/>
        </xsl:call-template>
      </xsl:when>
      <xsl:otherwise>
        <xsl:call-template name="collectElems">
          <xsl:with-param name="keyval" select="$nkvl"/>
        </xsl:call-template>
      </xsl:otherwise>
    </xsl:choose>
  </xsl:template>

  <xsl:template name="collectElems">
    <xsl:param name="keyval"/>
    <xsl:variable name="uri">
      <xsl:call-template name="keyvalGet">
        <xsl:with-param name="keyval" select="$keyval"/>
        <xsl:with-param name="key" select="'uri'"/>
      </xsl:call-template>
    </xsl:variable>
    <xsl:variable name="tagn">
      <xsl:call-template name="keyvalGet">
        <xsl:with-param name="keyval" select="$keyval"/>
        <xsl:with-param name="key" select="'ln'"/>
      </xsl:call-template>
    </xsl:variable>
    <class debug="{$keyval}">
      <tagname namespace="{$uri}">
        <xsl:value-of select="$tagn"/>
      </tagname>
      <count><xsl:value-of select="count(//*[local-name()=$tagn][namespace-uri()=$uri])"/></count>
      <xsl:for-each select="//*[local-name()=$tagn][namespace-uri()=$uri]">
        <leaf><xsl:value-of select="."/></leaf>
      </xsl:for-each>
    </class>
  </xsl:template>

  <xsl:template name="keyvalGet">
    <xsl:param name="keyval"/>
    <xsl:param name="key"/>
    <xsl:value-of
      select="substring-before(
      substring-after($keyval, concat('|', $key, '=')), '|')"
      />
  </xsl:template>

</xsl:stylesheet>
=== input ===
<?xml version="1.0"?>
<weirdPrefix:root
  xmlns:weirdPrefix="http://example.com/hack">
  <weirdPrefix:node2>val2a</weirdPrefix:node2>
  <weirdPrefix:node1>val1a</weirdPrefix:node1>
  <weirdPrefix:node2>val2b</weirdPrefix:node2>
  <weirdPrefix:node1>val1b</weirdPrefix:node1>
  <weirdPrefix:node1>val1c</weirdPrefix:node1>
  <weirdPrefix:node2>val2c</weirdPrefix:node2>
</weirdPrefix:root>
=== output ===
<?xml version="1.0"?>
<root xmlns="http://example.com/hack2" xmlns:in="http://example.com/hack">
  <class debug="|uri=http://example.com/hack|ln=node1|param=string|">
    <tagname namespace="http://example.com/hack">node1</tagname>
    <count>3</count>
    <leaf>val1a</leaf>
    <leaf>val1b</leaf>
    <leaf>val1c</leaf>
  </class>
  <class debug="|uri=http://example.com/hack|ln=node2|param=float|">
    <tagname namespace="http://example.com/hack">node2</tagname>
    <count>3</count>
    <leaf>val2a</leaf>
    <leaf>val2b</leaf>
    <leaf>val2c</leaf>
  </class>
  <class debug="|uri=http://example.com/hack|ln=node3|param=date|">
    <tagname namespace="http://example.com/hack">node3</tagname>
    <count>0</count>
  </class>
</root>

2010-08-06

MD_ProjectionParameters

ISO standard of projection parameter is given in MD_ProjectionParameters of ISO 19115.


Unfortunately it is not in WMO Core Profile v1.1 of Metadata.

But at least we can safely say that if we are going to define something, we have to respect that.