Class: SwissMatch::Location::DataFiles

Inherits:
Object
  • Object
show all
Defined in:
lib/swissmatch/location/datafiles.rb

Overview

TODO:

The current handling of the urls is not clean. I don't know yet how the urls will change over iterations.

SwissMatch::Location::DataFiles

Deals with retrieving and updating the files provided by the swiss postal service, and loading the data from them.

Constant Summary collapse

LanguageCodes =

Used to convert numerical language codes to symbols

[nil, :de, :fr, :it, :rt]
AllCantons =

The data of all cantons

Cantons.new([
  Canton.new("AG", "Aargau",                    "Aargau",                   "Argovie",                      "Argovia",                  "Argovia"),
  Canton.new("AI", "Appenzell Innerrhoden",     "Appenzell Innerrhoden",    "Appenzell Rhodes-Intérieures", "Appenzello Interno",       "Appenzell Dadens"),
  Canton.new("AR", "Appenzell Ausserrhoden",    "Appenzell Ausserrhoden",   "Appenzell Rhodes-Extérieures", "Appenzello Esterno",       "Appenzell Dadora"),
  Canton.new("BE", "Bern",                      "Bern",                     "Berne",                        "Berna",                    "Berna"),
  Canton.new("BL", "Basel-Landschaft",          "Basel-Landschaft",         "Bâle-Campagne",                "Basilea Campagna",         "Basilea-Champagna"),
  Canton.new("BS", "Basel-Stadt",               "Basel-Stadt",              "Bâle-Ville",                   "Basilea Città",            "Basilea-Citad"),
  Canton.new("FR", "Freiburg",                  "Fribourg",                 "Fribourg",                     "Friburgo",                 "Friburg"),
  Canton.new("GE", "Genève",                    "Genf",                     "Genève",                       "Ginevra",                  "Genevra"),
  Canton.new("GL", "Glarus",                    "Glarus",                   "Glaris",                       "Glarona",                  "Glaruna"),
  Canton.new("GR", "Graubünden",                "Graubünden",               "Grisons",                      "Grigioni",                 "Grischun"),
  Canton.new("JU", "Jura",                      "Jura",                     "Jura",                         "Giura",                    "Giura"),
  Canton.new("LU", "Luzern",                    "Luzern",                   "Lucerne",                      "Lucerna",                  "Lucerna"),
  Canton.new("NE", "Neuchâtel",                 "Neuenburg",                "Neuchâtel",                    "Neuchâtel",                "Neuchâtel"),
  Canton.new("NW", "Nidwalden",                 "Nidwalden",                "Nidwald",                      "Nidvaldo",                 "Sutsilvania"),
  Canton.new("OW", "Obwalden",                  "Obwalden",                 "Obwald",                       "Obvaldo",                  "Sursilvania"),
  Canton.new("SG", "St. Gallen",                "St. Gallen",               "Saint-Gall",                   "San Gallo",                "Son Gagl"),
  Canton.new("SH", "Schaffhausen",              "Schaffhausen",             "Schaffhouse",                  "Sciaffusa",                "Schaffusa"),
  Canton.new("SO", "Solothurn",                 "Solothurn",                "Soleure",                      "Soletta",                  "Soloturn"),
  Canton.new("SZ", "Schwyz",                    "Schwyz",                   "Schwytz",                      "Svitto",                   "Sviz"),
  Canton.new("TG", "Thurgau",                   "Thurgau",                  "Thurgovie",                    "Turgovia",                 "Turgovia"),
  Canton.new("TI", "Ticino",                    "Tessin",                   "Tessin",                       "Ticino",                   "Tessin"),
  Canton.new("UR", "Uri",                       "Uri",                      "Uri",                          "Uri",                      "Uri"),
  Canton.new("VD", "Vaud",                      "Waadt",                    "Vaud",                         "Vaud",                     "Vad"),
  Canton.new("VS", "Valais",                    "Wallis",                   "Valais",                       "Vallese",                  "Vallais"),
  Canton.new("ZG", "Zug",                       "Zug",                      "Zoug",                         "Zugo",                     "Zug"),
  Canton.new("ZH", "Zürich",                    "Zürich",                   "Zurich",                       "Zurigo",                   "Turitg"),
  Canton.new("FL", "Fürstentum Liechtenstein",  "Fürstentum Liechtenstein", "Liechtenstein",                "Liechtenstein",            "Liechtenstein"),
  Canton.new("DE", "Deutschland",               "Deutschland",              "Allemagne",                    "Germania",                 "Germania"),
  Canton.new("IT", "Italien",                   "Italien",                  "Italie",                       "Italia",                   "Italia"),
])

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(data_directory = nil) ⇒ DataFiles

Returns a new instance of DataFiles.

Parameters:

  • data_directory (nil, String) (defaults to: nil)

    The directory in which the post mat[ch] files reside


105
106
107
108
109
110
111
112
113
114
115
# File 'lib/swissmatch/location/datafiles.rb', line 105

def initialize(data_directory=nil)
  reset_errors!
  @loaded = false
  if data_directory then
    @data_directory = data_directory
  elsif ENV['SWISSMATCH_DATA'] then
    @data_directory = ENV['SWISSMATCH_DATA']
  else
    @data_directory  = File.expand_path('~/.swissmatch')
  end
end

Instance Attribute Details

#cantonsSwissMatch::Cantons (readonly)

Returns The loaded swiss cantons.

Returns:


89
90
91
# File 'lib/swissmatch/location/datafiles.rb', line 89

def cantons
  @cantons
end

#communitiesSwissMatch::Communities (readonly)

Returns The loaded swiss communities.

Returns:


95
96
97
# File 'lib/swissmatch/location/datafiles.rb', line 95

def communities
  @communities
end

#data_directoryObject

The directory in which the post mat[ch] files reside


86
87
88
# File 'lib/swissmatch/location/datafiles.rb', line 86

def data_directory
  @data_directory
end

#dateDate (readonly)

Returns The date from when the data from the swiss post master data file starts to be valid.

Returns:

  • (Date)

    The date from when the data from the swiss post master data file starts to be valid


79
80
81
# File 'lib/swissmatch/location/datafiles.rb', line 79

def date
  @date
end

#districtsSwissMatch::Districts (readonly)

Returns The loaded swiss districts.

Returns:


92
93
94
# File 'lib/swissmatch/location/datafiles.rb', line 92

def districts
  @districts
end

#errorsArray<LoadError> (readonly)

Returns Errors that occurred while loading the data.

Returns:

  • (Array<LoadError>)

    Errors that occurred while loading the data


101
102
103
# File 'lib/swissmatch/location/datafiles.rb', line 101

def errors
  @errors
end

#random_codeInteger (readonly)

Returns The random code from the swiss post master data file.

Returns:

  • (Integer)

    The random code from the swiss post master data file


83
84
85
# File 'lib/swissmatch/location/datafiles.rb', line 83

def random_code
  @random_code
end

#zip_codesSwissMatch::ZipCodes (readonly)

Returns The loaded swiss zip codes.

Returns:


98
99
100
# File 'lib/swissmatch/location/datafiles.rb', line 98

def zip_codes
  @zip_codes
end

Class Method Details

.emptyObject


69
70
71
72
73
74
# File 'lib/swissmatch/location/datafiles.rb', line 69

def self.empty
  data = new
  data.load_empty!

  data
end

Instance Method Details

#latest_binary_fileObject


124
125
126
# File 'lib/swissmatch/location/datafiles.rb', line 124

def latest_binary_file
  Dir.enum_for(:glob, "#{@data_directory}/locations_*.binary").last
end

#load!(file = nil) ⇒ self

Loads the data into this DataFiles instance

Returns:

  • (self)

    Returns self.


144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
# File 'lib/swissmatch/location/datafiles.rb', line 144

def load!(file=nil)
  return if @loaded

  file ||= latest_binary_file

  unless file && File.readable?(file)
    if ENV['SWISSMATCH_DATA']
      raise LoadError.new("File #{file.inspect} not found or not readable (used SWISSMATCH_DATA, data_directory=#{@data_directory}) - see https://github.com/apeiros/swissmatch-location#installation", nil)
    else
      raise LoadError.new("File #{file.inspect} not found or not readable (used ~/.swissmatch, data_directory=#{@data_directory}) - see https://github.com/apeiros/swissmatch-location#installation", nil)
    end
  end

  data = File.read(file, encoding: Encoding::BINARY)
  date, random_code, zip1_count, zip2_count, com1_count, com2_count, district_count = *data[0,18].unpack("NNn*")
  int1_size, int2_size, int4_size, text_size = *data[18,16].unpack("N*")

  offset    = 34
  int1_cols = data[offset, int1_size].unpack("C*")
  int2_cols = data[offset+=int1_size, int2_size].unpack("n*")
  int4_cols = data[offset+=int2_size, int4_size].unpack("N*")
  text_cols = data[offset+=int4_size, text_size].force_encoding(Encoding::UTF_8).split("\x1f")

  offset                    = 0
  zip1_type                 = int1_cols[offset, zip1_count]
  zip1_addon                = int1_cols[offset += zip1_count, zip1_count]
  zip1_language             = int1_cols[offset += zip1_count, zip1_count]
  zip1_language_alternative = int1_cols[offset += zip1_count, zip1_count]
  zip2_region               = int1_cols[offset += zip1_count, zip2_count]
  zip2_type                 = int1_cols[offset += zip2_count, zip2_count]
  zip2_lang                 = int1_cols[offset += zip2_count, zip2_count]
  com2_PLZZ                 = int1_cols[offset += zip2_count, com2_count]

  offset                        = 0
  zip1_onrp                     = int2_cols[offset, zip1_count]
  zip1_code                     = int2_cols[offset += zip1_count, zip1_count]
  zip1_delivery_by              = int2_cols[offset += zip1_count, zip1_count]
  zip1_largest_community_number = int2_cols[offset += zip1_count, zip1_count]
  zip2_onrp                     = int2_cols[offset += zip1_count, zip2_count]
  com1_bfsnr                    = int2_cols[offset += zip2_count, com1_count]
  com1_agglomeration            = int2_cols[offset += com1_count, com1_count]
  com2_GDENR                    = int2_cols[offset += com1_count, com2_count]
  com2_PLZ4                     = int2_cols[offset += com2_count, com2_count]
  district_GDEBZNR              = int2_cols[offset += com2_count, district_count]

  zip1_valid_from = int4_cols

  offset           = 0
  zip1_name_short  = text_cols[offset, zip1_count]
  zip1_name        = text_cols[offset += zip1_count, zip1_count]
  zip1_canton      = text_cols[offset += zip1_count, zip1_count]
  zip2_short       = text_cols[offset += zip1_count, zip2_count]
  zip2_name        = text_cols[offset += zip2_count, zip2_count]
  com1_name        = text_cols[offset += zip2_count, com1_count]
  com1_canton      = text_cols[offset += com1_count, com1_count]
  district_GDEKT   = text_cols[offset += com1_count, district_count]
  district_GDEBZNA = text_cols[offset += district_count, district_count]

  zip1     = [
    zip1_onrp, zip1_type, zip1_canton, zip1_code, zip1_addon,
    zip1_delivery_by, zip1_language, zip1_language_alternative,
    zip1_name_short, zip1_name, zip1_largest_community_number,
    zip1_valid_from
  ].transpose
  zip2     = [zip2_onrp, zip2_region, zip2_type, zip2_lang, zip2_short, zip2_name].transpose
  com1     = [com1_bfsnr, com1_name, com1_canton, com1_agglomeration].transpose
  com2     = [com2_PLZ4, com2_PLZZ, com2_GDENR].transpose
  district = [district_GDEKT, district_GDEBZNR, district_GDEBZNA].transpose

  @date        = Date.jd(date)
  @random_code = random_code
  @cantons     = AllCantons
  @districts   = load_districts(district)
  @communities = load_communities(com1)
  @zip_codes   = load_zipcodes(zip1, zip2, com2)

  self
end

#load_communities(data) ⇒ SwissMatch::Communities

Returns An instance of SwissMatch::Communities containing all communities defined by the files known to this DataFiles instance.

Returns:

  • (SwissMatch::Communities)

    An instance of SwissMatch::Communities containing all communities defined by the files known to this DataFiles instance.


232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
# File 'lib/swissmatch/location/datafiles.rb', line 232

def load_communities(data)
  temporary = []
  complete  = {}
  data.each do |bfsnr, name, canton, agglomeration|
    canton = @cantons.by_license_tag(canton)
    if agglomeration == bfsnr then
      complete[bfsnr] = Community.new(bfsnr, name, canton, :self)
    elsif agglomeration.zero? then
      complete[bfsnr] = Community.new(bfsnr, name, canton, nil)
    else
      temporary << [bfsnr, name, canton, agglomeration]
    end
  end
  temporary.each do |bfsnr, name, canton, agglomeration|
    community = complete[agglomeration]
    raise "Incomplete community referenced by #{bfsnr}: #{agglomeration}" unless agglomeration
    complete[bfsnr] = Community.new(bfsnr, name, canton, community)
  end

  Communities.new(complete.values)
end

#load_districts(data) ⇒ Object


223
224
225
226
227
# File 'lib/swissmatch/location/datafiles.rb', line 223

def load_districts(data)
  Districts.new(data.map { |data|
    District.new(*data, SwissMatch::Communities.new([]))
  })
end

#load_empty!Object


128
129
130
131
132
133
134
135
136
137
138
# File 'lib/swissmatch/location/datafiles.rb', line 128

def load_empty!
  return if @loaded

  @loaded      = true
  @date        = Date.new(0)
  @random_code = 0
  @cantons     = AllCantons
  @districts   = Districts.new([])
  @communities = Communities.new([])
  @zip_codes   = ZipCodes.new([])
end

#load_zipcodes(zip1_data, zip2_data, com2_data) ⇒ SwissMatch::ZipCodes

TODO: load all files, not just the most recent TODO: calculate valid_until dates

Returns:

  • (SwissMatch::ZipCodes)

    An instance of SwissMatch::ZipCodes containing all zip codes defined by the files known to this DataFiles instance.


260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
# File 'lib/swissmatch/location/datafiles.rb', line 260

def load_zipcodes(zip1_data, zip2_data, com2_data)
  community_mapping = Hash.new { |h,k| h[k] = [] }
  self_delivered    = []
  others            = []
  temporary         = {}

  com2_data.each do |*key, value|
    community_mapping[key] << value
  end

  zip1_data.each do |onrp, type, canton, code, addon, delivery_by, lang, lang_alt, name_short, name, largest_community_number, valid_from|
    delivery_by               = case delivery_by when 0 then nil; when onrp then :self; else delivery_by; end
    language                  = LanguageCodes[lang]
    language_alternative      = LanguageCodes[lang_alt]
    name_short                = Name.new(name_short, language)
    name                      = Name.new(name, language)

    # compact, because some communities already no longer exist, so by_community_numbers can
    # contain nils which must be removed
    community_numbers         = (community_mapping[[code, addon]] | [largest_community_number]).sort
    communities               = Communities.new(@communities.by_community_numbers(*community_numbers).compact)

    data                      = [
      onrp,                              # ordering_number
      type,                              # type
      code,
      addon,
      name,                              # name (official)
      [name],                            # names (official + alternative)
      name_short,                        # name_short (official)
      [name_short],                      # names_short (official + alternative)
      [],                                # PLZ2 type 3 short names (additional region names)
      [],                                # PLZ2 type 3 names (additional region names)
      cantons.by_license_tag(canton),    # canton
      language,
      language_alternative,
      false,                             # sortfile_member TODO: remove
      delivery_by,                       # delivery_by
      communities.by_community_number(largest_community_number),  # community_number
      communities,
      Date.jd(valid_from) # valid_from
    ]
    temporary[onrp] = data
    if :self == delivery_by then
      self_delivered << data
    else
      others << data
    end
  end

  zip2_data.each do |onrp, rn, type, lang, short, name|
    onrp      = onrp
    lang_code = lang
    language  = LanguageCodes[lang_code]
    entry     = temporary[onrp]
    if type == 2
      entry[5] << Name.new(name, language, rn)
      entry[7] << Name.new(short, language, rn)
    elsif type == 3
      entry[8] << Name.new(name, language, rn)
      entry[9] << Name.new(short, language, rn)
    end
  end

  self_delivered.each do |row|
    temporary[row[0]] = ZipCode.new(*row)
  end
  others.each do |row|
    if row[14] then
      raise "Delivery not found:\n#{row.inspect}" unless tmp = temporary[row[14]]
      if tmp.kind_of?(Array) then
        @errors << LoadError.new("Invalid reference: onrp #{row.at(0)} delivery by #{row.at(14)}", row)
        row[14] = nil
      else
        row[14] = tmp
      end
    end
    temporary[row[0]] = ZipCode.new(*row)
  end

  ZipCodes.new(temporary.values)
end

#reset_errors!self

Resets the list of errors that were encountered during load

Returns:

  • (self)

119
120
121
122
# File 'lib/swissmatch/location/datafiles.rb', line 119

def reset_errors!
  @errors = []
  self
end