load_op.go 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112
  1. // Copyright 2015 Google Inc. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package bigquery
  15. import (
  16. "fmt"
  17. "golang.org/x/net/context"
  18. bq "google.golang.org/api/bigquery/v2"
  19. )
  20. type loadOption interface {
  21. customizeLoad(conf *bq.JobConfigurationLoad, projectID string)
  22. }
  23. // DestinationSchema returns an Option that specifies the schema to use when loading data into a new table.
  24. // A DestinationSchema Option must be supplied when loading data from Google Cloud Storage into a non-existent table.
  25. // Caveat: DestinationSchema is not required if the data being loaded is a datastore backup.
  26. // schema must not be nil.
  27. func DestinationSchema(schema Schema) Option { return destSchema{Schema: schema} }
  28. type destSchema struct {
  29. Schema
  30. }
  31. func (opt destSchema) implementsOption() {}
  32. func (opt destSchema) customizeLoad(conf *bq.JobConfigurationLoad, projectID string) {
  33. conf.Schema = opt.asTableSchema()
  34. }
  35. // MaxBadRecords returns an Option that sets the maximum number of bad records that will be ignored.
  36. // If this maximum is exceeded, the operation will be unsuccessful.
  37. func MaxBadRecords(n int64) Option { return maxBadRecords(n) }
  38. type maxBadRecords int64
  39. func (opt maxBadRecords) implementsOption() {}
  40. func (opt maxBadRecords) customizeLoad(conf *bq.JobConfigurationLoad, projectID string) {
  41. conf.MaxBadRecords = int64(opt)
  42. }
  43. // AllowJaggedRows returns an Option that causes missing trailing optional columns to be tolerated in CSV data. Missing values are treated as nulls.
  44. func AllowJaggedRows() Option { return allowJaggedRows{} }
  45. type allowJaggedRows struct{}
  46. func (opt allowJaggedRows) implementsOption() {}
  47. func (opt allowJaggedRows) customizeLoad(conf *bq.JobConfigurationLoad, projectID string) {
  48. conf.AllowJaggedRows = true
  49. }
  50. // AllowQuotedNewlines returns an Option that allows quoted data sections containing newlines in CSV data.
  51. func AllowQuotedNewlines() Option { return allowQuotedNewlines{} }
  52. type allowQuotedNewlines struct{}
  53. func (opt allowQuotedNewlines) implementsOption() {}
  54. func (opt allowQuotedNewlines) customizeLoad(conf *bq.JobConfigurationLoad, projectID string) {
  55. conf.AllowQuotedNewlines = true
  56. }
  57. // IgnoreUnknownValues returns an Option that causes values not matching the schema to be tolerated.
  58. // Unknown values are ignored. For CSV this ignores extra values at the end of a line.
  59. // For JSON this ignores named values that do not match any column name.
  60. // If this Option is not used, records containing unknown values are treated as bad records.
  61. // The MaxBadRecords Option can be used to customize how bad records are handled.
  62. func IgnoreUnknownValues() Option { return ignoreUnknownValues{} }
  63. type ignoreUnknownValues struct{}
  64. func (opt ignoreUnknownValues) implementsOption() {}
  65. func (opt ignoreUnknownValues) customizeLoad(conf *bq.JobConfigurationLoad, projectID string) {
  66. conf.IgnoreUnknownValues = true
  67. }
  68. func (c *Client) load(ctx context.Context, dst *Table, src *GCSReference, options []Option) (*Job, error) {
  69. job, options := initJobProto(c.projectID, options)
  70. payload := &bq.JobConfigurationLoad{}
  71. dst.customizeLoadDst(payload, c.projectID)
  72. src.customizeLoadSrc(payload, c.projectID)
  73. for _, opt := range options {
  74. o, ok := opt.(loadOption)
  75. if !ok {
  76. return nil, fmt.Errorf("option (%#v) not applicable to dst/src pair: dst: %T ; src: %T", opt, dst, src)
  77. }
  78. o.customizeLoad(payload, c.projectID)
  79. }
  80. job.Configuration = &bq.JobConfiguration{
  81. Load: payload,
  82. }
  83. return c.service.insertJob(ctx, job, c.projectID)
  84. }