gcs.go 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112
  1. // Copyright 2015 Google Inc. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package bigquery
  15. import bq "google.golang.org/api/bigquery/v2"
  16. // GCSReference is a reference to one or more Google Cloud Storage objects, which together constitute
  17. // an input or output to a BigQuery operation.
  18. type GCSReference struct {
  19. uris []string
  20. // FieldDelimiter is the separator for fields in a CSV file, used when loading or exporting data.
  21. // The default is ",".
  22. FieldDelimiter string
  23. // The number of rows at the top of a CSV file that BigQuery will skip when loading the data.
  24. SkipLeadingRows int64
  25. // SourceFormat is the format of the GCS data to be loaded into BigQuery.
  26. // Allowed values are: CSV, JSON, DatastoreBackup. The default is CSV.
  27. SourceFormat DataFormat
  28. // Only used when loading data.
  29. Encoding Encoding
  30. // Quote is the value used to quote data sections in a CSV file.
  31. // The default quotation character is the double quote ("), which is used if both Quote and ForceZeroQuote are unset.
  32. // To specify that no character should be interpreted as a quotation character, set ForceZeroQuote to true.
  33. // Only used when loading data.
  34. Quote string
  35. ForceZeroQuote bool
  36. // DestinationFormat is the format to use when writing exported files.
  37. // Allowed values are: CSV, Avro, JSON. The default is CSV.
  38. // CSV is not supported for tables with nested or repeated fields.
  39. DestinationFormat DataFormat
  40. // Only used when writing data. Default is None.
  41. Compression Compression
  42. }
  43. func (gcs *GCSReference) implementsSource() {}
  44. func (gcs *GCSReference) implementsDestination() {}
  45. // NewGCSReference constructs a reference to one or more Google Cloud Storage objects, which together constitute a data source or destination.
  46. // In the simple case, a single URI in the form gs://bucket/object may refer to a single GCS object.
  47. // Data may also be split into mutiple files, if multiple URIs or URIs containing wildcards are provided.
  48. // Each URI may contain one '*' wildcard character, which (if present) must come after the bucket name.
  49. // For more information about the treatment of wildcards and multiple URIs,
  50. // see https://cloud.google.com/bigquery/exporting-data-from-bigquery#exportingmultiple
  51. func (c *Client) NewGCSReference(uri ...string) *GCSReference {
  52. return &GCSReference{uris: uri}
  53. }
  54. type DataFormat string
  55. const (
  56. CSV DataFormat = "CSV"
  57. Avro DataFormat = "AVRO"
  58. JSON DataFormat = "NEWLINE_DELIMITED_JSON"
  59. DatastoreBackup DataFormat = "DATASTORE_BACKUP"
  60. )
  61. // Encoding specifies the character encoding of data to be loaded into BigQuery.
  62. // See https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.load.encoding
  63. // for more details about how this is used.
  64. type Encoding string
  65. const (
  66. UTF_8 Encoding = "UTF-8"
  67. ISO_8859_1 Encoding = "ISO-8859-1"
  68. )
  69. // Compression is the type of compression to apply when writing data to Google Cloud Storage.
  70. type Compression string
  71. const (
  72. None Compression = "NONE"
  73. Gzip Compression = "GZIP"
  74. )
  75. func (gcs *GCSReference) customizeLoadSrc(conf *bq.JobConfigurationLoad, projectID string) {
  76. conf.SourceUris = gcs.uris
  77. conf.SkipLeadingRows = gcs.SkipLeadingRows
  78. conf.SourceFormat = string(gcs.SourceFormat)
  79. conf.Encoding = string(gcs.Encoding)
  80. conf.FieldDelimiter = gcs.FieldDelimiter
  81. if gcs.ForceZeroQuote {
  82. quote := ""
  83. conf.Quote = &quote
  84. } else if gcs.Quote != "" {
  85. conf.Quote = &gcs.Quote
  86. }
  87. }
  88. func (gcs *GCSReference) customizeExtractDst(conf *bq.JobConfigurationExtract, projectID string) {
  89. conf.DestinationUris = gcs.uris
  90. conf.Compression = string(gcs.Compression)
  91. conf.DestinationFormat = string(gcs.DestinationFormat)
  92. conf.FieldDelimiter = gcs.FieldDelimiter
  93. }