search.go 32 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109
  1. // Copyright 2012 Google Inc. All rights reserved.
  2. // Use of this source code is governed by the Apache 2.0
  3. // license that can be found in the LICENSE file.
  4. package search // import "google.golang.org/appengine/search"
  5. // TODO: let Put specify the document language: "en", "fr", etc. Also: order_id?? storage??
  6. // TODO: Index.GetAll (or Iterator.GetAll)?
  7. // TODO: struct <-> protobuf tests.
  8. // TODO: enforce Python's MIN_NUMBER_VALUE and MIN_DATE (which would disallow a zero
  9. // time.Time)? _MAXIMUM_STRING_LENGTH?
  10. import (
  11. "errors"
  12. "fmt"
  13. "math"
  14. "reflect"
  15. "regexp"
  16. "strconv"
  17. "strings"
  18. "time"
  19. "unicode/utf8"
  20. "github.com/golang/protobuf/proto"
  21. "golang.org/x/net/context"
  22. "google.golang.org/appengine"
  23. "google.golang.org/appengine/internal"
  24. pb "google.golang.org/appengine/internal/search"
  25. )
  26. var (
  27. // ErrInvalidDocumentType is returned when methods like Put, Get or Next
  28. // are passed a dst or src argument of invalid type.
  29. ErrInvalidDocumentType = errors.New("search: invalid document type")
  30. // ErrNoSuchDocument is returned when no document was found for a given ID.
  31. ErrNoSuchDocument = errors.New("search: no such document")
  32. )
  33. // Atom is a document field whose contents are indexed as a single indivisible
  34. // string.
  35. type Atom string
  36. // HTML is a document field whose contents are indexed as HTML. Only text nodes
  37. // are indexed: "foo<b>bar" will be treated as "foobar".
  38. type HTML string
  39. // validIndexNameOrDocID is the Go equivalent of Python's
  40. // _ValidateVisiblePrintableAsciiNotReserved.
  41. func validIndexNameOrDocID(s string) bool {
  42. if strings.HasPrefix(s, "!") {
  43. return false
  44. }
  45. for _, c := range s {
  46. if c < 0x21 || 0x7f <= c {
  47. return false
  48. }
  49. }
  50. return true
  51. }
  52. var (
  53. fieldNameRE = regexp.MustCompile(`^[A-Za-z][A-Za-z0-9_]*$`)
  54. languageRE = regexp.MustCompile(`^[a-z]{2}$`)
  55. )
  56. // validFieldName is the Go equivalent of Python's _CheckFieldName. It checks
  57. // the validity of both field and facet names.
  58. func validFieldName(s string) bool {
  59. return len(s) <= 500 && fieldNameRE.MatchString(s)
  60. }
  61. // validDocRank checks that the ranks is in the range [0, 2^31).
  62. func validDocRank(r int) bool {
  63. return 0 <= r && r <= (1<<31-1)
  64. }
  65. // validLanguage checks that a language looks like ISO 639-1.
  66. func validLanguage(s string) bool {
  67. return languageRE.MatchString(s)
  68. }
  69. // validFloat checks that f is in the range [-2147483647, 2147483647].
  70. func validFloat(f float64) bool {
  71. return -(1<<31-1) <= f && f <= (1<<31-1)
  72. }
  73. // Index is an index of documents.
  74. type Index struct {
  75. spec pb.IndexSpec
  76. }
  77. // orderIDEpoch forms the basis for populating OrderId on documents.
  78. var orderIDEpoch = time.Date(2011, 1, 1, 0, 0, 0, 0, time.UTC)
  79. // Open opens the index with the given name. The index is created if it does
  80. // not already exist.
  81. //
  82. // The name is a human-readable ASCII string. It must contain no whitespace
  83. // characters and not start with "!".
  84. func Open(name string) (*Index, error) {
  85. if !validIndexNameOrDocID(name) {
  86. return nil, fmt.Errorf("search: invalid index name %q", name)
  87. }
  88. return &Index{
  89. spec: pb.IndexSpec{
  90. Name: &name,
  91. },
  92. }, nil
  93. }
  94. // Put saves src to the index. If id is empty, a new ID is allocated by the
  95. // service and returned. If id is not empty, any existing index entry for that
  96. // ID is replaced.
  97. //
  98. // The ID is a human-readable ASCII string. It must contain no whitespace
  99. // characters and not start with "!".
  100. //
  101. // src must be a non-nil struct pointer or implement the FieldLoadSaver
  102. // interface.
  103. func (x *Index) Put(c context.Context, id string, src interface{}) (string, error) {
  104. d, err := saveDoc(src)
  105. if err != nil {
  106. return "", err
  107. }
  108. if id != "" {
  109. if !validIndexNameOrDocID(id) {
  110. return "", fmt.Errorf("search: invalid ID %q", id)
  111. }
  112. d.Id = proto.String(id)
  113. }
  114. req := &pb.IndexDocumentRequest{
  115. Params: &pb.IndexDocumentParams{
  116. Document: []*pb.Document{d},
  117. IndexSpec: &x.spec,
  118. },
  119. }
  120. res := &pb.IndexDocumentResponse{}
  121. if err := internal.Call(c, "search", "IndexDocument", req, res); err != nil {
  122. return "", err
  123. }
  124. if len(res.Status) > 0 {
  125. if s := res.Status[0]; s.GetCode() != pb.SearchServiceError_OK {
  126. return "", fmt.Errorf("search: %s: %s", s.GetCode(), s.GetErrorDetail())
  127. }
  128. }
  129. if len(res.Status) != 1 || len(res.DocId) != 1 {
  130. return "", fmt.Errorf("search: internal error: wrong number of results (%d Statuses, %d DocIDs)",
  131. len(res.Status), len(res.DocId))
  132. }
  133. return res.DocId[0], nil
  134. }
  135. // Get loads the document with the given ID into dst.
  136. //
  137. // The ID is a human-readable ASCII string. It must be non-empty, contain no
  138. // whitespace characters and not start with "!".
  139. //
  140. // dst must be a non-nil struct pointer or implement the FieldLoadSaver
  141. // interface.
  142. //
  143. // ErrFieldMismatch is returned when a field is to be loaded into a different
  144. // type than the one it was stored from, or when a field is missing or
  145. // unexported in the destination struct. ErrFieldMismatch is only returned if
  146. // dst is a struct pointer. It is up to the callee to decide whether this error
  147. // is fatal, recoverable, or ignorable.
  148. func (x *Index) Get(c context.Context, id string, dst interface{}) error {
  149. if id == "" || !validIndexNameOrDocID(id) {
  150. return fmt.Errorf("search: invalid ID %q", id)
  151. }
  152. req := &pb.ListDocumentsRequest{
  153. Params: &pb.ListDocumentsParams{
  154. IndexSpec: &x.spec,
  155. StartDocId: proto.String(id),
  156. Limit: proto.Int32(1),
  157. },
  158. }
  159. res := &pb.ListDocumentsResponse{}
  160. if err := internal.Call(c, "search", "ListDocuments", req, res); err != nil {
  161. return err
  162. }
  163. if res.Status == nil || res.Status.GetCode() != pb.SearchServiceError_OK {
  164. return fmt.Errorf("search: %s: %s", res.Status.GetCode(), res.Status.GetErrorDetail())
  165. }
  166. if len(res.Document) != 1 || res.Document[0].GetId() != id {
  167. return ErrNoSuchDocument
  168. }
  169. return loadDoc(dst, res.Document[0], nil)
  170. }
  171. // Delete deletes a document from the index.
  172. func (x *Index) Delete(c context.Context, id string) error {
  173. req := &pb.DeleteDocumentRequest{
  174. Params: &pb.DeleteDocumentParams{
  175. DocId: []string{id},
  176. IndexSpec: &x.spec,
  177. },
  178. }
  179. res := &pb.DeleteDocumentResponse{}
  180. if err := internal.Call(c, "search", "DeleteDocument", req, res); err != nil {
  181. return err
  182. }
  183. if len(res.Status) != 1 {
  184. return fmt.Errorf("search: internal error: wrong number of results (%d)", len(res.Status))
  185. }
  186. if s := res.Status[0]; s.GetCode() != pb.SearchServiceError_OK {
  187. return fmt.Errorf("search: %s: %s", s.GetCode(), s.GetErrorDetail())
  188. }
  189. return nil
  190. }
  191. // List lists all of the documents in an index. The documents are returned in
  192. // increasing ID order.
  193. func (x *Index) List(c context.Context, opts *ListOptions) *Iterator {
  194. t := &Iterator{
  195. c: c,
  196. index: x,
  197. count: -1,
  198. listInclusive: true,
  199. more: moreList,
  200. }
  201. if opts != nil {
  202. t.listStartID = opts.StartID
  203. t.limit = opts.Limit
  204. t.idsOnly = opts.IDsOnly
  205. }
  206. return t
  207. }
  208. func moreList(t *Iterator) error {
  209. req := &pb.ListDocumentsRequest{
  210. Params: &pb.ListDocumentsParams{
  211. IndexSpec: &t.index.spec,
  212. },
  213. }
  214. if t.listStartID != "" {
  215. req.Params.StartDocId = &t.listStartID
  216. req.Params.IncludeStartDoc = &t.listInclusive
  217. }
  218. if t.limit > 0 {
  219. req.Params.Limit = proto.Int32(int32(t.limit))
  220. }
  221. if t.idsOnly {
  222. req.Params.KeysOnly = &t.idsOnly
  223. }
  224. res := &pb.ListDocumentsResponse{}
  225. if err := internal.Call(t.c, "search", "ListDocuments", req, res); err != nil {
  226. return err
  227. }
  228. if res.Status == nil || res.Status.GetCode() != pb.SearchServiceError_OK {
  229. return fmt.Errorf("search: %s: %s", res.Status.GetCode(), res.Status.GetErrorDetail())
  230. }
  231. t.listRes = res.Document
  232. t.listStartID, t.listInclusive, t.more = "", false, nil
  233. if len(res.Document) != 0 && t.limit <= 0 {
  234. if id := res.Document[len(res.Document)-1].GetId(); id != "" {
  235. t.listStartID, t.more = id, moreList
  236. }
  237. }
  238. return nil
  239. }
  240. // ListOptions are the options for listing documents in an index. Passing a nil
  241. // *ListOptions is equivalent to using the default values.
  242. type ListOptions struct {
  243. // StartID is the inclusive lower bound for the ID of the returned
  244. // documents. The zero value means all documents will be returned.
  245. StartID string
  246. // Limit is the maximum number of documents to return. The zero value
  247. // indicates no limit.
  248. Limit int
  249. // IDsOnly indicates that only document IDs should be returned for the list
  250. // operation; no document fields are populated.
  251. IDsOnly bool
  252. }
  253. // Search searches the index for the given query.
  254. func (x *Index) Search(c context.Context, query string, opts *SearchOptions) *Iterator {
  255. t := &Iterator{
  256. c: c,
  257. index: x,
  258. searchQuery: query,
  259. more: moreSearch,
  260. }
  261. if opts != nil {
  262. if opts.Cursor != "" {
  263. if opts.Offset != 0 {
  264. return errIter("at most one of Cursor and Offset may be specified")
  265. }
  266. t.searchCursor = proto.String(string(opts.Cursor))
  267. }
  268. t.limit = opts.Limit
  269. t.fields = opts.Fields
  270. t.idsOnly = opts.IDsOnly
  271. t.sort = opts.Sort
  272. t.exprs = opts.Expressions
  273. t.refinements = opts.Refinements
  274. t.facetOpts = opts.Facets
  275. t.searchOffset = opts.Offset
  276. }
  277. return t
  278. }
  279. func moreSearch(t *Iterator) error {
  280. // We use per-result (rather than single/per-page) cursors since this
  281. // lets us return a Cursor for every iterator document. The two cursor
  282. // types are largely interchangeable: a page cursor is the same as the
  283. // last per-result cursor in a given search response.
  284. req := &pb.SearchRequest{
  285. Params: &pb.SearchParams{
  286. IndexSpec: &t.index.spec,
  287. Query: &t.searchQuery,
  288. Cursor: t.searchCursor,
  289. CursorType: pb.SearchParams_PER_RESULT.Enum(),
  290. FieldSpec: &pb.FieldSpec{
  291. Name: t.fields,
  292. },
  293. },
  294. }
  295. if t.limit > 0 {
  296. req.Params.Limit = proto.Int32(int32(t.limit))
  297. }
  298. if t.searchOffset > 0 {
  299. req.Params.Offset = proto.Int32(int32(t.searchOffset))
  300. t.searchOffset = 0
  301. }
  302. if t.idsOnly {
  303. req.Params.KeysOnly = &t.idsOnly
  304. }
  305. if t.sort != nil {
  306. if err := sortToProto(t.sort, req.Params); err != nil {
  307. return err
  308. }
  309. }
  310. if t.refinements != nil {
  311. if err := refinementsToProto(t.refinements, req.Params); err != nil {
  312. return err
  313. }
  314. }
  315. for _, e := range t.exprs {
  316. req.Params.FieldSpec.Expression = append(req.Params.FieldSpec.Expression, &pb.FieldSpec_Expression{
  317. Name: proto.String(e.Name),
  318. Expression: proto.String(e.Expr),
  319. })
  320. }
  321. for _, f := range t.facetOpts {
  322. if err := f.setParams(req.Params); err != nil {
  323. return fmt.Errorf("bad FacetSearchOption: %v", err)
  324. }
  325. }
  326. // Don't repeat facet search.
  327. t.facetOpts = nil
  328. res := &pb.SearchResponse{}
  329. if err := internal.Call(t.c, "search", "Search", req, res); err != nil {
  330. return err
  331. }
  332. if res.Status == nil || res.Status.GetCode() != pb.SearchServiceError_OK {
  333. return fmt.Errorf("search: %s: %s", res.Status.GetCode(), res.Status.GetErrorDetail())
  334. }
  335. t.searchRes = res.Result
  336. if len(res.FacetResult) > 0 {
  337. t.facetRes = res.FacetResult
  338. }
  339. t.count = int(*res.MatchedCount)
  340. if t.limit > 0 {
  341. t.more = nil
  342. } else {
  343. t.more = moreSearch
  344. }
  345. return nil
  346. }
  347. // SearchOptions are the options for searching an index. Passing a nil
  348. // *SearchOptions is equivalent to using the default values.
  349. type SearchOptions struct {
  350. // Limit is the maximum number of documents to return. The zero value
  351. // indicates no limit.
  352. Limit int
  353. // IDsOnly indicates that only document IDs should be returned for the search
  354. // operation; no document fields are populated.
  355. IDsOnly bool
  356. // Sort controls the ordering of search results.
  357. Sort *SortOptions
  358. // Fields specifies which document fields to include in the results. If omitted,
  359. // all document fields are returned. No more than 100 fields may be specified.
  360. Fields []string
  361. // Expressions specifies additional computed fields to add to each returned
  362. // document.
  363. Expressions []FieldExpression
  364. // Facets controls what facet information is returned for these search results.
  365. // If no options are specified, no facet results will be returned.
  366. Facets []FacetSearchOption
  367. // Refinements filters the returned documents by requiring them to contain facets
  368. // with specific values. Refinements are applied in conjunction for facets with
  369. // different names, and in disjunction otherwise.
  370. Refinements []Facet
  371. // Cursor causes the results to commence with the first document after
  372. // the document associated with the cursor.
  373. Cursor Cursor
  374. // Offset specifies the number of documents to skip over before returning results.
  375. // When specified, Cursor must be nil.
  376. Offset int
  377. }
  378. // Cursor represents an iterator's position.
  379. //
  380. // The string value of a cursor is web-safe. It can be saved and restored
  381. // for later use.
  382. type Cursor string
  383. // FieldExpression defines a custom expression to evaluate for each result.
  384. type FieldExpression struct {
  385. // Name is the name to use for the computed field.
  386. Name string
  387. // Expr is evaluated to provide a custom content snippet for each document.
  388. // See https://cloud.google.com/appengine/docs/go/search/options for
  389. // the supported expression syntax.
  390. Expr string
  391. }
  392. // FacetSearchOption controls what facet information is returned in search results.
  393. type FacetSearchOption interface {
  394. setParams(*pb.SearchParams) error
  395. }
  396. // AutoFacetDiscovery returns a FacetSearchOption which enables automatic facet
  397. // discovery for the search. Automatic facet discovery looks for the facets
  398. // which appear the most often in the aggregate in the matched documents.
  399. //
  400. // The maximum number of facets returned is controlled by facetLimit, and the
  401. // maximum number of values per facet by facetLimit. A limit of zero indicates
  402. // a default limit should be used.
  403. func AutoFacetDiscovery(facetLimit, valueLimit int) FacetSearchOption {
  404. return &autoFacetOpt{facetLimit, valueLimit}
  405. }
  406. type autoFacetOpt struct {
  407. facetLimit, valueLimit int
  408. }
  409. const defaultAutoFacetLimit = 10 // As per python runtime search.py.
  410. func (o *autoFacetOpt) setParams(params *pb.SearchParams) error {
  411. lim := int32(o.facetLimit)
  412. if lim == 0 {
  413. lim = defaultAutoFacetLimit
  414. }
  415. params.AutoDiscoverFacetCount = &lim
  416. if o.valueLimit > 0 {
  417. params.FacetAutoDetectParam = &pb.FacetAutoDetectParam{
  418. ValueLimit: proto.Int32(int32(o.valueLimit)),
  419. }
  420. }
  421. return nil
  422. }
  423. // FacetDiscovery returns a FacetSearchOption which selects a facet to be
  424. // returned with the search results. By default, the most frequently
  425. // occurring values for that facet will be returned. However, you can also
  426. // specify a list of particular Atoms or specific Ranges to return.
  427. func FacetDiscovery(name string, value ...interface{}) FacetSearchOption {
  428. return &facetOpt{name, value}
  429. }
  430. type facetOpt struct {
  431. name string
  432. values []interface{}
  433. }
  434. func (o *facetOpt) setParams(params *pb.SearchParams) error {
  435. req := &pb.FacetRequest{Name: &o.name}
  436. params.IncludeFacet = append(params.IncludeFacet, req)
  437. if len(o.values) == 0 {
  438. return nil
  439. }
  440. vtype := reflect.TypeOf(o.values[0])
  441. reqParam := &pb.FacetRequestParam{}
  442. for _, v := range o.values {
  443. if reflect.TypeOf(v) != vtype {
  444. return errors.New("values must all be Atom, or must all be Range")
  445. }
  446. switch v := v.(type) {
  447. case Atom:
  448. reqParam.ValueConstraint = append(reqParam.ValueConstraint, string(v))
  449. case Range:
  450. rng, err := rangeToProto(v)
  451. if err != nil {
  452. return fmt.Errorf("invalid range: %v", err)
  453. }
  454. reqParam.Range = append(reqParam.Range, rng)
  455. default:
  456. return fmt.Errorf("unsupported value type %T", v)
  457. }
  458. }
  459. req.Params = reqParam
  460. return nil
  461. }
  462. // FacetDocumentDepth returns a FacetSearchOption which controls the number of
  463. // documents to be evaluated with preparing facet results.
  464. func FacetDocumentDepth(depth int) FacetSearchOption {
  465. return facetDepthOpt(depth)
  466. }
  467. type facetDepthOpt int
  468. func (o facetDepthOpt) setParams(params *pb.SearchParams) error {
  469. params.FacetDepth = proto.Int32(int32(o))
  470. return nil
  471. }
  472. // FacetResult represents the number of times a particular facet and value
  473. // appeared in the documents matching a search request.
  474. type FacetResult struct {
  475. Facet
  476. // Count is the number of times this specific facet and value appeared in the
  477. // matching documents.
  478. Count int
  479. }
  480. // Range represents a numeric range with inclusive start and exclusive end.
  481. // Start may be specified as math.Inf(-1) to indicate there is no minimum
  482. // value, and End may similarly be specified as math.Inf(1); at least one of
  483. // Start or End must be a finite number.
  484. type Range struct {
  485. Start, End float64
  486. }
  487. var (
  488. negInf = math.Inf(-1)
  489. posInf = math.Inf(1)
  490. )
  491. // AtLeast returns a Range matching any value greater than, or equal to, min.
  492. func AtLeast(min float64) Range {
  493. return Range{Start: min, End: posInf}
  494. }
  495. // LessThan returns a Range matching any value less than max.
  496. func LessThan(max float64) Range {
  497. return Range{Start: negInf, End: max}
  498. }
  499. // SortOptions control the ordering and scoring of search results.
  500. type SortOptions struct {
  501. // Expressions is a slice of expressions representing a multi-dimensional
  502. // sort.
  503. Expressions []SortExpression
  504. // Scorer, when specified, will cause the documents to be scored according to
  505. // search term frequency.
  506. Scorer Scorer
  507. // Limit is the maximum number of objects to score and/or sort. Limit cannot
  508. // be more than 10,000. The zero value indicates a default limit.
  509. Limit int
  510. }
  511. // SortExpression defines a single dimension for sorting a document.
  512. type SortExpression struct {
  513. // Expr is evaluated to provide a sorting value for each document.
  514. // See https://cloud.google.com/appengine/docs/go/search/options for
  515. // the supported expression syntax.
  516. Expr string
  517. // Reverse causes the documents to be sorted in ascending order.
  518. Reverse bool
  519. // The default value to use when no field is present or the expresion
  520. // cannot be calculated for a document. For text sorts, Default must
  521. // be of type string; for numeric sorts, float64.
  522. Default interface{}
  523. }
  524. // A Scorer defines how a document is scored.
  525. type Scorer interface {
  526. toProto(*pb.ScorerSpec)
  527. }
  528. type enumScorer struct {
  529. enum pb.ScorerSpec_Scorer
  530. }
  531. func (e enumScorer) toProto(spec *pb.ScorerSpec) {
  532. spec.Scorer = e.enum.Enum()
  533. }
  534. var (
  535. // MatchScorer assigns a score based on term frequency in a document.
  536. MatchScorer Scorer = enumScorer{pb.ScorerSpec_MATCH_SCORER}
  537. // RescoringMatchScorer assigns a score based on the quality of the query
  538. // match. It is similar to a MatchScorer but uses a more complex scoring
  539. // algorithm based on match term frequency and other factors like field type.
  540. // Please be aware that this algorithm is continually refined and can change
  541. // over time without notice. This means that the ordering of search results
  542. // that use this scorer can also change without notice.
  543. RescoringMatchScorer Scorer = enumScorer{pb.ScorerSpec_RESCORING_MATCH_SCORER}
  544. )
  545. func sortToProto(sort *SortOptions, params *pb.SearchParams) error {
  546. for _, e := range sort.Expressions {
  547. spec := &pb.SortSpec{
  548. SortExpression: proto.String(e.Expr),
  549. }
  550. if e.Reverse {
  551. spec.SortDescending = proto.Bool(false)
  552. }
  553. if e.Default != nil {
  554. switch d := e.Default.(type) {
  555. case float64:
  556. spec.DefaultValueNumeric = &d
  557. case string:
  558. spec.DefaultValueText = &d
  559. default:
  560. return fmt.Errorf("search: invalid Default type %T for expression %q", d, e.Expr)
  561. }
  562. }
  563. params.SortSpec = append(params.SortSpec, spec)
  564. }
  565. spec := &pb.ScorerSpec{}
  566. if sort.Limit > 0 {
  567. spec.Limit = proto.Int32(int32(sort.Limit))
  568. params.ScorerSpec = spec
  569. }
  570. if sort.Scorer != nil {
  571. sort.Scorer.toProto(spec)
  572. params.ScorerSpec = spec
  573. }
  574. return nil
  575. }
  576. func refinementsToProto(refinements []Facet, params *pb.SearchParams) error {
  577. for _, r := range refinements {
  578. ref := &pb.FacetRefinement{
  579. Name: proto.String(r.Name),
  580. }
  581. switch v := r.Value.(type) {
  582. case Atom:
  583. ref.Value = proto.String(string(v))
  584. case Range:
  585. rng, err := rangeToProto(v)
  586. if err != nil {
  587. return fmt.Errorf("search: refinement for facet %q: %v", r.Name, err)
  588. }
  589. // Unfortunately there are two identical messages for identify Facet ranges.
  590. ref.Range = &pb.FacetRefinement_Range{Start: rng.Start, End: rng.End}
  591. default:
  592. return fmt.Errorf("search: unsupported refinement for facet %q of type %T", r.Name, v)
  593. }
  594. params.FacetRefinement = append(params.FacetRefinement, ref)
  595. }
  596. return nil
  597. }
  598. func rangeToProto(r Range) (*pb.FacetRange, error) {
  599. rng := &pb.FacetRange{}
  600. if r.Start != negInf {
  601. if !validFloat(r.Start) {
  602. return nil, errors.New("invalid value for Start")
  603. }
  604. rng.Start = proto.String(strconv.FormatFloat(r.Start, 'e', -1, 64))
  605. } else if r.End == posInf {
  606. return nil, errors.New("either Start or End must be finite")
  607. }
  608. if r.End != posInf {
  609. if !validFloat(r.End) {
  610. return nil, errors.New("invalid value for End")
  611. }
  612. rng.End = proto.String(strconv.FormatFloat(r.End, 'e', -1, 64))
  613. }
  614. return rng, nil
  615. }
  616. func protoToRange(rng *pb.FacetRefinement_Range) Range {
  617. r := Range{Start: negInf, End: posInf}
  618. if x, err := strconv.ParseFloat(rng.GetStart(), 64); err != nil {
  619. r.Start = x
  620. }
  621. if x, err := strconv.ParseFloat(rng.GetEnd(), 64); err != nil {
  622. r.End = x
  623. }
  624. return r
  625. }
  626. // Iterator is the result of searching an index for a query or listing an
  627. // index.
  628. type Iterator struct {
  629. c context.Context
  630. index *Index
  631. err error
  632. listRes []*pb.Document
  633. listStartID string
  634. listInclusive bool
  635. searchRes []*pb.SearchResult
  636. facetRes []*pb.FacetResult
  637. searchQuery string
  638. searchCursor *string
  639. searchOffset int
  640. sort *SortOptions
  641. fields []string
  642. exprs []FieldExpression
  643. refinements []Facet
  644. facetOpts []FacetSearchOption
  645. more func(*Iterator) error
  646. count int
  647. limit int // items left to return; 0 for unlimited.
  648. idsOnly bool
  649. }
  650. // errIter returns an iterator that only returns the given error.
  651. func errIter(err string) *Iterator {
  652. return &Iterator{
  653. err: errors.New(err),
  654. }
  655. }
  656. // Done is returned when a query iteration has completed.
  657. var Done = errors.New("search: query has no more results")
  658. // Count returns an approximation of the number of documents matched by the
  659. // query. It is only valid to call for iterators returned by Search.
  660. func (t *Iterator) Count() int { return t.count }
  661. // fetchMore retrieves more results, if there are no errors or pending results.
  662. func (t *Iterator) fetchMore() {
  663. if t.err == nil && len(t.listRes)+len(t.searchRes) == 0 && t.more != nil {
  664. t.err = t.more(t)
  665. }
  666. }
  667. // Next returns the ID of the next result. When there are no more results,
  668. // Done is returned as the error.
  669. //
  670. // dst must be a non-nil struct pointer, implement the FieldLoadSaver
  671. // interface, or be a nil interface value. If a non-nil dst is provided, it
  672. // will be filled with the indexed fields. dst is ignored if this iterator was
  673. // created with an IDsOnly option.
  674. func (t *Iterator) Next(dst interface{}) (string, error) {
  675. t.fetchMore()
  676. if t.err != nil {
  677. return "", t.err
  678. }
  679. var doc *pb.Document
  680. var exprs []*pb.Field
  681. switch {
  682. case len(t.listRes) != 0:
  683. doc = t.listRes[0]
  684. t.listRes = t.listRes[1:]
  685. case len(t.searchRes) != 0:
  686. doc = t.searchRes[0].Document
  687. exprs = t.searchRes[0].Expression
  688. t.searchCursor = t.searchRes[0].Cursor
  689. t.searchRes = t.searchRes[1:]
  690. default:
  691. return "", Done
  692. }
  693. if doc == nil {
  694. return "", errors.New("search: internal error: no document returned")
  695. }
  696. if !t.idsOnly && dst != nil {
  697. if err := loadDoc(dst, doc, exprs); err != nil {
  698. return "", err
  699. }
  700. }
  701. return doc.GetId(), nil
  702. }
  703. // Cursor returns the cursor associated with the current document (that is,
  704. // the document most recently returned by a call to Next).
  705. //
  706. // Passing this cursor in a future call to Search will cause those results
  707. // to commence with the first document after the current document.
  708. func (t *Iterator) Cursor() Cursor {
  709. if t.searchCursor == nil {
  710. return ""
  711. }
  712. return Cursor(*t.searchCursor)
  713. }
  714. // Facets returns the facets found within the search results, if any facets
  715. // were requested in the SearchOptions.
  716. func (t *Iterator) Facets() ([][]FacetResult, error) {
  717. t.fetchMore()
  718. if t.err != nil && t.err != Done {
  719. return nil, t.err
  720. }
  721. var facets [][]FacetResult
  722. for _, f := range t.facetRes {
  723. fres := make([]FacetResult, 0, len(f.Value))
  724. for _, v := range f.Value {
  725. ref := v.Refinement
  726. facet := FacetResult{
  727. Facet: Facet{Name: ref.GetName()},
  728. Count: int(v.GetCount()),
  729. }
  730. if ref.Value != nil {
  731. facet.Value = Atom(*ref.Value)
  732. } else {
  733. facet.Value = protoToRange(ref.Range)
  734. }
  735. fres = append(fres, facet)
  736. }
  737. facets = append(facets, fres)
  738. }
  739. return facets, nil
  740. }
  741. // saveDoc converts from a struct pointer or
  742. // FieldLoadSaver/FieldMetadataLoadSaver to the Document protobuf.
  743. func saveDoc(src interface{}) (*pb.Document, error) {
  744. var err error
  745. var fields []Field
  746. var meta *DocumentMetadata
  747. switch x := src.(type) {
  748. case FieldLoadSaver:
  749. fields, meta, err = x.Save()
  750. default:
  751. fields, err = SaveStruct(src)
  752. }
  753. if err != nil {
  754. return nil, err
  755. }
  756. fieldsProto, err := fieldsToProto(fields)
  757. if err != nil {
  758. return nil, err
  759. }
  760. d := &pb.Document{
  761. Field: fieldsProto,
  762. OrderId: proto.Int32(int32(time.Since(orderIDEpoch).Seconds())),
  763. }
  764. if meta != nil {
  765. if meta.Rank != 0 {
  766. if !validDocRank(meta.Rank) {
  767. return nil, fmt.Errorf("search: invalid rank %d, must be [0, 2^31)", meta.Rank)
  768. }
  769. *d.OrderId = int32(meta.Rank)
  770. }
  771. if len(meta.Facets) > 0 {
  772. facets, err := facetsToProto(meta.Facets)
  773. if err != nil {
  774. return nil, err
  775. }
  776. d.Facet = facets
  777. }
  778. }
  779. return d, nil
  780. }
  781. func fieldsToProto(src []Field) ([]*pb.Field, error) {
  782. // Maps to catch duplicate time or numeric fields.
  783. timeFields, numericFields := make(map[string]bool), make(map[string]bool)
  784. dst := make([]*pb.Field, 0, len(src))
  785. for _, f := range src {
  786. if !validFieldName(f.Name) {
  787. return nil, fmt.Errorf("search: invalid field name %q", f.Name)
  788. }
  789. fieldValue := &pb.FieldValue{}
  790. switch x := f.Value.(type) {
  791. case string:
  792. fieldValue.Type = pb.FieldValue_TEXT.Enum()
  793. fieldValue.StringValue = proto.String(x)
  794. case Atom:
  795. fieldValue.Type = pb.FieldValue_ATOM.Enum()
  796. fieldValue.StringValue = proto.String(string(x))
  797. case HTML:
  798. fieldValue.Type = pb.FieldValue_HTML.Enum()
  799. fieldValue.StringValue = proto.String(string(x))
  800. case time.Time:
  801. if timeFields[f.Name] {
  802. return nil, fmt.Errorf("search: duplicate time field %q", f.Name)
  803. }
  804. timeFields[f.Name] = true
  805. fieldValue.Type = pb.FieldValue_DATE.Enum()
  806. fieldValue.StringValue = proto.String(strconv.FormatInt(x.UnixNano()/1e6, 10))
  807. case float64:
  808. if numericFields[f.Name] {
  809. return nil, fmt.Errorf("search: duplicate numeric field %q", f.Name)
  810. }
  811. if !validFloat(x) {
  812. return nil, fmt.Errorf("search: numeric field %q with invalid value %f", f.Name, x)
  813. }
  814. numericFields[f.Name] = true
  815. fieldValue.Type = pb.FieldValue_NUMBER.Enum()
  816. fieldValue.StringValue = proto.String(strconv.FormatFloat(x, 'e', -1, 64))
  817. case appengine.GeoPoint:
  818. if !x.Valid() {
  819. return nil, fmt.Errorf(
  820. "search: GeoPoint field %q with invalid value %v",
  821. f.Name, x)
  822. }
  823. fieldValue.Type = pb.FieldValue_GEO.Enum()
  824. fieldValue.Geo = &pb.FieldValue_Geo{
  825. Lat: proto.Float64(x.Lat),
  826. Lng: proto.Float64(x.Lng),
  827. }
  828. default:
  829. return nil, fmt.Errorf("search: unsupported field type: %v", reflect.TypeOf(f.Value))
  830. }
  831. if f.Language != "" {
  832. switch f.Value.(type) {
  833. case string, HTML:
  834. if !validLanguage(f.Language) {
  835. return nil, fmt.Errorf("search: invalid language for field %q: %q", f.Name, f.Language)
  836. }
  837. fieldValue.Language = proto.String(f.Language)
  838. default:
  839. return nil, fmt.Errorf("search: setting language not supported for field %q of type %T", f.Name, f.Value)
  840. }
  841. }
  842. if p := fieldValue.StringValue; p != nil && !utf8.ValidString(*p) {
  843. return nil, fmt.Errorf("search: %q field is invalid UTF-8: %q", f.Name, *p)
  844. }
  845. dst = append(dst, &pb.Field{
  846. Name: proto.String(f.Name),
  847. Value: fieldValue,
  848. })
  849. }
  850. return dst, nil
  851. }
  852. func facetsToProto(src []Facet) ([]*pb.Facet, error) {
  853. dst := make([]*pb.Facet, 0, len(src))
  854. for _, f := range src {
  855. if !validFieldName(f.Name) {
  856. return nil, fmt.Errorf("search: invalid facet name %q", f.Name)
  857. }
  858. facetValue := &pb.FacetValue{}
  859. switch x := f.Value.(type) {
  860. case Atom:
  861. if !utf8.ValidString(string(x)) {
  862. return nil, fmt.Errorf("search: %q facet is invalid UTF-8: %q", f.Name, x)
  863. }
  864. facetValue.Type = pb.FacetValue_ATOM.Enum()
  865. facetValue.StringValue = proto.String(string(x))
  866. case float64:
  867. if !validFloat(x) {
  868. return nil, fmt.Errorf("search: numeric facet %q with invalid value %f", f.Name, x)
  869. }
  870. facetValue.Type = pb.FacetValue_NUMBER.Enum()
  871. facetValue.StringValue = proto.String(strconv.FormatFloat(x, 'e', -1, 64))
  872. default:
  873. return nil, fmt.Errorf("search: unsupported facet type: %v", reflect.TypeOf(f.Value))
  874. }
  875. dst = append(dst, &pb.Facet{
  876. Name: proto.String(f.Name),
  877. Value: facetValue,
  878. })
  879. }
  880. return dst, nil
  881. }
  882. // loadDoc converts from protobufs to a struct pointer or
  883. // FieldLoadSaver/FieldMetadataLoadSaver. The src param provides the document's
  884. // stored fields and facets, and any document metadata. An additional slice of
  885. // fields, exprs, may optionally be provided to contain any derived expressions
  886. // requested by the developer.
  887. func loadDoc(dst interface{}, src *pb.Document, exprs []*pb.Field) (err error) {
  888. fields, err := protoToFields(src.Field)
  889. if err != nil {
  890. return err
  891. }
  892. facets, err := protoToFacets(src.Facet)
  893. if err != nil {
  894. return err
  895. }
  896. if len(exprs) > 0 {
  897. exprFields, err := protoToFields(exprs)
  898. if err != nil {
  899. return err
  900. }
  901. // Mark each field as derived.
  902. for i := range exprFields {
  903. exprFields[i].Derived = true
  904. }
  905. fields = append(fields, exprFields...)
  906. }
  907. meta := &DocumentMetadata{
  908. Rank: int(src.GetOrderId()),
  909. Facets: facets,
  910. }
  911. switch x := dst.(type) {
  912. case FieldLoadSaver:
  913. return x.Load(fields, meta)
  914. default:
  915. return loadStructWithMeta(dst, fields, meta)
  916. }
  917. }
  918. func protoToFields(fields []*pb.Field) ([]Field, error) {
  919. dst := make([]Field, 0, len(fields))
  920. for _, field := range fields {
  921. fieldValue := field.GetValue()
  922. f := Field{
  923. Name: field.GetName(),
  924. }
  925. switch fieldValue.GetType() {
  926. case pb.FieldValue_TEXT:
  927. f.Value = fieldValue.GetStringValue()
  928. f.Language = fieldValue.GetLanguage()
  929. case pb.FieldValue_ATOM:
  930. f.Value = Atom(fieldValue.GetStringValue())
  931. case pb.FieldValue_HTML:
  932. f.Value = HTML(fieldValue.GetStringValue())
  933. f.Language = fieldValue.GetLanguage()
  934. case pb.FieldValue_DATE:
  935. sv := fieldValue.GetStringValue()
  936. millis, err := strconv.ParseInt(sv, 10, 64)
  937. if err != nil {
  938. return nil, fmt.Errorf("search: internal error: bad time.Time encoding %q: %v", sv, err)
  939. }
  940. f.Value = time.Unix(0, millis*1e6)
  941. case pb.FieldValue_NUMBER:
  942. sv := fieldValue.GetStringValue()
  943. x, err := strconv.ParseFloat(sv, 64)
  944. if err != nil {
  945. return nil, err
  946. }
  947. f.Value = x
  948. case pb.FieldValue_GEO:
  949. geoValue := fieldValue.GetGeo()
  950. geoPoint := appengine.GeoPoint{geoValue.GetLat(), geoValue.GetLng()}
  951. if !geoPoint.Valid() {
  952. return nil, fmt.Errorf("search: internal error: invalid GeoPoint encoding: %v", geoPoint)
  953. }
  954. f.Value = geoPoint
  955. default:
  956. return nil, fmt.Errorf("search: internal error: unknown data type %s", fieldValue.GetType())
  957. }
  958. dst = append(dst, f)
  959. }
  960. return dst, nil
  961. }
  962. func protoToFacets(facets []*pb.Facet) ([]Facet, error) {
  963. if len(facets) == 0 {
  964. return nil, nil
  965. }
  966. dst := make([]Facet, 0, len(facets))
  967. for _, facet := range facets {
  968. facetValue := facet.GetValue()
  969. f := Facet{
  970. Name: facet.GetName(),
  971. }
  972. switch facetValue.GetType() {
  973. case pb.FacetValue_ATOM:
  974. f.Value = Atom(facetValue.GetStringValue())
  975. case pb.FacetValue_NUMBER:
  976. sv := facetValue.GetStringValue()
  977. x, err := strconv.ParseFloat(sv, 64)
  978. if err != nil {
  979. return nil, err
  980. }
  981. f.Value = x
  982. default:
  983. return nil, fmt.Errorf("search: internal error: unknown data type %s", facetValue.GetType())
  984. }
  985. dst = append(dst, f)
  986. }
  987. return dst, nil
  988. }
  989. func namespaceMod(m proto.Message, namespace string) {
  990. set := func(s **string) {
  991. if *s == nil {
  992. *s = &namespace
  993. }
  994. }
  995. switch m := m.(type) {
  996. case *pb.IndexDocumentRequest:
  997. set(&m.Params.IndexSpec.Namespace)
  998. case *pb.ListDocumentsRequest:
  999. set(&m.Params.IndexSpec.Namespace)
  1000. case *pb.DeleteDocumentRequest:
  1001. set(&m.Params.IndexSpec.Namespace)
  1002. case *pb.SearchRequest:
  1003. set(&m.Params.IndexSpec.Namespace)
  1004. }
  1005. }
  1006. func init() {
  1007. internal.RegisterErrorCodeMap("search", pb.SearchServiceError_ErrorCode_name)
  1008. internal.NamespaceMods["search"] = namespaceMod
  1009. }