mirror of
				https://github.com/go-gitea/gitea.git
				synced 2025-10-26 12:27:06 +00:00 
			
		
		
		
	Update bleve dependency to latest master revision (#6100)
* update bleve to master b17287a86f6cac923a5d886e10618df994eeb54b6724eac2e3b8dde89cfbe3a2 * remove unused pkg from dep file * change bleve from master to recent revision
This commit is contained in:
		 Lunny Xiao
					Lunny Xiao
				
			
				
					committed by
					
						 techknowlogick
						techknowlogick
					
				
			
			
				
	
			
			
			 techknowlogick
						techknowlogick
					
				
			
						parent
						
							11e316654e
						
					
				
				
					commit
					a380cfd8e0
				
			
							
								
								
									
										36
									
								
								Gopkg.lock
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										36
									
								
								Gopkg.lock
									
									
									
										generated
									
									
									
								
							| @@ -40,14 +40,6 @@ | |||||||
|   revision = "1a28a7fa985680f9f4e1644c0a857ec359a444b0" |   revision = "1a28a7fa985680f9f4e1644c0a857ec359a444b0" | ||||||
|   version = "v0.4.7" |   version = "v0.4.7" | ||||||
|  |  | ||||||
| [[projects]] |  | ||||||
|   branch = "master" |  | ||||||
|   digest = "1:93367b6d47a8ccc7d14f9f493ccf103ccf5afb698559ff8e8f1999427ce27ace" |  | ||||||
|   name = "github.com/Smerity/govarint" |  | ||||||
|   packages = ["."] |  | ||||||
|   pruneopts = "NUT" |  | ||||||
|   revision = "7265e41f48f15fd61751e16da866af3c704bb3ab" |  | ||||||
|  |  | ||||||
| [[projects]] | [[projects]] | ||||||
|   branch = "master" |   branch = "master" | ||||||
|   digest = "1:d290f4b25abbf574f80f60c8a5603ddada784f13f436b91a9a927bc7ce5a0146" |   digest = "1:d290f4b25abbf574f80f60c8a5603ddada784f13f436b91a9a927bc7ce5a0146" | ||||||
| @@ -98,7 +90,8 @@ | |||||||
|   revision = "3a771d992973f24aa725d07868b467d1ddfceafb" |   revision = "3a771d992973f24aa725d07868b467d1ddfceafb" | ||||||
|  |  | ||||||
| [[projects]] | [[projects]] | ||||||
|   digest = "1:c10f35be6200b09e26da267ca80f837315093ecaba27e7a223071380efb9dd32" |   branch = "master" | ||||||
|  |   digest = "1:b17287a86f6cac923a5d886e10618df994eeb54b6724eac2e3b8dde89cfbe3a2" | ||||||
|   name = "github.com/blevesearch/bleve" |   name = "github.com/blevesearch/bleve" | ||||||
|   packages = [ |   packages = [ | ||||||
|     ".", |     ".", | ||||||
| @@ -121,7 +114,6 @@ | |||||||
|     "index/scorch", |     "index/scorch", | ||||||
|     "index/scorch/mergeplan", |     "index/scorch/mergeplan", | ||||||
|     "index/scorch/segment", |     "index/scorch/segment", | ||||||
|     "index/scorch/segment/mem", |  | ||||||
|     "index/scorch/segment/zap", |     "index/scorch/segment/zap", | ||||||
|     "index/store", |     "index/store", | ||||||
|     "index/store/boltdb", |     "index/store/boltdb", | ||||||
| @@ -141,9 +133,10 @@ | |||||||
|     "search/query", |     "search/query", | ||||||
|     "search/scorer", |     "search/scorer", | ||||||
|     "search/searcher", |     "search/searcher", | ||||||
|  |     "size", | ||||||
|   ] |   ] | ||||||
|   pruneopts = "NUT" |   pruneopts = "NUT" | ||||||
|   revision = "c74e08f039e56cef576e4336382b2a2d12d9e026" |   revision = "05d86ea8f6e30456949f612cf68cf4a27ce8c9c5" | ||||||
|  |  | ||||||
| [[projects]] | [[projects]] | ||||||
|   branch = "master" |   branch = "master" | ||||||
| @@ -160,14 +153,6 @@ | |||||||
|   pruneopts = "NUT" |   pruneopts = "NUT" | ||||||
|   revision = "db70c57796cc8c310613541dfade3dce627d09c7" |   revision = "db70c57796cc8c310613541dfade3dce627d09c7" | ||||||
|  |  | ||||||
| [[projects]] |  | ||||||
|   digest = "1:c7e0968c05659f3973148cd5c5387d6ee960a6ae1b2eaaec0b1d435d806458bb" |  | ||||||
|   name = "github.com/boltdb/bolt" |  | ||||||
|   packages = ["."] |  | ||||||
|   pruneopts = "NUT" |  | ||||||
|   revision = "ccd680d8c1a0179ac3d68f692b01e1a1589cbfc7" |  | ||||||
|   source = "github.com/go-gitea/bolt" |  | ||||||
|  |  | ||||||
| [[projects]] | [[projects]] | ||||||
|   digest = "1:7c96cf7bf7f52af67f7a8222185813b9b665f5172ec2ac5f7d49ed96e5fcf3e5" |   digest = "1:7c96cf7bf7f52af67f7a8222185813b9b665f5172ec2ac5f7d49ed96e5fcf3e5" | ||||||
|   name = "github.com/boombuler/barcode" |   name = "github.com/boombuler/barcode" | ||||||
| @@ -217,15 +202,16 @@ | |||||||
|  |  | ||||||
| [[projects]] | [[projects]] | ||||||
|   branch = "master" |   branch = "master" | ||||||
|   digest = "1:82e1ad11d777f7bff9a1fc678a8a534a318f85e5026a8a4d6f4a94a6b0678bb6" |   digest = "1:6a658ac7d23204dc743c7155557c45273747d78e05ae0579742bd6b744bce215" | ||||||
|   name = "github.com/couchbase/vellum" |   name = "github.com/couchbase/vellum" | ||||||
|   packages = [ |   packages = [ | ||||||
|     ".", |     ".", | ||||||
|  |     "levenshtein2", | ||||||
|     "regexp", |     "regexp", | ||||||
|     "utf8", |     "utf8", | ||||||
|   ] |   ] | ||||||
|   pruneopts = "NUT" |   pruneopts = "NUT" | ||||||
|   revision = "eb6ae3743b3f300f2136f83ca78c08cc071edbd4" |   revision = "e91b68ff3efe3cc11723aa25dd315cbc9276cd65" | ||||||
|  |  | ||||||
| [[projects]] | [[projects]] | ||||||
|   branch = "master" |   branch = "master" | ||||||
| @@ -287,6 +273,14 @@ | |||||||
|   revision = "1615341f118ae12f353cc8a983f35b584342c9b3" |   revision = "1615341f118ae12f353cc8a983f35b584342c9b3" | ||||||
|   version = "v1.12.0" |   version = "v1.12.0" | ||||||
|  |  | ||||||
|  | [[projects]] | ||||||
|  |   digest = "1:ae8eea1a24ae43a46c2e96631b6303fcc4210ca0ac9d643e4da965029d1b511d" | ||||||
|  |   name = "github.com/etcd-io/bbolt" | ||||||
|  |   packages = ["."] | ||||||
|  |   pruneopts = "NUT" | ||||||
|  |   revision = "63597a96ec0ad9e6d43c3fc81e809909e0237461" | ||||||
|  |   version = "v1.3.2" | ||||||
|  |  | ||||||
| [[projects]] | [[projects]] | ||||||
|   digest = "1:8603f74d35c93b37c615a02ba297be2cf2efc9ff6f1ff2b458a903990b568e48" |   digest = "1:8603f74d35c93b37c615a02ba297be2cf2efc9ff6f1ff2b458a903990b568e48" | ||||||
|   name = "github.com/ethantkoenig/rupture" |   name = "github.com/ethantkoenig/rupture" | ||||||
|   | |||||||
| @@ -15,10 +15,8 @@ ignored = ["google.golang.org/appengine*"] | |||||||
|   name = "code.gitea.io/sdk" |   name = "code.gitea.io/sdk" | ||||||
|  |  | ||||||
| [[constraint]] | [[constraint]] | ||||||
| #  branch = "master" |   revision = "05d86ea8f6e30456949f612cf68cf4a27ce8c9c5" | ||||||
|   revision = "c74e08f039e56cef576e4336382b2a2d12d9e026" |  | ||||||
|   name = "github.com/blevesearch/bleve" |   name = "github.com/blevesearch/bleve" | ||||||
| #Not targetting v0.7.0 since standard where use only just after this tag |  | ||||||
|  |  | ||||||
| [[constraint]] | [[constraint]] | ||||||
|   revision = "12dd70caea0268ac0d6c2707d0611ef601e7c64e" |   revision = "12dd70caea0268ac0d6c2707d0611ef601e7c64e" | ||||||
| @@ -108,11 +106,6 @@ ignored = ["google.golang.org/appengine*"] | |||||||
|   name = "gopkg.in/testfixtures.v2" |   name = "gopkg.in/testfixtures.v2" | ||||||
|   version = "2.0.0" |   version = "2.0.0" | ||||||
|  |  | ||||||
| [[override]] |  | ||||||
|   name = "github.com/boltdb/bolt" |  | ||||||
|   revision = "ccd680d8c1a0179ac3d68f692b01e1a1589cbfc7" |  | ||||||
|   source = "github.com/go-gitea/bolt" |  | ||||||
|  |  | ||||||
| [[override]] | [[override]] | ||||||
|   branch = "master" |   branch = "master" | ||||||
|   name = "golang.org/x/oauth2" |   name = "golang.org/x/oauth2" | ||||||
|   | |||||||
							
								
								
									
										22
									
								
								vendor/github.com/Smerity/govarint/LICENSE
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										22
									
								
								vendor/github.com/Smerity/govarint/LICENSE
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -1,22 +0,0 @@ | |||||||
| The MIT License (MIT) |  | ||||||
|  |  | ||||||
| Copyright (c) 2015 Stephen Merity |  | ||||||
|  |  | ||||||
| Permission is hereby granted, free of charge, to any person obtaining a copy |  | ||||||
| of this software and associated documentation files (the "Software"), to deal |  | ||||||
| in the Software without restriction, including without limitation the rights |  | ||||||
| to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |  | ||||||
| copies of the Software, and to permit persons to whom the Software is |  | ||||||
| furnished to do so, subject to the following conditions: |  | ||||||
|  |  | ||||||
| The above copyright notice and this permission notice shall be included in all |  | ||||||
| copies or substantial portions of the Software. |  | ||||||
|  |  | ||||||
| THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |  | ||||||
| IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |  | ||||||
| FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |  | ||||||
| AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |  | ||||||
| LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |  | ||||||
| OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |  | ||||||
| SOFTWARE. |  | ||||||
|  |  | ||||||
							
								
								
									
										229
									
								
								vendor/github.com/Smerity/govarint/govarint.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										229
									
								
								vendor/github.com/Smerity/govarint/govarint.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -1,229 +0,0 @@ | |||||||
| package govarint |  | ||||||
|  |  | ||||||
| import "encoding/binary" |  | ||||||
| import "io" |  | ||||||
|  |  | ||||||
| type U32VarintEncoder interface { |  | ||||||
| 	PutU32(x uint32) int |  | ||||||
| 	Close() |  | ||||||
| } |  | ||||||
|  |  | ||||||
| type U32VarintDecoder interface { |  | ||||||
| 	GetU32() (uint32, error) |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /// |  | ||||||
|  |  | ||||||
| type U64VarintEncoder interface { |  | ||||||
| 	PutU64(x uint64) int |  | ||||||
| 	Close() |  | ||||||
| } |  | ||||||
|  |  | ||||||
| type U64VarintDecoder interface { |  | ||||||
| 	GetU64() (uint64, error) |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /// |  | ||||||
|  |  | ||||||
| type U32GroupVarintEncoder struct { |  | ||||||
| 	w     io.Writer |  | ||||||
| 	index int |  | ||||||
| 	store [4]uint32 |  | ||||||
| 	temp  [17]byte |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func NewU32GroupVarintEncoder(w io.Writer) *U32GroupVarintEncoder { return &U32GroupVarintEncoder{w: w} } |  | ||||||
|  |  | ||||||
| func (b *U32GroupVarintEncoder) Flush() (int, error) { |  | ||||||
| 	// TODO: Is it more efficient to have a tailored version that's called only in Close()? |  | ||||||
| 	// If index is zero, there are no integers to flush |  | ||||||
| 	if b.index == 0 { |  | ||||||
| 		return 0, nil |  | ||||||
| 	} |  | ||||||
| 	// In the case we're flushing (the group isn't of size four), the non-values should be zero |  | ||||||
| 	// This ensures the unused entries are all zero in the sizeByte |  | ||||||
| 	for i := b.index; i < 4; i++ { |  | ||||||
| 		b.store[i] = 0 |  | ||||||
| 	} |  | ||||||
| 	length := 1 |  | ||||||
| 	// We need to reset the size byte to zero as we only bitwise OR into it, we don't overwrite it |  | ||||||
| 	b.temp[0] = 0 |  | ||||||
| 	for i, x := range b.store { |  | ||||||
| 		size := byte(0) |  | ||||||
| 		shifts := []byte{24, 16, 8, 0} |  | ||||||
| 		for _, shift := range shifts { |  | ||||||
| 			// Always writes at least one byte -- the first one (shift = 0) |  | ||||||
| 			// Will write more bytes until the rest of the integer is all zeroes |  | ||||||
| 			if (x>>shift) != 0 || shift == 0 { |  | ||||||
| 				size += 1 |  | ||||||
| 				b.temp[length] = byte(x >> shift) |  | ||||||
| 				length += 1 |  | ||||||
| 			} |  | ||||||
| 		} |  | ||||||
| 		// We store the size in two of the eight bits in the first byte (sizeByte) |  | ||||||
| 		// 0 means there is one byte in total, hence why we subtract one from size |  | ||||||
| 		b.temp[0] |= (size - 1) << (uint8(3-i) * 2) |  | ||||||
| 	} |  | ||||||
| 	// If we're flushing without a full group of four, remove the unused bytes we computed |  | ||||||
| 	// This enables us to realize it's a partial group on decoding thanks to EOF |  | ||||||
| 	if b.index != 4 { |  | ||||||
| 		length -= 4 - b.index |  | ||||||
| 	} |  | ||||||
| 	_, err := b.w.Write(b.temp[:length]) |  | ||||||
| 	return length, err |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func (b *U32GroupVarintEncoder) PutU32(x uint32) (int, error) { |  | ||||||
| 	bytesWritten := 0 |  | ||||||
| 	b.store[b.index] = x |  | ||||||
| 	b.index += 1 |  | ||||||
| 	if b.index == 4 { |  | ||||||
| 		n, err := b.Flush() |  | ||||||
| 		if err != nil { |  | ||||||
| 			return n, err |  | ||||||
| 		} |  | ||||||
| 		bytesWritten += n |  | ||||||
| 		b.index = 0 |  | ||||||
| 	} |  | ||||||
| 	return bytesWritten, nil |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func (b *U32GroupVarintEncoder) Close() { |  | ||||||
| 	// On Close, we flush any remaining values that might not have been in a full group |  | ||||||
| 	b.Flush() |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /// |  | ||||||
|  |  | ||||||
| type U32GroupVarintDecoder struct { |  | ||||||
| 	r        io.ByteReader |  | ||||||
| 	group    [4]uint32 |  | ||||||
| 	pos      int |  | ||||||
| 	finished bool |  | ||||||
| 	capacity int |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func NewU32GroupVarintDecoder(r io.ByteReader) *U32GroupVarintDecoder { |  | ||||||
| 	return &U32GroupVarintDecoder{r: r, pos: 4, capacity: 4} |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func (b *U32GroupVarintDecoder) getGroup() error { |  | ||||||
| 	// We should always receive a sizeByte if there are more values to read |  | ||||||
| 	sizeByte, err := b.r.ReadByte() |  | ||||||
| 	if err != nil { |  | ||||||
| 		return err |  | ||||||
| 	} |  | ||||||
| 	// Calculate the size of the four incoming 32 bit integers |  | ||||||
| 	// 0b00 means 1 byte to read, 0b01 = 2, etc |  | ||||||
| 	b.group[0] = uint32((sizeByte >> 6) & 3) |  | ||||||
| 	b.group[1] = uint32((sizeByte >> 4) & 3) |  | ||||||
| 	b.group[2] = uint32((sizeByte >> 2) & 3) |  | ||||||
| 	b.group[3] = uint32(sizeByte & 3) |  | ||||||
| 	// |  | ||||||
| 	for index, size := range b.group { |  | ||||||
| 		b.group[index] = 0 |  | ||||||
| 		// Any error that occurs in earlier byte reads should be repeated at the end one |  | ||||||
| 		// Hence we only catch and report the final ReadByte's error |  | ||||||
| 		var err error |  | ||||||
| 		switch size { |  | ||||||
| 		case 0: |  | ||||||
| 			var x byte |  | ||||||
| 			x, err = b.r.ReadByte() |  | ||||||
| 			b.group[index] = uint32(x) |  | ||||||
| 		case 1: |  | ||||||
| 			var x, y byte |  | ||||||
| 			x, _ = b.r.ReadByte() |  | ||||||
| 			y, err = b.r.ReadByte() |  | ||||||
| 			b.group[index] = uint32(x)<<8 | uint32(y) |  | ||||||
| 		case 2: |  | ||||||
| 			var x, y, z byte |  | ||||||
| 			x, _ = b.r.ReadByte() |  | ||||||
| 			y, _ = b.r.ReadByte() |  | ||||||
| 			z, err = b.r.ReadByte() |  | ||||||
| 			b.group[index] = uint32(x)<<16 | uint32(y)<<8 | uint32(z) |  | ||||||
| 		case 3: |  | ||||||
| 			var x, y, z, zz byte |  | ||||||
| 			x, _ = b.r.ReadByte() |  | ||||||
| 			y, _ = b.r.ReadByte() |  | ||||||
| 			z, _ = b.r.ReadByte() |  | ||||||
| 			zz, err = b.r.ReadByte() |  | ||||||
| 			b.group[index] = uint32(x)<<24 | uint32(y)<<16 | uint32(z)<<8 | uint32(zz) |  | ||||||
| 		} |  | ||||||
| 		if err != nil { |  | ||||||
| 			if err == io.EOF { |  | ||||||
| 				// If we hit EOF here, we have found a partial group |  | ||||||
| 				// We've return any valid entries we have read and return EOF once we run out |  | ||||||
| 				b.capacity = index |  | ||||||
| 				b.finished = true |  | ||||||
| 				break |  | ||||||
| 			} else { |  | ||||||
| 				return err |  | ||||||
| 			} |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| 	// Reset the pos pointer to the beginning of the read values |  | ||||||
| 	b.pos = 0 |  | ||||||
| 	return nil |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func (b *U32GroupVarintDecoder) GetU32() (uint32, error) { |  | ||||||
| 	// Check if we have any more values to give out - if not, let's get them |  | ||||||
| 	if b.pos == b.capacity { |  | ||||||
| 		// If finished is set, there is nothing else to do |  | ||||||
| 		if b.finished { |  | ||||||
| 			return 0, io.EOF |  | ||||||
| 		} |  | ||||||
| 		err := b.getGroup() |  | ||||||
| 		if err != nil { |  | ||||||
| 			return 0, err |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| 	// Increment pointer and return the value stored at that point |  | ||||||
| 	b.pos += 1 |  | ||||||
| 	return b.group[b.pos-1], nil |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /// |  | ||||||
|  |  | ||||||
| type Base128Encoder struct { |  | ||||||
| 	w        io.Writer |  | ||||||
| 	tmpBytes []byte |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func NewU32Base128Encoder(w io.Writer) *Base128Encoder { |  | ||||||
| 	return &Base128Encoder{w: w, tmpBytes: make([]byte, binary.MaxVarintLen32)} |  | ||||||
| } |  | ||||||
| func NewU64Base128Encoder(w io.Writer) *Base128Encoder { |  | ||||||
| 	return &Base128Encoder{w: w, tmpBytes: make([]byte, binary.MaxVarintLen64)} |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func (b *Base128Encoder) PutU32(x uint32) (int, error) { |  | ||||||
| 	writtenBytes := binary.PutUvarint(b.tmpBytes, uint64(x)) |  | ||||||
| 	return b.w.Write(b.tmpBytes[:writtenBytes]) |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func (b *Base128Encoder) PutU64(x uint64) (int, error) { |  | ||||||
| 	writtenBytes := binary.PutUvarint(b.tmpBytes, x) |  | ||||||
| 	return b.w.Write(b.tmpBytes[:writtenBytes]) |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func (b *Base128Encoder) Close() { |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /// |  | ||||||
|  |  | ||||||
| type Base128Decoder struct { |  | ||||||
| 	r io.ByteReader |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func NewU32Base128Decoder(r io.ByteReader) *Base128Decoder { return &Base128Decoder{r: r} } |  | ||||||
| func NewU64Base128Decoder(r io.ByteReader) *Base128Decoder { return &Base128Decoder{r: r} } |  | ||||||
|  |  | ||||||
| func (b *Base128Decoder) GetU32() (uint32, error) { |  | ||||||
| 	v, err := binary.ReadUvarint(b.r) |  | ||||||
| 	return uint32(v), err |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func (b *Base128Decoder) GetU64() (uint64, error) { |  | ||||||
| 	return binary.ReadUvarint(b.r) |  | ||||||
| } |  | ||||||
							
								
								
									
										41
									
								
								vendor/github.com/blevesearch/bleve/analysis/freq.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										41
									
								
								vendor/github.com/blevesearch/bleve/analysis/freq.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -14,6 +14,22 @@ | |||||||
|  |  | ||||||
| package analysis | package analysis | ||||||
|  |  | ||||||
|  | import ( | ||||||
|  | 	"reflect" | ||||||
|  |  | ||||||
|  | 	"github.com/blevesearch/bleve/size" | ||||||
|  | ) | ||||||
|  |  | ||||||
|  | var reflectStaticSizeTokenLocation int | ||||||
|  | var reflectStaticSizeTokenFreq int | ||||||
|  |  | ||||||
|  | func init() { | ||||||
|  | 	var tl TokenLocation | ||||||
|  | 	reflectStaticSizeTokenLocation = int(reflect.TypeOf(tl).Size()) | ||||||
|  | 	var tf TokenFreq | ||||||
|  | 	reflectStaticSizeTokenFreq = int(reflect.TypeOf(tf).Size()) | ||||||
|  | } | ||||||
|  |  | ||||||
| // TokenLocation represents one occurrence of a term at a particular location in | // TokenLocation represents one occurrence of a term at a particular location in | ||||||
| // a field. Start, End and Position have the same meaning as in analysis.Token. | // a field. Start, End and Position have the same meaning as in analysis.Token. | ||||||
| // Field and ArrayPositions identify the field value in the source document. | // Field and ArrayPositions identify the field value in the source document. | ||||||
| @@ -26,6 +42,12 @@ type TokenLocation struct { | |||||||
| 	Position       int | 	Position       int | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (tl *TokenLocation) Size() int { | ||||||
|  | 	rv := reflectStaticSizeTokenLocation | ||||||
|  | 	rv += len(tl.ArrayPositions) * size.SizeOfUint64 | ||||||
|  | 	return rv | ||||||
|  | } | ||||||
|  |  | ||||||
| // TokenFreq represents all the occurrences of a term in all fields of a | // TokenFreq represents all the occurrences of a term in all fields of a | ||||||
| // document. | // document. | ||||||
| type TokenFreq struct { | type TokenFreq struct { | ||||||
| @@ -34,6 +56,15 @@ type TokenFreq struct { | |||||||
| 	frequency int | 	frequency int | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (tf *TokenFreq) Size() int { | ||||||
|  | 	rv := reflectStaticSizeTokenFreq | ||||||
|  | 	rv += len(tf.Term) | ||||||
|  | 	for _, loc := range tf.Locations { | ||||||
|  | 		rv += loc.Size() | ||||||
|  | 	} | ||||||
|  | 	return rv | ||||||
|  | } | ||||||
|  |  | ||||||
| func (tf *TokenFreq) Frequency() int { | func (tf *TokenFreq) Frequency() int { | ||||||
| 	return tf.frequency | 	return tf.frequency | ||||||
| } | } | ||||||
| @@ -42,6 +73,16 @@ func (tf *TokenFreq) Frequency() int { | |||||||
| // fields. | // fields. | ||||||
| type TokenFrequencies map[string]*TokenFreq | type TokenFrequencies map[string]*TokenFreq | ||||||
|  |  | ||||||
|  | func (tfs TokenFrequencies) Size() int { | ||||||
|  | 	rv := size.SizeOfMap | ||||||
|  | 	rv += len(tfs) * (size.SizeOfString + size.SizeOfPtr) | ||||||
|  | 	for k, v := range tfs { | ||||||
|  | 		rv += len(k) | ||||||
|  | 		rv += v.Size() | ||||||
|  | 	} | ||||||
|  | 	return rv | ||||||
|  | } | ||||||
|  |  | ||||||
| func (tfs TokenFrequencies) MergeAll(remoteField string, other TokenFrequencies) { | func (tfs TokenFrequencies) MergeAll(remoteField string, other TokenFrequencies) { | ||||||
| 	// walk the new token frequencies | 	// walk the new token frequencies | ||||||
| 	for tfk, tf := range other { | 	for tfk, tf := range other { | ||||||
|   | |||||||
							
								
								
									
										8
									
								
								vendor/github.com/blevesearch/bleve/analysis/token/camelcase/parser.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										8
									
								
								vendor/github.com/blevesearch/bleve/analysis/token/camelcase/parser.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -46,11 +46,11 @@ type Parser struct { | |||||||
| 	index     int | 	index     int | ||||||
| } | } | ||||||
|  |  | ||||||
| func NewParser(len, position, index int) *Parser { | func NewParser(length, position, index int) *Parser { | ||||||
| 	return &Parser{ | 	return &Parser{ | ||||||
| 		bufferLen: len, | 		bufferLen: length, | ||||||
| 		buffer:    make([]rune, 0, len), | 		buffer:    make([]rune, 0, length), | ||||||
| 		tokens:    make([]*analysis.Token, 0, len), | 		tokens:    make([]*analysis.Token, 0, length), | ||||||
| 		position:  position, | 		position:  position, | ||||||
| 		index:     index, | 		index:     index, | ||||||
| 	} | 	} | ||||||
|   | |||||||
							
								
								
									
										2
									
								
								vendor/github.com/blevesearch/bleve/analysis/token/unique/unique.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								vendor/github.com/blevesearch/bleve/analysis/token/unique/unique.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -21,7 +21,7 @@ import ( | |||||||
|  |  | ||||||
| const Name = "unique" | const Name = "unique" | ||||||
|  |  | ||||||
| // UniqueTermFilter retains only the tokens which mark the first occurence of | // UniqueTermFilter retains only the tokens which mark the first occurrence of | ||||||
| // a term. Tokens whose term appears in a preceding token are dropped. | // a term. Tokens whose term appears in a preceding token are dropped. | ||||||
| type UniqueTermFilter struct{} | type UniqueTermFilter struct{} | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										29
									
								
								vendor/github.com/blevesearch/bleve/document/document.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										29
									
								
								vendor/github.com/blevesearch/bleve/document/document.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -14,7 +14,19 @@ | |||||||
|  |  | ||||||
| package document | package document | ||||||
|  |  | ||||||
| import "fmt" | import ( | ||||||
|  | 	"fmt" | ||||||
|  | 	"reflect" | ||||||
|  |  | ||||||
|  | 	"github.com/blevesearch/bleve/size" | ||||||
|  | ) | ||||||
|  |  | ||||||
|  | var reflectStaticSizeDocument int | ||||||
|  |  | ||||||
|  | func init() { | ||||||
|  | 	var d Document | ||||||
|  | 	reflectStaticSizeDocument = int(reflect.TypeOf(d).Size()) | ||||||
|  | } | ||||||
|  |  | ||||||
| type Document struct { | type Document struct { | ||||||
| 	ID              string  `json:"id"` | 	ID              string  `json:"id"` | ||||||
| @@ -30,6 +42,21 @@ func NewDocument(id string) *Document { | |||||||
| 	} | 	} | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (d *Document) Size() int { | ||||||
|  | 	sizeInBytes := reflectStaticSizeDocument + size.SizeOfPtr + | ||||||
|  | 		len(d.ID) | ||||||
|  |  | ||||||
|  | 	for _, entry := range d.Fields { | ||||||
|  | 		sizeInBytes += entry.Size() | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	for _, entry := range d.CompositeFields { | ||||||
|  | 		sizeInBytes += entry.Size() | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return sizeInBytes | ||||||
|  | } | ||||||
|  |  | ||||||
| func (d *Document) AddField(f Field) *Document { | func (d *Document) AddField(f Field) *Document { | ||||||
| 	switch f := f.(type) { | 	switch f := f.(type) { | ||||||
| 	case *CompositeField: | 	case *CompositeField: | ||||||
|   | |||||||
							
								
								
									
										2
									
								
								vendor/github.com/blevesearch/bleve/document/field.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								vendor/github.com/blevesearch/bleve/document/field.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -36,4 +36,6 @@ type Field interface { | |||||||
| 	// that this field represents - this is a common metric for tracking | 	// that this field represents - this is a common metric for tracking | ||||||
| 	// the rate of indexing | 	// the rate of indexing | ||||||
| 	NumPlainTextBytes() uint64 | 	NumPlainTextBytes() uint64 | ||||||
|  |  | ||||||
|  | 	Size() int | ||||||
| } | } | ||||||
|   | |||||||
							
								
								
									
										16
									
								
								vendor/github.com/blevesearch/bleve/document/field_boolean.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										16
									
								
								vendor/github.com/blevesearch/bleve/document/field_boolean.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -16,10 +16,19 @@ package document | |||||||
|  |  | ||||||
| import ( | import ( | ||||||
| 	"fmt" | 	"fmt" | ||||||
|  | 	"reflect" | ||||||
|  |  | ||||||
| 	"github.com/blevesearch/bleve/analysis" | 	"github.com/blevesearch/bleve/analysis" | ||||||
|  | 	"github.com/blevesearch/bleve/size" | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  | var reflectStaticSizeBooleanField int | ||||||
|  |  | ||||||
|  | func init() { | ||||||
|  | 	var f BooleanField | ||||||
|  | 	reflectStaticSizeBooleanField = int(reflect.TypeOf(f).Size()) | ||||||
|  | } | ||||||
|  |  | ||||||
| const DefaultBooleanIndexingOptions = StoreField | IndexField | DocValues | const DefaultBooleanIndexingOptions = StoreField | IndexField | DocValues | ||||||
|  |  | ||||||
| type BooleanField struct { | type BooleanField struct { | ||||||
| @@ -30,6 +39,13 @@ type BooleanField struct { | |||||||
| 	numPlainTextBytes uint64 | 	numPlainTextBytes uint64 | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (b *BooleanField) Size() int { | ||||||
|  | 	return reflectStaticSizeBooleanField + size.SizeOfPtr + | ||||||
|  | 		len(b.name) + | ||||||
|  | 		len(b.arrayPositions)*size.SizeOfUint64 + | ||||||
|  | 		len(b.value) | ||||||
|  | } | ||||||
|  |  | ||||||
| func (b *BooleanField) Name() string { | func (b *BooleanField) Name() string { | ||||||
| 	return b.name | 	return b.name | ||||||
| } | } | ||||||
|   | |||||||
							
								
								
									
										25
									
								
								vendor/github.com/blevesearch/bleve/document/field_composite.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										25
									
								
								vendor/github.com/blevesearch/bleve/document/field_composite.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -15,9 +15,19 @@ | |||||||
| package document | package document | ||||||
|  |  | ||||||
| import ( | import ( | ||||||
|  | 	"reflect" | ||||||
|  |  | ||||||
| 	"github.com/blevesearch/bleve/analysis" | 	"github.com/blevesearch/bleve/analysis" | ||||||
|  | 	"github.com/blevesearch/bleve/size" | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  | var reflectStaticSizeCompositeField int | ||||||
|  |  | ||||||
|  | func init() { | ||||||
|  | 	var cf CompositeField | ||||||
|  | 	reflectStaticSizeCompositeField = int(reflect.TypeOf(cf).Size()) | ||||||
|  | } | ||||||
|  |  | ||||||
| const DefaultCompositeIndexingOptions = IndexField | const DefaultCompositeIndexingOptions = IndexField | ||||||
|  |  | ||||||
| type CompositeField struct { | type CompositeField struct { | ||||||
| @@ -54,6 +64,21 @@ func NewCompositeFieldWithIndexingOptions(name string, defaultInclude bool, incl | |||||||
| 	return rv | 	return rv | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (c *CompositeField) Size() int { | ||||||
|  | 	sizeInBytes := reflectStaticSizeCompositeField + size.SizeOfPtr + | ||||||
|  | 		len(c.name) | ||||||
|  |  | ||||||
|  | 	for k, _ := range c.includedFields { | ||||||
|  | 		sizeInBytes += size.SizeOfString + len(k) + size.SizeOfBool | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	for k, _ := range c.excludedFields { | ||||||
|  | 		sizeInBytes += size.SizeOfString + len(k) + size.SizeOfBool | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return sizeInBytes | ||||||
|  | } | ||||||
|  |  | ||||||
| func (c *CompositeField) Name() string { | func (c *CompositeField) Name() string { | ||||||
| 	return c.name | 	return c.name | ||||||
| } | } | ||||||
|   | |||||||
							
								
								
									
										15
									
								
								vendor/github.com/blevesearch/bleve/document/field_datetime.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										15
									
								
								vendor/github.com/blevesearch/bleve/document/field_datetime.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -17,12 +17,21 @@ package document | |||||||
| import ( | import ( | ||||||
| 	"fmt" | 	"fmt" | ||||||
| 	"math" | 	"math" | ||||||
|  | 	"reflect" | ||||||
| 	"time" | 	"time" | ||||||
|  |  | ||||||
| 	"github.com/blevesearch/bleve/analysis" | 	"github.com/blevesearch/bleve/analysis" | ||||||
| 	"github.com/blevesearch/bleve/numeric" | 	"github.com/blevesearch/bleve/numeric" | ||||||
|  | 	"github.com/blevesearch/bleve/size" | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  | var reflectStaticSizeDateTimeField int | ||||||
|  |  | ||||||
|  | func init() { | ||||||
|  | 	var f DateTimeField | ||||||
|  | 	reflectStaticSizeDateTimeField = int(reflect.TypeOf(f).Size()) | ||||||
|  | } | ||||||
|  |  | ||||||
| const DefaultDateTimeIndexingOptions = StoreField | IndexField | DocValues | const DefaultDateTimeIndexingOptions = StoreField | IndexField | DocValues | ||||||
| const DefaultDateTimePrecisionStep uint = 4 | const DefaultDateTimePrecisionStep uint = 4 | ||||||
|  |  | ||||||
| @@ -37,6 +46,12 @@ type DateTimeField struct { | |||||||
| 	numPlainTextBytes uint64 | 	numPlainTextBytes uint64 | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (n *DateTimeField) Size() int { | ||||||
|  | 	return reflectStaticSizeDateTimeField + size.SizeOfPtr + | ||||||
|  | 		len(n.name) + | ||||||
|  | 		len(n.arrayPositions)*size.SizeOfUint64 | ||||||
|  | } | ||||||
|  |  | ||||||
| func (n *DateTimeField) Name() string { | func (n *DateTimeField) Name() string { | ||||||
| 	return n.name | 	return n.name | ||||||
| } | } | ||||||
|   | |||||||
							
								
								
									
										15
									
								
								vendor/github.com/blevesearch/bleve/document/field_geopoint.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										15
									
								
								vendor/github.com/blevesearch/bleve/document/field_geopoint.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -16,12 +16,21 @@ package document | |||||||
|  |  | ||||||
| import ( | import ( | ||||||
| 	"fmt" | 	"fmt" | ||||||
|  | 	"reflect" | ||||||
|  |  | ||||||
| 	"github.com/blevesearch/bleve/analysis" | 	"github.com/blevesearch/bleve/analysis" | ||||||
| 	"github.com/blevesearch/bleve/geo" | 	"github.com/blevesearch/bleve/geo" | ||||||
| 	"github.com/blevesearch/bleve/numeric" | 	"github.com/blevesearch/bleve/numeric" | ||||||
|  | 	"github.com/blevesearch/bleve/size" | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  | var reflectStaticSizeGeoPointField int | ||||||
|  |  | ||||||
|  | func init() { | ||||||
|  | 	var f GeoPointField | ||||||
|  | 	reflectStaticSizeGeoPointField = int(reflect.TypeOf(f).Size()) | ||||||
|  | } | ||||||
|  |  | ||||||
| var GeoPrecisionStep uint = 9 | var GeoPrecisionStep uint = 9 | ||||||
|  |  | ||||||
| type GeoPointField struct { | type GeoPointField struct { | ||||||
| @@ -32,6 +41,12 @@ type GeoPointField struct { | |||||||
| 	numPlainTextBytes uint64 | 	numPlainTextBytes uint64 | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (n *GeoPointField) Size() int { | ||||||
|  | 	return reflectStaticSizeGeoPointField + size.SizeOfPtr + | ||||||
|  | 		len(n.name) + | ||||||
|  | 		len(n.arrayPositions)*size.SizeOfUint64 | ||||||
|  | } | ||||||
|  |  | ||||||
| func (n *GeoPointField) Name() string { | func (n *GeoPointField) Name() string { | ||||||
| 	return n.name | 	return n.name | ||||||
| } | } | ||||||
|   | |||||||
							
								
								
									
										15
									
								
								vendor/github.com/blevesearch/bleve/document/field_numeric.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										15
									
								
								vendor/github.com/blevesearch/bleve/document/field_numeric.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -16,11 +16,20 @@ package document | |||||||
|  |  | ||||||
| import ( | import ( | ||||||
| 	"fmt" | 	"fmt" | ||||||
|  | 	"reflect" | ||||||
|  |  | ||||||
| 	"github.com/blevesearch/bleve/analysis" | 	"github.com/blevesearch/bleve/analysis" | ||||||
| 	"github.com/blevesearch/bleve/numeric" | 	"github.com/blevesearch/bleve/numeric" | ||||||
|  | 	"github.com/blevesearch/bleve/size" | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  | var reflectStaticSizeNumericField int | ||||||
|  |  | ||||||
|  | func init() { | ||||||
|  | 	var f NumericField | ||||||
|  | 	reflectStaticSizeNumericField = int(reflect.TypeOf(f).Size()) | ||||||
|  | } | ||||||
|  |  | ||||||
| const DefaultNumericIndexingOptions = StoreField | IndexField | DocValues | const DefaultNumericIndexingOptions = StoreField | IndexField | DocValues | ||||||
|  |  | ||||||
| const DefaultPrecisionStep uint = 4 | const DefaultPrecisionStep uint = 4 | ||||||
| @@ -33,6 +42,12 @@ type NumericField struct { | |||||||
| 	numPlainTextBytes uint64 | 	numPlainTextBytes uint64 | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (n *NumericField) Size() int { | ||||||
|  | 	return reflectStaticSizeNumericField + size.SizeOfPtr + | ||||||
|  | 		len(n.name) + | ||||||
|  | 		len(n.arrayPositions)*size.SizeOfPtr | ||||||
|  | } | ||||||
|  |  | ||||||
| func (n *NumericField) Name() string { | func (n *NumericField) Name() string { | ||||||
| 	return n.name | 	return n.name | ||||||
| } | } | ||||||
|   | |||||||
							
								
								
									
										16
									
								
								vendor/github.com/blevesearch/bleve/document/field_text.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										16
									
								
								vendor/github.com/blevesearch/bleve/document/field_text.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -16,10 +16,19 @@ package document | |||||||
|  |  | ||||||
| import ( | import ( | ||||||
| 	"fmt" | 	"fmt" | ||||||
|  | 	"reflect" | ||||||
|  |  | ||||||
| 	"github.com/blevesearch/bleve/analysis" | 	"github.com/blevesearch/bleve/analysis" | ||||||
|  | 	"github.com/blevesearch/bleve/size" | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  | var reflectStaticSizeTextField int | ||||||
|  |  | ||||||
|  | func init() { | ||||||
|  | 	var f TextField | ||||||
|  | 	reflectStaticSizeTextField = int(reflect.TypeOf(f).Size()) | ||||||
|  | } | ||||||
|  |  | ||||||
| const DefaultTextIndexingOptions = IndexField | DocValues | const DefaultTextIndexingOptions = IndexField | DocValues | ||||||
|  |  | ||||||
| type TextField struct { | type TextField struct { | ||||||
| @@ -31,6 +40,13 @@ type TextField struct { | |||||||
| 	numPlainTextBytes uint64 | 	numPlainTextBytes uint64 | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (t *TextField) Size() int { | ||||||
|  | 	return reflectStaticSizeTextField + size.SizeOfPtr + | ||||||
|  | 		len(t.name) + | ||||||
|  | 		len(t.arrayPositions)*size.SizeOfUint64 + | ||||||
|  | 		len(t.value) | ||||||
|  | } | ||||||
|  |  | ||||||
| func (t *TextField) Name() string { | func (t *TextField) Name() string { | ||||||
| 	return t.name | 	return t.name | ||||||
| } | } | ||||||
|   | |||||||
							
								
								
									
										174
									
								
								vendor/github.com/blevesearch/bleve/geo/geohash.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										174
									
								
								vendor/github.com/blevesearch/bleve/geo/geohash.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,174 @@ | |||||||
|  | // The code here was obtained from: | ||||||
|  | //   https://github.com/mmcloughlin/geohash | ||||||
|  |  | ||||||
|  | // The MIT License (MIT) | ||||||
|  | // Copyright (c) 2015 Michael McLoughlin | ||||||
|  | // Permission is hereby granted, free of charge, to any person obtaining a copy | ||||||
|  | // of this software and associated documentation files (the "Software"), to deal | ||||||
|  | // in the Software without restriction, including without limitation the rights | ||||||
|  | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||||||
|  | // copies of the Software, and to permit persons to whom the Software is | ||||||
|  | // furnished to do so, subject to the following conditions: | ||||||
|  |  | ||||||
|  | // The above copyright notice and this permission notice shall be included in all | ||||||
|  | // copies or substantial portions of the Software. | ||||||
|  |  | ||||||
|  | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||||||
|  | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||||||
|  | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||||||
|  | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||||||
|  | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||||||
|  | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||||||
|  | // SOFTWARE. | ||||||
|  |  | ||||||
|  | package geo | ||||||
|  |  | ||||||
|  | import ( | ||||||
|  | 	"math" | ||||||
|  | ) | ||||||
|  |  | ||||||
|  | // encoding encapsulates an encoding defined by a given base32 alphabet. | ||||||
|  | type encoding struct { | ||||||
|  | 	enc string | ||||||
|  | 	dec [256]byte | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // newEncoding constructs a new encoding defined by the given alphabet, | ||||||
|  | // which must be a 32-byte string. | ||||||
|  | func newEncoding(encoder string) *encoding { | ||||||
|  | 	e := new(encoding) | ||||||
|  | 	e.enc = encoder | ||||||
|  | 	for i := 0; i < len(e.dec); i++ { | ||||||
|  | 		e.dec[i] = 0xff | ||||||
|  | 	} | ||||||
|  | 	for i := 0; i < len(encoder); i++ { | ||||||
|  | 		e.dec[encoder[i]] = byte(i) | ||||||
|  | 	} | ||||||
|  | 	return e | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Decode string into bits of a 64-bit word. The string s may be at most 12 | ||||||
|  | // characters. | ||||||
|  | func (e *encoding) decode(s string) uint64 { | ||||||
|  | 	x := uint64(0) | ||||||
|  | 	for i := 0; i < len(s); i++ { | ||||||
|  | 		x = (x << 5) | uint64(e.dec[s[i]]) | ||||||
|  | 	} | ||||||
|  | 	return x | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Encode bits of 64-bit word into a string. | ||||||
|  | func (e *encoding) encode(x uint64) string { | ||||||
|  | 	b := [12]byte{} | ||||||
|  | 	for i := 0; i < 12; i++ { | ||||||
|  | 		b[11-i] = e.enc[x&0x1f] | ||||||
|  | 		x >>= 5 | ||||||
|  | 	} | ||||||
|  | 	return string(b[:]) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Base32Encoding with the Geohash alphabet. | ||||||
|  | var base32encoding = newEncoding("0123456789bcdefghjkmnpqrstuvwxyz") | ||||||
|  |  | ||||||
|  | // BoundingBox returns the region encoded by the given string geohash. | ||||||
|  | func geoBoundingBox(hash string) geoBox { | ||||||
|  | 	bits := uint(5 * len(hash)) | ||||||
|  | 	inthash := base32encoding.decode(hash) | ||||||
|  | 	return geoBoundingBoxIntWithPrecision(inthash, bits) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Box represents a rectangle in latitude/longitude space. | ||||||
|  | type geoBox struct { | ||||||
|  | 	minLat float64 | ||||||
|  | 	maxLat float64 | ||||||
|  | 	minLng float64 | ||||||
|  | 	maxLng float64 | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Round returns a point inside the box, making an effort to round to minimal | ||||||
|  | // precision. | ||||||
|  | func (b geoBox) round() (lat, lng float64) { | ||||||
|  | 	x := maxDecimalPower(b.maxLat - b.minLat) | ||||||
|  | 	lat = math.Ceil(b.minLat/x) * x | ||||||
|  | 	x = maxDecimalPower(b.maxLng - b.minLng) | ||||||
|  | 	lng = math.Ceil(b.minLng/x) * x | ||||||
|  | 	return | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // precalculated for performance | ||||||
|  | var exp232 = math.Exp2(32) | ||||||
|  |  | ||||||
|  | // errorWithPrecision returns the error range in latitude and longitude for in | ||||||
|  | // integer geohash with bits of precision. | ||||||
|  | func errorWithPrecision(bits uint) (latErr, lngErr float64) { | ||||||
|  | 	b := int(bits) | ||||||
|  | 	latBits := b / 2 | ||||||
|  | 	lngBits := b - latBits | ||||||
|  | 	latErr = math.Ldexp(180.0, -latBits) | ||||||
|  | 	lngErr = math.Ldexp(360.0, -lngBits) | ||||||
|  | 	return | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // minDecimalPlaces returns the minimum number of decimal places such that | ||||||
|  | // there must exist an number with that many places within any range of width | ||||||
|  | // r. This is intended for returning minimal precision coordinates inside a | ||||||
|  | // box. | ||||||
|  | func maxDecimalPower(r float64) float64 { | ||||||
|  | 	m := int(math.Floor(math.Log10(r))) | ||||||
|  | 	return math.Pow10(m) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Encode the position of x within the range -r to +r as a 32-bit integer. | ||||||
|  | func encodeRange(x, r float64) uint32 { | ||||||
|  | 	p := (x + r) / (2 * r) | ||||||
|  | 	return uint32(p * exp232) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Decode the 32-bit range encoding X back to a value in the range -r to +r. | ||||||
|  | func decodeRange(X uint32, r float64) float64 { | ||||||
|  | 	p := float64(X) / exp232 | ||||||
|  | 	x := 2*r*p - r | ||||||
|  | 	return x | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Squash the even bitlevels of X into a 32-bit word. Odd bitlevels of X are | ||||||
|  | // ignored, and may take any value. | ||||||
|  | func squash(X uint64) uint32 { | ||||||
|  | 	X &= 0x5555555555555555 | ||||||
|  | 	X = (X | (X >> 1)) & 0x3333333333333333 | ||||||
|  | 	X = (X | (X >> 2)) & 0x0f0f0f0f0f0f0f0f | ||||||
|  | 	X = (X | (X >> 4)) & 0x00ff00ff00ff00ff | ||||||
|  | 	X = (X | (X >> 8)) & 0x0000ffff0000ffff | ||||||
|  | 	X = (X | (X >> 16)) & 0x00000000ffffffff | ||||||
|  | 	return uint32(X) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Deinterleave the bits of X into 32-bit words containing the even and odd | ||||||
|  | // bitlevels of X, respectively. | ||||||
|  | func deinterleave(X uint64) (uint32, uint32) { | ||||||
|  | 	return squash(X), squash(X >> 1) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // BoundingBoxIntWithPrecision returns the region encoded by the integer | ||||||
|  | // geohash with the specified precision. | ||||||
|  | func geoBoundingBoxIntWithPrecision(hash uint64, bits uint) geoBox { | ||||||
|  | 	fullHash := hash << (64 - bits) | ||||||
|  | 	latInt, lngInt := deinterleave(fullHash) | ||||||
|  | 	lat := decodeRange(latInt, 90) | ||||||
|  | 	lng := decodeRange(lngInt, 180) | ||||||
|  | 	latErr, lngErr := errorWithPrecision(bits) | ||||||
|  | 	return geoBox{ | ||||||
|  | 		minLat: lat, | ||||||
|  | 		maxLat: lat + latErr, | ||||||
|  | 		minLng: lng, | ||||||
|  | 		maxLng: lng + lngErr, | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // ---------------------------------------------------------------------- | ||||||
|  |  | ||||||
|  | // Decode the string geohash to a (lat, lng) point. | ||||||
|  | func GeoHashDecode(hash string) (lat, lng float64) { | ||||||
|  | 	box := geoBoundingBox(hash) | ||||||
|  | 	return box.round() | ||||||
|  | } | ||||||
							
								
								
									
										43
									
								
								vendor/github.com/blevesearch/bleve/geo/parse.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										43
									
								
								vendor/github.com/blevesearch/bleve/geo/parse.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -16,6 +16,7 @@ package geo | |||||||
|  |  | ||||||
| import ( | import ( | ||||||
| 	"reflect" | 	"reflect" | ||||||
|  | 	"strconv" | ||||||
| 	"strings" | 	"strings" | ||||||
| ) | ) | ||||||
|  |  | ||||||
| @@ -24,6 +25,8 @@ import ( | |||||||
| // Container: | // Container: | ||||||
| // slice length 2 (GeoJSON) | // slice length 2 (GeoJSON) | ||||||
| //  first element lon, second element lat | //  first element lon, second element lat | ||||||
|  | // string (coordinates separated by comma, or a geohash) | ||||||
|  | //  first element lat, second element lon | ||||||
| // map[string]interface{} | // map[string]interface{} | ||||||
| //  exact keys lat and lon or lng | //  exact keys lat and lon or lng | ||||||
| // struct | // struct | ||||||
| @@ -36,10 +39,14 @@ func ExtractGeoPoint(thing interface{}) (lon, lat float64, success bool) { | |||||||
| 	var foundLon, foundLat bool | 	var foundLon, foundLat bool | ||||||
|  |  | ||||||
| 	thingVal := reflect.ValueOf(thing) | 	thingVal := reflect.ValueOf(thing) | ||||||
|  | 	if !thingVal.IsValid() { | ||||||
|  | 		return lon, lat, false | ||||||
|  | 	} | ||||||
|  |  | ||||||
| 	thingTyp := thingVal.Type() | 	thingTyp := thingVal.Type() | ||||||
|  |  | ||||||
| 	// is it a slice | 	// is it a slice | ||||||
| 	if thingVal.IsValid() && thingVal.Kind() == reflect.Slice { | 	if thingVal.Kind() == reflect.Slice { | ||||||
| 		// must be length 2 | 		// must be length 2 | ||||||
| 		if thingVal.Len() == 2 { | 		if thingVal.Len() == 2 { | ||||||
| 			first := thingVal.Index(0) | 			first := thingVal.Index(0) | ||||||
| @@ -55,6 +62,35 @@ func ExtractGeoPoint(thing interface{}) (lon, lat float64, success bool) { | |||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | 	// is it a string | ||||||
|  | 	if thingVal.Kind() == reflect.String { | ||||||
|  | 		geoStr := thingVal.Interface().(string) | ||||||
|  | 		if strings.Contains(geoStr, ",") { | ||||||
|  | 			// geo point with coordinates split by comma | ||||||
|  | 			points := strings.Split(geoStr, ",") | ||||||
|  | 			for i, point := range points { | ||||||
|  | 				// trim any leading or trailing white spaces | ||||||
|  | 				points[i] = strings.TrimSpace(point) | ||||||
|  | 			} | ||||||
|  | 			if len(points) == 2 { | ||||||
|  | 				var err error | ||||||
|  | 				lat, err = strconv.ParseFloat(points[0], 64) | ||||||
|  | 				if err == nil { | ||||||
|  | 					foundLat = true | ||||||
|  | 				} | ||||||
|  | 				lon, err = strconv.ParseFloat(points[1], 64) | ||||||
|  | 				if err == nil { | ||||||
|  | 					foundLon = true | ||||||
|  | 				} | ||||||
|  | 			} | ||||||
|  | 		} else { | ||||||
|  | 			// geohash | ||||||
|  | 			lat, lon = GeoHashDecode(geoStr) | ||||||
|  | 			foundLat = true | ||||||
|  | 			foundLon = true | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
| 	// is it a map | 	// is it a map | ||||||
| 	if l, ok := thing.(map[string]interface{}); ok { | 	if l, ok := thing.(map[string]interface{}); ok { | ||||||
| 		if lval, ok := l["lon"]; ok { | 		if lval, ok := l["lon"]; ok { | ||||||
| @@ -68,7 +104,7 @@ func ExtractGeoPoint(thing interface{}) (lon, lat float64, success bool) { | |||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	// now try reflection on struct fields | 	// now try reflection on struct fields | ||||||
| 	if thingVal.IsValid() && thingVal.Kind() == reflect.Struct { | 	if thingVal.Kind() == reflect.Struct { | ||||||
| 		for i := 0; i < thingVal.NumField(); i++ { | 		for i := 0; i < thingVal.NumField(); i++ { | ||||||
| 			fieldName := thingTyp.Field(i).Name | 			fieldName := thingTyp.Field(i).Name | ||||||
| 			if strings.HasPrefix(strings.ToLower(fieldName), "lon") { | 			if strings.HasPrefix(strings.ToLower(fieldName), "lon") { | ||||||
| @@ -113,6 +149,9 @@ func ExtractGeoPoint(thing interface{}) (lon, lat float64, success bool) { | |||||||
| // extract numeric value (if possible) and returns a float64 | // extract numeric value (if possible) and returns a float64 | ||||||
| func extractNumericVal(v interface{}) (float64, bool) { | func extractNumericVal(v interface{}) (float64, bool) { | ||||||
| 	val := reflect.ValueOf(v) | 	val := reflect.ValueOf(v) | ||||||
|  | 	if !val.IsValid() { | ||||||
|  | 		return 0, false | ||||||
|  | 	} | ||||||
| 	typ := val.Type() | 	typ := val.Type() | ||||||
| 	switch typ.Kind() { | 	switch typ.Kind() { | ||||||
| 	case reflect.Float32, reflect.Float64: | 	case reflect.Float32, reflect.Float64: | ||||||
|   | |||||||
							
								
								
									
										35
									
								
								vendor/github.com/blevesearch/bleve/index.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										35
									
								
								vendor/github.com/blevesearch/bleve/index.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -21,6 +21,7 @@ import ( | |||||||
| 	"github.com/blevesearch/bleve/index" | 	"github.com/blevesearch/bleve/index" | ||||||
| 	"github.com/blevesearch/bleve/index/store" | 	"github.com/blevesearch/bleve/index/store" | ||||||
| 	"github.com/blevesearch/bleve/mapping" | 	"github.com/blevesearch/bleve/mapping" | ||||||
|  | 	"github.com/blevesearch/bleve/size" | ||||||
| ) | ) | ||||||
|  |  | ||||||
| // A Batch groups together multiple Index and Delete | // A Batch groups together multiple Index and Delete | ||||||
| @@ -32,6 +33,9 @@ import ( | |||||||
| type Batch struct { | type Batch struct { | ||||||
| 	index    Index | 	index    Index | ||||||
| 	internal *index.Batch | 	internal *index.Batch | ||||||
|  |  | ||||||
|  | 	lastDocSize uint64 | ||||||
|  | 	totalSize   uint64 | ||||||
| } | } | ||||||
|  |  | ||||||
| // Index adds the specified index operation to the | // Index adds the specified index operation to the | ||||||
| @@ -47,9 +51,22 @@ func (b *Batch) Index(id string, data interface{}) error { | |||||||
| 		return err | 		return err | ||||||
| 	} | 	} | ||||||
| 	b.internal.Update(doc) | 	b.internal.Update(doc) | ||||||
|  |  | ||||||
|  | 	b.lastDocSize = uint64(doc.Size() + | ||||||
|  | 		len(id) + size.SizeOfString) // overhead from internal | ||||||
|  | 	b.totalSize += b.lastDocSize | ||||||
|  |  | ||||||
| 	return nil | 	return nil | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (b *Batch) LastDocSize() uint64 { | ||||||
|  | 	return b.lastDocSize | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (b *Batch) TotalDocsSize() uint64 { | ||||||
|  | 	return b.totalSize | ||||||
|  | } | ||||||
|  |  | ||||||
| // IndexAdvanced adds the specified index operation to the | // IndexAdvanced adds the specified index operation to the | ||||||
| // batch which skips the mapping.  NOTE: the bleve Index is not updated | // batch which skips the mapping.  NOTE: the bleve Index is not updated | ||||||
| // until the batch is executed. | // until the batch is executed. | ||||||
| @@ -102,6 +119,24 @@ func (b *Batch) Reset() { | |||||||
| 	b.internal.Reset() | 	b.internal.Reset() | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (b *Batch) Merge(o *Batch) { | ||||||
|  | 	if o != nil && o.internal != nil { | ||||||
|  | 		b.internal.Merge(o.internal) | ||||||
|  | 		if o.LastDocSize() > 0 { | ||||||
|  | 			b.lastDocSize = o.LastDocSize() | ||||||
|  | 		} | ||||||
|  | 		b.totalSize = uint64(b.internal.TotalDocSize()) | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (b *Batch) SetPersistedCallback(f index.BatchCallback) { | ||||||
|  | 	b.internal.SetPersistedCallback(f) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (b *Batch) PersistedCallback() index.BatchCallback { | ||||||
|  | 	return b.internal.PersistedCallback() | ||||||
|  | } | ||||||
|  |  | ||||||
| // An Index implements all the indexing and searching | // An Index implements all the indexing and searching | ||||||
| // capabilities of bleve.  An Index can be created | // capabilities of bleve.  An Index can be created | ||||||
| // using the New() and Open() methods. | // using the New() and Open() methods. | ||||||
|   | |||||||
							
								
								
									
										19
									
								
								vendor/github.com/blevesearch/bleve/index/analysis.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										19
									
								
								vendor/github.com/blevesearch/bleve/index/analysis.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -15,10 +15,20 @@ | |||||||
| package index | package index | ||||||
|  |  | ||||||
| import ( | import ( | ||||||
|  | 	"reflect" | ||||||
|  |  | ||||||
| 	"github.com/blevesearch/bleve/analysis" | 	"github.com/blevesearch/bleve/analysis" | ||||||
| 	"github.com/blevesearch/bleve/document" | 	"github.com/blevesearch/bleve/document" | ||||||
|  | 	"github.com/blevesearch/bleve/size" | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  | var reflectStaticSizeAnalysisResult int | ||||||
|  |  | ||||||
|  | func init() { | ||||||
|  | 	var ar AnalysisResult | ||||||
|  | 	reflectStaticSizeAnalysisResult = int(reflect.TypeOf(ar).Size()) | ||||||
|  | } | ||||||
|  |  | ||||||
| type IndexRow interface { | type IndexRow interface { | ||||||
| 	KeySize() int | 	KeySize() int | ||||||
| 	KeyTo([]byte) (int, error) | 	KeyTo([]byte) (int, error) | ||||||
| @@ -39,6 +49,15 @@ type AnalysisResult struct { | |||||||
| 	Length   []int | 	Length   []int | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (a *AnalysisResult) Size() int { | ||||||
|  | 	rv := reflectStaticSizeAnalysisResult | ||||||
|  | 	for _, analyzedI := range a.Analyzed { | ||||||
|  | 		rv += analyzedI.Size() | ||||||
|  | 	} | ||||||
|  | 	rv += len(a.Length) * size.SizeOfInt | ||||||
|  | 	return rv | ||||||
|  | } | ||||||
|  |  | ||||||
| type AnalysisWork struct { | type AnalysisWork struct { | ||||||
| 	i  Index | 	i  Index | ||||||
| 	d  *document.Document | 	d  *document.Document | ||||||
|   | |||||||
							
								
								
									
										120
									
								
								vendor/github.com/blevesearch/bleve/index/index.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										120
									
								
								vendor/github.com/blevesearch/bleve/index/index.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -18,11 +18,23 @@ import ( | |||||||
| 	"bytes" | 	"bytes" | ||||||
| 	"encoding/json" | 	"encoding/json" | ||||||
| 	"fmt" | 	"fmt" | ||||||
|  | 	"reflect" | ||||||
|  |  | ||||||
| 	"github.com/blevesearch/bleve/document" | 	"github.com/blevesearch/bleve/document" | ||||||
| 	"github.com/blevesearch/bleve/index/store" | 	"github.com/blevesearch/bleve/index/store" | ||||||
|  | 	"github.com/blevesearch/bleve/size" | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  | var reflectStaticSizeTermFieldDoc int | ||||||
|  | var reflectStaticSizeTermFieldVector int | ||||||
|  |  | ||||||
|  | func init() { | ||||||
|  | 	var tfd TermFieldDoc | ||||||
|  | 	reflectStaticSizeTermFieldDoc = int(reflect.TypeOf(tfd).Size()) | ||||||
|  | 	var tfv TermFieldVector | ||||||
|  | 	reflectStaticSizeTermFieldVector = int(reflect.TypeOf(tfv).Size()) | ||||||
|  | } | ||||||
|  |  | ||||||
| var ErrorUnknownStorageType = fmt.Errorf("unknown storage type") | var ErrorUnknownStorageType = fmt.Errorf("unknown storage type") | ||||||
|  |  | ||||||
| type Index interface { | type Index interface { | ||||||
| @@ -68,6 +80,8 @@ type IndexReader interface { | |||||||
| 	Document(id string) (*document.Document, error) | 	Document(id string) (*document.Document, error) | ||||||
| 	DocumentVisitFieldTerms(id IndexInternalID, fields []string, visitor DocumentFieldTermVisitor) error | 	DocumentVisitFieldTerms(id IndexInternalID, fields []string, visitor DocumentFieldTermVisitor) error | ||||||
|  |  | ||||||
|  | 	DocValueReader(fields []string) (DocValueReader, error) | ||||||
|  |  | ||||||
| 	Fields() ([]string, error) | 	Fields() ([]string, error) | ||||||
|  |  | ||||||
| 	GetInternal(key []byte) ([]byte, error) | 	GetInternal(key []byte) ([]byte, error) | ||||||
| @@ -84,6 +98,29 @@ type IndexReader interface { | |||||||
| 	Close() error | 	Close() error | ||||||
| } | } | ||||||
|  |  | ||||||
|  | // The Regexp interface defines the subset of the regexp.Regexp API | ||||||
|  | // methods that are used by bleve indexes, allowing callers to pass in | ||||||
|  | // alternate implementations. | ||||||
|  | type Regexp interface { | ||||||
|  | 	FindStringIndex(s string) (loc []int) | ||||||
|  |  | ||||||
|  | 	LiteralPrefix() (prefix string, complete bool) | ||||||
|  |  | ||||||
|  | 	String() string | ||||||
|  | } | ||||||
|  |  | ||||||
|  | type IndexReaderRegexp interface { | ||||||
|  | 	FieldDictRegexp(field string, regex string) (FieldDict, error) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | type IndexReaderFuzzy interface { | ||||||
|  | 	FieldDictFuzzy(field string, term string, fuzziness int, prefix string) (FieldDict, error) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | type IndexReaderOnly interface { | ||||||
|  | 	FieldDictOnly(field string, onlyTerms [][]byte, includeCount bool) (FieldDict, error) | ||||||
|  | } | ||||||
|  |  | ||||||
| // FieldTerms contains the terms used by a document, keyed by field | // FieldTerms contains the terms used by a document, keyed by field | ||||||
| type FieldTerms map[string][]string | type FieldTerms map[string][]string | ||||||
|  |  | ||||||
| @@ -115,6 +152,11 @@ type TermFieldVector struct { | |||||||
| 	End            uint64 | 	End            uint64 | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (tfv *TermFieldVector) Size() int { | ||||||
|  | 	return reflectStaticSizeTermFieldVector + size.SizeOfPtr + | ||||||
|  | 		len(tfv.Field) + len(tfv.ArrayPositions)*size.SizeOfUint64 | ||||||
|  | } | ||||||
|  |  | ||||||
| // IndexInternalID is an opaque document identifier interal to the index impl | // IndexInternalID is an opaque document identifier interal to the index impl | ||||||
| type IndexInternalID []byte | type IndexInternalID []byte | ||||||
|  |  | ||||||
| @@ -134,14 +176,27 @@ type TermFieldDoc struct { | |||||||
| 	Vectors []*TermFieldVector | 	Vectors []*TermFieldVector | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (tfd *TermFieldDoc) Size() int { | ||||||
|  | 	sizeInBytes := reflectStaticSizeTermFieldDoc + size.SizeOfPtr + | ||||||
|  | 		len(tfd.Term) + len(tfd.ID) | ||||||
|  |  | ||||||
|  | 	for _, entry := range tfd.Vectors { | ||||||
|  | 		sizeInBytes += entry.Size() | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return sizeInBytes | ||||||
|  | } | ||||||
|  |  | ||||||
| // Reset allows an already allocated TermFieldDoc to be reused | // Reset allows an already allocated TermFieldDoc to be reused | ||||||
| func (tfd *TermFieldDoc) Reset() *TermFieldDoc { | func (tfd *TermFieldDoc) Reset() *TermFieldDoc { | ||||||
| 	// remember the []byte used for the ID | 	// remember the []byte used for the ID | ||||||
| 	id := tfd.ID | 	id := tfd.ID | ||||||
|  | 	vectors := tfd.Vectors | ||||||
| 	// idiom to copy over from empty TermFieldDoc (0 allocations) | 	// idiom to copy over from empty TermFieldDoc (0 allocations) | ||||||
| 	*tfd = TermFieldDoc{} | 	*tfd = TermFieldDoc{} | ||||||
| 	// reuse the []byte already allocated (and reset len to 0) | 	// reuse the []byte already allocated (and reset len to 0) | ||||||
| 	tfd.ID = id[:0] | 	tfd.ID = id[:0] | ||||||
|  | 	tfd.Vectors = vectors[:0] | ||||||
| 	return tfd | 	return tfd | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -161,6 +216,8 @@ type TermFieldReader interface { | |||||||
| 	// Count returns the number of documents contains the term in this field. | 	// Count returns the number of documents contains the term in this field. | ||||||
| 	Count() uint64 | 	Count() uint64 | ||||||
| 	Close() error | 	Close() error | ||||||
|  |  | ||||||
|  | 	Size() int | ||||||
| } | } | ||||||
|  |  | ||||||
| type DictEntry struct { | type DictEntry struct { | ||||||
| @@ -185,12 +242,18 @@ type DocIDReader interface { | |||||||
| 	// will start there instead. If ID is greater than or equal to the end of | 	// will start there instead. If ID is greater than or equal to the end of | ||||||
| 	// the range, Next() call will return io.EOF. | 	// the range, Next() call will return io.EOF. | ||||||
| 	Advance(ID IndexInternalID) (IndexInternalID, error) | 	Advance(ID IndexInternalID) (IndexInternalID, error) | ||||||
|  |  | ||||||
|  | 	Size() int | ||||||
|  |  | ||||||
| 	Close() error | 	Close() error | ||||||
| } | } | ||||||
|  |  | ||||||
|  | type BatchCallback func(error) | ||||||
|  |  | ||||||
| type Batch struct { | type Batch struct { | ||||||
| 	IndexOps          map[string]*document.Document | 	IndexOps          map[string]*document.Document | ||||||
| 	InternalOps       map[string][]byte | 	InternalOps       map[string][]byte | ||||||
|  | 	persistedCallback BatchCallback | ||||||
| } | } | ||||||
|  |  | ||||||
| func NewBatch() *Batch { | func NewBatch() *Batch { | ||||||
| @@ -216,6 +279,14 @@ func (b *Batch) DeleteInternal(key []byte) { | |||||||
| 	b.InternalOps[string(key)] = nil | 	b.InternalOps[string(key)] = nil | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (b *Batch) SetPersistedCallback(f BatchCallback) { | ||||||
|  | 	b.persistedCallback = f | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (b *Batch) PersistedCallback() BatchCallback { | ||||||
|  | 	return b.persistedCallback | ||||||
|  | } | ||||||
|  |  | ||||||
| func (b *Batch) String() string { | func (b *Batch) String() string { | ||||||
| 	rv := fmt.Sprintf("Batch (%d ops, %d internal ops)\n", len(b.IndexOps), len(b.InternalOps)) | 	rv := fmt.Sprintf("Batch (%d ops, %d internal ops)\n", len(b.IndexOps), len(b.InternalOps)) | ||||||
| 	for k, v := range b.IndexOps { | 	for k, v := range b.IndexOps { | ||||||
| @@ -238,4 +309,53 @@ func (b *Batch) String() string { | |||||||
| func (b *Batch) Reset() { | func (b *Batch) Reset() { | ||||||
| 	b.IndexOps = make(map[string]*document.Document) | 	b.IndexOps = make(map[string]*document.Document) | ||||||
| 	b.InternalOps = make(map[string][]byte) | 	b.InternalOps = make(map[string][]byte) | ||||||
|  | 	b.persistedCallback = nil | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (b *Batch) Merge(o *Batch) { | ||||||
|  | 	for k, v := range o.IndexOps { | ||||||
|  | 		b.IndexOps[k] = v | ||||||
|  | 	} | ||||||
|  | 	for k, v := range o.InternalOps { | ||||||
|  | 		b.InternalOps[k] = v | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (b *Batch) TotalDocSize() int { | ||||||
|  | 	var s int | ||||||
|  | 	for k, v := range b.IndexOps { | ||||||
|  | 		if v != nil { | ||||||
|  | 			s += v.Size() + size.SizeOfString | ||||||
|  | 		} | ||||||
|  | 		s += len(k) | ||||||
|  | 	} | ||||||
|  | 	return s | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Optimizable represents an optional interface that implementable by | ||||||
|  | // optimizable resources (e.g., TermFieldReaders, Searchers).  These | ||||||
|  | // optimizable resources are provided the same OptimizableContext | ||||||
|  | // instance, so that they can coordinate via dynamic interface | ||||||
|  | // casting. | ||||||
|  | type Optimizable interface { | ||||||
|  | 	Optimize(kind string, octx OptimizableContext) (OptimizableContext, error) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Represents a result of optimization -- see the Finish() method. | ||||||
|  | type Optimized interface{} | ||||||
|  |  | ||||||
|  | type OptimizableContext interface { | ||||||
|  | 	// Once all the optimzable resources have been provided the same | ||||||
|  | 	// OptimizableContext instance, the optimization preparations are | ||||||
|  | 	// finished or completed via the Finish() method. | ||||||
|  | 	// | ||||||
|  | 	// Depending on the optimization being performed, the Finish() | ||||||
|  | 	// method might return a non-nil Optimized instance.  For example, | ||||||
|  | 	// the Optimized instance might represent an optimized | ||||||
|  | 	// TermFieldReader instance. | ||||||
|  | 	Finish() (Optimized, error) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | type DocValueReader interface { | ||||||
|  | 	VisitDocValues(id IndexInternalID, visitor DocumentFieldTermVisitor) error | ||||||
| } | } | ||||||
|   | |||||||
							
								
								
									
										268
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/introducer.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										268
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/introducer.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -19,7 +19,9 @@ import ( | |||||||
| 	"sync/atomic" | 	"sync/atomic" | ||||||
|  |  | ||||||
| 	"github.com/RoaringBitmap/roaring" | 	"github.com/RoaringBitmap/roaring" | ||||||
|  | 	"github.com/blevesearch/bleve/index" | ||||||
| 	"github.com/blevesearch/bleve/index/scorch/segment" | 	"github.com/blevesearch/bleve/index/scorch/segment" | ||||||
|  | 	"github.com/blevesearch/bleve/index/scorch/segment/zap" | ||||||
| ) | ) | ||||||
|  |  | ||||||
| type segmentIntroduction struct { | type segmentIntroduction struct { | ||||||
| @@ -31,6 +33,12 @@ type segmentIntroduction struct { | |||||||
|  |  | ||||||
| 	applied           chan error | 	applied           chan error | ||||||
| 	persisted         chan error | 	persisted         chan error | ||||||
|  | 	persistedCallback index.BatchCallback | ||||||
|  | } | ||||||
|  |  | ||||||
|  | type persistIntroduction struct { | ||||||
|  | 	persisted map[uint64]segment.Segment | ||||||
|  | 	applied   notificationChan | ||||||
| } | } | ||||||
|  |  | ||||||
| type epochWatcher struct { | type epochWatcher struct { | ||||||
| @@ -48,6 +56,8 @@ func (s *Scorch) mainLoop() { | |||||||
| 	var epochWatchers []*epochWatcher | 	var epochWatchers []*epochWatcher | ||||||
| OUTER: | OUTER: | ||||||
| 	for { | 	for { | ||||||
|  | 		atomic.AddUint64(&s.stats.TotIntroduceLoop, 1) | ||||||
|  |  | ||||||
| 		select { | 		select { | ||||||
| 		case <-s.closeCh: | 		case <-s.closeCh: | ||||||
| 			break OUTER | 			break OUTER | ||||||
| @@ -64,6 +74,9 @@ OUTER: | |||||||
| 				continue OUTER | 				continue OUTER | ||||||
| 			} | 			} | ||||||
|  |  | ||||||
|  | 		case persist := <-s.persists: | ||||||
|  | 			s.introducePersist(persist) | ||||||
|  |  | ||||||
| 		case revertTo := <-s.revertToSnapshots: | 		case revertTo := <-s.revertToSnapshots: | ||||||
| 			err := s.revertToSnapshot(revertTo) | 			err := s.revertToSnapshot(revertTo) | ||||||
| 			if err != nil { | 			if err != nil { | ||||||
| @@ -92,32 +105,38 @@ OUTER: | |||||||
| } | } | ||||||
|  |  | ||||||
| func (s *Scorch) introduceSegment(next *segmentIntroduction) error { | func (s *Scorch) introduceSegment(next *segmentIntroduction) error { | ||||||
| 	// acquire lock | 	atomic.AddUint64(&s.stats.TotIntroduceSegmentBeg, 1) | ||||||
| 	s.rootLock.Lock() | 	defer atomic.AddUint64(&s.stats.TotIntroduceSegmentEnd, 1) | ||||||
|  |  | ||||||
| 	nsegs := len(s.root.segment) | 	s.rootLock.RLock() | ||||||
|  | 	root := s.root | ||||||
|  | 	root.AddRef() | ||||||
|  | 	s.rootLock.RUnlock() | ||||||
|  |  | ||||||
|  | 	defer func() { _ = root.DecRef() }() | ||||||
|  |  | ||||||
|  | 	nsegs := len(root.segment) | ||||||
|  |  | ||||||
| 	// prepare new index snapshot | 	// prepare new index snapshot | ||||||
| 	newSnapshot := &IndexSnapshot{ | 	newSnapshot := &IndexSnapshot{ | ||||||
| 		parent:   s, | 		parent:   s, | ||||||
| 		segment:  make([]*SegmentSnapshot, 0, nsegs+1), | 		segment:  make([]*SegmentSnapshot, 0, nsegs+1), | ||||||
| 		offsets:  make([]uint64, 0, nsegs+1), | 		offsets:  make([]uint64, 0, nsegs+1), | ||||||
| 		internal: make(map[string][]byte, len(s.root.internal)), | 		internal: make(map[string][]byte, len(root.internal)), | ||||||
| 		epoch:    s.nextSnapshotEpoch, |  | ||||||
| 		refs:     1, | 		refs:     1, | ||||||
|  | 		creator:  "introduceSegment", | ||||||
| 	} | 	} | ||||||
| 	s.nextSnapshotEpoch++ |  | ||||||
|  |  | ||||||
| 	// iterate through current segments | 	// iterate through current segments | ||||||
| 	var running uint64 | 	var running uint64 | ||||||
| 	for i := range s.root.segment { | 	var docsToPersistCount, memSegments, fileSegments uint64 | ||||||
|  | 	for i := range root.segment { | ||||||
| 		// see if optimistic work included this segment | 		// see if optimistic work included this segment | ||||||
| 		delta, ok := next.obsoletes[s.root.segment[i].id] | 		delta, ok := next.obsoletes[root.segment[i].id] | ||||||
| 		if !ok { | 		if !ok { | ||||||
| 			var err error | 			var err error | ||||||
| 			delta, err = s.root.segment[i].segment.DocNumbers(next.ids) | 			delta, err = root.segment[i].segment.DocNumbers(next.ids) | ||||||
| 			if err != nil { | 			if err != nil { | ||||||
| 				s.rootLock.Unlock() |  | ||||||
| 				next.applied <- fmt.Errorf("error computing doc numbers: %v", err) | 				next.applied <- fmt.Errorf("error computing doc numbers: %v", err) | ||||||
| 				close(next.applied) | 				close(next.applied) | ||||||
| 				_ = newSnapshot.DecRef() | 				_ = newSnapshot.DecRef() | ||||||
| @@ -126,43 +145,60 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error { | |||||||
| 		} | 		} | ||||||
|  |  | ||||||
| 		newss := &SegmentSnapshot{ | 		newss := &SegmentSnapshot{ | ||||||
| 			id:         s.root.segment[i].id, | 			id:         root.segment[i].id, | ||||||
| 			segment:    s.root.segment[i].segment, | 			segment:    root.segment[i].segment, | ||||||
| 			cachedDocs: s.root.segment[i].cachedDocs, | 			cachedDocs: root.segment[i].cachedDocs, | ||||||
|  | 			creator:    root.segment[i].creator, | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
| 		// apply new obsoletions | 		// apply new obsoletions | ||||||
| 		if s.root.segment[i].deleted == nil { | 		if root.segment[i].deleted == nil { | ||||||
| 			newss.deleted = delta | 			newss.deleted = delta | ||||||
| 		} else { | 		} else { | ||||||
| 			newss.deleted = roaring.Or(s.root.segment[i].deleted, delta) | 			newss.deleted = roaring.Or(root.segment[i].deleted, delta) | ||||||
|  | 		} | ||||||
|  | 		if newss.deleted.IsEmpty() { | ||||||
|  | 			newss.deleted = nil | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
| 		// check for live size before copying | 		// check for live size before copying | ||||||
| 		if newss.LiveSize() > 0 { | 		if newss.LiveSize() > 0 { | ||||||
| 			newSnapshot.segment = append(newSnapshot.segment, newss) | 			newSnapshot.segment = append(newSnapshot.segment, newss) | ||||||
| 			s.root.segment[i].segment.AddRef() | 			root.segment[i].segment.AddRef() | ||||||
| 			newSnapshot.offsets = append(newSnapshot.offsets, running) | 			newSnapshot.offsets = append(newSnapshot.offsets, running) | ||||||
| 			running += s.root.segment[i].Count() | 			running += newss.segment.Count() | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		if isMemorySegment(root.segment[i]) { | ||||||
|  | 			docsToPersistCount += root.segment[i].Count() | ||||||
|  | 			memSegments++ | ||||||
|  | 		} else { | ||||||
|  | 			fileSegments++ | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | 	atomic.StoreUint64(&s.stats.TotItemsToPersist, docsToPersistCount) | ||||||
|  | 	atomic.StoreUint64(&s.stats.TotMemorySegmentsAtRoot, memSegments) | ||||||
|  | 	atomic.StoreUint64(&s.stats.TotFileSegmentsAtRoot, fileSegments) | ||||||
|  |  | ||||||
| 	// append new segment, if any, to end of the new index snapshot | 	// append new segment, if any, to end of the new index snapshot | ||||||
| 	if next.data != nil { | 	if next.data != nil { | ||||||
| 		newSegmentSnapshot := &SegmentSnapshot{ | 		newSegmentSnapshot := &SegmentSnapshot{ | ||||||
| 			id:         next.id, | 			id:         next.id, | ||||||
| 			segment:    next.data, // take ownership of next.data's ref-count | 			segment:    next.data, // take ownership of next.data's ref-count | ||||||
| 			cachedDocs: &cachedDocs{cache: nil}, | 			cachedDocs: &cachedDocs{cache: nil}, | ||||||
|  | 			creator:    "introduceSegment", | ||||||
| 		} | 		} | ||||||
| 		newSnapshot.segment = append(newSnapshot.segment, newSegmentSnapshot) | 		newSnapshot.segment = append(newSnapshot.segment, newSegmentSnapshot) | ||||||
| 		newSnapshot.offsets = append(newSnapshot.offsets, running) | 		newSnapshot.offsets = append(newSnapshot.offsets, running) | ||||||
|  |  | ||||||
| 		// increment numItemsIntroduced which tracks the number of items | 		// increment numItemsIntroduced which tracks the number of items | ||||||
| 		// queued for persistence. | 		// queued for persistence. | ||||||
| 		atomic.AddUint64(&s.stats.numItemsIntroduced, newSegmentSnapshot.Count()) | 		atomic.AddUint64(&s.stats.TotIntroducedItems, newSegmentSnapshot.Count()) | ||||||
|  | 		atomic.AddUint64(&s.stats.TotIntroducedSegmentsBatch, 1) | ||||||
| 	} | 	} | ||||||
| 	// copy old values | 	// copy old values | ||||||
| 	for key, oldVal := range s.root.internal { | 	for key, oldVal := range root.internal { | ||||||
| 		newSnapshot.internal[key] = oldVal | 		newSnapshot.internal[key] = oldVal | ||||||
| 	} | 	} | ||||||
| 	// set new values and apply deletes | 	// set new values and apply deletes | ||||||
| @@ -173,12 +209,21 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error { | |||||||
| 			delete(newSnapshot.internal, key) | 			delete(newSnapshot.internal, key) | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | 	newSnapshot.updateSize() | ||||||
|  | 	s.rootLock.Lock() | ||||||
| 	if next.persisted != nil { | 	if next.persisted != nil { | ||||||
| 		s.rootPersisted = append(s.rootPersisted, next.persisted) | 		s.rootPersisted = append(s.rootPersisted, next.persisted) | ||||||
| 	} | 	} | ||||||
|  | 	if next.persistedCallback != nil { | ||||||
|  | 		s.persistedCallbacks = append(s.persistedCallbacks, next.persistedCallback) | ||||||
|  | 	} | ||||||
| 	// swap in new index snapshot | 	// swap in new index snapshot | ||||||
|  | 	newSnapshot.epoch = s.nextSnapshotEpoch | ||||||
|  | 	s.nextSnapshotEpoch++ | ||||||
| 	rootPrev := s.root | 	rootPrev := s.root | ||||||
| 	s.root = newSnapshot | 	s.root = newSnapshot | ||||||
|  | 	atomic.StoreUint64(&s.stats.CurRootEpoch, s.root.epoch) | ||||||
| 	// release lock | 	// release lock | ||||||
| 	s.rootLock.Unlock() | 	s.rootLock.Unlock() | ||||||
|  |  | ||||||
| @@ -191,42 +236,113 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error { | |||||||
| 	return nil | 	return nil | ||||||
| } | } | ||||||
|  |  | ||||||
| func (s *Scorch) introduceMerge(nextMerge *segmentMerge) { | func (s *Scorch) introducePersist(persist *persistIntroduction) { | ||||||
| 	// acquire lock | 	atomic.AddUint64(&s.stats.TotIntroducePersistBeg, 1) | ||||||
|  | 	defer atomic.AddUint64(&s.stats.TotIntroducePersistEnd, 1) | ||||||
|  |  | ||||||
| 	s.rootLock.Lock() | 	s.rootLock.Lock() | ||||||
|  | 	root := s.root | ||||||
|  | 	root.AddRef() | ||||||
|  | 	nextSnapshotEpoch := s.nextSnapshotEpoch | ||||||
|  | 	s.nextSnapshotEpoch++ | ||||||
|  | 	s.rootLock.Unlock() | ||||||
|  |  | ||||||
| 	// prepare new index snapshot | 	defer func() { _ = root.DecRef() }() | ||||||
| 	currSize := len(s.root.segment) |  | ||||||
| 	newSize := currSize + 1 - len(nextMerge.old) |  | ||||||
|  |  | ||||||
| 	// empty segments deletion | 	newIndexSnapshot := &IndexSnapshot{ | ||||||
| 	if nextMerge.new == nil { | 		parent:   s, | ||||||
| 		newSize-- | 		epoch:    nextSnapshotEpoch, | ||||||
|  | 		segment:  make([]*SegmentSnapshot, len(root.segment)), | ||||||
|  | 		offsets:  make([]uint64, len(root.offsets)), | ||||||
|  | 		internal: make(map[string][]byte, len(root.internal)), | ||||||
|  | 		refs:     1, | ||||||
|  | 		creator:  "introducePersist", | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | 	var docsToPersistCount, memSegments, fileSegments uint64 | ||||||
|  | 	for i, segmentSnapshot := range root.segment { | ||||||
|  | 		// see if this segment has been replaced | ||||||
|  | 		if replacement, ok := persist.persisted[segmentSnapshot.id]; ok { | ||||||
|  | 			newSegmentSnapshot := &SegmentSnapshot{ | ||||||
|  | 				id:         segmentSnapshot.id, | ||||||
|  | 				segment:    replacement, | ||||||
|  | 				deleted:    segmentSnapshot.deleted, | ||||||
|  | 				cachedDocs: segmentSnapshot.cachedDocs, | ||||||
|  | 				creator:    "introducePersist", | ||||||
|  | 			} | ||||||
|  | 			newIndexSnapshot.segment[i] = newSegmentSnapshot | ||||||
|  | 			delete(persist.persisted, segmentSnapshot.id) | ||||||
|  |  | ||||||
|  | 			// update items persisted incase of a new segment snapshot | ||||||
|  | 			atomic.AddUint64(&s.stats.TotPersistedItems, newSegmentSnapshot.Count()) | ||||||
|  | 			atomic.AddUint64(&s.stats.TotPersistedSegments, 1) | ||||||
|  | 			fileSegments++ | ||||||
|  | 		} else { | ||||||
|  | 			newIndexSnapshot.segment[i] = root.segment[i] | ||||||
|  | 			newIndexSnapshot.segment[i].segment.AddRef() | ||||||
|  |  | ||||||
|  | 			if isMemorySegment(root.segment[i]) { | ||||||
|  | 				docsToPersistCount += root.segment[i].Count() | ||||||
|  | 				memSegments++ | ||||||
|  | 			} else { | ||||||
|  | 				fileSegments++ | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  | 		newIndexSnapshot.offsets[i] = root.offsets[i] | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	for k, v := range root.internal { | ||||||
|  | 		newIndexSnapshot.internal[k] = v | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	atomic.StoreUint64(&s.stats.TotItemsToPersist, docsToPersistCount) | ||||||
|  | 	atomic.StoreUint64(&s.stats.TotMemorySegmentsAtRoot, memSegments) | ||||||
|  | 	atomic.StoreUint64(&s.stats.TotFileSegmentsAtRoot, fileSegments) | ||||||
|  | 	newIndexSnapshot.updateSize() | ||||||
|  | 	s.rootLock.Lock() | ||||||
|  | 	rootPrev := s.root | ||||||
|  | 	s.root = newIndexSnapshot | ||||||
|  | 	atomic.StoreUint64(&s.stats.CurRootEpoch, s.root.epoch) | ||||||
|  | 	s.rootLock.Unlock() | ||||||
|  |  | ||||||
|  | 	if rootPrev != nil { | ||||||
|  | 		_ = rootPrev.DecRef() | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	close(persist.applied) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (s *Scorch) introduceMerge(nextMerge *segmentMerge) { | ||||||
|  | 	atomic.AddUint64(&s.stats.TotIntroduceMergeBeg, 1) | ||||||
|  | 	defer atomic.AddUint64(&s.stats.TotIntroduceMergeEnd, 1) | ||||||
|  |  | ||||||
|  | 	s.rootLock.RLock() | ||||||
|  | 	root := s.root | ||||||
|  | 	root.AddRef() | ||||||
|  | 	s.rootLock.RUnlock() | ||||||
|  |  | ||||||
|  | 	defer func() { _ = root.DecRef() }() | ||||||
|  |  | ||||||
| 	newSnapshot := &IndexSnapshot{ | 	newSnapshot := &IndexSnapshot{ | ||||||
| 		parent:   s, | 		parent:   s, | ||||||
| 		segment:  make([]*SegmentSnapshot, 0, newSize), | 		internal: root.internal, | ||||||
| 		offsets:  make([]uint64, 0, newSize), |  | ||||||
| 		internal: s.root.internal, |  | ||||||
| 		epoch:    s.nextSnapshotEpoch, |  | ||||||
| 		refs:     1, | 		refs:     1, | ||||||
|  | 		creator:  "introduceMerge", | ||||||
| 	} | 	} | ||||||
| 	s.nextSnapshotEpoch++ |  | ||||||
|  |  | ||||||
| 	// iterate through current segments | 	// iterate through current segments | ||||||
| 	newSegmentDeleted := roaring.NewBitmap() | 	newSegmentDeleted := roaring.NewBitmap() | ||||||
| 	var running uint64 | 	var running, docsToPersistCount, memSegments, fileSegments uint64 | ||||||
| 	for i := range s.root.segment { | 	for i := range root.segment { | ||||||
| 		segmentID := s.root.segment[i].id | 		segmentID := root.segment[i].id | ||||||
| 		if segSnapAtMerge, ok := nextMerge.old[segmentID]; ok { | 		if segSnapAtMerge, ok := nextMerge.old[segmentID]; ok { | ||||||
| 			// this segment is going away, see if anything else was deleted since we started the merge | 			// this segment is going away, see if anything else was deleted since we started the merge | ||||||
| 			if segSnapAtMerge != nil && s.root.segment[i].deleted != nil { | 			if segSnapAtMerge != nil && root.segment[i].deleted != nil { | ||||||
| 				// assume all these deletes are new | 				// assume all these deletes are new | ||||||
| 				deletedSince := s.root.segment[i].deleted | 				deletedSince := root.segment[i].deleted | ||||||
| 				// if we already knew about some of them, remove | 				// if we already knew about some of them, remove | ||||||
| 				if segSnapAtMerge.deleted != nil { | 				if segSnapAtMerge.deleted != nil { | ||||||
| 					deletedSince = roaring.AndNot(s.root.segment[i].deleted, segSnapAtMerge.deleted) | 					deletedSince = roaring.AndNot(root.segment[i].deleted, segSnapAtMerge.deleted) | ||||||
| 				} | 				} | ||||||
| 				deletedSinceItr := deletedSince.Iterator() | 				deletedSinceItr := deletedSince.Iterator() | ||||||
| 				for deletedSinceItr.HasNext() { | 				for deletedSinceItr.HasNext() { | ||||||
| @@ -240,18 +356,25 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) { | |||||||
| 			// segments left behind in old map after processing | 			// segments left behind in old map after processing | ||||||
| 			// the root segments would be the obsolete segment set | 			// the root segments would be the obsolete segment set | ||||||
| 			delete(nextMerge.old, segmentID) | 			delete(nextMerge.old, segmentID) | ||||||
|  | 		} else if root.segment[i].LiveSize() > 0 { | ||||||
| 		} else if s.root.segment[i].LiveSize() > 0 { |  | ||||||
| 			// this segment is staying | 			// this segment is staying | ||||||
| 			newSnapshot.segment = append(newSnapshot.segment, &SegmentSnapshot{ | 			newSnapshot.segment = append(newSnapshot.segment, &SegmentSnapshot{ | ||||||
| 				id:         s.root.segment[i].id, | 				id:         root.segment[i].id, | ||||||
| 				segment:    s.root.segment[i].segment, | 				segment:    root.segment[i].segment, | ||||||
| 				deleted:    s.root.segment[i].deleted, | 				deleted:    root.segment[i].deleted, | ||||||
| 				cachedDocs: s.root.segment[i].cachedDocs, | 				cachedDocs: root.segment[i].cachedDocs, | ||||||
|  | 				creator:    root.segment[i].creator, | ||||||
| 			}) | 			}) | ||||||
| 			s.root.segment[i].segment.AddRef() | 			root.segment[i].segment.AddRef() | ||||||
| 			newSnapshot.offsets = append(newSnapshot.offsets, running) | 			newSnapshot.offsets = append(newSnapshot.offsets, running) | ||||||
| 			running += s.root.segment[i].Count() | 			running += root.segment[i].segment.Count() | ||||||
|  |  | ||||||
|  | 			if isMemorySegment(root.segment[i]) { | ||||||
|  | 				docsToPersistCount += root.segment[i].Count() | ||||||
|  | 				memSegments++ | ||||||
|  | 			} else { | ||||||
|  | 				fileSegments++ | ||||||
|  | 			} | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| @@ -269,6 +392,7 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) { | |||||||
| 			} | 			} | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	// In case where all the docs in the newly merged segment getting | 	// In case where all the docs in the newly merged segment getting | ||||||
| 	// deleted by the time we reach here, can skip the introduction. | 	// deleted by the time we reach here, can skip the introduction. | ||||||
| 	if nextMerge.new != nil && | 	if nextMerge.new != nil && | ||||||
| @@ -279,15 +403,35 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) { | |||||||
| 			segment:    nextMerge.new, // take ownership for nextMerge.new's ref-count | 			segment:    nextMerge.new, // take ownership for nextMerge.new's ref-count | ||||||
| 			deleted:    newSegmentDeleted, | 			deleted:    newSegmentDeleted, | ||||||
| 			cachedDocs: &cachedDocs{cache: nil}, | 			cachedDocs: &cachedDocs{cache: nil}, | ||||||
|  | 			creator:    "introduceMerge", | ||||||
| 		}) | 		}) | ||||||
| 		newSnapshot.offsets = append(newSnapshot.offsets, running) | 		newSnapshot.offsets = append(newSnapshot.offsets, running) | ||||||
|  | 		atomic.AddUint64(&s.stats.TotIntroducedSegmentsMerge, 1) | ||||||
|  |  | ||||||
|  | 		switch nextMerge.new.(type) { | ||||||
|  | 		case *zap.SegmentBase: | ||||||
|  | 			docsToPersistCount += nextMerge.new.Count() - newSegmentDeleted.GetCardinality() | ||||||
|  | 			memSegments++ | ||||||
|  | 		case *zap.Segment: | ||||||
|  | 			fileSegments++ | ||||||
| 		} | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	atomic.StoreUint64(&s.stats.TotItemsToPersist, docsToPersistCount) | ||||||
|  | 	atomic.StoreUint64(&s.stats.TotMemorySegmentsAtRoot, memSegments) | ||||||
|  | 	atomic.StoreUint64(&s.stats.TotFileSegmentsAtRoot, fileSegments) | ||||||
|  |  | ||||||
| 	newSnapshot.AddRef() // 1 ref for the nextMerge.notify response | 	newSnapshot.AddRef() // 1 ref for the nextMerge.notify response | ||||||
|  |  | ||||||
| 	// swap in new segment | 	newSnapshot.updateSize() | ||||||
|  |  | ||||||
|  | 	s.rootLock.Lock() | ||||||
|  | 	// swap in new index snapshot | ||||||
|  | 	newSnapshot.epoch = s.nextSnapshotEpoch | ||||||
|  | 	s.nextSnapshotEpoch++ | ||||||
| 	rootPrev := s.root | 	rootPrev := s.root | ||||||
| 	s.root = newSnapshot | 	s.root = newSnapshot | ||||||
|  | 	atomic.StoreUint64(&s.stats.CurRootEpoch, s.root.epoch) | ||||||
| 	// release lock | 	// release lock | ||||||
| 	s.rootLock.Unlock() | 	s.rootLock.Unlock() | ||||||
|  |  | ||||||
| @@ -301,6 +445,9 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) { | |||||||
| } | } | ||||||
|  |  | ||||||
| func (s *Scorch) revertToSnapshot(revertTo *snapshotReversion) error { | func (s *Scorch) revertToSnapshot(revertTo *snapshotReversion) error { | ||||||
|  | 	atomic.AddUint64(&s.stats.TotIntroduceRevertBeg, 1) | ||||||
|  | 	defer atomic.AddUint64(&s.stats.TotIntroduceRevertEnd, 1) | ||||||
|  |  | ||||||
| 	if revertTo.snapshot == nil { | 	if revertTo.snapshot == nil { | ||||||
| 		err := fmt.Errorf("Cannot revert to a nil snapshot") | 		err := fmt.Errorf("Cannot revert to a nil snapshot") | ||||||
| 		revertTo.applied <- err | 		revertTo.applied <- err | ||||||
| @@ -318,9 +465,11 @@ func (s *Scorch) revertToSnapshot(revertTo *snapshotReversion) error { | |||||||
| 		internal: revertTo.snapshot.internal, | 		internal: revertTo.snapshot.internal, | ||||||
| 		epoch:    s.nextSnapshotEpoch, | 		epoch:    s.nextSnapshotEpoch, | ||||||
| 		refs:     1, | 		refs:     1, | ||||||
|  | 		creator:  "revertToSnapshot", | ||||||
| 	} | 	} | ||||||
| 	s.nextSnapshotEpoch++ | 	s.nextSnapshotEpoch++ | ||||||
|  |  | ||||||
|  | 	var docsToPersistCount, memSegments, fileSegments uint64 | ||||||
| 	// iterate through segments | 	// iterate through segments | ||||||
| 	for i, segmentSnapshot := range revertTo.snapshot.segment { | 	for i, segmentSnapshot := range revertTo.snapshot.segment { | ||||||
| 		newSnapshot.segment[i] = &SegmentSnapshot{ | 		newSnapshot.segment[i] = &SegmentSnapshot{ | ||||||
| @@ -328,21 +477,37 @@ func (s *Scorch) revertToSnapshot(revertTo *snapshotReversion) error { | |||||||
| 			segment:    segmentSnapshot.segment, | 			segment:    segmentSnapshot.segment, | ||||||
| 			deleted:    segmentSnapshot.deleted, | 			deleted:    segmentSnapshot.deleted, | ||||||
| 			cachedDocs: segmentSnapshot.cachedDocs, | 			cachedDocs: segmentSnapshot.cachedDocs, | ||||||
|  | 			creator:    segmentSnapshot.creator, | ||||||
| 		} | 		} | ||||||
| 		newSnapshot.segment[i].segment.AddRef() | 		newSnapshot.segment[i].segment.AddRef() | ||||||
|  |  | ||||||
| 		// remove segment from ineligibleForRemoval map | 		// remove segment from ineligibleForRemoval map | ||||||
| 		filename := zapFileName(segmentSnapshot.id) | 		filename := zapFileName(segmentSnapshot.id) | ||||||
| 		delete(s.ineligibleForRemoval, filename) | 		delete(s.ineligibleForRemoval, filename) | ||||||
|  |  | ||||||
|  | 		if isMemorySegment(segmentSnapshot) { | ||||||
|  | 			docsToPersistCount += segmentSnapshot.Count() | ||||||
|  | 			memSegments++ | ||||||
|  | 		} else { | ||||||
|  | 			fileSegments++ | ||||||
| 		} | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	atomic.StoreUint64(&s.stats.TotItemsToPersist, docsToPersistCount) | ||||||
|  | 	atomic.StoreUint64(&s.stats.TotMemorySegmentsAtRoot, memSegments) | ||||||
|  | 	atomic.StoreUint64(&s.stats.TotFileSegmentsAtRoot, fileSegments) | ||||||
|  |  | ||||||
| 	if revertTo.persisted != nil { | 	if revertTo.persisted != nil { | ||||||
| 		s.rootPersisted = append(s.rootPersisted, revertTo.persisted) | 		s.rootPersisted = append(s.rootPersisted, revertTo.persisted) | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | 	newSnapshot.updateSize() | ||||||
|  |  | ||||||
| 	// swap in new snapshot | 	// swap in new snapshot | ||||||
| 	rootPrev := s.root | 	rootPrev := s.root | ||||||
| 	s.root = newSnapshot | 	s.root = newSnapshot | ||||||
|  |  | ||||||
|  | 	atomic.StoreUint64(&s.stats.CurRootEpoch, s.root.epoch) | ||||||
| 	// release lock | 	// release lock | ||||||
| 	s.rootLock.Unlock() | 	s.rootLock.Unlock() | ||||||
|  |  | ||||||
| @@ -354,3 +519,12 @@ func (s *Scorch) revertToSnapshot(revertTo *snapshotReversion) error { | |||||||
|  |  | ||||||
| 	return nil | 	return nil | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func isMemorySegment(s *SegmentSnapshot) bool { | ||||||
|  | 	switch s.segment.(type) { | ||||||
|  | 	case *zap.SegmentBase: | ||||||
|  | 		return true | ||||||
|  | 	default: | ||||||
|  | 		return false | ||||||
|  | 	} | ||||||
|  | } | ||||||
|   | |||||||
							
								
								
									
										146
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/merge.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										146
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/merge.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -15,9 +15,7 @@ | |||||||
| package scorch | package scorch | ||||||
|  |  | ||||||
| import ( | import ( | ||||||
| 	"bytes" |  | ||||||
| 	"encoding/json" | 	"encoding/json" | ||||||
|  |  | ||||||
| 	"fmt" | 	"fmt" | ||||||
| 	"os" | 	"os" | ||||||
| 	"sync/atomic" | 	"sync/atomic" | ||||||
| @@ -40,16 +38,20 @@ func (s *Scorch) mergerLoop() { | |||||||
|  |  | ||||||
| OUTER: | OUTER: | ||||||
| 	for { | 	for { | ||||||
|  | 		atomic.AddUint64(&s.stats.TotFileMergeLoopBeg, 1) | ||||||
|  |  | ||||||
| 		select { | 		select { | ||||||
| 		case <-s.closeCh: | 		case <-s.closeCh: | ||||||
| 			break OUTER | 			break OUTER | ||||||
|  |  | ||||||
| 		default: | 		default: | ||||||
| 			// check to see if there is a new snapshot to persist | 			// check to see if there is a new snapshot to persist | ||||||
| 			s.rootLock.RLock() | 			s.rootLock.Lock() | ||||||
| 			ourSnapshot := s.root | 			ourSnapshot := s.root | ||||||
| 			ourSnapshot.AddRef() | 			ourSnapshot.AddRef() | ||||||
| 			s.rootLock.RUnlock() | 			atomic.StoreUint64(&s.iStats.mergeSnapshotSize, uint64(ourSnapshot.Size())) | ||||||
|  | 			atomic.StoreUint64(&s.iStats.mergeEpoch, ourSnapshot.epoch) | ||||||
|  | 			s.rootLock.Unlock() | ||||||
|  |  | ||||||
| 			if ourSnapshot.epoch != lastEpochMergePlanned { | 			if ourSnapshot.epoch != lastEpochMergePlanned { | ||||||
| 				startTime := time.Now() | 				startTime := time.Now() | ||||||
| @@ -57,12 +59,21 @@ OUTER: | |||||||
| 				// lets get started | 				// lets get started | ||||||
| 				err := s.planMergeAtSnapshot(ourSnapshot, mergePlannerOptions) | 				err := s.planMergeAtSnapshot(ourSnapshot, mergePlannerOptions) | ||||||
| 				if err != nil { | 				if err != nil { | ||||||
|  | 					atomic.StoreUint64(&s.iStats.mergeEpoch, 0) | ||||||
|  | 					if err == segment.ErrClosed { | ||||||
|  | 						// index has been closed | ||||||
|  | 						_ = ourSnapshot.DecRef() | ||||||
|  | 						break OUTER | ||||||
|  | 					} | ||||||
| 					s.fireAsyncError(fmt.Errorf("merging err: %v", err)) | 					s.fireAsyncError(fmt.Errorf("merging err: %v", err)) | ||||||
| 					_ = ourSnapshot.DecRef() | 					_ = ourSnapshot.DecRef() | ||||||
|  | 					atomic.AddUint64(&s.stats.TotFileMergeLoopErr, 1) | ||||||
| 					continue OUTER | 					continue OUTER | ||||||
| 				} | 				} | ||||||
| 				lastEpochMergePlanned = ourSnapshot.epoch | 				lastEpochMergePlanned = ourSnapshot.epoch | ||||||
|  |  | ||||||
|  | 				atomic.StoreUint64(&s.stats.LastMergedEpoch, ourSnapshot.epoch) | ||||||
|  |  | ||||||
| 				s.fireEvent(EventKindMergerProgress, time.Since(startTime)) | 				s.fireEvent(EventKindMergerProgress, time.Since(startTime)) | ||||||
| 			} | 			} | ||||||
| 			_ = ourSnapshot.DecRef() | 			_ = ourSnapshot.DecRef() | ||||||
| @@ -88,7 +99,10 @@ OUTER: | |||||||
| 			case <-ew.notifyCh: | 			case <-ew.notifyCh: | ||||||
| 			} | 			} | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
|  | 		atomic.AddUint64(&s.stats.TotFileMergeLoopEnd, 1) | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	s.asyncTasks.Done() | 	s.asyncTasks.Done() | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -105,6 +119,11 @@ func (s *Scorch) parseMergePlannerOptions() (*mergeplan.MergePlanOptions, | |||||||
| 		if err != nil { | 		if err != nil { | ||||||
| 			return &mergePlannerOptions, err | 			return &mergePlannerOptions, err | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
|  | 		err = mergeplan.ValidateMergePlannerOptions(&mergePlannerOptions) | ||||||
|  | 		if err != nil { | ||||||
|  | 			return nil, err | ||||||
|  | 		} | ||||||
| 	} | 	} | ||||||
| 	return &mergePlannerOptions, nil | 	return &mergePlannerOptions, nil | ||||||
| } | } | ||||||
| @@ -119,32 +138,45 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot, | |||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | 	atomic.AddUint64(&s.stats.TotFileMergePlan, 1) | ||||||
|  |  | ||||||
| 	// give this list to the planner | 	// give this list to the planner | ||||||
| 	resultMergePlan, err := mergeplan.Plan(onlyZapSnapshots, options) | 	resultMergePlan, err := mergeplan.Plan(onlyZapSnapshots, options) | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
|  | 		atomic.AddUint64(&s.stats.TotFileMergePlanErr, 1) | ||||||
| 		return fmt.Errorf("merge planning err: %v", err) | 		return fmt.Errorf("merge planning err: %v", err) | ||||||
| 	} | 	} | ||||||
| 	if resultMergePlan == nil { | 	if resultMergePlan == nil { | ||||||
| 		// nothing to do | 		// nothing to do | ||||||
|  | 		atomic.AddUint64(&s.stats.TotFileMergePlanNone, 1) | ||||||
| 		return nil | 		return nil | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | 	atomic.AddUint64(&s.stats.TotFileMergePlanOk, 1) | ||||||
|  |  | ||||||
|  | 	atomic.AddUint64(&s.stats.TotFileMergePlanTasks, uint64(len(resultMergePlan.Tasks))) | ||||||
|  |  | ||||||
| 	// process tasks in serial for now | 	// process tasks in serial for now | ||||||
| 	var notifications []chan *IndexSnapshot | 	var notifications []chan *IndexSnapshot | ||||||
| 	for _, task := range resultMergePlan.Tasks { | 	for _, task := range resultMergePlan.Tasks { | ||||||
| 		if len(task.Segments) == 0 { | 		if len(task.Segments) == 0 { | ||||||
|  | 			atomic.AddUint64(&s.stats.TotFileMergePlanTasksSegmentsEmpty, 1) | ||||||
| 			continue | 			continue | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
|  | 		atomic.AddUint64(&s.stats.TotFileMergePlanTasksSegments, uint64(len(task.Segments))) | ||||||
|  |  | ||||||
| 		oldMap := make(map[uint64]*SegmentSnapshot) | 		oldMap := make(map[uint64]*SegmentSnapshot) | ||||||
| 		newSegmentID := atomic.AddUint64(&s.nextSegmentID, 1) | 		newSegmentID := atomic.AddUint64(&s.nextSegmentID, 1) | ||||||
| 		segmentsToMerge := make([]*zap.Segment, 0, len(task.Segments)) | 		segmentsToMerge := make([]*zap.Segment, 0, len(task.Segments)) | ||||||
| 		docsToDrop := make([]*roaring.Bitmap, 0, len(task.Segments)) | 		docsToDrop := make([]*roaring.Bitmap, 0, len(task.Segments)) | ||||||
|  |  | ||||||
| 		for _, planSegment := range task.Segments { | 		for _, planSegment := range task.Segments { | ||||||
| 			if segSnapshot, ok := planSegment.(*SegmentSnapshot); ok { | 			if segSnapshot, ok := planSegment.(*SegmentSnapshot); ok { | ||||||
| 				oldMap[segSnapshot.id] = segSnapshot | 				oldMap[segSnapshot.id] = segSnapshot | ||||||
| 				if zapSeg, ok := segSnapshot.segment.(*zap.Segment); ok { | 				if zapSeg, ok := segSnapshot.segment.(*zap.Segment); ok { | ||||||
| 					if segSnapshot.LiveSize() == 0 { | 					if segSnapshot.LiveSize() == 0 { | ||||||
|  | 						atomic.AddUint64(&s.stats.TotFileMergeSegmentsEmpty, 1) | ||||||
| 						oldMap[segSnapshot.id] = nil | 						oldMap[segSnapshot.id] = nil | ||||||
| 					} else { | 					} else { | ||||||
| 						segmentsToMerge = append(segmentsToMerge, zapSeg) | 						segmentsToMerge = append(segmentsToMerge, zapSeg) | ||||||
| @@ -155,32 +187,53 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot, | |||||||
| 		} | 		} | ||||||
|  |  | ||||||
| 		var oldNewDocNums map[uint64][]uint64 | 		var oldNewDocNums map[uint64][]uint64 | ||||||
| 		var segment segment.Segment | 		var seg segment.Segment | ||||||
| 		if len(segmentsToMerge) > 0 { | 		if len(segmentsToMerge) > 0 { | ||||||
| 			filename := zapFileName(newSegmentID) | 			filename := zapFileName(newSegmentID) | ||||||
| 			s.markIneligibleForRemoval(filename) | 			s.markIneligibleForRemoval(filename) | ||||||
| 			path := s.path + string(os.PathSeparator) + filename | 			path := s.path + string(os.PathSeparator) + filename | ||||||
| 			newDocNums, err := zap.Merge(segmentsToMerge, docsToDrop, path, 1024) |  | ||||||
|  | 			fileMergeZapStartTime := time.Now() | ||||||
|  |  | ||||||
|  | 			atomic.AddUint64(&s.stats.TotFileMergeZapBeg, 1) | ||||||
|  | 			newDocNums, _, err := zap.Merge(segmentsToMerge, docsToDrop, path, | ||||||
|  | 				DefaultChunkFactor, s.closeCh, s) | ||||||
|  | 			atomic.AddUint64(&s.stats.TotFileMergeZapEnd, 1) | ||||||
|  |  | ||||||
|  | 			fileMergeZapTime := uint64(time.Since(fileMergeZapStartTime)) | ||||||
|  | 			atomic.AddUint64(&s.stats.TotFileMergeZapTime, fileMergeZapTime) | ||||||
|  | 			if atomic.LoadUint64(&s.stats.MaxFileMergeZapTime) < fileMergeZapTime { | ||||||
|  | 				atomic.StoreUint64(&s.stats.MaxFileMergeZapTime, fileMergeZapTime) | ||||||
|  | 			} | ||||||
|  |  | ||||||
| 			if err != nil { | 			if err != nil { | ||||||
| 				s.unmarkIneligibleForRemoval(filename) | 				s.unmarkIneligibleForRemoval(filename) | ||||||
|  | 				atomic.AddUint64(&s.stats.TotFileMergePlanTasksErr, 1) | ||||||
|  | 				if err == segment.ErrClosed { | ||||||
|  | 					return err | ||||||
|  | 				} | ||||||
| 				return fmt.Errorf("merging failed: %v", err) | 				return fmt.Errorf("merging failed: %v", err) | ||||||
| 			} | 			} | ||||||
| 			segment, err = zap.Open(path) |  | ||||||
|  | 			seg, err = zap.Open(path) | ||||||
| 			if err != nil { | 			if err != nil { | ||||||
| 				s.unmarkIneligibleForRemoval(filename) | 				s.unmarkIneligibleForRemoval(filename) | ||||||
|  | 				atomic.AddUint64(&s.stats.TotFileMergePlanTasksErr, 1) | ||||||
| 				return err | 				return err | ||||||
| 			} | 			} | ||||||
| 			oldNewDocNums = make(map[uint64][]uint64) | 			oldNewDocNums = make(map[uint64][]uint64) | ||||||
| 			for i, segNewDocNums := range newDocNums { | 			for i, segNewDocNums := range newDocNums { | ||||||
| 				oldNewDocNums[task.Segments[i].Id()] = segNewDocNums | 				oldNewDocNums[task.Segments[i].Id()] = segNewDocNums | ||||||
| 			} | 			} | ||||||
|  |  | ||||||
|  | 			atomic.AddUint64(&s.stats.TotFileMergeSegments, uint64(len(segmentsToMerge))) | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
| 		sm := &segmentMerge{ | 		sm := &segmentMerge{ | ||||||
| 			id:            newSegmentID, | 			id:            newSegmentID, | ||||||
| 			old:           oldMap, | 			old:           oldMap, | ||||||
| 			oldNewDocNums: oldNewDocNums, | 			oldNewDocNums: oldNewDocNums, | ||||||
| 			new:           segment, | 			new:           seg, | ||||||
| 			notify:        make(chan *IndexSnapshot, 1), | 			notify:        make(chan *IndexSnapshot, 1), | ||||||
| 		} | 		} | ||||||
| 		notifications = append(notifications, sm.notify) | 		notifications = append(notifications, sm.notify) | ||||||
| @@ -188,21 +241,28 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot, | |||||||
| 		// give it to the introducer | 		// give it to the introducer | ||||||
| 		select { | 		select { | ||||||
| 		case <-s.closeCh: | 		case <-s.closeCh: | ||||||
| 			_ = segment.Close() | 			_ = seg.Close() | ||||||
| 			return nil | 			return segment.ErrClosed | ||||||
| 		case s.merges <- sm: | 		case s.merges <- sm: | ||||||
|  | 			atomic.AddUint64(&s.stats.TotFileMergeIntroductions, 1) | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
|  | 		atomic.AddUint64(&s.stats.TotFileMergePlanTasksDone, 1) | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	for _, notification := range notifications { | 	for _, notification := range notifications { | ||||||
| 		select { | 		select { | ||||||
| 		case <-s.closeCh: | 		case <-s.closeCh: | ||||||
| 			return nil | 			atomic.AddUint64(&s.stats.TotFileMergeIntroductionsSkipped, 1) | ||||||
|  | 			return segment.ErrClosed | ||||||
| 		case newSnapshot := <-notification: | 		case newSnapshot := <-notification: | ||||||
|  | 			atomic.AddUint64(&s.stats.TotFileMergeIntroductionsDone, 1) | ||||||
| 			if newSnapshot != nil { | 			if newSnapshot != nil { | ||||||
| 				_ = newSnapshot.DecRef() | 				_ = newSnapshot.DecRef() | ||||||
| 			} | 			} | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	return nil | 	return nil | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -219,44 +279,48 @@ type segmentMerge struct { | |||||||
| // into the root | // into the root | ||||||
| func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot, | func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot, | ||||||
| 	sbs []*zap.SegmentBase, sbsDrops []*roaring.Bitmap, sbsIndexes []int, | 	sbs []*zap.SegmentBase, sbsDrops []*roaring.Bitmap, sbsIndexes []int, | ||||||
| 	chunkFactor uint32) (uint64, *IndexSnapshot, uint64, error) { | 	chunkFactor uint32) (*IndexSnapshot, uint64, error) { | ||||||
| 	var br bytes.Buffer | 	atomic.AddUint64(&s.stats.TotMemMergeBeg, 1) | ||||||
|  |  | ||||||
| 	cr := zap.NewCountHashWriter(&br) | 	memMergeZapStartTime := time.Now() | ||||||
|  |  | ||||||
| 	newDocNums, numDocs, storedIndexOffset, fieldsIndexOffset, | 	atomic.AddUint64(&s.stats.TotMemMergeZapBeg, 1) | ||||||
| 		docValueOffset, dictLocs, fieldsInv, fieldsMap, err := |  | ||||||
| 		zap.MergeToWriter(sbs, sbsDrops, chunkFactor, cr) |  | ||||||
| 	if err != nil { |  | ||||||
| 		return 0, nil, 0, err |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	sb, err := zap.InitSegmentBase(br.Bytes(), cr.Sum32(), chunkFactor, |  | ||||||
| 		fieldsMap, fieldsInv, numDocs, storedIndexOffset, fieldsIndexOffset, |  | ||||||
| 		docValueOffset, dictLocs) |  | ||||||
| 	if err != nil { |  | ||||||
| 		return 0, nil, 0, err |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	newSegmentID := atomic.AddUint64(&s.nextSegmentID, 1) | 	newSegmentID := atomic.AddUint64(&s.nextSegmentID, 1) | ||||||
|  |  | ||||||
| 	filename := zapFileName(newSegmentID) | 	filename := zapFileName(newSegmentID) | ||||||
| 	path := s.path + string(os.PathSeparator) + filename | 	path := s.path + string(os.PathSeparator) + filename | ||||||
| 	err = zap.PersistSegmentBase(sb, path) |  | ||||||
| 	if err != nil { | 	newDocNums, _, err := | ||||||
| 		return 0, nil, 0, err | 		zap.MergeSegmentBases(sbs, sbsDrops, path, chunkFactor, s.closeCh, s) | ||||||
|  |  | ||||||
|  | 	atomic.AddUint64(&s.stats.TotMemMergeZapEnd, 1) | ||||||
|  |  | ||||||
|  | 	memMergeZapTime := uint64(time.Since(memMergeZapStartTime)) | ||||||
|  | 	atomic.AddUint64(&s.stats.TotMemMergeZapTime, memMergeZapTime) | ||||||
|  | 	if atomic.LoadUint64(&s.stats.MaxMemMergeZapTime) < memMergeZapTime { | ||||||
|  | 		atomic.StoreUint64(&s.stats.MaxMemMergeZapTime, memMergeZapTime) | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	segment, err := zap.Open(path) |  | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		return 0, nil, 0, err | 		atomic.AddUint64(&s.stats.TotMemMergeErr, 1) | ||||||
|  | 		return nil, 0, err | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | 	seg, err := zap.Open(path) | ||||||
|  | 	if err != nil { | ||||||
|  | 		atomic.AddUint64(&s.stats.TotMemMergeErr, 1) | ||||||
|  | 		return nil, 0, err | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	// update persisted stats | ||||||
|  | 	atomic.AddUint64(&s.stats.TotPersistedItems, seg.Count()) | ||||||
|  | 	atomic.AddUint64(&s.stats.TotPersistedSegments, 1) | ||||||
|  |  | ||||||
| 	sm := &segmentMerge{ | 	sm := &segmentMerge{ | ||||||
| 		id:            newSegmentID, | 		id:            newSegmentID, | ||||||
| 		old:           make(map[uint64]*SegmentSnapshot), | 		old:           make(map[uint64]*SegmentSnapshot), | ||||||
| 		oldNewDocNums: make(map[uint64][]uint64), | 		oldNewDocNums: make(map[uint64][]uint64), | ||||||
| 		new:           segment, | 		new:           seg, | ||||||
| 		notify:        make(chan *IndexSnapshot, 1), | 		notify:        make(chan *IndexSnapshot, 1), | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| @@ -268,15 +332,21 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot, | |||||||
|  |  | ||||||
| 	select { // send to introducer | 	select { // send to introducer | ||||||
| 	case <-s.closeCh: | 	case <-s.closeCh: | ||||||
| 		_ = segment.DecRef() | 		_ = seg.DecRef() | ||||||
| 		return 0, nil, 0, nil // TODO: return ErrInterruptedClosed? | 		return nil, 0, segment.ErrClosed | ||||||
| 	case s.merges <- sm: | 	case s.merges <- sm: | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	select { // wait for introduction to complete | 	select { // wait for introduction to complete | ||||||
| 	case <-s.closeCh: | 	case <-s.closeCh: | ||||||
| 		return 0, nil, 0, nil // TODO: return ErrInterruptedClosed? | 		return nil, 0, segment.ErrClosed | ||||||
| 	case newSnapshot := <-sm.notify: | 	case newSnapshot := <-sm.notify: | ||||||
| 		return numDocs, newSnapshot, newSegmentID, nil | 		atomic.AddUint64(&s.stats.TotMemMergeSegments, uint64(len(sbs))) | ||||||
|  | 		atomic.AddUint64(&s.stats.TotMemMergeDone, 1) | ||||||
|  | 		return newSnapshot, newSegmentID, nil | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (s *Scorch) ReportBytesWritten(bytesWritten uint64) { | ||||||
|  | 	atomic.AddUint64(&s.stats.TotFileMergeWrittenBytes, bytesWritten) | ||||||
|  | } | ||||||
|   | |||||||
							
								
								
									
										23
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/mergeplan/merge_plan.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										23
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/mergeplan/merge_plan.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -18,6 +18,7 @@ | |||||||
| package mergeplan | package mergeplan | ||||||
|  |  | ||||||
| import ( | import ( | ||||||
|  | 	"errors" | ||||||
| 	"fmt" | 	"fmt" | ||||||
| 	"math" | 	"math" | ||||||
| 	"sort" | 	"sort" | ||||||
| @@ -115,7 +116,15 @@ func (o *MergePlanOptions) RaiseToFloorSegmentSize(s int64) int64 { | |||||||
| 	return o.FloorSegmentSize | 	return o.FloorSegmentSize | ||||||
| } | } | ||||||
|  |  | ||||||
| // Suggested default options. | // MaxSegmentSizeLimit represents the maximum size of a segment, | ||||||
|  | // this limit comes with hit-1 optimisation/max encoding limit uint31. | ||||||
|  | const MaxSegmentSizeLimit = 1<<31 - 1 | ||||||
|  |  | ||||||
|  | // ErrMaxSegmentSizeTooLarge is returned when the size of the segment | ||||||
|  | // exceeds the MaxSegmentSizeLimit | ||||||
|  | var ErrMaxSegmentSizeTooLarge = errors.New("MaxSegmentSize exceeds the size limit") | ||||||
|  |  | ||||||
|  | // DefaultMergePlanOptions suggests the default options. | ||||||
| var DefaultMergePlanOptions = MergePlanOptions{ | var DefaultMergePlanOptions = MergePlanOptions{ | ||||||
| 	MaxSegmentsPerTier:   10, | 	MaxSegmentsPerTier:   10, | ||||||
| 	MaxSegmentSize:       5000000, | 	MaxSegmentSize:       5000000, | ||||||
| @@ -208,14 +217,14 @@ func plan(segmentsIn []Segment, o *MergePlanOptions) (*MergePlan, error) { | |||||||
| 			if len(roster) > 0 { | 			if len(roster) > 0 { | ||||||
| 				rosterScore := scoreSegments(roster, o) | 				rosterScore := scoreSegments(roster, o) | ||||||
|  |  | ||||||
| 				if len(bestRoster) <= 0 || rosterScore < bestRosterScore { | 				if len(bestRoster) == 0 || rosterScore < bestRosterScore { | ||||||
| 					bestRoster = roster | 					bestRoster = roster | ||||||
| 					bestRosterScore = rosterScore | 					bestRosterScore = rosterScore | ||||||
| 				} | 				} | ||||||
| 			} | 			} | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
| 		if len(bestRoster) <= 0 { | 		if len(bestRoster) == 0 { | ||||||
| 			return rv, nil | 			return rv, nil | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
| @@ -367,3 +376,11 @@ func ToBarChart(prefix string, barMax int, segments []Segment, plan *MergePlan) | |||||||
|  |  | ||||||
| 	return strings.Join(rv, "\n") | 	return strings.Join(rv, "\n") | ||||||
| } | } | ||||||
|  |  | ||||||
|  | // ValidateMergePlannerOptions validates the merge planner options | ||||||
|  | func ValidateMergePlannerOptions(options *MergePlanOptions) error { | ||||||
|  | 	if options.MaxSegmentSize > MaxSegmentSizeLimit { | ||||||
|  | 		return ErrMaxSegmentSizeTooLarge | ||||||
|  | 	} | ||||||
|  | 	return nil | ||||||
|  | } | ||||||
|   | |||||||
							
								
								
									
										420
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/optimize.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										420
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/optimize.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,420 @@ | |||||||
|  | //  Copyright (c) 2018 Couchbase, Inc. | ||||||
|  | // | ||||||
|  | // Licensed under the Apache License, Version 2.0 (the "License"); | ||||||
|  | // you may not use this file except in compliance with the License. | ||||||
|  | // You may obtain a copy of the License at | ||||||
|  | // | ||||||
|  | // 		http://www.apache.org/licenses/LICENSE-2.0 | ||||||
|  | // | ||||||
|  | // Unless required by applicable law or agreed to in writing, software | ||||||
|  | // distributed under the License is distributed on an "AS IS" BASIS, | ||||||
|  | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||||
|  | // See the License for the specific language governing permissions and | ||||||
|  | // limitations under the License. | ||||||
|  |  | ||||||
|  | package scorch | ||||||
|  |  | ||||||
|  | import ( | ||||||
|  | 	"fmt" | ||||||
|  |  | ||||||
|  | 	"github.com/RoaringBitmap/roaring" | ||||||
|  |  | ||||||
|  | 	"github.com/blevesearch/bleve/index" | ||||||
|  | 	"github.com/blevesearch/bleve/index/scorch/segment" | ||||||
|  | 	"github.com/blevesearch/bleve/index/scorch/segment/zap" | ||||||
|  | ) | ||||||
|  |  | ||||||
|  | var OptimizeConjunction = true | ||||||
|  | var OptimizeConjunctionUnadorned = true | ||||||
|  | var OptimizeDisjunctionUnadorned = true | ||||||
|  |  | ||||||
|  | func (s *IndexSnapshotTermFieldReader) Optimize(kind string, | ||||||
|  | 	octx index.OptimizableContext) (index.OptimizableContext, error) { | ||||||
|  | 	if OptimizeConjunction && kind == "conjunction" { | ||||||
|  | 		return s.optimizeConjunction(octx) | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	if OptimizeConjunctionUnadorned && kind == "conjunction:unadorned" { | ||||||
|  | 		return s.optimizeConjunctionUnadorned(octx) | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	if OptimizeDisjunctionUnadorned && kind == "disjunction:unadorned" { | ||||||
|  | 		return s.optimizeDisjunctionUnadorned(octx) | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return octx, nil | ||||||
|  | } | ||||||
|  |  | ||||||
|  | var OptimizeDisjunctionUnadornedMinChildCardinality = uint64(256) | ||||||
|  |  | ||||||
|  | // ---------------------------------------------------------------- | ||||||
|  |  | ||||||
|  | func (s *IndexSnapshotTermFieldReader) optimizeConjunction( | ||||||
|  | 	octx index.OptimizableContext) (index.OptimizableContext, error) { | ||||||
|  | 	if octx == nil { | ||||||
|  | 		octx = &OptimizeTFRConjunction{snapshot: s.snapshot} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	o, ok := octx.(*OptimizeTFRConjunction) | ||||||
|  | 	if !ok { | ||||||
|  | 		return octx, nil | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	if o.snapshot != s.snapshot { | ||||||
|  | 		return nil, fmt.Errorf("tried to optimize conjunction across different snapshots") | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	o.tfrs = append(o.tfrs, s) | ||||||
|  |  | ||||||
|  | 	return o, nil | ||||||
|  | } | ||||||
|  |  | ||||||
|  | type OptimizeTFRConjunction struct { | ||||||
|  | 	snapshot *IndexSnapshot | ||||||
|  |  | ||||||
|  | 	tfrs []*IndexSnapshotTermFieldReader | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (o *OptimizeTFRConjunction) Finish() (index.Optimized, error) { | ||||||
|  | 	if len(o.tfrs) <= 1 { | ||||||
|  | 		return nil, nil | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	for i := range o.snapshot.segment { | ||||||
|  | 		itr0, ok := o.tfrs[0].iterators[i].(*zap.PostingsIterator) | ||||||
|  | 		if !ok || itr0.ActualBM == nil { | ||||||
|  | 			continue | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		itr1, ok := o.tfrs[1].iterators[i].(*zap.PostingsIterator) | ||||||
|  | 		if !ok || itr1.ActualBM == nil { | ||||||
|  | 			continue | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		bm := roaring.And(itr0.ActualBM, itr1.ActualBM) | ||||||
|  |  | ||||||
|  | 		for _, tfr := range o.tfrs[2:] { | ||||||
|  | 			itr, ok := tfr.iterators[i].(*zap.PostingsIterator) | ||||||
|  | 			if !ok || itr.ActualBM == nil { | ||||||
|  | 				continue | ||||||
|  | 			} | ||||||
|  |  | ||||||
|  | 			bm.And(itr.ActualBM) | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		// in this conjunction optimization, the postings iterators | ||||||
|  | 		// will all share the same AND'ed together actual bitmap.  The | ||||||
|  | 		// regular conjunction searcher machinery will still be used, | ||||||
|  | 		// but the underlying bitmap will be smaller. | ||||||
|  | 		for _, tfr := range o.tfrs { | ||||||
|  | 			itr, ok := tfr.iterators[i].(*zap.PostingsIterator) | ||||||
|  | 			if ok && itr.ActualBM != nil { | ||||||
|  | 				itr.ActualBM = bm | ||||||
|  | 				itr.Actual = bm.Iterator() | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return nil, nil | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // ---------------------------------------------------------------- | ||||||
|  |  | ||||||
|  | // An "unadorned" conjunction optimization is appropriate when | ||||||
|  | // additional or subsidiary information like freq-norm's and | ||||||
|  | // term-vectors are not required, and instead only the internal-id's | ||||||
|  | // are needed. | ||||||
|  | func (s *IndexSnapshotTermFieldReader) optimizeConjunctionUnadorned( | ||||||
|  | 	octx index.OptimizableContext) (index.OptimizableContext, error) { | ||||||
|  | 	if octx == nil { | ||||||
|  | 		octx = &OptimizeTFRConjunctionUnadorned{snapshot: s.snapshot} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	o, ok := octx.(*OptimizeTFRConjunctionUnadorned) | ||||||
|  | 	if !ok { | ||||||
|  | 		return nil, nil | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	if o.snapshot != s.snapshot { | ||||||
|  | 		return nil, fmt.Errorf("tried to optimize unadorned conjunction across different snapshots") | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	o.tfrs = append(o.tfrs, s) | ||||||
|  |  | ||||||
|  | 	return o, nil | ||||||
|  | } | ||||||
|  |  | ||||||
|  | type OptimizeTFRConjunctionUnadorned struct { | ||||||
|  | 	snapshot *IndexSnapshot | ||||||
|  |  | ||||||
|  | 	tfrs []*IndexSnapshotTermFieldReader | ||||||
|  | } | ||||||
|  |  | ||||||
|  | var OptimizeTFRConjunctionUnadornedTerm = []byte("<conjunction:unadorned>") | ||||||
|  | var OptimizeTFRConjunctionUnadornedField = "*" | ||||||
|  |  | ||||||
|  | // Finish of an unadorned conjunction optimization will compute a | ||||||
|  | // termFieldReader with an "actual" bitmap that represents the | ||||||
|  | // constituent bitmaps AND'ed together.  This termFieldReader cannot | ||||||
|  | // provide any freq-norm or termVector associated information. | ||||||
|  | func (o *OptimizeTFRConjunctionUnadorned) Finish() (rv index.Optimized, err error) { | ||||||
|  | 	if len(o.tfrs) <= 1 { | ||||||
|  | 		return nil, nil | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	// We use an artificial term and field because the optimized | ||||||
|  | 	// termFieldReader can represent multiple terms and fields. | ||||||
|  | 	oTFR := &IndexSnapshotTermFieldReader{ | ||||||
|  | 		term:               OptimizeTFRConjunctionUnadornedTerm, | ||||||
|  | 		field:              OptimizeTFRConjunctionUnadornedField, | ||||||
|  | 		snapshot:           o.snapshot, | ||||||
|  | 		iterators:          make([]segment.PostingsIterator, len(o.snapshot.segment)), | ||||||
|  | 		segmentOffset:      0, | ||||||
|  | 		includeFreq:        false, | ||||||
|  | 		includeNorm:        false, | ||||||
|  | 		includeTermVectors: false, | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	var actualBMs []*roaring.Bitmap // Collected from regular posting lists. | ||||||
|  |  | ||||||
|  | OUTER: | ||||||
|  | 	for i := range o.snapshot.segment { | ||||||
|  | 		actualBMs = actualBMs[:0] | ||||||
|  |  | ||||||
|  | 		var docNum1HitLast uint64 | ||||||
|  | 		var docNum1HitLastOk bool | ||||||
|  |  | ||||||
|  | 		for _, tfr := range o.tfrs { | ||||||
|  | 			if _, ok := tfr.iterators[i].(*segment.EmptyPostingsIterator); ok { | ||||||
|  | 				// An empty postings iterator means the entire AND is empty. | ||||||
|  | 				oTFR.iterators[i] = segment.AnEmptyPostingsIterator | ||||||
|  | 				continue OUTER | ||||||
|  | 			} | ||||||
|  |  | ||||||
|  | 			itr, ok := tfr.iterators[i].(*zap.PostingsIterator) | ||||||
|  | 			if !ok { | ||||||
|  | 				// We optimize zap postings iterators only. | ||||||
|  | 				return nil, nil | ||||||
|  | 			} | ||||||
|  |  | ||||||
|  | 			// If the postings iterator is "1-hit" optimized, then we | ||||||
|  | 			// can perform several optimizations up-front here. | ||||||
|  | 			docNum1Hit, ok := itr.DocNum1Hit() | ||||||
|  | 			if ok { | ||||||
|  | 				if docNum1Hit == zap.DocNum1HitFinished { | ||||||
|  | 					// An empty docNum here means the entire AND is empty. | ||||||
|  | 					oTFR.iterators[i] = segment.AnEmptyPostingsIterator | ||||||
|  | 					continue OUTER | ||||||
|  | 				} | ||||||
|  |  | ||||||
|  | 				if docNum1HitLastOk && docNum1HitLast != docNum1Hit { | ||||||
|  | 					// The docNum1Hit doesn't match the previous | ||||||
|  | 					// docNum1HitLast, so the entire AND is empty. | ||||||
|  | 					oTFR.iterators[i] = segment.AnEmptyPostingsIterator | ||||||
|  | 					continue OUTER | ||||||
|  | 				} | ||||||
|  |  | ||||||
|  | 				docNum1HitLast = docNum1Hit | ||||||
|  | 				docNum1HitLastOk = true | ||||||
|  |  | ||||||
|  | 				continue | ||||||
|  | 			} | ||||||
|  |  | ||||||
|  | 			if itr.ActualBM == nil { | ||||||
|  | 				// An empty actual bitmap means the entire AND is empty. | ||||||
|  | 				oTFR.iterators[i] = segment.AnEmptyPostingsIterator | ||||||
|  | 				continue OUTER | ||||||
|  | 			} | ||||||
|  |  | ||||||
|  | 			// Collect the actual bitmap for more processing later. | ||||||
|  | 			actualBMs = append(actualBMs, itr.ActualBM) | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		if docNum1HitLastOk { | ||||||
|  | 			// We reach here if all the 1-hit optimized posting | ||||||
|  | 			// iterators had the same 1-hit docNum, so we can check if | ||||||
|  | 			// our collected actual bitmaps also have that docNum. | ||||||
|  | 			for _, bm := range actualBMs { | ||||||
|  | 				if !bm.Contains(uint32(docNum1HitLast)) { | ||||||
|  | 					// The docNum1Hit isn't in one of our actual | ||||||
|  | 					// bitmaps, so the entire AND is empty. | ||||||
|  | 					oTFR.iterators[i] = segment.AnEmptyPostingsIterator | ||||||
|  | 					continue OUTER | ||||||
|  | 				} | ||||||
|  | 			} | ||||||
|  |  | ||||||
|  | 			// The actual bitmaps and docNum1Hits all contain or have | ||||||
|  | 			// the same 1-hit docNum, so that's our AND'ed result. | ||||||
|  | 			oTFR.iterators[i], err = zap.PostingsIteratorFrom1Hit( | ||||||
|  | 				docNum1HitLast, zap.NormBits1Hit, false, false) | ||||||
|  | 			if err != nil { | ||||||
|  | 				return nil, nil | ||||||
|  | 			} | ||||||
|  |  | ||||||
|  | 			continue OUTER | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		if len(actualBMs) == 0 { | ||||||
|  | 			// If we've collected no actual bitmaps at this point, | ||||||
|  | 			// then the entire AND is empty. | ||||||
|  | 			oTFR.iterators[i] = segment.AnEmptyPostingsIterator | ||||||
|  | 			continue OUTER | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		if len(actualBMs) == 1 { | ||||||
|  | 			// If we've only 1 actual bitmap, then that's our result. | ||||||
|  | 			oTFR.iterators[i], err = zap.PostingsIteratorFromBitmap( | ||||||
|  | 				actualBMs[0], false, false) | ||||||
|  | 			if err != nil { | ||||||
|  | 				return nil, nil | ||||||
|  | 			} | ||||||
|  |  | ||||||
|  | 			continue OUTER | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		// Else, AND together our collected bitmaps as our result. | ||||||
|  | 		bm := roaring.And(actualBMs[0], actualBMs[1]) | ||||||
|  |  | ||||||
|  | 		for _, actualBM := range actualBMs[2:] { | ||||||
|  | 			bm.And(actualBM) | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		oTFR.iterators[i], err = zap.PostingsIteratorFromBitmap( | ||||||
|  | 			bm, false, false) | ||||||
|  | 		if err != nil { | ||||||
|  | 			return nil, nil | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return oTFR, nil | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // ---------------------------------------------------------------- | ||||||
|  |  | ||||||
|  | // An "unadorned" disjunction optimization is appropriate when | ||||||
|  | // additional or subsidiary information like freq-norm's and | ||||||
|  | // term-vectors are not required, and instead only the internal-id's | ||||||
|  | // are needed. | ||||||
|  | func (s *IndexSnapshotTermFieldReader) optimizeDisjunctionUnadorned( | ||||||
|  | 	octx index.OptimizableContext) (index.OptimizableContext, error) { | ||||||
|  | 	if octx == nil { | ||||||
|  | 		octx = &OptimizeTFRDisjunctionUnadorned{snapshot: s.snapshot} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	o, ok := octx.(*OptimizeTFRDisjunctionUnadorned) | ||||||
|  | 	if !ok { | ||||||
|  | 		return nil, nil | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	if o.snapshot != s.snapshot { | ||||||
|  | 		return nil, fmt.Errorf("tried to optimize unadorned disjunction across different snapshots") | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	o.tfrs = append(o.tfrs, s) | ||||||
|  |  | ||||||
|  | 	return o, nil | ||||||
|  | } | ||||||
|  |  | ||||||
|  | type OptimizeTFRDisjunctionUnadorned struct { | ||||||
|  | 	snapshot *IndexSnapshot | ||||||
|  |  | ||||||
|  | 	tfrs []*IndexSnapshotTermFieldReader | ||||||
|  | } | ||||||
|  |  | ||||||
|  | var OptimizeTFRDisjunctionUnadornedTerm = []byte("<disjunction:unadorned>") | ||||||
|  | var OptimizeTFRDisjunctionUnadornedField = "*" | ||||||
|  |  | ||||||
|  | // Finish of an unadorned disjunction optimization will compute a | ||||||
|  | // termFieldReader with an "actual" bitmap that represents the | ||||||
|  | // constituent bitmaps OR'ed together.  This termFieldReader cannot | ||||||
|  | // provide any freq-norm or termVector associated information. | ||||||
|  | func (o *OptimizeTFRDisjunctionUnadorned) Finish() (rv index.Optimized, err error) { | ||||||
|  | 	if len(o.tfrs) <= 1 { | ||||||
|  | 		return nil, nil | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	for i := range o.snapshot.segment { | ||||||
|  | 		var cMax uint64 | ||||||
|  |  | ||||||
|  | 		for _, tfr := range o.tfrs { | ||||||
|  | 			itr, ok := tfr.iterators[i].(*zap.PostingsIterator) | ||||||
|  | 			if !ok { | ||||||
|  | 				return nil, nil | ||||||
|  | 			} | ||||||
|  |  | ||||||
|  | 			if itr.ActualBM != nil { | ||||||
|  | 				c := itr.ActualBM.GetCardinality() | ||||||
|  | 				if cMax < c { | ||||||
|  | 					cMax = c | ||||||
|  | 				} | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		// Heuristic to skip the optimization if all the constituent | ||||||
|  | 		// bitmaps are too small, where the processing & resource | ||||||
|  | 		// overhead to create the OR'ed bitmap outweighs the benefit. | ||||||
|  | 		if cMax < OptimizeDisjunctionUnadornedMinChildCardinality { | ||||||
|  | 			return nil, nil | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	// We use an artificial term and field because the optimized | ||||||
|  | 	// termFieldReader can represent multiple terms and fields. | ||||||
|  | 	oTFR := &IndexSnapshotTermFieldReader{ | ||||||
|  | 		term:               OptimizeTFRDisjunctionUnadornedTerm, | ||||||
|  | 		field:              OptimizeTFRDisjunctionUnadornedField, | ||||||
|  | 		snapshot:           o.snapshot, | ||||||
|  | 		iterators:          make([]segment.PostingsIterator, len(o.snapshot.segment)), | ||||||
|  | 		segmentOffset:      0, | ||||||
|  | 		includeFreq:        false, | ||||||
|  | 		includeNorm:        false, | ||||||
|  | 		includeTermVectors: false, | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	var docNums []uint32            // Collected docNum's from 1-hit posting lists. | ||||||
|  | 	var actualBMs []*roaring.Bitmap // Collected from regular posting lists. | ||||||
|  |  | ||||||
|  | 	for i := range o.snapshot.segment { | ||||||
|  | 		docNums = docNums[:0] | ||||||
|  | 		actualBMs = actualBMs[:0] | ||||||
|  |  | ||||||
|  | 		for _, tfr := range o.tfrs { | ||||||
|  | 			itr, ok := tfr.iterators[i].(*zap.PostingsIterator) | ||||||
|  | 			if !ok { | ||||||
|  | 				return nil, nil | ||||||
|  | 			} | ||||||
|  |  | ||||||
|  | 			docNum, ok := itr.DocNum1Hit() | ||||||
|  | 			if ok { | ||||||
|  | 				docNums = append(docNums, uint32(docNum)) | ||||||
|  | 				continue | ||||||
|  | 			} | ||||||
|  |  | ||||||
|  | 			if itr.ActualBM != nil { | ||||||
|  | 				actualBMs = append(actualBMs, itr.ActualBM) | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		var bm *roaring.Bitmap | ||||||
|  | 		if len(actualBMs) > 2 { | ||||||
|  | 			bm = roaring.HeapOr(actualBMs...) | ||||||
|  | 		} else if len(actualBMs) == 2 { | ||||||
|  | 			bm = roaring.Or(actualBMs[0], actualBMs[1]) | ||||||
|  | 		} else if len(actualBMs) == 1 { | ||||||
|  | 			bm = actualBMs[0].Clone() | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		if bm == nil { | ||||||
|  | 			bm = roaring.New() | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		bm.AddMany(docNums) | ||||||
|  |  | ||||||
|  | 		oTFR.iterators[i], err = zap.PostingsIteratorFromBitmap(bm, false, false) | ||||||
|  | 		if err != nil { | ||||||
|  | 			return nil, nil | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return oTFR, nil | ||||||
|  | } | ||||||
							
								
								
									
										275
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/persister.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										275
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/persister.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -16,9 +16,12 @@ package scorch | |||||||
|  |  | ||||||
| import ( | import ( | ||||||
| 	"bytes" | 	"bytes" | ||||||
|  | 	"encoding/binary" | ||||||
|  | 	"encoding/json" | ||||||
| 	"fmt" | 	"fmt" | ||||||
| 	"io/ioutil" | 	"io/ioutil" | ||||||
| 	"log" | 	"log" | ||||||
|  | 	"math" | ||||||
| 	"os" | 	"os" | ||||||
| 	"path/filepath" | 	"path/filepath" | ||||||
| 	"strconv" | 	"strconv" | ||||||
| @@ -27,23 +30,57 @@ import ( | |||||||
| 	"time" | 	"time" | ||||||
|  |  | ||||||
| 	"github.com/RoaringBitmap/roaring" | 	"github.com/RoaringBitmap/roaring" | ||||||
|  | 	"github.com/blevesearch/bleve/index" | ||||||
| 	"github.com/blevesearch/bleve/index/scorch/segment" | 	"github.com/blevesearch/bleve/index/scorch/segment" | ||||||
| 	"github.com/blevesearch/bleve/index/scorch/segment/zap" | 	"github.com/blevesearch/bleve/index/scorch/segment/zap" | ||||||
| 	"github.com/boltdb/bolt" | 	bolt "github.com/etcd-io/bbolt" | ||||||
| ) | ) | ||||||
|  |  | ||||||
| var DefaultChunkFactor uint32 = 1024 | var DefaultChunkFactor uint32 = 1024 | ||||||
|  |  | ||||||
| // Arbitrary number, need to make it configurable. | // DefaultPersisterNapTimeMSec is kept to zero as this helps in direct | ||||||
| // Lower values like 10/making persister really slow | // persistence of segments with the default safe batch option. | ||||||
| // doesn't work well as it is creating more files to | // If the default safe batch option results in high number of | ||||||
| // persist for in next persist iteration and spikes the # FDs. | // files on disk, then users may initialise this configuration parameter | ||||||
| // Ideal value should let persister also proceed at | // with higher values so that the persister will nap a bit within it's | ||||||
| // an optimum pace so that the merger can skip | // work loop to favour better in-memory merging of segments to result | ||||||
| // many intermediate snapshots. | // in fewer segment files on disk. But that may come with an indexing | ||||||
| // This needs to be based on empirical data. | // performance overhead. | ||||||
| // TODO - may need to revisit this approach/value. | // Unsafe batch users are advised to override this to higher value | ||||||
| var epochDistance = uint64(5) | // for better performance especially with high data density. | ||||||
|  | var DefaultPersisterNapTimeMSec int = 0 // ms | ||||||
|  |  | ||||||
|  | // DefaultPersisterNapUnderNumFiles helps in controlling the pace of | ||||||
|  | // persister. At times of a slow merger progress with heavy file merging | ||||||
|  | // operations, its better to pace down the persister for letting the merger | ||||||
|  | // to catch up within a range defined by this parameter. | ||||||
|  | // Fewer files on disk (as per the merge plan) would result in keeping the | ||||||
|  | // file handle usage under limit, faster disk merger and a healthier index. | ||||||
|  | // Its been observed that such a loosely sync'ed introducer-persister-merger | ||||||
|  | // trio results in better overall performance. | ||||||
|  | var DefaultPersisterNapUnderNumFiles int = 1000 | ||||||
|  |  | ||||||
|  | var DefaultMemoryPressurePauseThreshold uint64 = math.MaxUint64 | ||||||
|  |  | ||||||
|  | type persisterOptions struct { | ||||||
|  | 	// PersisterNapTimeMSec controls the wait/delay injected into | ||||||
|  | 	// persistence workloop to improve the chances for | ||||||
|  | 	// a healthier and heavier in-memory merging | ||||||
|  | 	PersisterNapTimeMSec int | ||||||
|  |  | ||||||
|  | 	// PersisterNapTimeMSec > 0, and the number of files is less than | ||||||
|  | 	// PersisterNapUnderNumFiles, then the persister will sleep | ||||||
|  | 	// PersisterNapTimeMSec amount of time to improve the chances for | ||||||
|  | 	// a healthier and heavier in-memory merging | ||||||
|  | 	PersisterNapUnderNumFiles int | ||||||
|  |  | ||||||
|  | 	// MemoryPressurePauseThreshold let persister to have a better leeway | ||||||
|  | 	// for prudently performing the memory merge of segments on a memory | ||||||
|  | 	// pressure situation. Here the config value is an upper threshold | ||||||
|  | 	// for the number of paused application threads. The default value would | ||||||
|  | 	// be a very high number to always favour the merging of memory segments. | ||||||
|  | 	MemoryPressurePauseThreshold uint64 | ||||||
|  | } | ||||||
|  |  | ||||||
| type notificationChan chan struct{} | type notificationChan chan struct{} | ||||||
|  |  | ||||||
| @@ -53,8 +90,17 @@ func (s *Scorch) persisterLoop() { | |||||||
| 	var persistWatchers []*epochWatcher | 	var persistWatchers []*epochWatcher | ||||||
| 	var lastPersistedEpoch, lastMergedEpoch uint64 | 	var lastPersistedEpoch, lastMergedEpoch uint64 | ||||||
| 	var ew *epochWatcher | 	var ew *epochWatcher | ||||||
|  | 	po, err := s.parsePersisterOptions() | ||||||
|  | 	if err != nil { | ||||||
|  | 		s.fireAsyncError(fmt.Errorf("persisterOptions json parsing err: %v", err)) | ||||||
|  | 		s.asyncTasks.Done() | ||||||
|  | 		return | ||||||
|  | 	} | ||||||
|  |  | ||||||
| OUTER: | OUTER: | ||||||
| 	for { | 	for { | ||||||
|  | 		atomic.AddUint64(&s.stats.TotPersistLoopBeg, 1) | ||||||
|  |  | ||||||
| 		select { | 		select { | ||||||
| 		case <-s.closeCh: | 		case <-s.closeCh: | ||||||
| 			break OUTER | 			break OUTER | ||||||
| @@ -65,11 +111,13 @@ OUTER: | |||||||
| 		if ew != nil && ew.epoch > lastMergedEpoch { | 		if ew != nil && ew.epoch > lastMergedEpoch { | ||||||
| 			lastMergedEpoch = ew.epoch | 			lastMergedEpoch = ew.epoch | ||||||
| 		} | 		} | ||||||
| 		persistWatchers = s.pausePersisterForMergerCatchUp(lastPersistedEpoch, |  | ||||||
| 			&lastMergedEpoch, persistWatchers) | 		lastMergedEpoch, persistWatchers = s.pausePersisterForMergerCatchUp(lastPersistedEpoch, | ||||||
|  | 			lastMergedEpoch, persistWatchers, po) | ||||||
|  |  | ||||||
| 		var ourSnapshot *IndexSnapshot | 		var ourSnapshot *IndexSnapshot | ||||||
| 		var ourPersisted []chan error | 		var ourPersisted []chan error | ||||||
|  | 		var ourPersistedCallbacks []index.BatchCallback | ||||||
|  |  | ||||||
| 		// check to see if there is a new snapshot to persist | 		// check to see if there is a new snapshot to persist | ||||||
| 		s.rootLock.Lock() | 		s.rootLock.Lock() | ||||||
| @@ -78,13 +126,17 @@ OUTER: | |||||||
| 			ourSnapshot.AddRef() | 			ourSnapshot.AddRef() | ||||||
| 			ourPersisted = s.rootPersisted | 			ourPersisted = s.rootPersisted | ||||||
| 			s.rootPersisted = nil | 			s.rootPersisted = nil | ||||||
|  | 			ourPersistedCallbacks = s.persistedCallbacks | ||||||
|  | 			s.persistedCallbacks = nil | ||||||
|  | 			atomic.StoreUint64(&s.iStats.persistSnapshotSize, uint64(ourSnapshot.Size())) | ||||||
|  | 			atomic.StoreUint64(&s.iStats.persistEpoch, ourSnapshot.epoch) | ||||||
| 		} | 		} | ||||||
| 		s.rootLock.Unlock() | 		s.rootLock.Unlock() | ||||||
|  |  | ||||||
| 		if ourSnapshot != nil { | 		if ourSnapshot != nil { | ||||||
| 			startTime := time.Now() | 			startTime := time.Now() | ||||||
|  |  | ||||||
| 			err := s.persistSnapshot(ourSnapshot) | 			err := s.persistSnapshot(ourSnapshot, po) | ||||||
| 			for _, ch := range ourPersisted { | 			for _, ch := range ourPersisted { | ||||||
| 				if err != nil { | 				if err != nil { | ||||||
| 					ch <- err | 					ch <- err | ||||||
| @@ -92,10 +144,22 @@ OUTER: | |||||||
| 				close(ch) | 				close(ch) | ||||||
| 			} | 			} | ||||||
| 			if err != nil { | 			if err != nil { | ||||||
|  | 				atomic.StoreUint64(&s.iStats.persistEpoch, 0) | ||||||
|  | 				if err == segment.ErrClosed { | ||||||
|  | 					// index has been closed | ||||||
|  | 					_ = ourSnapshot.DecRef() | ||||||
|  | 					break OUTER | ||||||
|  | 				} | ||||||
| 				s.fireAsyncError(fmt.Errorf("got err persisting snapshot: %v", err)) | 				s.fireAsyncError(fmt.Errorf("got err persisting snapshot: %v", err)) | ||||||
| 				_ = ourSnapshot.DecRef() | 				_ = ourSnapshot.DecRef() | ||||||
|  | 				atomic.AddUint64(&s.stats.TotPersistLoopErr, 1) | ||||||
| 				continue OUTER | 				continue OUTER | ||||||
| 			} | 			} | ||||||
|  | 			for i := range ourPersistedCallbacks { | ||||||
|  | 				ourPersistedCallbacks[i](err) | ||||||
|  | 			} | ||||||
|  |  | ||||||
|  | 			atomic.StoreUint64(&s.stats.LastPersistedEpoch, ourSnapshot.epoch) | ||||||
|  |  | ||||||
| 			lastPersistedEpoch = ourSnapshot.epoch | 			lastPersistedEpoch = ourSnapshot.epoch | ||||||
| 			for _, ew := range persistWatchers { | 			for _, ew := range persistWatchers { | ||||||
| @@ -115,6 +179,8 @@ OUTER: | |||||||
| 			s.fireEvent(EventKindPersisterProgress, time.Since(startTime)) | 			s.fireEvent(EventKindPersisterProgress, time.Since(startTime)) | ||||||
|  |  | ||||||
| 			if changed { | 			if changed { | ||||||
|  | 				s.removeOldData() | ||||||
|  | 				atomic.AddUint64(&s.stats.TotPersistLoopProgress, 1) | ||||||
| 				continue OUTER | 				continue OUTER | ||||||
| 			} | 			} | ||||||
| 		} | 		} | ||||||
| @@ -133,17 +199,21 @@ OUTER: | |||||||
|  |  | ||||||
| 		s.removeOldData() // might as well cleanup while waiting | 		s.removeOldData() // might as well cleanup while waiting | ||||||
|  |  | ||||||
|  | 		atomic.AddUint64(&s.stats.TotPersistLoopWait, 1) | ||||||
|  |  | ||||||
| 		select { | 		select { | ||||||
| 		case <-s.closeCh: | 		case <-s.closeCh: | ||||||
| 			break OUTER | 			break OUTER | ||||||
| 		case <-w.notifyCh: | 		case <-w.notifyCh: | ||||||
| 			// woken up, next loop should pick up work | 			// woken up, next loop should pick up work | ||||||
| 			continue OUTER | 			atomic.AddUint64(&s.stats.TotPersistLoopWaitNotified, 1) | ||||||
| 		case ew = <-s.persisterNotifier: | 		case ew = <-s.persisterNotifier: | ||||||
| 			// if the watchers are already caught up then let them wait, | 			// if the watchers are already caught up then let them wait, | ||||||
| 			// else let them continue to do the catch up | 			// else let them continue to do the catch up | ||||||
| 			persistWatchers = append(persistWatchers, ew) | 			persistWatchers = append(persistWatchers, ew) | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
|  | 		atomic.AddUint64(&s.stats.TotPersistLoopEnd, 1) | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -160,32 +230,88 @@ func notifyMergeWatchers(lastPersistedEpoch uint64, | |||||||
| 	return watchersNext | 	return watchersNext | ||||||
| } | } | ||||||
|  |  | ||||||
| func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64, lastMergedEpoch *uint64, | func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64, lastMergedEpoch uint64, | ||||||
| 	persistWatchers []*epochWatcher) []*epochWatcher { | 	persistWatchers []*epochWatcher, po *persisterOptions) (uint64, []*epochWatcher) { | ||||||
|  |  | ||||||
| 	// first, let the watchers proceed if they lag behind | 	// first, let the watchers proceed if they lag behind | ||||||
| 	persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers) | 	persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers) | ||||||
|  |  | ||||||
|  | 	// check the merger lag by counting the segment files on disk, | ||||||
|  | 	// On finding fewer files on disk, persister takes a short pause | ||||||
|  | 	// for sufficient in-memory segments to pile up for the next | ||||||
|  | 	// memory merge cum persist loop. | ||||||
|  | 	// On finding too many files on disk, persister pause until the merger | ||||||
|  | 	// catches up to reduce the segment file count under the threshold. | ||||||
|  | 	// But if there is memory pressure, then skip this sleep maneuvers. | ||||||
|  | 	numFilesOnDisk, _ := s.diskFileStats() | ||||||
|  | 	if numFilesOnDisk < uint64(po.PersisterNapUnderNumFiles) && | ||||||
|  | 		po.PersisterNapTimeMSec > 0 && s.paused() == 0 { | ||||||
|  | 		select { | ||||||
|  | 		case <-s.closeCh: | ||||||
|  | 		case <-time.After(time.Millisecond * time.Duration(po.PersisterNapTimeMSec)): | ||||||
|  | 			atomic.AddUint64(&s.stats.TotPersisterNapPauseCompleted, 1) | ||||||
|  |  | ||||||
|  | 		case ew := <-s.persisterNotifier: | ||||||
|  | 			// unblock the merger in meantime | ||||||
|  | 			persistWatchers = append(persistWatchers, ew) | ||||||
|  | 			lastMergedEpoch = ew.epoch | ||||||
|  | 			persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers) | ||||||
|  | 			atomic.AddUint64(&s.stats.TotPersisterMergerNapBreak, 1) | ||||||
|  | 		} | ||||||
|  | 		return lastMergedEpoch, persistWatchers | ||||||
|  | 	} | ||||||
|  |  | ||||||
| OUTER: | OUTER: | ||||||
| 	// check for slow merger and await until the merger catch up | 	for po.PersisterNapUnderNumFiles > 0 && | ||||||
| 	for lastPersistedEpoch > *lastMergedEpoch+epochDistance { | 		numFilesOnDisk >= uint64(po.PersisterNapUnderNumFiles) && | ||||||
|  | 		lastMergedEpoch < lastPersistedEpoch { | ||||||
|  | 		atomic.AddUint64(&s.stats.TotPersisterSlowMergerPause, 1) | ||||||
|  |  | ||||||
| 		select { | 		select { | ||||||
| 		case <-s.closeCh: | 		case <-s.closeCh: | ||||||
| 			break OUTER | 			break OUTER | ||||||
| 		case ew := <-s.persisterNotifier: | 		case ew := <-s.persisterNotifier: | ||||||
| 			persistWatchers = append(persistWatchers, ew) | 			persistWatchers = append(persistWatchers, ew) | ||||||
| 			*lastMergedEpoch = ew.epoch | 			lastMergedEpoch = ew.epoch | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
|  | 		atomic.AddUint64(&s.stats.TotPersisterSlowMergerResume, 1) | ||||||
|  |  | ||||||
| 		// let the watchers proceed if they lag behind | 		// let the watchers proceed if they lag behind | ||||||
| 		persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers) | 		persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers) | ||||||
|  |  | ||||||
|  | 		numFilesOnDisk, _ = s.diskFileStats() | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	return persistWatchers | 	return lastMergedEpoch, persistWatchers | ||||||
| } | } | ||||||
|  |  | ||||||
| func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error { | func (s *Scorch) parsePersisterOptions() (*persisterOptions, error) { | ||||||
|  | 	po := persisterOptions{ | ||||||
|  | 		PersisterNapTimeMSec:         DefaultPersisterNapTimeMSec, | ||||||
|  | 		PersisterNapUnderNumFiles:    DefaultPersisterNapUnderNumFiles, | ||||||
|  | 		MemoryPressurePauseThreshold: DefaultMemoryPressurePauseThreshold, | ||||||
|  | 	} | ||||||
|  | 	if v, ok := s.config["scorchPersisterOptions"]; ok { | ||||||
|  | 		b, err := json.Marshal(v) | ||||||
|  | 		if err != nil { | ||||||
|  | 			return &po, err | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		err = json.Unmarshal(b, &po) | ||||||
|  | 		if err != nil { | ||||||
|  | 			return &po, err | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	return &po, nil | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot, | ||||||
|  | 	po *persisterOptions) error { | ||||||
|  | 	// Perform in-memory segment merging only when the memory pressure is | ||||||
|  | 	// below the configured threshold, else the persister performs the | ||||||
|  | 	// direct persistence of segments. | ||||||
|  | 	if s.paused() < po.MemoryPressurePauseThreshold { | ||||||
| 		persisted, err := s.persistSnapshotMaybeMerge(snapshot) | 		persisted, err := s.persistSnapshotMaybeMerge(snapshot) | ||||||
| 		if err != nil { | 		if err != nil { | ||||||
| 			return err | 			return err | ||||||
| @@ -193,6 +319,7 @@ func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error { | |||||||
| 		if persisted { | 		if persisted { | ||||||
| 			return nil | 			return nil | ||||||
| 		} | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
| 	return s.persistSnapshotDirect(snapshot) | 	return s.persistSnapshotDirect(snapshot) | ||||||
| } | } | ||||||
| @@ -224,7 +351,7 @@ func (s *Scorch) persistSnapshotMaybeMerge(snapshot *IndexSnapshot) ( | |||||||
| 		return false, nil | 		return false, nil | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	_, newSnapshot, newSegmentID, err := s.mergeSegmentBases( | 	newSnapshot, newSegmentID, err := s.mergeSegmentBases( | ||||||
| 		snapshot, sbs, sbsDrops, sbsIndexes, DefaultChunkFactor) | 		snapshot, sbs, sbsDrops, sbsIndexes, DefaultChunkFactor) | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		return false, err | 		return false, err | ||||||
| @@ -249,6 +376,7 @@ func (s *Scorch) persistSnapshotMaybeMerge(snapshot *IndexSnapshot) ( | |||||||
| 		segment:  make([]*SegmentSnapshot, 0, len(snapshot.segment)), | 		segment:  make([]*SegmentSnapshot, 0, len(snapshot.segment)), | ||||||
| 		internal: snapshot.internal, | 		internal: snapshot.internal, | ||||||
| 		epoch:    snapshot.epoch, | 		epoch:    snapshot.epoch, | ||||||
|  | 		creator:  "persistSnapshotMaybeMerge", | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	// copy to the equiv the segments that weren't replaced | 	// copy to the equiv the segments that weren't replaced | ||||||
| @@ -301,6 +429,22 @@ func (s *Scorch) persistSnapshotDirect(snapshot *IndexSnapshot) (err error) { | |||||||
| 		return err | 		return err | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | 	// persist meta values | ||||||
|  | 	metaBucket, err := snapshotBucket.CreateBucketIfNotExists(boltMetaDataKey) | ||||||
|  | 	if err != nil { | ||||||
|  | 		return err | ||||||
|  | 	} | ||||||
|  | 	err = metaBucket.Put([]byte("type"), []byte(zap.Type)) | ||||||
|  | 	if err != nil { | ||||||
|  | 		return err | ||||||
|  | 	} | ||||||
|  | 	buf := make([]byte, binary.MaxVarintLen32) | ||||||
|  | 	binary.BigEndian.PutUint32(buf, zap.Version) | ||||||
|  | 	err = metaBucket.Put([]byte("version"), buf) | ||||||
|  | 	if err != nil { | ||||||
|  | 		return err | ||||||
|  | 	} | ||||||
|  |  | ||||||
| 	// persist internal values | 	// persist internal values | ||||||
| 	internalBucket, err := snapshotBucket.CreateBucketIfNotExists(boltInternalKey) | 	internalBucket, err := snapshotBucket.CreateBucketIfNotExists(boltInternalKey) | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| @@ -390,44 +534,21 @@ func (s *Scorch) persistSnapshotDirect(snapshot *IndexSnapshot) (err error) { | |||||||
| 			} | 			} | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
| 		s.rootLock.Lock() | 		persist := &persistIntroduction{ | ||||||
| 		newIndexSnapshot := &IndexSnapshot{ | 			persisted: newSegments, | ||||||
| 			parent:   s, | 			applied:   make(notificationChan), | ||||||
| 			epoch:    s.nextSnapshotEpoch, |  | ||||||
| 			segment:  make([]*SegmentSnapshot, len(s.root.segment)), |  | ||||||
| 			offsets:  make([]uint64, len(s.root.offsets)), |  | ||||||
| 			internal: make(map[string][]byte, len(s.root.internal)), |  | ||||||
| 			refs:     1, |  | ||||||
| 		} |  | ||||||
| 		s.nextSnapshotEpoch++ |  | ||||||
| 		for i, segmentSnapshot := range s.root.segment { |  | ||||||
| 			// see if this segment has been replaced |  | ||||||
| 			if replacement, ok := newSegments[segmentSnapshot.id]; ok { |  | ||||||
| 				newSegmentSnapshot := &SegmentSnapshot{ |  | ||||||
| 					id:         segmentSnapshot.id, |  | ||||||
| 					segment:    replacement, |  | ||||||
| 					deleted:    segmentSnapshot.deleted, |  | ||||||
| 					cachedDocs: segmentSnapshot.cachedDocs, |  | ||||||
| 				} |  | ||||||
| 				newIndexSnapshot.segment[i] = newSegmentSnapshot |  | ||||||
| 				delete(newSegments, segmentSnapshot.id) |  | ||||||
| 				// update items persisted incase of a new segment snapshot |  | ||||||
| 				atomic.AddUint64(&s.stats.numItemsPersisted, newSegmentSnapshot.Count()) |  | ||||||
| 			} else { |  | ||||||
| 				newIndexSnapshot.segment[i] = s.root.segment[i] |  | ||||||
| 				newIndexSnapshot.segment[i].segment.AddRef() |  | ||||||
| 			} |  | ||||||
| 			newIndexSnapshot.offsets[i] = s.root.offsets[i] |  | ||||||
| 		} |  | ||||||
| 		for k, v := range s.root.internal { |  | ||||||
| 			newIndexSnapshot.internal[k] = v |  | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
| 		rootPrev := s.root | 		select { | ||||||
| 		s.root = newIndexSnapshot | 		case <-s.closeCh: | ||||||
| 		s.rootLock.Unlock() | 			return segment.ErrClosed | ||||||
| 		if rootPrev != nil { | 		case s.persists <- persist: | ||||||
| 			_ = rootPrev.DecRef() | 		} | ||||||
|  |  | ||||||
|  | 		select { | ||||||
|  | 		case <-s.closeCh: | ||||||
|  | 			return segment.ErrClosed | ||||||
|  | 		case <-persist.applied: | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| @@ -462,6 +583,7 @@ var boltSnapshotsBucket = []byte{'s'} | |||||||
| var boltPathKey = []byte{'p'} | var boltPathKey = []byte{'p'} | ||||||
| var boltDeletedKey = []byte{'d'} | var boltDeletedKey = []byte{'d'} | ||||||
| var boltInternalKey = []byte{'i'} | var boltInternalKey = []byte{'i'} | ||||||
|  | var boltMetaDataKey = []byte{'m'} | ||||||
|  |  | ||||||
| func (s *Scorch) loadFromBolt() error { | func (s *Scorch) loadFromBolt() error { | ||||||
| 	return s.rootBolt.View(func(tx *bolt.Tx) error { | 	return s.rootBolt.View(func(tx *bolt.Tx) error { | ||||||
| @@ -478,19 +600,19 @@ func (s *Scorch) loadFromBolt() error { | |||||||
| 				continue | 				continue | ||||||
| 			} | 			} | ||||||
| 			if foundRoot { | 			if foundRoot { | ||||||
| 				s.eligibleForRemoval = append(s.eligibleForRemoval, snapshotEpoch) | 				s.AddEligibleForRemoval(snapshotEpoch) | ||||||
| 				continue | 				continue | ||||||
| 			} | 			} | ||||||
| 			snapshot := snapshots.Bucket(k) | 			snapshot := snapshots.Bucket(k) | ||||||
| 			if snapshot == nil { | 			if snapshot == nil { | ||||||
| 				log.Printf("snapshot key, but bucket missing %x, continuing", k) | 				log.Printf("snapshot key, but bucket missing %x, continuing", k) | ||||||
| 				s.eligibleForRemoval = append(s.eligibleForRemoval, snapshotEpoch) | 				s.AddEligibleForRemoval(snapshotEpoch) | ||||||
| 				continue | 				continue | ||||||
| 			} | 			} | ||||||
| 			indexSnapshot, err := s.loadSnapshot(snapshot) | 			indexSnapshot, err := s.loadSnapshot(snapshot) | ||||||
| 			if err != nil { | 			if err != nil { | ||||||
| 				log.Printf("unable to load snapshot, %v, continuing", err) | 				log.Printf("unable to load snapshot, %v, continuing", err) | ||||||
| 				s.eligibleForRemoval = append(s.eligibleForRemoval, snapshotEpoch) | 				s.AddEligibleForRemoval(snapshotEpoch) | ||||||
| 				continue | 				continue | ||||||
| 			} | 			} | ||||||
| 			indexSnapshot.epoch = snapshotEpoch | 			indexSnapshot.epoch = snapshotEpoch | ||||||
| @@ -500,13 +622,16 @@ func (s *Scorch) loadFromBolt() error { | |||||||
| 				return err | 				return err | ||||||
| 			} | 			} | ||||||
| 			s.nextSegmentID++ | 			s.nextSegmentID++ | ||||||
| 			s.nextSnapshotEpoch = snapshotEpoch + 1 |  | ||||||
| 			s.rootLock.Lock() | 			s.rootLock.Lock() | ||||||
| 			if s.root != nil { | 			s.nextSnapshotEpoch = snapshotEpoch + 1 | ||||||
| 				_ = s.root.DecRef() | 			rootPrev := s.root | ||||||
| 			} |  | ||||||
| 			s.root = indexSnapshot | 			s.root = indexSnapshot | ||||||
| 			s.rootLock.Unlock() | 			s.rootLock.Unlock() | ||||||
|  |  | ||||||
|  | 			if rootPrev != nil { | ||||||
|  | 				_ = rootPrev.DecRef() | ||||||
|  | 			} | ||||||
|  |  | ||||||
| 			foundRoot = true | 			foundRoot = true | ||||||
| 		} | 		} | ||||||
| 		return nil | 		return nil | ||||||
| @@ -524,7 +649,7 @@ func (s *Scorch) LoadSnapshot(epoch uint64) (rv *IndexSnapshot, err error) { | |||||||
| 		snapshotKey := segment.EncodeUvarintAscending(nil, epoch) | 		snapshotKey := segment.EncodeUvarintAscending(nil, epoch) | ||||||
| 		snapshot := snapshots.Bucket(snapshotKey) | 		snapshot := snapshots.Bucket(snapshotKey) | ||||||
| 		if snapshot == nil { | 		if snapshot == nil { | ||||||
| 			return nil | 			return fmt.Errorf("snapshot with epoch: %v - doesn't exist", epoch) | ||||||
| 		} | 		} | ||||||
| 		rv, err = s.loadSnapshot(snapshot) | 		rv, err = s.loadSnapshot(snapshot) | ||||||
| 		return err | 		return err | ||||||
| @@ -536,12 +661,13 @@ func (s *Scorch) LoadSnapshot(epoch uint64) (rv *IndexSnapshot, err error) { | |||||||
| } | } | ||||||
|  |  | ||||||
| func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) { | func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) { | ||||||
|  |  | ||||||
| 	rv := &IndexSnapshot{ | 	rv := &IndexSnapshot{ | ||||||
| 		parent:   s, | 		parent:   s, | ||||||
| 		internal: make(map[string][]byte), | 		internal: make(map[string][]byte), | ||||||
| 		refs:     1, | 		refs:     1, | ||||||
|  | 		creator:  "loadSnapshot", | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	var running uint64 | 	var running uint64 | ||||||
| 	c := snapshot.Cursor() | 	c := snapshot.Cursor() | ||||||
| 	for k, _ := c.First(); k != nil; k, _ = c.Next() { | 	for k, _ := c.First(); k != nil; k, _ = c.Next() { | ||||||
| @@ -556,7 +682,7 @@ func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) { | |||||||
| 				_ = rv.DecRef() | 				_ = rv.DecRef() | ||||||
| 				return nil, err | 				return nil, err | ||||||
| 			} | 			} | ||||||
| 		} else { | 		} else if k[0] != boltMetaDataKey[0] { | ||||||
| 			segmentBucket := snapshot.Bucket(k) | 			segmentBucket := snapshot.Bucket(k) | ||||||
| 			if segmentBucket == nil { | 			if segmentBucket == nil { | ||||||
| 				_ = rv.DecRef() | 				_ = rv.DecRef() | ||||||
| @@ -577,6 +703,7 @@ func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) { | |||||||
| 			running += segmentSnapshot.segment.Count() | 			running += segmentSnapshot.segment.Count() | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	return rv, nil | 	return rv, nil | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -604,8 +731,10 @@ func (s *Scorch) loadSegment(segmentBucket *bolt.Bucket) (*SegmentSnapshot, erro | |||||||
| 			_ = segment.Close() | 			_ = segment.Close() | ||||||
| 			return nil, fmt.Errorf("error reading deleted bytes: %v", err) | 			return nil, fmt.Errorf("error reading deleted bytes: %v", err) | ||||||
| 		} | 		} | ||||||
|  | 		if !deletedBitmap.IsEmpty() { | ||||||
| 			rv.deleted = deletedBitmap | 			rv.deleted = deletedBitmap | ||||||
| 		} | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
| 	return rv, nil | 	return rv, nil | ||||||
| } | } | ||||||
| @@ -643,14 +772,14 @@ func (s *Scorch) removeOldBoltSnapshots() (numRemoved int, err error) { | |||||||
| 		return 0, err | 		return 0, err | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	if len(persistedEpochs) <= NumSnapshotsToKeep { | 	if len(persistedEpochs) <= s.numSnapshotsToKeep { | ||||||
| 		// we need to keep everything | 		// we need to keep everything | ||||||
| 		return 0, nil | 		return 0, nil | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	// make a map of epochs to protect from deletion | 	// make a map of epochs to protect from deletion | ||||||
| 	protectedEpochs := make(map[uint64]struct{}, NumSnapshotsToKeep) | 	protectedEpochs := make(map[uint64]struct{}, s.numSnapshotsToKeep) | ||||||
| 	for _, epoch := range persistedEpochs[0:NumSnapshotsToKeep] { | 	for _, epoch := range persistedEpochs[0:s.numSnapshotsToKeep] { | ||||||
| 		protectedEpochs[epoch] = struct{}{} | 		protectedEpochs[epoch] = struct{}{} | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| @@ -668,7 +797,7 @@ func (s *Scorch) removeOldBoltSnapshots() (numRemoved int, err error) { | |||||||
| 	s.eligibleForRemoval = newEligible | 	s.eligibleForRemoval = newEligible | ||||||
| 	s.rootLock.Unlock() | 	s.rootLock.Unlock() | ||||||
|  |  | ||||||
| 	if len(epochsToRemove) <= 0 { | 	if len(epochsToRemove) == 0 { | ||||||
| 		return 0, nil | 		return 0, nil | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										110
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/reader.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										110
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/reader.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -1,110 +0,0 @@ | |||||||
| //  Copyright (c) 2017 Couchbase, Inc. |  | ||||||
| // |  | ||||||
| // Licensed under the Apache License, Version 2.0 (the "License"); |  | ||||||
| // you may not use this file except in compliance with the License. |  | ||||||
| // You may obtain a copy of the License at |  | ||||||
| // |  | ||||||
| // 		http://www.apache.org/licenses/LICENSE-2.0 |  | ||||||
| // |  | ||||||
| // Unless required by applicable law or agreed to in writing, software |  | ||||||
| // distributed under the License is distributed on an "AS IS" BASIS, |  | ||||||
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |  | ||||||
| // See the License for the specific language governing permissions and |  | ||||||
| // limitations under the License. |  | ||||||
|  |  | ||||||
| package scorch |  | ||||||
|  |  | ||||||
| import ( |  | ||||||
| 	"github.com/blevesearch/bleve/document" |  | ||||||
| 	"github.com/blevesearch/bleve/index" |  | ||||||
| ) |  | ||||||
|  |  | ||||||
| type Reader struct { |  | ||||||
| 	root *IndexSnapshot // Owns 1 ref-count on the index snapshot. |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func (r *Reader) TermFieldReader(term []byte, field string, includeFreq, |  | ||||||
| 	includeNorm, includeTermVectors bool) (index.TermFieldReader, error) { |  | ||||||
| 	return r.root.TermFieldReader(term, field, includeFreq, includeNorm, includeTermVectors) |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // DocIDReader returns an iterator over all doc ids |  | ||||||
| // The caller must close returned instance to release associated resources. |  | ||||||
| func (r *Reader) DocIDReaderAll() (index.DocIDReader, error) { |  | ||||||
| 	return r.root.DocIDReaderAll() |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func (r *Reader) DocIDReaderOnly(ids []string) (index.DocIDReader, error) { |  | ||||||
| 	return r.root.DocIDReaderOnly(ids) |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func (r *Reader) FieldDict(field string) (index.FieldDict, error) { |  | ||||||
| 	return r.root.FieldDict(field) |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // FieldDictRange is currently defined to include the start and end terms |  | ||||||
| func (r *Reader) FieldDictRange(field string, startTerm []byte, |  | ||||||
| 	endTerm []byte) (index.FieldDict, error) { |  | ||||||
| 	return r.root.FieldDictRange(field, startTerm, endTerm) |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func (r *Reader) FieldDictPrefix(field string, |  | ||||||
| 	termPrefix []byte) (index.FieldDict, error) { |  | ||||||
| 	return r.root.FieldDictPrefix(field, termPrefix) |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func (r *Reader) Document(id string) (*document.Document, error) { |  | ||||||
| 	return r.root.Document(id) |  | ||||||
| } |  | ||||||
| func (r *Reader) DocumentVisitFieldTerms(id index.IndexInternalID, fields []string, |  | ||||||
| 	visitor index.DocumentFieldTermVisitor) error { |  | ||||||
| 	return r.root.DocumentVisitFieldTerms(id, fields, visitor) |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func (r *Reader) Fields() ([]string, error) { |  | ||||||
| 	return r.root.Fields() |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func (r *Reader) GetInternal(key []byte) ([]byte, error) { |  | ||||||
| 	return r.root.GetInternal(key) |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func (r *Reader) DocCount() (uint64, error) { |  | ||||||
| 	return r.root.DocCount() |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func (r *Reader) ExternalID(id index.IndexInternalID) (string, error) { |  | ||||||
| 	return r.root.ExternalID(id) |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func (r *Reader) InternalID(id string) (index.IndexInternalID, error) { |  | ||||||
| 	return r.root.InternalID(id) |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func (r *Reader) DumpAll() chan interface{} { |  | ||||||
| 	rv := make(chan interface{}) |  | ||||||
| 	go func() { |  | ||||||
| 		close(rv) |  | ||||||
| 	}() |  | ||||||
| 	return rv |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func (r *Reader) DumpDoc(id string) chan interface{} { |  | ||||||
| 	rv := make(chan interface{}) |  | ||||||
| 	go func() { |  | ||||||
| 		close(rv) |  | ||||||
| 	}() |  | ||||||
| 	return rv |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func (r *Reader) DumpFields() chan interface{} { |  | ||||||
| 	rv := make(chan interface{}) |  | ||||||
| 	go func() { |  | ||||||
| 		close(rv) |  | ||||||
| 	}() |  | ||||||
| 	return rv |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func (r *Reader) Close() error { |  | ||||||
| 	return r.root.DecRef() |  | ||||||
| } |  | ||||||
							
								
								
									
										235
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/scorch.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										235
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/scorch.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -17,6 +17,7 @@ package scorch | |||||||
| import ( | import ( | ||||||
| 	"encoding/json" | 	"encoding/json" | ||||||
| 	"fmt" | 	"fmt" | ||||||
|  | 	"io/ioutil" | ||||||
| 	"os" | 	"os" | ||||||
| 	"sync" | 	"sync" | ||||||
| 	"sync/atomic" | 	"sync/atomic" | ||||||
| @@ -27,23 +28,24 @@ import ( | |||||||
| 	"github.com/blevesearch/bleve/document" | 	"github.com/blevesearch/bleve/document" | ||||||
| 	"github.com/blevesearch/bleve/index" | 	"github.com/blevesearch/bleve/index" | ||||||
| 	"github.com/blevesearch/bleve/index/scorch/segment" | 	"github.com/blevesearch/bleve/index/scorch/segment" | ||||||
| 	"github.com/blevesearch/bleve/index/scorch/segment/mem" |  | ||||||
| 	"github.com/blevesearch/bleve/index/scorch/segment/zap" | 	"github.com/blevesearch/bleve/index/scorch/segment/zap" | ||||||
| 	"github.com/blevesearch/bleve/index/store" | 	"github.com/blevesearch/bleve/index/store" | ||||||
| 	"github.com/blevesearch/bleve/registry" | 	"github.com/blevesearch/bleve/registry" | ||||||
| 	"github.com/boltdb/bolt" | 	bolt "github.com/etcd-io/bbolt" | ||||||
| ) | ) | ||||||
|  |  | ||||||
| const Name = "scorch" | const Name = "scorch" | ||||||
|  |  | ||||||
| const Version uint8 = 1 | const Version uint8 = 2 | ||||||
|  |  | ||||||
|  | var ErrClosed = fmt.Errorf("scorch closed") | ||||||
|  |  | ||||||
| type Scorch struct { | type Scorch struct { | ||||||
| 	readOnly      bool | 	readOnly      bool | ||||||
| 	version       uint8 | 	version       uint8 | ||||||
| 	config        map[string]interface{} | 	config        map[string]interface{} | ||||||
| 	analysisQueue *index.AnalysisQueue | 	analysisQueue *index.AnalysisQueue | ||||||
| 	stats         *Stats | 	stats         Stats | ||||||
| 	nextSegmentID uint64 | 	nextSegmentID uint64 | ||||||
| 	path          string | 	path          string | ||||||
|  |  | ||||||
| @@ -52,12 +54,15 @@ type Scorch struct { | |||||||
| 	rootLock             sync.RWMutex | 	rootLock             sync.RWMutex | ||||||
| 	root                 *IndexSnapshot // holds 1 ref-count on the root | 	root                 *IndexSnapshot // holds 1 ref-count on the root | ||||||
| 	rootPersisted        []chan error   // closed when root is persisted | 	rootPersisted        []chan error   // closed when root is persisted | ||||||
|  | 	persistedCallbacks   []index.BatchCallback | ||||||
| 	nextSnapshotEpoch    uint64 | 	nextSnapshotEpoch    uint64 | ||||||
| 	eligibleForRemoval   []uint64        // Index snapshot epochs that are safe to GC. | 	eligibleForRemoval   []uint64        // Index snapshot epochs that are safe to GC. | ||||||
| 	ineligibleForRemoval map[string]bool // Filenames that should not be GC'ed yet. | 	ineligibleForRemoval map[string]bool // Filenames that should not be GC'ed yet. | ||||||
|  |  | ||||||
|  | 	numSnapshotsToKeep int | ||||||
| 	closeCh            chan struct{} | 	closeCh            chan struct{} | ||||||
| 	introductions      chan *segmentIntroduction | 	introductions      chan *segmentIntroduction | ||||||
|  | 	persists           chan *persistIntroduction | ||||||
| 	merges             chan *segmentMerge | 	merges             chan *segmentMerge | ||||||
| 	introducerNotifier chan *epochWatcher | 	introducerNotifier chan *epochWatcher | ||||||
| 	revertToSnapshots  chan *snapshotReversion | 	revertToSnapshots  chan *snapshotReversion | ||||||
| @@ -67,6 +72,23 @@ type Scorch struct { | |||||||
|  |  | ||||||
| 	onEvent      func(event Event) | 	onEvent      func(event Event) | ||||||
| 	onAsyncError func(err error) | 	onAsyncError func(err error) | ||||||
|  |  | ||||||
|  | 	iStats internalStats | ||||||
|  |  | ||||||
|  | 	pauseLock sync.RWMutex | ||||||
|  |  | ||||||
|  | 	pauseCount uint64 | ||||||
|  | } | ||||||
|  |  | ||||||
|  | type internalStats struct { | ||||||
|  | 	persistEpoch          uint64 | ||||||
|  | 	persistSnapshotSize   uint64 | ||||||
|  | 	mergeEpoch            uint64 | ||||||
|  | 	mergeSnapshotSize     uint64 | ||||||
|  | 	newSegBufBytesAdded   uint64 | ||||||
|  | 	newSegBufBytesRemoved uint64 | ||||||
|  | 	analysisBytesAdded    uint64 | ||||||
|  | 	analysisBytesRemoved  uint64 | ||||||
| } | } | ||||||
|  |  | ||||||
| func NewScorch(storeName string, | func NewScorch(storeName string, | ||||||
| @@ -80,8 +102,7 @@ func NewScorch(storeName string, | |||||||
| 		closeCh:              make(chan struct{}), | 		closeCh:              make(chan struct{}), | ||||||
| 		ineligibleForRemoval: map[string]bool{}, | 		ineligibleForRemoval: map[string]bool{}, | ||||||
| 	} | 	} | ||||||
| 	rv.stats = &Stats{i: rv} | 	rv.root = &IndexSnapshot{parent: rv, refs: 1, creator: "NewScorch"} | ||||||
| 	rv.root = &IndexSnapshot{parent: rv, refs: 1} |  | ||||||
| 	ro, ok := config["read_only"].(bool) | 	ro, ok := config["read_only"].(bool) | ||||||
| 	if ok { | 	if ok { | ||||||
| 		rv.readOnly = ro | 		rv.readOnly = ro | ||||||
| @@ -101,9 +122,30 @@ func NewScorch(storeName string, | |||||||
| 	return rv, nil | 	return rv, nil | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (s *Scorch) paused() uint64 { | ||||||
|  | 	s.pauseLock.Lock() | ||||||
|  | 	pc := s.pauseCount | ||||||
|  | 	s.pauseLock.Unlock() | ||||||
|  | 	return pc | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (s *Scorch) incrPause() { | ||||||
|  | 	s.pauseLock.Lock() | ||||||
|  | 	s.pauseCount++ | ||||||
|  | 	s.pauseLock.Unlock() | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (s *Scorch) decrPause() { | ||||||
|  | 	s.pauseLock.Lock() | ||||||
|  | 	s.pauseCount-- | ||||||
|  | 	s.pauseLock.Unlock() | ||||||
|  | } | ||||||
|  |  | ||||||
| func (s *Scorch) fireEvent(kind EventKind, dur time.Duration) { | func (s *Scorch) fireEvent(kind EventKind, dur time.Duration) { | ||||||
| 	if s.onEvent != nil { | 	if s.onEvent != nil { | ||||||
|  | 		s.incrPause() | ||||||
| 		s.onEvent(Event{Kind: kind, Scorch: s, Duration: dur}) | 		s.onEvent(Event{Kind: kind, Scorch: s, Duration: dur}) | ||||||
|  | 		s.decrPause() | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -111,6 +153,7 @@ func (s *Scorch) fireAsyncError(err error) { | |||||||
| 	if s.onAsyncError != nil { | 	if s.onAsyncError != nil { | ||||||
| 		s.onAsyncError(err) | 		s.onAsyncError(err) | ||||||
| 	} | 	} | ||||||
|  | 	atomic.AddUint64(&s.stats.TotOnErrors, 1) | ||||||
| } | } | ||||||
|  |  | ||||||
| func (s *Scorch) Open() error { | func (s *Scorch) Open() error { | ||||||
| @@ -172,7 +215,10 @@ func (s *Scorch) openBolt() error { | |||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | 	atomic.StoreUint64(&s.stats.TotFileSegmentsAtRoot, uint64(len(s.root.segment))) | ||||||
|  |  | ||||||
| 	s.introductions = make(chan *segmentIntroduction) | 	s.introductions = make(chan *segmentIntroduction) | ||||||
|  | 	s.persists = make(chan *persistIntroduction) | ||||||
| 	s.merges = make(chan *segmentMerge) | 	s.merges = make(chan *segmentMerge) | ||||||
| 	s.introducerNotifier = make(chan *epochWatcher, 1) | 	s.introducerNotifier = make(chan *epochWatcher, 1) | ||||||
| 	s.revertToSnapshots = make(chan *snapshotReversion) | 	s.revertToSnapshots = make(chan *snapshotReversion) | ||||||
| @@ -186,6 +232,17 @@ func (s *Scorch) openBolt() error { | |||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | 	s.numSnapshotsToKeep = NumSnapshotsToKeep | ||||||
|  | 	if v, ok := s.config["numSnapshotsToKeep"]; ok { | ||||||
|  | 		var t int | ||||||
|  | 		if t, err = parseToInteger(v); err != nil { | ||||||
|  | 			return fmt.Errorf("numSnapshotsToKeep parse err: %v", err) | ||||||
|  | 		} | ||||||
|  | 		if t > 0 { | ||||||
|  | 			s.numSnapshotsToKeep = t | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
| 	return nil | 	return nil | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -255,6 +312,7 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) { | |||||||
|  |  | ||||||
| 	// FIXME could sort ids list concurrent with analysis? | 	// FIXME could sort ids list concurrent with analysis? | ||||||
|  |  | ||||||
|  | 	if len(batch.IndexOps) > 0 { | ||||||
| 		go func() { | 		go func() { | ||||||
| 			for _, doc := range batch.IndexOps { | 			for _, doc := range batch.IndexOps { | ||||||
| 				if doc != nil { | 				if doc != nil { | ||||||
| @@ -264,47 +322,63 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) { | |||||||
| 				} | 				} | ||||||
| 			} | 			} | ||||||
| 		}() | 		}() | ||||||
|  | 	} | ||||||
|  |  | ||||||
| 	// wait for analysis result | 	// wait for analysis result | ||||||
| 	analysisResults := make([]*index.AnalysisResult, int(numUpdates)) | 	analysisResults := make([]*index.AnalysisResult, int(numUpdates)) | ||||||
| 	var itemsDeQueued uint64 | 	var itemsDeQueued uint64 | ||||||
|  | 	var totalAnalysisSize int | ||||||
| 	for itemsDeQueued < numUpdates { | 	for itemsDeQueued < numUpdates { | ||||||
| 		result := <-resultChan | 		result := <-resultChan | ||||||
|  | 		resultSize := result.Size() | ||||||
|  | 		atomic.AddUint64(&s.iStats.analysisBytesAdded, uint64(resultSize)) | ||||||
|  | 		totalAnalysisSize += resultSize | ||||||
| 		analysisResults[itemsDeQueued] = result | 		analysisResults[itemsDeQueued] = result | ||||||
| 		itemsDeQueued++ | 		itemsDeQueued++ | ||||||
| 	} | 	} | ||||||
| 	close(resultChan) | 	close(resultChan) | ||||||
|  | 	defer atomic.AddUint64(&s.iStats.analysisBytesRemoved, uint64(totalAnalysisSize)) | ||||||
|  |  | ||||||
| 	atomic.AddUint64(&s.stats.analysisTime, uint64(time.Since(start))) | 	atomic.AddUint64(&s.stats.TotAnalysisTime, uint64(time.Since(start))) | ||||||
|  |  | ||||||
|  | 	indexStart := time.Now() | ||||||
|  |  | ||||||
| 	// notify handlers that we're about to introduce a segment | 	// notify handlers that we're about to introduce a segment | ||||||
| 	s.fireEvent(EventKindBatchIntroductionStart, 0) | 	s.fireEvent(EventKindBatchIntroductionStart, 0) | ||||||
|  |  | ||||||
| 	var newSegment segment.Segment | 	var newSegment segment.Segment | ||||||
|  | 	var bufBytes uint64 | ||||||
| 	if len(analysisResults) > 0 { | 	if len(analysisResults) > 0 { | ||||||
| 		newSegment, err = zap.NewSegmentBase(mem.NewFromAnalyzedDocs(analysisResults), DefaultChunkFactor) | 		newSegment, bufBytes, err = zap.AnalysisResultsToSegmentBase(analysisResults, DefaultChunkFactor) | ||||||
| 		if err != nil { | 		if err != nil { | ||||||
| 			return err | 			return err | ||||||
| 		} | 		} | ||||||
|  | 		atomic.AddUint64(&s.iStats.newSegBufBytesAdded, bufBytes) | ||||||
|  | 	} else { | ||||||
|  | 		atomic.AddUint64(&s.stats.TotBatchesEmpty, 1) | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	err = s.prepareSegment(newSegment, ids, batch.InternalOps) | 	err = s.prepareSegment(newSegment, ids, batch.InternalOps, batch.PersistedCallback()) | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		if newSegment != nil { | 		if newSegment != nil { | ||||||
| 			_ = newSegment.Close() | 			_ = newSegment.Close() | ||||||
| 		} | 		} | ||||||
| 		atomic.AddUint64(&s.stats.errors, 1) | 		atomic.AddUint64(&s.stats.TotOnErrors, 1) | ||||||
| 	} else { | 	} else { | ||||||
| 		atomic.AddUint64(&s.stats.updates, numUpdates) | 		atomic.AddUint64(&s.stats.TotUpdates, numUpdates) | ||||||
| 		atomic.AddUint64(&s.stats.deletes, numDeletes) | 		atomic.AddUint64(&s.stats.TotDeletes, numDeletes) | ||||||
| 		atomic.AddUint64(&s.stats.batches, 1) | 		atomic.AddUint64(&s.stats.TotBatches, 1) | ||||||
| 		atomic.AddUint64(&s.stats.numPlainTextBytesIndexed, numPlainTextBytes) | 		atomic.AddUint64(&s.stats.TotIndexedPlainTextBytes, numPlainTextBytes) | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | 	atomic.AddUint64(&s.iStats.newSegBufBytesRemoved, bufBytes) | ||||||
|  | 	atomic.AddUint64(&s.stats.TotIndexTime, uint64(time.Since(indexStart))) | ||||||
|  |  | ||||||
| 	return err | 	return err | ||||||
| } | } | ||||||
|  |  | ||||||
| func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string, | func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string, | ||||||
| 	internalOps map[string][]byte) error { | 	internalOps map[string][]byte, persistedCallback index.BatchCallback) error { | ||||||
|  |  | ||||||
| 	// new introduction | 	// new introduction | ||||||
| 	introduction := &segmentIntroduction{ | 	introduction := &segmentIntroduction{ | ||||||
| @@ -314,6 +388,7 @@ func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string, | |||||||
| 		obsoletes:         make(map[uint64]*roaring.Bitmap), | 		obsoletes:         make(map[uint64]*roaring.Bitmap), | ||||||
| 		internal:          internalOps, | 		internal:          internalOps, | ||||||
| 		applied:           make(chan error), | 		applied:           make(chan error), | ||||||
|  | 		persistedCallback: persistedCallback, | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	if !s.unsafeBatch { | 	if !s.unsafeBatch { | ||||||
| @@ -326,6 +401,8 @@ func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string, | |||||||
| 	root.AddRef() | 	root.AddRef() | ||||||
| 	s.rootLock.RUnlock() | 	s.rootLock.RUnlock() | ||||||
|  |  | ||||||
|  | 	defer func() { _ = root.DecRef() }() | ||||||
|  |  | ||||||
| 	for _, seg := range root.segment { | 	for _, seg := range root.segment { | ||||||
| 		delta, err := seg.segment.DocNumbers(ids) | 		delta, err := seg.segment.DocNumbers(ids) | ||||||
| 		if err != nil { | 		if err != nil { | ||||||
| @@ -334,7 +411,7 @@ func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string, | |||||||
| 		introduction.obsoletes[seg.id] = delta | 		introduction.obsoletes[seg.id] = delta | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	_ = root.DecRef() | 	introStartTime := time.Now() | ||||||
|  |  | ||||||
| 	s.introductions <- introduction | 	s.introductions <- introduction | ||||||
|  |  | ||||||
| @@ -348,6 +425,12 @@ func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string, | |||||||
| 		err = <-introduction.persisted | 		err = <-introduction.persisted | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | 	introTime := uint64(time.Since(introStartTime)) | ||||||
|  | 	atomic.AddUint64(&s.stats.TotBatchIntroTime, introTime) | ||||||
|  | 	if atomic.LoadUint64(&s.stats.MaxBatchIntroTime) < introTime { | ||||||
|  | 		atomic.StoreUint64(&s.stats.MaxBatchIntroTime, introTime) | ||||||
|  | 	} | ||||||
|  |  | ||||||
| 	return err | 	return err | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -366,18 +449,69 @@ func (s *Scorch) DeleteInternal(key []byte) error { | |||||||
| // Reader returns a low-level accessor on the index data. Close it to | // Reader returns a low-level accessor on the index data. Close it to | ||||||
| // release associated resources. | // release associated resources. | ||||||
| func (s *Scorch) Reader() (index.IndexReader, error) { | func (s *Scorch) Reader() (index.IndexReader, error) { | ||||||
|  | 	return s.currentSnapshot(), nil | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (s *Scorch) currentSnapshot() *IndexSnapshot { | ||||||
| 	s.rootLock.RLock() | 	s.rootLock.RLock() | ||||||
| 	rv := &Reader{root: s.root} | 	rv := s.root | ||||||
| 	rv.root.AddRef() | 	if rv != nil { | ||||||
|  | 		rv.AddRef() | ||||||
|  | 	} | ||||||
| 	s.rootLock.RUnlock() | 	s.rootLock.RUnlock() | ||||||
| 	return rv, nil | 	return rv | ||||||
| } | } | ||||||
|  |  | ||||||
| func (s *Scorch) Stats() json.Marshaler { | func (s *Scorch) Stats() json.Marshaler { | ||||||
| 	return s.stats | 	return &s.stats | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (s *Scorch) diskFileStats() (uint64, uint64) { | ||||||
|  | 	var numFilesOnDisk, numBytesUsedDisk uint64 | ||||||
|  | 	if s.path != "" { | ||||||
|  | 		finfos, err := ioutil.ReadDir(s.path) | ||||||
|  | 		if err == nil { | ||||||
|  | 			for _, finfo := range finfos { | ||||||
|  | 				if !finfo.IsDir() { | ||||||
|  | 					numBytesUsedDisk += uint64(finfo.Size()) | ||||||
|  | 					numFilesOnDisk++ | ||||||
|  | 				} | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	return numFilesOnDisk, numBytesUsedDisk | ||||||
|  | } | ||||||
|  |  | ||||||
| func (s *Scorch) StatsMap() map[string]interface{} { | func (s *Scorch) StatsMap() map[string]interface{} { | ||||||
| 	m, _ := s.stats.statsMap() | 	m := s.stats.ToMap() | ||||||
|  |  | ||||||
|  | 	numFilesOnDisk, numBytesUsedDisk := s.diskFileStats() | ||||||
|  |  | ||||||
|  | 	m["CurOnDiskBytes"] = numBytesUsedDisk | ||||||
|  | 	m["CurOnDiskFiles"] = numFilesOnDisk | ||||||
|  |  | ||||||
|  | 	// TODO: consider one day removing these backwards compatible | ||||||
|  | 	// names for apps using the old names | ||||||
|  | 	m["updates"] = m["TotUpdates"] | ||||||
|  | 	m["deletes"] = m["TotDeletes"] | ||||||
|  | 	m["batches"] = m["TotBatches"] | ||||||
|  | 	m["errors"] = m["TotOnErrors"] | ||||||
|  | 	m["analysis_time"] = m["TotAnalysisTime"] | ||||||
|  | 	m["index_time"] = m["TotIndexTime"] | ||||||
|  | 	m["term_searchers_started"] = m["TotTermSearchersStarted"] | ||||||
|  | 	m["term_searchers_finished"] = m["TotTermSearchersFinished"] | ||||||
|  | 	m["num_plain_text_bytes_indexed"] = m["TotIndexedPlainTextBytes"] | ||||||
|  | 	m["num_items_introduced"] = m["TotIntroducedItems"] | ||||||
|  | 	m["num_items_persisted"] = m["TotPersistedItems"] | ||||||
|  | 	m["num_recs_to_persist"] = m["TotItemsToPersist"] | ||||||
|  | 	m["num_bytes_used_disk"] = m["CurOnDiskBytes"] | ||||||
|  | 	m["num_files_on_disk"] = m["CurOnDiskFiles"] | ||||||
|  | 	m["num_root_memorysegments"] = m["TotMemorySegmentsAtRoot"] | ||||||
|  | 	m["num_root_filesegments"] = m["TotFileSegmentsAtRoot"] | ||||||
|  | 	m["num_persister_nap_pause_completed"] = m["TotPersisterNapPauseCompleted"] | ||||||
|  | 	m["num_persister_nap_merger_break"] = m["TotPersisterMergerNapBreak"] | ||||||
|  | 	m["total_compaction_written_bytes"] = m["TotFileMergeWrittenBytes"] | ||||||
|  |  | ||||||
| 	return m | 	return m | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -394,7 +528,7 @@ func (s *Scorch) Analyze(d *document.Document) *index.AnalysisResult { | |||||||
| 			rv.Analyzed[i] = tokenFreqs | 			rv.Analyzed[i] = tokenFreqs | ||||||
| 			rv.Length[i] = fieldLength | 			rv.Length[i] = fieldLength | ||||||
|  |  | ||||||
| 			if len(d.CompositeFields) > 0 { | 			if len(d.CompositeFields) > 0 && field.Name() != "_id" { | ||||||
| 				// see if any of the composite fields need this | 				// see if any of the composite fields need this | ||||||
| 				for _, compositeField := range d.CompositeFields { | 				for _, compositeField := range d.CompositeFields { | ||||||
| 					compositeField.Compose(field.Name(), fieldLength, tokenFreqs) | 					compositeField.Compose(field.Name(), fieldLength, tokenFreqs) | ||||||
| @@ -418,20 +552,43 @@ func (s *Scorch) AddEligibleForRemoval(epoch uint64) { | |||||||
| 	s.rootLock.Unlock() | 	s.rootLock.Unlock() | ||||||
| } | } | ||||||
|  |  | ||||||
| func (s *Scorch) MemoryUsed() uint64 { | func (s *Scorch) MemoryUsed() (memUsed uint64) { | ||||||
| 	var memUsed uint64 | 	indexSnapshot := s.currentSnapshot() | ||||||
| 	s.rootLock.RLock() | 	if indexSnapshot == nil { | ||||||
| 	if s.root != nil { | 		return | ||||||
| 		for _, segmentSnapshot := range s.root.segment { |  | ||||||
| 			memUsed += 8 /* size of id -> uint64 */ + |  | ||||||
| 				segmentSnapshot.segment.SizeInBytes() |  | ||||||
| 			if segmentSnapshot.deleted != nil { |  | ||||||
| 				memUsed += segmentSnapshot.deleted.GetSizeInBytes() |  | ||||||
| 	} | 	} | ||||||
| 			memUsed += segmentSnapshot.cachedDocs.sizeInBytes() |  | ||||||
|  | 	defer func() { | ||||||
|  | 		_ = indexSnapshot.Close() | ||||||
|  | 	}() | ||||||
|  |  | ||||||
|  | 	// Account for current root snapshot overhead | ||||||
|  | 	memUsed += uint64(indexSnapshot.Size()) | ||||||
|  |  | ||||||
|  | 	// Account for snapshot that the persister may be working on | ||||||
|  | 	persistEpoch := atomic.LoadUint64(&s.iStats.persistEpoch) | ||||||
|  | 	persistSnapshotSize := atomic.LoadUint64(&s.iStats.persistSnapshotSize) | ||||||
|  | 	if persistEpoch != 0 && indexSnapshot.epoch > persistEpoch { | ||||||
|  | 		// the snapshot that the persister is working on isn't the same as | ||||||
|  | 		// the current snapshot | ||||||
|  | 		memUsed += persistSnapshotSize | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | 	// Account for snapshot that the merger may be working on | ||||||
|  | 	mergeEpoch := atomic.LoadUint64(&s.iStats.mergeEpoch) | ||||||
|  | 	mergeSnapshotSize := atomic.LoadUint64(&s.iStats.mergeSnapshotSize) | ||||||
|  | 	if mergeEpoch != 0 && indexSnapshot.epoch > mergeEpoch { | ||||||
|  | 		// the snapshot that the merger is working on isn't the same as | ||||||
|  | 		// the current snapshot | ||||||
|  | 		memUsed += mergeSnapshotSize | ||||||
| 	} | 	} | ||||||
| 	s.rootLock.RUnlock() |  | ||||||
|  | 	memUsed += (atomic.LoadUint64(&s.iStats.newSegBufBytesAdded) - | ||||||
|  | 		atomic.LoadUint64(&s.iStats.newSegBufBytesRemoved)) | ||||||
|  |  | ||||||
|  | 	memUsed += (atomic.LoadUint64(&s.iStats.analysisBytesAdded) - | ||||||
|  | 		atomic.LoadUint64(&s.iStats.analysisBytesRemoved)) | ||||||
|  |  | ||||||
| 	return memUsed | 	return memUsed | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -450,3 +607,15 @@ func (s *Scorch) unmarkIneligibleForRemoval(filename string) { | |||||||
| func init() { | func init() { | ||||||
| 	registry.RegisterIndexType(Name, NewScorch) | 	registry.RegisterIndexType(Name, NewScorch) | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func parseToInteger(i interface{}) (int, error) { | ||||||
|  | 	switch v := i.(type) { | ||||||
|  | 	case float64: | ||||||
|  | 		return int(v), nil | ||||||
|  | 	case int: | ||||||
|  | 		return v, nil | ||||||
|  |  | ||||||
|  | 	default: | ||||||
|  | 		return 0, fmt.Errorf("expects int or float64 value") | ||||||
|  | 	} | ||||||
|  | } | ||||||
|   | |||||||
							
								
								
									
										40
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/segment/empty.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										40
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/segment/empty.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -17,6 +17,7 @@ package segment | |||||||
| import ( | import ( | ||||||
| 	"github.com/RoaringBitmap/roaring" | 	"github.com/RoaringBitmap/roaring" | ||||||
| 	"github.com/blevesearch/bleve/index" | 	"github.com/blevesearch/bleve/index" | ||||||
|  | 	"github.com/couchbase/vellum" | ||||||
| ) | ) | ||||||
|  |  | ||||||
| type EmptySegment struct{} | type EmptySegment struct{} | ||||||
| @@ -29,6 +30,10 @@ func (e *EmptySegment) VisitDocument(num uint64, visitor DocumentFieldValueVisit | |||||||
| 	return nil | 	return nil | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (e *EmptySegment) DocID(num uint64) ([]byte, error) { | ||||||
|  | 	return nil, nil | ||||||
|  | } | ||||||
|  |  | ||||||
| func (e *EmptySegment) Count() uint64 { | func (e *EmptySegment) Count() uint64 { | ||||||
| 	return 0 | 	return 0 | ||||||
| } | } | ||||||
| @@ -46,6 +51,10 @@ func (e *EmptySegment) Close() error { | |||||||
| 	return nil | 	return nil | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (e *EmptySegment) Size() uint64 { | ||||||
|  | 	return 0 | ||||||
|  | } | ||||||
|  |  | ||||||
| func (e *EmptySegment) AddRef() { | func (e *EmptySegment) AddRef() { | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -55,8 +64,8 @@ func (e *EmptySegment) DecRef() error { | |||||||
|  |  | ||||||
| type EmptyDictionary struct{} | type EmptyDictionary struct{} | ||||||
|  |  | ||||||
| func (e *EmptyDictionary) PostingsList(term string, | func (e *EmptyDictionary) PostingsList(term []byte, | ||||||
| 	except *roaring.Bitmap) (PostingsList, error) { | 	except *roaring.Bitmap, prealloc PostingsList) (PostingsList, error) { | ||||||
| 	return &EmptyPostingsList{}, nil | 	return &EmptyPostingsList{}, nil | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -72,18 +81,37 @@ func (e *EmptyDictionary) RangeIterator(start, end string) DictionaryIterator { | |||||||
| 	return &EmptyDictionaryIterator{} | 	return &EmptyDictionaryIterator{} | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (e *EmptyDictionary) AutomatonIterator(a vellum.Automaton, | ||||||
|  | 	startKeyInclusive, endKeyExclusive []byte) DictionaryIterator { | ||||||
|  | 	return &EmptyDictionaryIterator{} | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (e *EmptyDictionary) OnlyIterator(onlyTerms [][]byte, | ||||||
|  | 	includeCount bool) DictionaryIterator { | ||||||
|  | 	return &EmptyDictionaryIterator{} | ||||||
|  | } | ||||||
|  |  | ||||||
| type EmptyDictionaryIterator struct{} | type EmptyDictionaryIterator struct{} | ||||||
|  |  | ||||||
| func (e *EmptyDictionaryIterator) Next() (*index.DictEntry, error) { | func (e *EmptyDictionaryIterator) Next() (*index.DictEntry, error) { | ||||||
| 	return nil, nil | 	return nil, nil | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (e *EmptyPostingsIterator) Advance(uint64) (Posting, error) { | ||||||
|  | 	return nil, nil | ||||||
|  | } | ||||||
|  |  | ||||||
| type EmptyPostingsList struct{} | type EmptyPostingsList struct{} | ||||||
|  |  | ||||||
| func (e *EmptyPostingsList) Iterator() PostingsIterator { | func (e *EmptyPostingsList) Iterator(includeFreq, includeNorm, includeLocations bool, | ||||||
|  | 	prealloc PostingsIterator) PostingsIterator { | ||||||
| 	return &EmptyPostingsIterator{} | 	return &EmptyPostingsIterator{} | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (e *EmptyPostingsList) Size() int { | ||||||
|  | 	return 0 | ||||||
|  | } | ||||||
|  |  | ||||||
| func (e *EmptyPostingsList) Count() uint64 { | func (e *EmptyPostingsList) Count() uint64 { | ||||||
| 	return 0 | 	return 0 | ||||||
| } | } | ||||||
| @@ -93,3 +121,9 @@ type EmptyPostingsIterator struct{} | |||||||
| func (e *EmptyPostingsIterator) Next() (Posting, error) { | func (e *EmptyPostingsIterator) Next() (Posting, error) { | ||||||
| 	return nil, nil | 	return nil, nil | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (e *EmptyPostingsIterator) Size() int { | ||||||
|  | 	return 0 | ||||||
|  | } | ||||||
|  |  | ||||||
|  | var AnEmptyPostingsIterator = &EmptyPostingsIterator{} | ||||||
|   | |||||||
							
								
								
									
										321
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/segment/mem/build.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										321
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/segment/mem/build.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -1,321 +0,0 @@ | |||||||
| //  Copyright (c) 2017 Couchbase, Inc. |  | ||||||
| // |  | ||||||
| // Licensed under the Apache License, Version 2.0 (the "License"); |  | ||||||
| // you may not use this file except in compliance with the License. |  | ||||||
| // You may obtain a copy of the License at |  | ||||||
| // |  | ||||||
| // 		http://www.apache.org/licenses/LICENSE-2.0 |  | ||||||
| // |  | ||||||
| // Unless required by applicable law or agreed to in writing, software |  | ||||||
| // distributed under the License is distributed on an "AS IS" BASIS, |  | ||||||
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |  | ||||||
| // See the License for the specific language governing permissions and |  | ||||||
| // limitations under the License. |  | ||||||
|  |  | ||||||
| package mem |  | ||||||
|  |  | ||||||
| import ( |  | ||||||
| 	"math" |  | ||||||
| 	"sort" |  | ||||||
|  |  | ||||||
| 	"github.com/RoaringBitmap/roaring" |  | ||||||
| 	"github.com/blevesearch/bleve/analysis" |  | ||||||
| 	"github.com/blevesearch/bleve/document" |  | ||||||
| 	"github.com/blevesearch/bleve/index" |  | ||||||
| ) |  | ||||||
|  |  | ||||||
| // NewFromAnalyzedDocs places the analyzed document mutations into a new segment |  | ||||||
| func NewFromAnalyzedDocs(results []*index.AnalysisResult) *Segment { |  | ||||||
| 	s := New() |  | ||||||
|  |  | ||||||
| 	// ensure that _id field get fieldID 0 |  | ||||||
| 	s.getOrDefineField("_id") |  | ||||||
|  |  | ||||||
| 	// fill Dicts/DictKeys and preallocate memory |  | ||||||
| 	s.initializeDict(results) |  | ||||||
|  |  | ||||||
| 	// walk each doc |  | ||||||
| 	for _, result := range results { |  | ||||||
| 		s.processDocument(result) |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	// go back and sort the dictKeys |  | ||||||
| 	for _, dict := range s.DictKeys { |  | ||||||
| 		sort.Strings(dict) |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	// compute memory usage of segment |  | ||||||
| 	s.updateSizeInBytes() |  | ||||||
|  |  | ||||||
| 	// professional debugging |  | ||||||
| 	// |  | ||||||
| 	// log.Printf("fields: %v\n", s.FieldsMap) |  | ||||||
| 	// log.Printf("fieldsInv: %v\n", s.FieldsInv) |  | ||||||
| 	// log.Printf("fieldsLoc: %v\n", s.FieldsLoc) |  | ||||||
| 	// log.Printf("dicts: %v\n", s.Dicts) |  | ||||||
| 	// log.Printf("dict keys: %v\n", s.DictKeys) |  | ||||||
| 	// for i, posting := range s.Postings { |  | ||||||
| 	// 	log.Printf("posting %d: %v\n", i, posting) |  | ||||||
| 	// } |  | ||||||
| 	// for i, freq := range s.Freqs { |  | ||||||
| 	// 	log.Printf("freq %d: %v\n", i, freq) |  | ||||||
| 	// } |  | ||||||
| 	// for i, norm := range s.Norms { |  | ||||||
| 	// 	log.Printf("norm %d: %v\n", i, norm) |  | ||||||
| 	// } |  | ||||||
| 	// for i, field := range s.Locfields { |  | ||||||
| 	// 	log.Printf("field %d: %v\n", i, field) |  | ||||||
| 	// } |  | ||||||
| 	// for i, start := range s.Locstarts { |  | ||||||
| 	// 	log.Printf("start %d: %v\n", i, start) |  | ||||||
| 	// } |  | ||||||
| 	// for i, end := range s.Locends { |  | ||||||
| 	// 	log.Printf("end %d: %v\n", i, end) |  | ||||||
| 	// } |  | ||||||
| 	// for i, pos := range s.Locpos { |  | ||||||
| 	// 	log.Printf("pos %d: %v\n", i, pos) |  | ||||||
| 	// } |  | ||||||
| 	// for i, apos := range s.Locarraypos { |  | ||||||
| 	// 	log.Printf("apos %d: %v\n", i, apos) |  | ||||||
| 	// } |  | ||||||
| 	// log.Printf("stored: %v\n", s.Stored) |  | ||||||
| 	// log.Printf("stored types: %v\n", s.StoredTypes) |  | ||||||
| 	// log.Printf("stored pos: %v\n", s.StoredPos) |  | ||||||
|  |  | ||||||
| 	return s |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // fill Dicts/DictKeys and preallocate memory for postings |  | ||||||
| func (s *Segment) initializeDict(results []*index.AnalysisResult) { |  | ||||||
| 	var numPostingsLists int |  | ||||||
|  |  | ||||||
| 	numTermsPerPostingsList := make([]int, 0, 64) // Keyed by postings list id. |  | ||||||
| 	numLocsPerPostingsList := make([]int, 0, 64)  // Keyed by postings list id. |  | ||||||
|  |  | ||||||
| 	var numTokenFrequencies int |  | ||||||
| 	var totLocs int |  | ||||||
|  |  | ||||||
| 	// initial scan for all fieldID's to sort them |  | ||||||
| 	for _, result := range results { |  | ||||||
| 		for _, field := range result.Document.CompositeFields { |  | ||||||
| 			s.getOrDefineField(field.Name()) |  | ||||||
| 		} |  | ||||||
| 		for _, field := range result.Document.Fields { |  | ||||||
| 			s.getOrDefineField(field.Name()) |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| 	sort.Strings(s.FieldsInv[1:]) // keep _id as first field |  | ||||||
| 	s.FieldsMap = make(map[string]uint16, len(s.FieldsInv)) |  | ||||||
| 	for fieldID, fieldName := range s.FieldsInv { |  | ||||||
| 		s.FieldsMap[fieldName] = uint16(fieldID + 1) |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	processField := func(fieldID uint16, tfs analysis.TokenFrequencies) { |  | ||||||
| 		for term, tf := range tfs { |  | ||||||
| 			pidPlus1, exists := s.Dicts[fieldID][term] |  | ||||||
| 			if !exists { |  | ||||||
| 				numPostingsLists++ |  | ||||||
| 				pidPlus1 = uint64(numPostingsLists) |  | ||||||
| 				s.Dicts[fieldID][term] = pidPlus1 |  | ||||||
| 				s.DictKeys[fieldID] = append(s.DictKeys[fieldID], term) |  | ||||||
| 				numTermsPerPostingsList = append(numTermsPerPostingsList, 0) |  | ||||||
| 				numLocsPerPostingsList = append(numLocsPerPostingsList, 0) |  | ||||||
| 			} |  | ||||||
| 			pid := pidPlus1 - 1 |  | ||||||
| 			numTermsPerPostingsList[pid] += 1 |  | ||||||
| 			numLocsPerPostingsList[pid] += len(tf.Locations) |  | ||||||
| 			totLocs += len(tf.Locations) |  | ||||||
| 		} |  | ||||||
| 		numTokenFrequencies += len(tfs) |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	for _, result := range results { |  | ||||||
| 		// walk each composite field |  | ||||||
| 		for _, field := range result.Document.CompositeFields { |  | ||||||
| 			fieldID := uint16(s.getOrDefineField(field.Name())) |  | ||||||
| 			_, tf := field.Analyze() |  | ||||||
| 			processField(fieldID, tf) |  | ||||||
| 		} |  | ||||||
|  |  | ||||||
| 		// walk each field |  | ||||||
| 		for i, field := range result.Document.Fields { |  | ||||||
| 			fieldID := uint16(s.getOrDefineField(field.Name())) |  | ||||||
| 			tf := result.Analyzed[i] |  | ||||||
| 			processField(fieldID, tf) |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	s.Postings = make([]*roaring.Bitmap, numPostingsLists) |  | ||||||
| 	for i := 0; i < numPostingsLists; i++ { |  | ||||||
| 		s.Postings[i] = roaring.New() |  | ||||||
| 	} |  | ||||||
| 	s.PostingsLocs = make([]*roaring.Bitmap, numPostingsLists) |  | ||||||
| 	for i := 0; i < numPostingsLists; i++ { |  | ||||||
| 		s.PostingsLocs[i] = roaring.New() |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	// Preallocate big, contiguous backing arrays. |  | ||||||
| 	auint64Backing := make([][]uint64, numPostingsLists*4+totLocs) // For Freqs, Locstarts, Locends, Locpos, sub-Locarraypos. |  | ||||||
| 	uint64Backing := make([]uint64, numTokenFrequencies+totLocs*3) // For sub-Freqs, sub-Locstarts, sub-Locends, sub-Locpos. |  | ||||||
| 	float32Backing := make([]float32, numTokenFrequencies)         // For sub-Norms. |  | ||||||
| 	uint16Backing := make([]uint16, totLocs)                       // For sub-Locfields. |  | ||||||
|  |  | ||||||
| 	// Point top-level slices to the backing arrays. |  | ||||||
| 	s.Freqs = auint64Backing[0:numPostingsLists] |  | ||||||
| 	auint64Backing = auint64Backing[numPostingsLists:] |  | ||||||
|  |  | ||||||
| 	s.Norms = make([][]float32, numPostingsLists) |  | ||||||
|  |  | ||||||
| 	s.Locfields = make([][]uint16, numPostingsLists) |  | ||||||
|  |  | ||||||
| 	s.Locstarts = auint64Backing[0:numPostingsLists] |  | ||||||
| 	auint64Backing = auint64Backing[numPostingsLists:] |  | ||||||
|  |  | ||||||
| 	s.Locends = auint64Backing[0:numPostingsLists] |  | ||||||
| 	auint64Backing = auint64Backing[numPostingsLists:] |  | ||||||
|  |  | ||||||
| 	s.Locpos = auint64Backing[0:numPostingsLists] |  | ||||||
| 	auint64Backing = auint64Backing[numPostingsLists:] |  | ||||||
|  |  | ||||||
| 	s.Locarraypos = make([][][]uint64, numPostingsLists) |  | ||||||
|  |  | ||||||
| 	// Point sub-slices to the backing arrays. |  | ||||||
| 	for pid, numTerms := range numTermsPerPostingsList { |  | ||||||
| 		s.Freqs[pid] = uint64Backing[0:0] |  | ||||||
| 		uint64Backing = uint64Backing[numTerms:] |  | ||||||
|  |  | ||||||
| 		s.Norms[pid] = float32Backing[0:0] |  | ||||||
| 		float32Backing = float32Backing[numTerms:] |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	for pid, numLocs := range numLocsPerPostingsList { |  | ||||||
| 		s.Locfields[pid] = uint16Backing[0:0] |  | ||||||
| 		uint16Backing = uint16Backing[numLocs:] |  | ||||||
|  |  | ||||||
| 		s.Locstarts[pid] = uint64Backing[0:0] |  | ||||||
| 		uint64Backing = uint64Backing[numLocs:] |  | ||||||
|  |  | ||||||
| 		s.Locends[pid] = uint64Backing[0:0] |  | ||||||
| 		uint64Backing = uint64Backing[numLocs:] |  | ||||||
|  |  | ||||||
| 		s.Locpos[pid] = uint64Backing[0:0] |  | ||||||
| 		uint64Backing = uint64Backing[numLocs:] |  | ||||||
|  |  | ||||||
| 		s.Locarraypos[pid] = auint64Backing[0:0] |  | ||||||
| 		auint64Backing = auint64Backing[numLocs:] |  | ||||||
| 	} |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func (s *Segment) processDocument(result *index.AnalysisResult) { |  | ||||||
| 	// used to collate information across fields |  | ||||||
| 	docMap := make(map[uint16]analysis.TokenFrequencies, len(s.FieldsMap)) |  | ||||||
| 	fieldLens := make(map[uint16]int, len(s.FieldsMap)) |  | ||||||
|  |  | ||||||
| 	docNum := uint64(s.addDocument()) |  | ||||||
|  |  | ||||||
| 	processField := func(field uint16, name string, l int, tf analysis.TokenFrequencies) { |  | ||||||
| 		fieldLens[field] += l |  | ||||||
| 		if existingFreqs, ok := docMap[field]; ok { |  | ||||||
| 			existingFreqs.MergeAll(name, tf) |  | ||||||
| 		} else { |  | ||||||
| 			docMap[field] = tf |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	storeField := func(docNum uint64, field uint16, typ byte, val []byte, pos []uint64) { |  | ||||||
| 		s.Stored[docNum][field] = append(s.Stored[docNum][field], val) |  | ||||||
| 		s.StoredTypes[docNum][field] = append(s.StoredTypes[docNum][field], typ) |  | ||||||
| 		s.StoredPos[docNum][field] = append(s.StoredPos[docNum][field], pos) |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	// walk each composite field |  | ||||||
| 	for _, field := range result.Document.CompositeFields { |  | ||||||
| 		fieldID := uint16(s.getOrDefineField(field.Name())) |  | ||||||
| 		l, tf := field.Analyze() |  | ||||||
| 		processField(fieldID, field.Name(), l, tf) |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	// walk each field |  | ||||||
| 	for i, field := range result.Document.Fields { |  | ||||||
| 		fieldID := uint16(s.getOrDefineField(field.Name())) |  | ||||||
| 		l := result.Length[i] |  | ||||||
| 		tf := result.Analyzed[i] |  | ||||||
| 		processField(fieldID, field.Name(), l, tf) |  | ||||||
| 		if field.Options().IsStored() { |  | ||||||
| 			storeField(docNum, fieldID, encodeFieldType(field), field.Value(), field.ArrayPositions()) |  | ||||||
| 		} |  | ||||||
|  |  | ||||||
| 		if field.Options().IncludeDocValues() { |  | ||||||
| 			s.DocValueFields[fieldID] = true |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	// now that its been rolled up into docMap, walk that |  | ||||||
| 	for fieldID, tokenFrequencies := range docMap { |  | ||||||
| 		for term, tokenFreq := range tokenFrequencies { |  | ||||||
| 			pid := s.Dicts[fieldID][term] - 1 |  | ||||||
| 			bs := s.Postings[pid] |  | ||||||
| 			bs.AddInt(int(docNum)) |  | ||||||
| 			s.Freqs[pid] = append(s.Freqs[pid], uint64(tokenFreq.Frequency())) |  | ||||||
| 			s.Norms[pid] = append(s.Norms[pid], float32(1.0/math.Sqrt(float64(fieldLens[fieldID])))) |  | ||||||
| 			locationBS := s.PostingsLocs[pid] |  | ||||||
| 			if len(tokenFreq.Locations) > 0 { |  | ||||||
| 				locationBS.AddInt(int(docNum)) |  | ||||||
| 				for _, loc := range tokenFreq.Locations { |  | ||||||
| 					var locf = fieldID |  | ||||||
| 					if loc.Field != "" { |  | ||||||
| 						locf = uint16(s.getOrDefineField(loc.Field)) |  | ||||||
| 					} |  | ||||||
| 					s.Locfields[pid] = append(s.Locfields[pid], locf) |  | ||||||
| 					s.Locstarts[pid] = append(s.Locstarts[pid], uint64(loc.Start)) |  | ||||||
| 					s.Locends[pid] = append(s.Locends[pid], uint64(loc.End)) |  | ||||||
| 					s.Locpos[pid] = append(s.Locpos[pid], uint64(loc.Position)) |  | ||||||
| 					if len(loc.ArrayPositions) > 0 { |  | ||||||
| 						s.Locarraypos[pid] = append(s.Locarraypos[pid], loc.ArrayPositions) |  | ||||||
| 					} else { |  | ||||||
| 						s.Locarraypos[pid] = append(s.Locarraypos[pid], nil) |  | ||||||
| 					} |  | ||||||
| 				} |  | ||||||
| 			} |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func (s *Segment) getOrDefineField(name string) int { |  | ||||||
| 	fieldIDPlus1, ok := s.FieldsMap[name] |  | ||||||
| 	if !ok { |  | ||||||
| 		fieldIDPlus1 = uint16(len(s.FieldsInv) + 1) |  | ||||||
| 		s.FieldsMap[name] = fieldIDPlus1 |  | ||||||
| 		s.FieldsInv = append(s.FieldsInv, name) |  | ||||||
| 		s.Dicts = append(s.Dicts, make(map[string]uint64)) |  | ||||||
| 		s.DictKeys = append(s.DictKeys, make([]string, 0)) |  | ||||||
| 	} |  | ||||||
| 	return int(fieldIDPlus1 - 1) |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func (s *Segment) addDocument() int { |  | ||||||
| 	docNum := len(s.Stored) |  | ||||||
| 	s.Stored = append(s.Stored, map[uint16][][]byte{}) |  | ||||||
| 	s.StoredTypes = append(s.StoredTypes, map[uint16][]byte{}) |  | ||||||
| 	s.StoredPos = append(s.StoredPos, map[uint16][][]uint64{}) |  | ||||||
| 	return docNum |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func encodeFieldType(f document.Field) byte { |  | ||||||
| 	fieldType := byte('x') |  | ||||||
| 	switch f.(type) { |  | ||||||
| 	case *document.TextField: |  | ||||||
| 		fieldType = 't' |  | ||||||
| 	case *document.NumericField: |  | ||||||
| 		fieldType = 'n' |  | ||||||
| 	case *document.DateTimeField: |  | ||||||
| 		fieldType = 'd' |  | ||||||
| 	case *document.BooleanField: |  | ||||||
| 		fieldType = 'b' |  | ||||||
| 	case *document.GeoPointField: |  | ||||||
| 		fieldType = 'g' |  | ||||||
| 	case *document.CompositeField: |  | ||||||
| 		fieldType = 'c' |  | ||||||
| 	} |  | ||||||
| 	return fieldType |  | ||||||
| } |  | ||||||
							
								
								
									
										103
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/segment/mem/dict.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										103
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/segment/mem/dict.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -1,103 +0,0 @@ | |||||||
| //  Copyright (c) 2017 Couchbase, Inc. |  | ||||||
| // |  | ||||||
| // Licensed under the Apache License, Version 2.0 (the "License"); |  | ||||||
| // you may not use this file except in compliance with the License. |  | ||||||
| // You may obtain a copy of the License at |  | ||||||
| // |  | ||||||
| // 		http://www.apache.org/licenses/LICENSE-2.0 |  | ||||||
| // |  | ||||||
| // Unless required by applicable law or agreed to in writing, software |  | ||||||
| // distributed under the License is distributed on an "AS IS" BASIS, |  | ||||||
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |  | ||||||
| // See the License for the specific language governing permissions and |  | ||||||
| // limitations under the License. |  | ||||||
|  |  | ||||||
| package mem |  | ||||||
|  |  | ||||||
| import ( |  | ||||||
| 	"sort" |  | ||||||
| 	"strings" |  | ||||||
|  |  | ||||||
| 	"github.com/RoaringBitmap/roaring" |  | ||||||
| 	"github.com/blevesearch/bleve/index" |  | ||||||
| 	"github.com/blevesearch/bleve/index/scorch/segment" |  | ||||||
| ) |  | ||||||
|  |  | ||||||
| // Dictionary is the in-memory representation of the term dictionary |  | ||||||
| type Dictionary struct { |  | ||||||
| 	segment *Segment |  | ||||||
| 	field   string |  | ||||||
| 	fieldID uint16 |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // PostingsList returns the postings list for the specified term |  | ||||||
| func (d *Dictionary) PostingsList(term string, |  | ||||||
| 	except *roaring.Bitmap) (segment.PostingsList, error) { |  | ||||||
| 	return &PostingsList{ |  | ||||||
| 		dictionary: d, |  | ||||||
| 		term:       term, |  | ||||||
| 		postingsID: d.segment.Dicts[d.fieldID][term], |  | ||||||
| 		except:     except, |  | ||||||
| 	}, nil |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // Iterator returns an iterator for this dictionary |  | ||||||
| func (d *Dictionary) Iterator() segment.DictionaryIterator { |  | ||||||
| 	return &DictionaryIterator{ |  | ||||||
| 		d: d, |  | ||||||
| 	} |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // PrefixIterator returns an iterator which only visits terms having the |  | ||||||
| // the specified prefix |  | ||||||
| func (d *Dictionary) PrefixIterator(prefix string) segment.DictionaryIterator { |  | ||||||
| 	offset := sort.SearchStrings(d.segment.DictKeys[d.fieldID], prefix) |  | ||||||
| 	return &DictionaryIterator{ |  | ||||||
| 		d:      d, |  | ||||||
| 		prefix: prefix, |  | ||||||
| 		offset: offset, |  | ||||||
| 	} |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // RangeIterator returns an iterator which only visits terms between the |  | ||||||
| // start and end terms.  NOTE: bleve.index API specifies the end is inclusive. |  | ||||||
| func (d *Dictionary) RangeIterator(start, end string) segment.DictionaryIterator { |  | ||||||
| 	offset := sort.SearchStrings(d.segment.DictKeys[d.fieldID], start) |  | ||||||
| 	return &DictionaryIterator{ |  | ||||||
| 		d:      d, |  | ||||||
| 		offset: offset, |  | ||||||
| 		end:    end, |  | ||||||
| 	} |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // DictionaryIterator is an iterator for term dictionary |  | ||||||
| type DictionaryIterator struct { |  | ||||||
| 	d      *Dictionary |  | ||||||
| 	prefix string |  | ||||||
| 	end    string |  | ||||||
| 	offset int |  | ||||||
|  |  | ||||||
| 	dictEntry index.DictEntry // reused across Next()'s |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // Next returns the next entry in the dictionary |  | ||||||
| func (d *DictionaryIterator) Next() (*index.DictEntry, error) { |  | ||||||
| 	if d.offset > len(d.d.segment.DictKeys[d.d.fieldID])-1 { |  | ||||||
| 		return nil, nil |  | ||||||
| 	} |  | ||||||
| 	next := d.d.segment.DictKeys[d.d.fieldID][d.offset] |  | ||||||
| 	// check prefix |  | ||||||
| 	if d.prefix != "" && !strings.HasPrefix(next, d.prefix) { |  | ||||||
| 		return nil, nil |  | ||||||
| 	} |  | ||||||
| 	// check end (bleve.index API demands inclusive end) |  | ||||||
| 	if d.end != "" && next > d.end { |  | ||||||
| 		return nil, nil |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	d.offset++ |  | ||||||
| 	postingID := d.d.segment.Dicts[d.d.fieldID][next] |  | ||||||
| 	d.dictEntry.Term = next |  | ||||||
| 	d.dictEntry.Count = d.d.segment.Postings[postingID-1].GetCardinality() |  | ||||||
| 	return &d.dictEntry, nil |  | ||||||
| } |  | ||||||
							
								
								
									
										178
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/segment/mem/posting.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										178
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/segment/mem/posting.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -1,178 +0,0 @@ | |||||||
| //  Copyright (c) 2017 Couchbase, Inc. |  | ||||||
| // |  | ||||||
| // Licensed under the Apache License, Version 2.0 (the "License"); |  | ||||||
| // you may not use this file except in compliance with the License. |  | ||||||
| // You may obtain a copy of the License at |  | ||||||
| // |  | ||||||
| // 		http://www.apache.org/licenses/LICENSE-2.0 |  | ||||||
| // |  | ||||||
| // Unless required by applicable law or agreed to in writing, software |  | ||||||
| // distributed under the License is distributed on an "AS IS" BASIS, |  | ||||||
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |  | ||||||
| // See the License for the specific language governing permissions and |  | ||||||
| // limitations under the License. |  | ||||||
|  |  | ||||||
| package mem |  | ||||||
|  |  | ||||||
| import ( |  | ||||||
| 	"github.com/RoaringBitmap/roaring" |  | ||||||
| 	"github.com/blevesearch/bleve/index/scorch/segment" |  | ||||||
| ) |  | ||||||
|  |  | ||||||
| // PostingsList is an in-memory represenation of a postings list |  | ||||||
| type PostingsList struct { |  | ||||||
| 	dictionary *Dictionary |  | ||||||
| 	term       string |  | ||||||
| 	postingsID uint64 |  | ||||||
| 	except     *roaring.Bitmap |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // Count returns the number of items on this postings list |  | ||||||
| func (p *PostingsList) Count() uint64 { |  | ||||||
| 	var rv uint64 |  | ||||||
| 	if p.postingsID > 0 { |  | ||||||
| 		rv = p.dictionary.segment.Postings[p.postingsID-1].GetCardinality() |  | ||||||
| 		if p.except != nil { |  | ||||||
| 			except := p.except.GetCardinality() |  | ||||||
| 			if except > rv { |  | ||||||
| 				// avoid underflow |  | ||||||
| 				except = rv |  | ||||||
| 			} |  | ||||||
| 			rv -= except |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| 	return rv |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // Iterator returns an iterator for this postings list |  | ||||||
| func (p *PostingsList) Iterator() segment.PostingsIterator { |  | ||||||
| 	rv := &PostingsIterator{ |  | ||||||
| 		postings: p, |  | ||||||
| 	} |  | ||||||
| 	if p.postingsID > 0 { |  | ||||||
| 		allbits := p.dictionary.segment.Postings[p.postingsID-1] |  | ||||||
| 		rv.locations = p.dictionary.segment.PostingsLocs[p.postingsID-1] |  | ||||||
| 		rv.all = allbits.Iterator() |  | ||||||
| 		if p.except != nil { |  | ||||||
| 			allExcept := allbits.Clone() |  | ||||||
| 			allExcept.AndNot(p.except) |  | ||||||
| 			rv.actual = allExcept.Iterator() |  | ||||||
| 		} else { |  | ||||||
| 			rv.actual = allbits.Iterator() |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	return rv |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // PostingsIterator provides a way to iterate through the postings list |  | ||||||
| type PostingsIterator struct { |  | ||||||
| 	postings  *PostingsList |  | ||||||
| 	all       roaring.IntIterable |  | ||||||
| 	locations *roaring.Bitmap |  | ||||||
| 	offset    int |  | ||||||
| 	locoffset int |  | ||||||
| 	actual    roaring.IntIterable |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // Next returns the next posting on the postings list, or nil at the end |  | ||||||
| func (i *PostingsIterator) Next() (segment.Posting, error) { |  | ||||||
| 	if i.actual == nil || !i.actual.HasNext() { |  | ||||||
| 		return nil, nil |  | ||||||
| 	} |  | ||||||
| 	n := i.actual.Next() |  | ||||||
| 	allN := i.all.Next() |  | ||||||
|  |  | ||||||
| 	// n is the next actual hit (excluding some postings) |  | ||||||
| 	// allN is the next hit in the full postings |  | ||||||
| 	// if they don't match, adjust offsets to factor in item we're skipping over |  | ||||||
| 	// incr the all iterator, and check again |  | ||||||
| 	for allN != n { |  | ||||||
| 		i.locoffset += int(i.postings.dictionary.segment.Freqs[i.postings.postingsID-1][i.offset]) |  | ||||||
| 		i.offset++ |  | ||||||
| 		allN = i.all.Next() |  | ||||||
| 	} |  | ||||||
| 	rv := &Posting{ |  | ||||||
| 		iterator:  i, |  | ||||||
| 		docNum:    uint64(n), |  | ||||||
| 		offset:    i.offset, |  | ||||||
| 		locoffset: i.locoffset, |  | ||||||
| 		hasLoc:    i.locations.Contains(n), |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	i.locoffset += int(i.postings.dictionary.segment.Freqs[i.postings.postingsID-1][i.offset]) |  | ||||||
| 	i.offset++ |  | ||||||
| 	return rv, nil |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // Posting is a single entry in a postings list |  | ||||||
| type Posting struct { |  | ||||||
| 	iterator  *PostingsIterator |  | ||||||
| 	docNum    uint64 |  | ||||||
| 	offset    int |  | ||||||
| 	locoffset int |  | ||||||
| 	hasLoc    bool |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // Number returns the document number of this posting in this segment |  | ||||||
| func (p *Posting) Number() uint64 { |  | ||||||
| 	return p.docNum |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // Frequency returns the frequence of occurance of this term in this doc/field |  | ||||||
| func (p *Posting) Frequency() uint64 { |  | ||||||
| 	return p.iterator.postings.dictionary.segment.Freqs[p.iterator.postings.postingsID-1][p.offset] |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // Norm returns the normalization factor for this posting |  | ||||||
| func (p *Posting) Norm() float64 { |  | ||||||
| 	return float64(p.iterator.postings.dictionary.segment.Norms[p.iterator.postings.postingsID-1][p.offset]) |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // Locations returns the location information for each occurance |  | ||||||
| func (p *Posting) Locations() []segment.Location { |  | ||||||
| 	if !p.hasLoc { |  | ||||||
| 		return nil |  | ||||||
| 	} |  | ||||||
| 	freq := int(p.Frequency()) |  | ||||||
| 	rv := make([]segment.Location, freq) |  | ||||||
| 	for i := 0; i < freq; i++ { |  | ||||||
| 		rv[i] = &Location{ |  | ||||||
| 			p:      p, |  | ||||||
| 			offset: p.locoffset + i, |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| 	return rv |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // Location represents the location of a single occurance |  | ||||||
| type Location struct { |  | ||||||
| 	p      *Posting |  | ||||||
| 	offset int |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // Field returns the name of the field (useful in composite fields to know |  | ||||||
| // which original field the value came from) |  | ||||||
| func (l *Location) Field() string { |  | ||||||
| 	return l.p.iterator.postings.dictionary.segment.FieldsInv[l.p.iterator.postings.dictionary.segment.Locfields[l.p.iterator.postings.postingsID-1][l.offset]] |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // Start returns the start byte offset of this occurance |  | ||||||
| func (l *Location) Start() uint64 { |  | ||||||
| 	return l.p.iterator.postings.dictionary.segment.Locstarts[l.p.iterator.postings.postingsID-1][l.offset] |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // End returns the end byte offset of this occurance |  | ||||||
| func (l *Location) End() uint64 { |  | ||||||
| 	return l.p.iterator.postings.dictionary.segment.Locends[l.p.iterator.postings.postingsID-1][l.offset] |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // Pos returns the 1-based phrase position of this occurance |  | ||||||
| func (l *Location) Pos() uint64 { |  | ||||||
| 	return l.p.iterator.postings.dictionary.segment.Locpos[l.p.iterator.postings.postingsID-1][l.offset] |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // ArrayPositions returns the array position vector associated with this occurance |  | ||||||
| func (l *Location) ArrayPositions() []uint64 { |  | ||||||
| 	return l.p.iterator.postings.dictionary.segment.Locarraypos[l.p.iterator.postings.postingsID-1][l.offset] |  | ||||||
| } |  | ||||||
							
								
								
									
										289
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/segment/mem/segment.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										289
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/segment/mem/segment.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -1,289 +0,0 @@ | |||||||
| //  Copyright (c) 2017 Couchbase, Inc. |  | ||||||
| // |  | ||||||
| // Licensed under the Apache License, Version 2.0 (the "License"); |  | ||||||
| // you may not use this file except in compliance with the License. |  | ||||||
| // You may obtain a copy of the License at |  | ||||||
| // |  | ||||||
| // 		http://www.apache.org/licenses/LICENSE-2.0 |  | ||||||
| // |  | ||||||
| // Unless required by applicable law or agreed to in writing, software |  | ||||||
| // distributed under the License is distributed on an "AS IS" BASIS, |  | ||||||
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |  | ||||||
| // See the License for the specific language governing permissions and |  | ||||||
| // limitations under the License. |  | ||||||
|  |  | ||||||
| package mem |  | ||||||
|  |  | ||||||
| import ( |  | ||||||
| 	"fmt" |  | ||||||
|  |  | ||||||
| 	"github.com/RoaringBitmap/roaring" |  | ||||||
| 	"github.com/blevesearch/bleve/index/scorch/segment" |  | ||||||
| ) |  | ||||||
|  |  | ||||||
| // _id field is always guaranteed to have fieldID of 0 |  | ||||||
| const idFieldID uint16 = 0 |  | ||||||
|  |  | ||||||
| // KNOWN ISSUES |  | ||||||
| // - LIMITATION - we decided whether or not to store term vectors for a field |  | ||||||
| //                at the segment level, based on the first definition of a |  | ||||||
| //                field we see.  in normal bleve usage this is fine, all |  | ||||||
| //                instances of a field definition will be the same.  however, |  | ||||||
| //                advanced users may violate this and provide unique field |  | ||||||
| //                definitions with each document.  this segment does not |  | ||||||
| //                support this usage. |  | ||||||
|  |  | ||||||
| // TODO |  | ||||||
| // - need better testing of multiple docs, iterating freqs, locations and |  | ||||||
| //   and verifying the correct results are returned |  | ||||||
|  |  | ||||||
| // Segment is an in memory implementation of scorch.Segment |  | ||||||
| type Segment struct { |  | ||||||
|  |  | ||||||
| 	// FieldsMap adds 1 to field id to avoid zero value issues |  | ||||||
| 	//  name -> field id + 1 |  | ||||||
| 	FieldsMap map[string]uint16 |  | ||||||
|  |  | ||||||
| 	// FieldsInv is the inverse of FieldsMap |  | ||||||
| 	//  field id -> name |  | ||||||
| 	FieldsInv []string |  | ||||||
|  |  | ||||||
| 	// Term dictionaries for each field |  | ||||||
| 	//  field id -> term -> postings list id + 1 |  | ||||||
| 	Dicts []map[string]uint64 |  | ||||||
|  |  | ||||||
| 	// Terms for each field, where terms are sorted ascending |  | ||||||
| 	//  field id -> []term |  | ||||||
| 	DictKeys [][]string |  | ||||||
|  |  | ||||||
| 	// Postings list |  | ||||||
| 	//  postings list id -> bitmap by docNum |  | ||||||
| 	Postings []*roaring.Bitmap |  | ||||||
|  |  | ||||||
| 	// Postings list has locations |  | ||||||
| 	PostingsLocs []*roaring.Bitmap |  | ||||||
|  |  | ||||||
| 	// Term frequencies |  | ||||||
| 	//  postings list id -> Freqs (one for each hit in bitmap) |  | ||||||
| 	Freqs [][]uint64 |  | ||||||
|  |  | ||||||
| 	// Field norms |  | ||||||
| 	//  postings list id -> Norms (one for each hit in bitmap) |  | ||||||
| 	Norms [][]float32 |  | ||||||
|  |  | ||||||
| 	// Field/start/end/pos/locarraypos |  | ||||||
| 	//  postings list id -> start/end/pos/locarraypos (one for each freq) |  | ||||||
| 	Locfields   [][]uint16 |  | ||||||
| 	Locstarts   [][]uint64 |  | ||||||
| 	Locends     [][]uint64 |  | ||||||
| 	Locpos      [][]uint64 |  | ||||||
| 	Locarraypos [][][]uint64 |  | ||||||
|  |  | ||||||
| 	// Stored field values |  | ||||||
| 	//  docNum -> field id -> slice of values (each value []byte) |  | ||||||
| 	Stored []map[uint16][][]byte |  | ||||||
|  |  | ||||||
| 	// Stored field types |  | ||||||
| 	//  docNum -> field id -> slice of types (each type byte) |  | ||||||
| 	StoredTypes []map[uint16][]byte |  | ||||||
|  |  | ||||||
| 	// Stored field array positions |  | ||||||
| 	//  docNum -> field id -> slice of array positions (each is []uint64) |  | ||||||
| 	StoredPos []map[uint16][][]uint64 |  | ||||||
|  |  | ||||||
| 	// For storing the docValue persisted fields |  | ||||||
| 	DocValueFields map[uint16]bool |  | ||||||
|  |  | ||||||
| 	// Footprint of the segment, updated when analyzed document mutations |  | ||||||
| 	// are added into the segment |  | ||||||
| 	sizeInBytes uint64 |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // New builds a new empty Segment |  | ||||||
| func New() *Segment { |  | ||||||
| 	return &Segment{ |  | ||||||
| 		FieldsMap:      map[string]uint16{}, |  | ||||||
| 		DocValueFields: map[uint16]bool{}, |  | ||||||
| 	} |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func (s *Segment) updateSizeInBytes() { |  | ||||||
| 	var sizeInBytes uint64 |  | ||||||
|  |  | ||||||
| 	// FieldsMap, FieldsInv |  | ||||||
| 	for k, _ := range s.FieldsMap { |  | ||||||
| 		sizeInBytes += uint64((len(k)+int(segment.SizeOfString))*2 + |  | ||||||
| 			2 /* size of uint16 */) |  | ||||||
| 	} |  | ||||||
| 	// overhead from the data structures |  | ||||||
| 	sizeInBytes += (segment.SizeOfMap + segment.SizeOfSlice) |  | ||||||
|  |  | ||||||
| 	// Dicts, DictKeys |  | ||||||
| 	for _, entry := range s.Dicts { |  | ||||||
| 		for k, _ := range entry { |  | ||||||
| 			sizeInBytes += uint64((len(k)+int(segment.SizeOfString))*2 + |  | ||||||
| 				8 /* size of uint64 */) |  | ||||||
| 		} |  | ||||||
| 		// overhead from the data structures |  | ||||||
| 		sizeInBytes += (segment.SizeOfMap + segment.SizeOfSlice) |  | ||||||
| 	} |  | ||||||
| 	sizeInBytes += (segment.SizeOfSlice * 2) |  | ||||||
|  |  | ||||||
| 	// Postings, PostingsLocs |  | ||||||
| 	for i := 0; i < len(s.Postings); i++ { |  | ||||||
| 		sizeInBytes += (s.Postings[i].GetSizeInBytes() + segment.SizeOfPointer) + |  | ||||||
| 			(s.PostingsLocs[i].GetSizeInBytes() + segment.SizeOfPointer) |  | ||||||
| 	} |  | ||||||
| 	sizeInBytes += (segment.SizeOfSlice * 2) |  | ||||||
|  |  | ||||||
| 	// Freqs, Norms |  | ||||||
| 	for i := 0; i < len(s.Freqs); i++ { |  | ||||||
| 		sizeInBytes += uint64(len(s.Freqs[i])*8 /* size of uint64 */ + |  | ||||||
| 			len(s.Norms[i])*4 /* size of float32 */) + |  | ||||||
| 			(segment.SizeOfSlice * 2) |  | ||||||
| 	} |  | ||||||
| 	sizeInBytes += (segment.SizeOfSlice * 2) |  | ||||||
|  |  | ||||||
| 	// Location data |  | ||||||
| 	for i := 0; i < len(s.Locfields); i++ { |  | ||||||
| 		sizeInBytes += uint64(len(s.Locfields[i])*2 /* size of uint16 */ + |  | ||||||
| 			len(s.Locstarts[i])*8 /* size of uint64 */ + |  | ||||||
| 			len(s.Locends[i])*8 /* size of uint64 */ + |  | ||||||
| 			len(s.Locpos[i])*8 /* size of uint64 */) |  | ||||||
|  |  | ||||||
| 		for j := 0; j < len(s.Locarraypos[i]); j++ { |  | ||||||
| 			sizeInBytes += uint64(len(s.Locarraypos[i][j])*8 /* size of uint64 */) + |  | ||||||
| 				segment.SizeOfSlice |  | ||||||
| 		} |  | ||||||
|  |  | ||||||
| 		sizeInBytes += (segment.SizeOfSlice * 5) |  | ||||||
| 	} |  | ||||||
| 	sizeInBytes += (segment.SizeOfSlice * 5) |  | ||||||
|  |  | ||||||
| 	// Stored data |  | ||||||
| 	for i := 0; i < len(s.Stored); i++ { |  | ||||||
| 		for _, v := range s.Stored[i] { |  | ||||||
| 			sizeInBytes += uint64(2 /* size of uint16 */) |  | ||||||
| 			for _, arr := range v { |  | ||||||
| 				sizeInBytes += uint64(len(arr)) + segment.SizeOfSlice |  | ||||||
| 			} |  | ||||||
| 			sizeInBytes += segment.SizeOfSlice |  | ||||||
| 		} |  | ||||||
|  |  | ||||||
| 		for _, v := range s.StoredTypes[i] { |  | ||||||
| 			sizeInBytes += uint64(2 /* size of uint16 */ +len(v)) + segment.SizeOfSlice |  | ||||||
| 		} |  | ||||||
|  |  | ||||||
| 		for _, v := range s.StoredPos[i] { |  | ||||||
| 			sizeInBytes += uint64(2 /* size of uint16 */) |  | ||||||
| 			for _, arr := range v { |  | ||||||
| 				sizeInBytes += uint64(len(arr)*8 /* size of uint64 */) + |  | ||||||
| 					segment.SizeOfSlice |  | ||||||
| 			} |  | ||||||
| 			sizeInBytes += segment.SizeOfSlice |  | ||||||
| 		} |  | ||||||
|  |  | ||||||
| 		// overhead from map(s) within Stored, StoredTypes, StoredPos |  | ||||||
| 		sizeInBytes += (segment.SizeOfMap * 3) |  | ||||||
| 	} |  | ||||||
| 	// overhead from data structures: Stored, StoredTypes, StoredPos |  | ||||||
| 	sizeInBytes += (segment.SizeOfSlice * 3) |  | ||||||
|  |  | ||||||
| 	// DocValueFields |  | ||||||
| 	sizeInBytes += uint64(len(s.DocValueFields)*3 /* size of uint16 + bool */) + |  | ||||||
| 		segment.SizeOfMap |  | ||||||
|  |  | ||||||
| 	// SizeInBytes |  | ||||||
| 	sizeInBytes += uint64(8) |  | ||||||
|  |  | ||||||
| 	s.sizeInBytes = sizeInBytes |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func (s *Segment) SizeInBytes() uint64 { |  | ||||||
| 	return s.sizeInBytes |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func (s *Segment) AddRef() { |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func (s *Segment) DecRef() error { |  | ||||||
| 	return nil |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // Fields returns the field names used in this segment |  | ||||||
| func (s *Segment) Fields() []string { |  | ||||||
| 	return s.FieldsInv |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // VisitDocument invokes the DocFieldValueVistor for each stored field |  | ||||||
| // for the specified doc number |  | ||||||
| func (s *Segment) VisitDocument(num uint64, visitor segment.DocumentFieldValueVisitor) error { |  | ||||||
| 	// ensure document number exists |  | ||||||
| 	if int(num) > len(s.Stored)-1 { |  | ||||||
| 		return nil |  | ||||||
| 	} |  | ||||||
| 	docFields := s.Stored[int(num)] |  | ||||||
| 	st := s.StoredTypes[int(num)] |  | ||||||
| 	sp := s.StoredPos[int(num)] |  | ||||||
| 	for field, values := range docFields { |  | ||||||
| 		for i, value := range values { |  | ||||||
| 			keepGoing := visitor(s.FieldsInv[field], st[field][i], value, sp[field][i]) |  | ||||||
| 			if !keepGoing { |  | ||||||
| 				return nil |  | ||||||
| 			} |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| 	return nil |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func (s *Segment) getField(name string) (int, error) { |  | ||||||
| 	fieldID, ok := s.FieldsMap[name] |  | ||||||
| 	if !ok { |  | ||||||
| 		return 0, fmt.Errorf("no field named %s", name) |  | ||||||
| 	} |  | ||||||
| 	return int(fieldID - 1), nil |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // Dictionary returns the term dictionary for the specified field |  | ||||||
| func (s *Segment) Dictionary(field string) (segment.TermDictionary, error) { |  | ||||||
| 	fieldID, err := s.getField(field) |  | ||||||
| 	if err != nil { |  | ||||||
| 		// no such field, return empty dictionary |  | ||||||
| 		return &segment.EmptyDictionary{}, nil |  | ||||||
| 	} |  | ||||||
| 	return &Dictionary{ |  | ||||||
| 		segment: s, |  | ||||||
| 		field:   field, |  | ||||||
| 		fieldID: uint16(fieldID), |  | ||||||
| 	}, nil |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // Count returns the number of documents in this segment |  | ||||||
| // (this has no notion of deleted docs) |  | ||||||
| func (s *Segment) Count() uint64 { |  | ||||||
| 	return uint64(len(s.Stored)) |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // DocNumbers returns a bitset corresponding to the doc numbers of all the |  | ||||||
| // provided _id strings |  | ||||||
| func (s *Segment) DocNumbers(ids []string) (*roaring.Bitmap, error) { |  | ||||||
| 	rv := roaring.New() |  | ||||||
|  |  | ||||||
| 	// guard against empty segment |  | ||||||
| 	if len(s.FieldsMap) > 0 { |  | ||||||
| 		idDictionary := s.Dicts[idFieldID] |  | ||||||
|  |  | ||||||
| 		for _, id := range ids { |  | ||||||
| 			postingID := idDictionary[id] |  | ||||||
| 			if postingID > 0 { |  | ||||||
| 				rv.Or(s.Postings[postingID-1]) |  | ||||||
| 			} |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| 	return rv, nil |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // Close releases all resources associated with this segment |  | ||||||
| func (s *Segment) Close() error { |  | ||||||
| 	return nil |  | ||||||
| } |  | ||||||
							
								
								
									
										75
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/segment/regexp.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										75
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/segment/regexp.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,75 @@ | |||||||
|  | //  Copyright (c) 2018 Couchbase, Inc. | ||||||
|  | // | ||||||
|  | // Licensed under the Apache License, Version 2.0 (the "License"); | ||||||
|  | // you may not use this file except in compliance with the License. | ||||||
|  | // You may obtain a copy of the License at | ||||||
|  | // | ||||||
|  | // 		http://www.apache.org/licenses/LICENSE-2.0 | ||||||
|  | // | ||||||
|  | // Unless required by applicable law or agreed to in writing, software | ||||||
|  | // distributed under the License is distributed on an "AS IS" BASIS, | ||||||
|  | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||||
|  | // See the License for the specific language governing permissions and | ||||||
|  | // limitations under the License. | ||||||
|  |  | ||||||
|  | package segment | ||||||
|  |  | ||||||
|  | import ( | ||||||
|  | 	"regexp/syntax" | ||||||
|  |  | ||||||
|  | 	"github.com/couchbase/vellum/regexp" | ||||||
|  | ) | ||||||
|  |  | ||||||
|  | func ParseRegexp(pattern string) (a *regexp.Regexp, prefixBeg, prefixEnd []byte, err error) { | ||||||
|  | 	// TODO: potential optimization where syntax.Regexp supports a Simplify() API? | ||||||
|  |  | ||||||
|  | 	parsed, err := syntax.Parse(pattern, syntax.Perl) | ||||||
|  | 	if err != nil { | ||||||
|  | 		return nil, nil, nil, err | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	re, err := regexp.NewParsedWithLimit(pattern, parsed, regexp.DefaultLimit) | ||||||
|  | 	if err != nil { | ||||||
|  | 		return nil, nil, nil, err | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	prefix := LiteralPrefix(parsed) | ||||||
|  | 	if prefix != "" { | ||||||
|  | 		prefixBeg := []byte(prefix) | ||||||
|  | 		prefixEnd := IncrementBytes(prefixBeg) | ||||||
|  | 		return re, prefixBeg, prefixEnd, nil | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return re, nil, nil, nil | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Returns the literal prefix given the parse tree for a regexp | ||||||
|  | func LiteralPrefix(s *syntax.Regexp) string { | ||||||
|  | 	// traverse the left-most branch in the parse tree as long as the | ||||||
|  | 	// node represents a concatenation | ||||||
|  | 	for s != nil && s.Op == syntax.OpConcat { | ||||||
|  | 		if len(s.Sub) < 1 { | ||||||
|  | 			return "" | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		s = s.Sub[0] | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	if s.Op == syntax.OpLiteral { | ||||||
|  | 		return string(s.Rune) | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return "" // no literal prefix | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func IncrementBytes(in []byte) []byte { | ||||||
|  | 	rv := make([]byte, len(in)) | ||||||
|  | 	copy(rv, in) | ||||||
|  | 	for i := len(rv) - 1; i >= 0; i-- { | ||||||
|  | 		rv[i] = rv[i] + 1 | ||||||
|  | 		if rv[i] != 0 { | ||||||
|  | 			return rv // didn't overflow, so stop | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	return nil // overflowed | ||||||
|  | } | ||||||
							
								
								
									
										43
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/segment/segment.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										43
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/segment/segment.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -15,15 +15,14 @@ | |||||||
| package segment | package segment | ||||||
|  |  | ||||||
| import ( | import ( | ||||||
|  | 	"fmt" | ||||||
|  |  | ||||||
| 	"github.com/RoaringBitmap/roaring" | 	"github.com/RoaringBitmap/roaring" | ||||||
| 	"github.com/blevesearch/bleve/index" | 	"github.com/blevesearch/bleve/index" | ||||||
|  | 	"github.com/couchbase/vellum" | ||||||
| ) | ) | ||||||
|  |  | ||||||
| // Overhead from go data structures when deployed on a 64-bit system. | var ErrClosed = fmt.Errorf("index closed") | ||||||
| const SizeOfMap uint64 = 8 |  | ||||||
| const SizeOfPointer uint64 = 8 |  | ||||||
| const SizeOfSlice uint64 = 24 |  | ||||||
| const SizeOfString uint64 = 16 |  | ||||||
|  |  | ||||||
| // DocumentFieldValueVisitor defines a callback to be visited for each | // DocumentFieldValueVisitor defines a callback to be visited for each | ||||||
| // stored field value.  The return value determines if the visitor | // stored field value.  The return value determines if the visitor | ||||||
| @@ -34,6 +33,9 @@ type Segment interface { | |||||||
| 	Dictionary(field string) (TermDictionary, error) | 	Dictionary(field string) (TermDictionary, error) | ||||||
|  |  | ||||||
| 	VisitDocument(num uint64, visitor DocumentFieldValueVisitor) error | 	VisitDocument(num uint64, visitor DocumentFieldValueVisitor) error | ||||||
|  |  | ||||||
|  | 	DocID(num uint64) ([]byte, error) | ||||||
|  |  | ||||||
| 	Count() uint64 | 	Count() uint64 | ||||||
|  |  | ||||||
| 	DocNumbers([]string) (*roaring.Bitmap, error) | 	DocNumbers([]string) (*roaring.Bitmap, error) | ||||||
| @@ -42,18 +44,21 @@ type Segment interface { | |||||||
|  |  | ||||||
| 	Close() error | 	Close() error | ||||||
|  |  | ||||||
| 	SizeInBytes() uint64 | 	Size() int | ||||||
|  |  | ||||||
| 	AddRef() | 	AddRef() | ||||||
| 	DecRef() error | 	DecRef() error | ||||||
| } | } | ||||||
|  |  | ||||||
| type TermDictionary interface { | type TermDictionary interface { | ||||||
| 	PostingsList(term string, except *roaring.Bitmap) (PostingsList, error) | 	PostingsList(term []byte, except *roaring.Bitmap, prealloc PostingsList) (PostingsList, error) | ||||||
|  |  | ||||||
| 	Iterator() DictionaryIterator | 	Iterator() DictionaryIterator | ||||||
| 	PrefixIterator(prefix string) DictionaryIterator | 	PrefixIterator(prefix string) DictionaryIterator | ||||||
| 	RangeIterator(start, end string) DictionaryIterator | 	RangeIterator(start, end string) DictionaryIterator | ||||||
|  | 	AutomatonIterator(a vellum.Automaton, | ||||||
|  | 		startKeyInclusive, endKeyExclusive []byte) DictionaryIterator | ||||||
|  | 	OnlyIterator(onlyTerms [][]byte, includeCount bool) DictionaryIterator | ||||||
| } | } | ||||||
|  |  | ||||||
| type DictionaryIterator interface { | type DictionaryIterator interface { | ||||||
| @@ -61,7 +66,9 @@ type DictionaryIterator interface { | |||||||
| } | } | ||||||
|  |  | ||||||
| type PostingsList interface { | type PostingsList interface { | ||||||
| 	Iterator() PostingsIterator | 	Iterator(includeFreq, includeNorm, includeLocations bool, prealloc PostingsIterator) PostingsIterator | ||||||
|  |  | ||||||
|  | 	Size() int | ||||||
|  |  | ||||||
| 	Count() uint64 | 	Count() uint64 | ||||||
|  |  | ||||||
| @@ -77,6 +84,14 @@ type PostingsIterator interface { | |||||||
| 	// implementations may return a shared instance to reduce memory | 	// implementations may return a shared instance to reduce memory | ||||||
| 	// allocations. | 	// allocations. | ||||||
| 	Next() (Posting, error) | 	Next() (Posting, error) | ||||||
|  |  | ||||||
|  | 	// Advance will return the posting with the specified doc number | ||||||
|  | 	// or if there is no such posting, the next posting. | ||||||
|  | 	// Callers MUST NOT attempt to pass a docNum that is less than or | ||||||
|  | 	// equal to the currently visited posting doc Num. | ||||||
|  | 	Advance(docNum uint64) (Posting, error) | ||||||
|  |  | ||||||
|  | 	Size() int | ||||||
| } | } | ||||||
|  |  | ||||||
| type Posting interface { | type Posting interface { | ||||||
| @@ -86,6 +101,8 @@ type Posting interface { | |||||||
| 	Norm() float64 | 	Norm() float64 | ||||||
|  |  | ||||||
| 	Locations() []Location | 	Locations() []Location | ||||||
|  |  | ||||||
|  | 	Size() int | ||||||
| } | } | ||||||
|  |  | ||||||
| type Location interface { | type Location interface { | ||||||
| @@ -94,6 +111,7 @@ type Location interface { | |||||||
| 	End() uint64 | 	End() uint64 | ||||||
| 	Pos() uint64 | 	Pos() uint64 | ||||||
| 	ArrayPositions() []uint64 | 	ArrayPositions() []uint64 | ||||||
|  | 	Size() int | ||||||
| } | } | ||||||
|  |  | ||||||
| // DocumentFieldTermVisitable is implemented by various scorch segment | // DocumentFieldTermVisitable is implemented by various scorch segment | ||||||
| @@ -101,10 +119,17 @@ type Location interface { | |||||||
| // postings or other indexed values. | // postings or other indexed values. | ||||||
| type DocumentFieldTermVisitable interface { | type DocumentFieldTermVisitable interface { | ||||||
| 	VisitDocumentFieldTerms(localDocNum uint64, fields []string, | 	VisitDocumentFieldTerms(localDocNum uint64, fields []string, | ||||||
| 		visitor index.DocumentFieldTermVisitor) error | 		visitor index.DocumentFieldTermVisitor, optional DocVisitState) (DocVisitState, error) | ||||||
|  |  | ||||||
| 	// VisitableDocValueFields implementation should return | 	// VisitableDocValueFields implementation should return | ||||||
| 	// the list of fields which are document value persisted and | 	// the list of fields which are document value persisted and | ||||||
| 	// therefore visitable by the above VisitDocumentFieldTerms method. | 	// therefore visitable by the above VisitDocumentFieldTerms method. | ||||||
| 	VisitableDocValueFields() ([]string, error) | 	VisitableDocValueFields() ([]string, error) | ||||||
| } | } | ||||||
|  |  | ||||||
|  | type DocVisitState interface { | ||||||
|  | } | ||||||
|  |  | ||||||
|  | type StatsReporter interface { | ||||||
|  | 	ReportBytesWritten(bytesWritten uint64) | ||||||
|  | } | ||||||
|   | |||||||
							
								
								
									
										542
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/build.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										542
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/build.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -16,19 +16,13 @@ package zap | |||||||
|  |  | ||||||
| import ( | import ( | ||||||
| 	"bufio" | 	"bufio" | ||||||
| 	"bytes" |  | ||||||
| 	"encoding/binary" |  | ||||||
| 	"math" | 	"math" | ||||||
| 	"os" | 	"os" | ||||||
| 	"sort" |  | ||||||
|  |  | ||||||
| 	"github.com/Smerity/govarint" |  | ||||||
| 	"github.com/blevesearch/bleve/index/scorch/segment/mem" |  | ||||||
| 	"github.com/couchbase/vellum" |  | ||||||
| 	"github.com/golang/snappy" |  | ||||||
| ) | ) | ||||||
|  |  | ||||||
| const version uint32 = 3 | const Version uint32 = 11 | ||||||
|  |  | ||||||
|  | const Type string = "zap" | ||||||
|  |  | ||||||
| const fieldNotUninverted = math.MaxUint64 | const fieldNotUninverted = math.MaxUint64 | ||||||
|  |  | ||||||
| @@ -82,219 +76,39 @@ func PersistSegmentBase(sb *SegmentBase, path string) error { | |||||||
| 	return nil | 	return nil | ||||||
| } | } | ||||||
|  |  | ||||||
| // PersistSegment takes the in-memory segment and persists it to |  | ||||||
| // the specified path in the zap file format. |  | ||||||
| func PersistSegment(memSegment *mem.Segment, path string, chunkFactor uint32) error { |  | ||||||
| 	flag := os.O_RDWR | os.O_CREATE |  | ||||||
|  |  | ||||||
| 	f, err := os.OpenFile(path, flag, 0600) |  | ||||||
| 	if err != nil { |  | ||||||
| 		return err |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	cleanup := func() { |  | ||||||
| 		_ = f.Close() |  | ||||||
| 		_ = os.Remove(path) |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	// buffer the output |  | ||||||
| 	br := bufio.NewWriter(f) |  | ||||||
|  |  | ||||||
| 	// wrap it for counting (tracking offsets) |  | ||||||
| 	cr := NewCountHashWriter(br) |  | ||||||
|  |  | ||||||
| 	numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset, _, err := |  | ||||||
| 		persistBase(memSegment, cr, chunkFactor) |  | ||||||
| 	if err != nil { |  | ||||||
| 		cleanup() |  | ||||||
| 		return err |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	err = persistFooter(numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset, |  | ||||||
| 		chunkFactor, cr.Sum32(), cr) |  | ||||||
| 	if err != nil { |  | ||||||
| 		cleanup() |  | ||||||
| 		return err |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	err = br.Flush() |  | ||||||
| 	if err != nil { |  | ||||||
| 		cleanup() |  | ||||||
| 		return err |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	err = f.Sync() |  | ||||||
| 	if err != nil { |  | ||||||
| 		cleanup() |  | ||||||
| 		return err |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	err = f.Close() |  | ||||||
| 	if err != nil { |  | ||||||
| 		cleanup() |  | ||||||
| 		return err |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	return nil |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func persistBase(memSegment *mem.Segment, cr *CountHashWriter, chunkFactor uint32) ( |  | ||||||
| 	numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset uint64, |  | ||||||
| 	dictLocs []uint64, err error) { |  | ||||||
| 	docValueOffset = uint64(fieldNotUninverted) |  | ||||||
|  |  | ||||||
| 	if len(memSegment.Stored) > 0 { |  | ||||||
| 		storedIndexOffset, err = persistStored(memSegment, cr) |  | ||||||
| 		if err != nil { |  | ||||||
| 			return 0, 0, 0, 0, nil, err |  | ||||||
| 		} |  | ||||||
|  |  | ||||||
| 		freqOffsets, locOffsets, err := persistPostingDetails(memSegment, cr, chunkFactor) |  | ||||||
| 		if err != nil { |  | ||||||
| 			return 0, 0, 0, 0, nil, err |  | ||||||
| 		} |  | ||||||
|  |  | ||||||
| 		postingsListLocs, err := persistPostingsLocs(memSegment, cr) |  | ||||||
| 		if err != nil { |  | ||||||
| 			return 0, 0, 0, 0, nil, err |  | ||||||
| 		} |  | ||||||
|  |  | ||||||
| 		postingsLocs, err := persistPostingsLists(memSegment, cr, postingsListLocs, freqOffsets, locOffsets) |  | ||||||
| 		if err != nil { |  | ||||||
| 			return 0, 0, 0, 0, nil, err |  | ||||||
| 		} |  | ||||||
|  |  | ||||||
| 		dictLocs, err = persistDictionary(memSegment, cr, postingsLocs) |  | ||||||
| 		if err != nil { |  | ||||||
| 			return 0, 0, 0, 0, nil, err |  | ||||||
| 		} |  | ||||||
|  |  | ||||||
| 		docValueOffset, err = persistFieldDocValues(memSegment, cr, chunkFactor) |  | ||||||
| 		if err != nil { |  | ||||||
| 			return 0, 0, 0, 0, nil, err |  | ||||||
| 		} |  | ||||||
| 	} else { |  | ||||||
| 		dictLocs = make([]uint64, len(memSegment.FieldsInv)) |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	fieldsIndexOffset, err = persistFields(memSegment.FieldsInv, cr, dictLocs) |  | ||||||
| 	if err != nil { |  | ||||||
| 		return 0, 0, 0, 0, nil, err |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	return uint64(len(memSegment.Stored)), storedIndexOffset, fieldsIndexOffset, docValueOffset, |  | ||||||
| 		dictLocs, nil |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func persistStored(memSegment *mem.Segment, w *CountHashWriter) (uint64, error) { |  | ||||||
| 	var curr int |  | ||||||
| 	var metaBuf bytes.Buffer |  | ||||||
| 	var data, compressed []byte |  | ||||||
|  |  | ||||||
| 	metaEncoder := govarint.NewU64Base128Encoder(&metaBuf) |  | ||||||
|  |  | ||||||
| 	docNumOffsets := make(map[int]uint64, len(memSegment.Stored)) |  | ||||||
|  |  | ||||||
| 	for docNum, storedValues := range memSegment.Stored { |  | ||||||
| 		if docNum != 0 { |  | ||||||
| 			// reset buffer if necessary |  | ||||||
| 			curr = 0 |  | ||||||
| 			metaBuf.Reset() |  | ||||||
| 			data = data[:0] |  | ||||||
| 			compressed = compressed[:0] |  | ||||||
| 		} |  | ||||||
|  |  | ||||||
| 		st := memSegment.StoredTypes[docNum] |  | ||||||
| 		sp := memSegment.StoredPos[docNum] |  | ||||||
|  |  | ||||||
| 		// encode fields in order |  | ||||||
| 		for fieldID := range memSegment.FieldsInv { |  | ||||||
| 			if storedFieldValues, ok := storedValues[uint16(fieldID)]; ok { |  | ||||||
| 				stf := st[uint16(fieldID)] |  | ||||||
| 				spf := sp[uint16(fieldID)] |  | ||||||
|  |  | ||||||
| 				var err2 error |  | ||||||
| 				curr, data, err2 = persistStoredFieldValues(fieldID, |  | ||||||
| 					storedFieldValues, stf, spf, curr, metaEncoder, data) |  | ||||||
| 				if err2 != nil { |  | ||||||
| 					return 0, err2 |  | ||||||
| 				} |  | ||||||
| 			} |  | ||||||
| 		} |  | ||||||
|  |  | ||||||
| 		metaEncoder.Close() |  | ||||||
| 		metaBytes := metaBuf.Bytes() |  | ||||||
|  |  | ||||||
| 		// compress the data |  | ||||||
| 		compressed = snappy.Encode(compressed, data) |  | ||||||
|  |  | ||||||
| 		// record where we're about to start writing |  | ||||||
| 		docNumOffsets[docNum] = uint64(w.Count()) |  | ||||||
|  |  | ||||||
| 		// write out the meta len and compressed data len |  | ||||||
| 		_, err := writeUvarints(w, uint64(len(metaBytes)), uint64(len(compressed))) |  | ||||||
| 		if err != nil { |  | ||||||
| 			return 0, err |  | ||||||
| 		} |  | ||||||
|  |  | ||||||
| 		// now write the meta |  | ||||||
| 		_, err = w.Write(metaBytes) |  | ||||||
| 		if err != nil { |  | ||||||
| 			return 0, err |  | ||||||
| 		} |  | ||||||
| 		// now write the compressed data |  | ||||||
| 		_, err = w.Write(compressed) |  | ||||||
| 		if err != nil { |  | ||||||
| 			return 0, err |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	// return value is the start of the stored index |  | ||||||
| 	rv := uint64(w.Count()) |  | ||||||
| 	// now write out the stored doc index |  | ||||||
| 	for docNum := range memSegment.Stored { |  | ||||||
| 		err := binary.Write(w, binary.BigEndian, docNumOffsets[docNum]) |  | ||||||
| 		if err != nil { |  | ||||||
| 			return 0, err |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	return rv, nil |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func persistStoredFieldValues(fieldID int, | func persistStoredFieldValues(fieldID int, | ||||||
| 	storedFieldValues [][]byte, stf []byte, spf [][]uint64, | 	storedFieldValues [][]byte, stf []byte, spf [][]uint64, | ||||||
| 	curr int, metaEncoder *govarint.Base128Encoder, data []byte) ( | 	curr int, metaEncode varintEncoder, data []byte) ( | ||||||
| 	int, []byte, error) { | 	int, []byte, error) { | ||||||
| 	for i := 0; i < len(storedFieldValues); i++ { | 	for i := 0; i < len(storedFieldValues); i++ { | ||||||
| 		// encode field | 		// encode field | ||||||
| 		_, err := metaEncoder.PutU64(uint64(fieldID)) | 		_, err := metaEncode(uint64(fieldID)) | ||||||
| 		if err != nil { | 		if err != nil { | ||||||
| 			return 0, nil, err | 			return 0, nil, err | ||||||
| 		} | 		} | ||||||
| 		// encode type | 		// encode type | ||||||
| 		_, err = metaEncoder.PutU64(uint64(stf[i])) | 		_, err = metaEncode(uint64(stf[i])) | ||||||
| 		if err != nil { | 		if err != nil { | ||||||
| 			return 0, nil, err | 			return 0, nil, err | ||||||
| 		} | 		} | ||||||
| 		// encode start offset | 		// encode start offset | ||||||
| 		_, err = metaEncoder.PutU64(uint64(curr)) | 		_, err = metaEncode(uint64(curr)) | ||||||
| 		if err != nil { | 		if err != nil { | ||||||
| 			return 0, nil, err | 			return 0, nil, err | ||||||
| 		} | 		} | ||||||
| 		// end len | 		// end len | ||||||
| 		_, err = metaEncoder.PutU64(uint64(len(storedFieldValues[i]))) | 		_, err = metaEncode(uint64(len(storedFieldValues[i]))) | ||||||
| 		if err != nil { | 		if err != nil { | ||||||
| 			return 0, nil, err | 			return 0, nil, err | ||||||
| 		} | 		} | ||||||
| 		// encode number of array pos | 		// encode number of array pos | ||||||
| 		_, err = metaEncoder.PutU64(uint64(len(spf[i]))) | 		_, err = metaEncode(uint64(len(spf[i]))) | ||||||
| 		if err != nil { | 		if err != nil { | ||||||
| 			return 0, nil, err | 			return 0, nil, err | ||||||
| 		} | 		} | ||||||
| 		// encode all array positions | 		// encode all array positions | ||||||
| 		for _, pos := range spf[i] { | 		for _, pos := range spf[i] { | ||||||
| 			_, err = metaEncoder.PutU64(pos) | 			_, err = metaEncode(pos) | ||||||
| 			if err != nil { | 			if err != nil { | ||||||
| 				return 0, nil, err | 				return 0, nil, err | ||||||
| 			} | 			} | ||||||
| @@ -307,337 +121,6 @@ func persistStoredFieldValues(fieldID int, | |||||||
| 	return curr, data, nil | 	return curr, data, nil | ||||||
| } | } | ||||||
|  |  | ||||||
| func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFactor uint32) ([]uint64, []uint64, error) { |  | ||||||
| 	var freqOffsets, locOfffsets []uint64 |  | ||||||
| 	tfEncoder := newChunkedIntCoder(uint64(chunkFactor), uint64(len(memSegment.Stored)-1)) |  | ||||||
| 	for postingID := range memSegment.Postings { |  | ||||||
| 		if postingID != 0 { |  | ||||||
| 			tfEncoder.Reset() |  | ||||||
| 		} |  | ||||||
| 		freqs := memSegment.Freqs[postingID] |  | ||||||
| 		norms := memSegment.Norms[postingID] |  | ||||||
| 		postingsListItr := memSegment.Postings[postingID].Iterator() |  | ||||||
| 		var offset int |  | ||||||
| 		for postingsListItr.HasNext() { |  | ||||||
|  |  | ||||||
| 			docNum := uint64(postingsListItr.Next()) |  | ||||||
|  |  | ||||||
| 			// put freq |  | ||||||
| 			err := tfEncoder.Add(docNum, freqs[offset]) |  | ||||||
| 			if err != nil { |  | ||||||
| 				return nil, nil, err |  | ||||||
| 			} |  | ||||||
|  |  | ||||||
| 			// put norm |  | ||||||
| 			norm := norms[offset] |  | ||||||
| 			normBits := math.Float32bits(norm) |  | ||||||
| 			err = tfEncoder.Add(docNum, uint64(normBits)) |  | ||||||
| 			if err != nil { |  | ||||||
| 				return nil, nil, err |  | ||||||
| 			} |  | ||||||
|  |  | ||||||
| 			offset++ |  | ||||||
| 		} |  | ||||||
|  |  | ||||||
| 		// record where this postings freq info starts |  | ||||||
| 		freqOffsets = append(freqOffsets, uint64(w.Count())) |  | ||||||
|  |  | ||||||
| 		tfEncoder.Close() |  | ||||||
| 		_, err := tfEncoder.Write(w) |  | ||||||
| 		if err != nil { |  | ||||||
| 			return nil, nil, err |  | ||||||
| 		} |  | ||||||
|  |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	// now do it again for the locations |  | ||||||
| 	locEncoder := newChunkedIntCoder(uint64(chunkFactor), uint64(len(memSegment.Stored)-1)) |  | ||||||
| 	for postingID := range memSegment.Postings { |  | ||||||
| 		if postingID != 0 { |  | ||||||
| 			locEncoder.Reset() |  | ||||||
| 		} |  | ||||||
| 		freqs := memSegment.Freqs[postingID] |  | ||||||
| 		locfields := memSegment.Locfields[postingID] |  | ||||||
| 		locpos := memSegment.Locpos[postingID] |  | ||||||
| 		locstarts := memSegment.Locstarts[postingID] |  | ||||||
| 		locends := memSegment.Locends[postingID] |  | ||||||
| 		locarraypos := memSegment.Locarraypos[postingID] |  | ||||||
| 		postingsListItr := memSegment.Postings[postingID].Iterator() |  | ||||||
| 		var offset int |  | ||||||
| 		var locOffset int |  | ||||||
| 		for postingsListItr.HasNext() { |  | ||||||
| 			docNum := uint64(postingsListItr.Next()) |  | ||||||
| 			for i := 0; i < int(freqs[offset]); i++ { |  | ||||||
| 				if len(locfields) > 0 { |  | ||||||
| 					// put field |  | ||||||
| 					err := locEncoder.Add(docNum, uint64(locfields[locOffset])) |  | ||||||
| 					if err != nil { |  | ||||||
| 						return nil, nil, err |  | ||||||
| 					} |  | ||||||
|  |  | ||||||
| 					// put pos |  | ||||||
| 					err = locEncoder.Add(docNum, locpos[locOffset]) |  | ||||||
| 					if err != nil { |  | ||||||
| 						return nil, nil, err |  | ||||||
| 					} |  | ||||||
|  |  | ||||||
| 					// put start |  | ||||||
| 					err = locEncoder.Add(docNum, locstarts[locOffset]) |  | ||||||
| 					if err != nil { |  | ||||||
| 						return nil, nil, err |  | ||||||
| 					} |  | ||||||
|  |  | ||||||
| 					// put end |  | ||||||
| 					err = locEncoder.Add(docNum, locends[locOffset]) |  | ||||||
| 					if err != nil { |  | ||||||
| 						return nil, nil, err |  | ||||||
| 					} |  | ||||||
|  |  | ||||||
| 					// put the number of array positions to follow |  | ||||||
| 					num := len(locarraypos[locOffset]) |  | ||||||
| 					err = locEncoder.Add(docNum, uint64(num)) |  | ||||||
| 					if err != nil { |  | ||||||
| 						return nil, nil, err |  | ||||||
| 					} |  | ||||||
|  |  | ||||||
| 					// put each array position |  | ||||||
| 					for _, pos := range locarraypos[locOffset] { |  | ||||||
| 						err = locEncoder.Add(docNum, pos) |  | ||||||
| 						if err != nil { |  | ||||||
| 							return nil, nil, err |  | ||||||
| 						} |  | ||||||
| 					} |  | ||||||
| 				} |  | ||||||
| 				locOffset++ |  | ||||||
| 			} |  | ||||||
| 			offset++ |  | ||||||
| 		} |  | ||||||
|  |  | ||||||
| 		// record where this postings loc info starts |  | ||||||
| 		locOfffsets = append(locOfffsets, uint64(w.Count())) |  | ||||||
| 		locEncoder.Close() |  | ||||||
| 		_, err := locEncoder.Write(w) |  | ||||||
| 		if err != nil { |  | ||||||
| 			return nil, nil, err |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| 	return freqOffsets, locOfffsets, nil |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func persistPostingsLocs(memSegment *mem.Segment, w *CountHashWriter) (rv []uint64, err error) { |  | ||||||
| 	rv = make([]uint64, 0, len(memSegment.PostingsLocs)) |  | ||||||
| 	var reuseBuf bytes.Buffer |  | ||||||
| 	reuseBufVarint := make([]byte, binary.MaxVarintLen64) |  | ||||||
| 	for postingID := range memSegment.PostingsLocs { |  | ||||||
| 		// record where we start this posting loc |  | ||||||
| 		rv = append(rv, uint64(w.Count())) |  | ||||||
| 		// write out the length and bitmap |  | ||||||
| 		_, err = writeRoaringWithLen(memSegment.PostingsLocs[postingID], w, &reuseBuf, reuseBufVarint) |  | ||||||
| 		if err != nil { |  | ||||||
| 			return nil, err |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| 	return rv, nil |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func persistPostingsLists(memSegment *mem.Segment, w *CountHashWriter, |  | ||||||
| 	postingsListLocs, freqOffsets, locOffsets []uint64) (rv []uint64, err error) { |  | ||||||
| 	rv = make([]uint64, 0, len(memSegment.Postings)) |  | ||||||
| 	var reuseBuf bytes.Buffer |  | ||||||
| 	reuseBufVarint := make([]byte, binary.MaxVarintLen64) |  | ||||||
| 	for postingID := range memSegment.Postings { |  | ||||||
| 		// record where we start this posting list |  | ||||||
| 		rv = append(rv, uint64(w.Count())) |  | ||||||
|  |  | ||||||
| 		// write out the term info, loc info, and loc posting list offset |  | ||||||
| 		_, err = writeUvarints(w, freqOffsets[postingID], |  | ||||||
| 			locOffsets[postingID], postingsListLocs[postingID]) |  | ||||||
| 		if err != nil { |  | ||||||
| 			return nil, err |  | ||||||
| 		} |  | ||||||
|  |  | ||||||
| 		// write out the length and bitmap |  | ||||||
| 		_, err = writeRoaringWithLen(memSegment.Postings[postingID], w, &reuseBuf, reuseBufVarint) |  | ||||||
| 		if err != nil { |  | ||||||
| 			return nil, err |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| 	return rv, nil |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func persistDictionary(memSegment *mem.Segment, w *CountHashWriter, postingsLocs []uint64) ([]uint64, error) { |  | ||||||
| 	rv := make([]uint64, 0, len(memSegment.DictKeys)) |  | ||||||
|  |  | ||||||
| 	varintBuf := make([]byte, binary.MaxVarintLen64) |  | ||||||
|  |  | ||||||
| 	var buffer bytes.Buffer |  | ||||||
| 	for fieldID, fieldTerms := range memSegment.DictKeys { |  | ||||||
| 		if fieldID != 0 { |  | ||||||
| 			buffer.Reset() |  | ||||||
| 		} |  | ||||||
|  |  | ||||||
| 		// start a new vellum for this field |  | ||||||
| 		builder, err := vellum.New(&buffer, nil) |  | ||||||
| 		if err != nil { |  | ||||||
| 			return nil, err |  | ||||||
| 		} |  | ||||||
|  |  | ||||||
| 		dict := memSegment.Dicts[fieldID] |  | ||||||
| 		// now walk the dictionary in order of fieldTerms (already sorted) |  | ||||||
| 		for _, fieldTerm := range fieldTerms { |  | ||||||
| 			postingID := dict[fieldTerm] - 1 |  | ||||||
| 			postingsAddr := postingsLocs[postingID] |  | ||||||
| 			err = builder.Insert([]byte(fieldTerm), postingsAddr) |  | ||||||
| 			if err != nil { |  | ||||||
| 				return nil, err |  | ||||||
| 			} |  | ||||||
| 		} |  | ||||||
| 		err = builder.Close() |  | ||||||
| 		if err != nil { |  | ||||||
| 			return nil, err |  | ||||||
| 		} |  | ||||||
|  |  | ||||||
| 		// record where this dictionary starts |  | ||||||
| 		rv = append(rv, uint64(w.Count())) |  | ||||||
|  |  | ||||||
| 		vellumData := buffer.Bytes() |  | ||||||
|  |  | ||||||
| 		// write out the length of the vellum data |  | ||||||
| 		n := binary.PutUvarint(varintBuf, uint64(len(vellumData))) |  | ||||||
| 		_, err = w.Write(varintBuf[:n]) |  | ||||||
| 		if err != nil { |  | ||||||
| 			return nil, err |  | ||||||
| 		} |  | ||||||
|  |  | ||||||
| 		// write this vellum to disk |  | ||||||
| 		_, err = w.Write(vellumData) |  | ||||||
| 		if err != nil { |  | ||||||
| 			return nil, err |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	return rv, nil |  | ||||||
| } |  | ||||||
|  |  | ||||||
| type docIDRange []uint64 |  | ||||||
|  |  | ||||||
| func (a docIDRange) Len() int           { return len(a) } |  | ||||||
| func (a docIDRange) Swap(i, j int)      { a[i], a[j] = a[j], a[i] } |  | ||||||
| func (a docIDRange) Less(i, j int) bool { return a[i] < a[j] } |  | ||||||
|  |  | ||||||
| func persistDocValues(memSegment *mem.Segment, w *CountHashWriter, |  | ||||||
| 	chunkFactor uint32) (map[uint16]uint64, error) { |  | ||||||
| 	fieldChunkOffsets := make(map[uint16]uint64, len(memSegment.FieldsInv)) |  | ||||||
| 	fdvEncoder := newChunkedContentCoder(uint64(chunkFactor), uint64(len(memSegment.Stored)-1)) |  | ||||||
|  |  | ||||||
| 	for fieldID := range memSegment.DocValueFields { |  | ||||||
| 		field := memSegment.FieldsInv[fieldID] |  | ||||||
| 		docTermMap := make(map[uint64][]byte, 0) |  | ||||||
| 		dict, err := memSegment.Dictionary(field) |  | ||||||
| 		if err != nil { |  | ||||||
| 			return nil, err |  | ||||||
| 		} |  | ||||||
|  |  | ||||||
| 		dictItr := dict.Iterator() |  | ||||||
| 		next, err := dictItr.Next() |  | ||||||
| 		for err == nil && next != nil { |  | ||||||
| 			postings, err1 := dict.PostingsList(next.Term, nil) |  | ||||||
| 			if err1 != nil { |  | ||||||
| 				return nil, err |  | ||||||
| 			} |  | ||||||
|  |  | ||||||
| 			postingsItr := postings.Iterator() |  | ||||||
| 			nextPosting, err2 := postingsItr.Next() |  | ||||||
| 			for err2 == nil && nextPosting != nil { |  | ||||||
| 				docNum := nextPosting.Number() |  | ||||||
| 				docTermMap[docNum] = append(docTermMap[docNum], []byte(next.Term)...) |  | ||||||
| 				docTermMap[docNum] = append(docTermMap[docNum], termSeparator) |  | ||||||
| 				nextPosting, err2 = postingsItr.Next() |  | ||||||
| 			} |  | ||||||
| 			if err2 != nil { |  | ||||||
| 				return nil, err2 |  | ||||||
| 			} |  | ||||||
|  |  | ||||||
| 			next, err = dictItr.Next() |  | ||||||
| 		} |  | ||||||
|  |  | ||||||
| 		if err != nil { |  | ||||||
| 			return nil, err |  | ||||||
| 		} |  | ||||||
| 		// sort wrt to docIDs |  | ||||||
| 		var docNumbers docIDRange |  | ||||||
| 		for k := range docTermMap { |  | ||||||
| 			docNumbers = append(docNumbers, k) |  | ||||||
| 		} |  | ||||||
| 		sort.Sort(docNumbers) |  | ||||||
|  |  | ||||||
| 		for _, docNum := range docNumbers { |  | ||||||
| 			err = fdvEncoder.Add(docNum, docTermMap[docNum]) |  | ||||||
| 			if err != nil { |  | ||||||
| 				return nil, err |  | ||||||
| 			} |  | ||||||
| 		} |  | ||||||
|  |  | ||||||
| 		fieldChunkOffsets[fieldID] = uint64(w.Count()) |  | ||||||
| 		err = fdvEncoder.Close() |  | ||||||
| 		if err != nil { |  | ||||||
| 			return nil, err |  | ||||||
| 		} |  | ||||||
| 		// persist the doc value details for this field |  | ||||||
| 		_, err = fdvEncoder.Write(w) |  | ||||||
| 		if err != nil { |  | ||||||
| 			return nil, err |  | ||||||
| 		} |  | ||||||
| 		// reseting encoder for the next field |  | ||||||
| 		fdvEncoder.Reset() |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	return fieldChunkOffsets, nil |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func persistFieldDocValues(memSegment *mem.Segment, w *CountHashWriter, |  | ||||||
| 	chunkFactor uint32) (uint64, error) { |  | ||||||
| 	fieldDvOffsets, err := persistDocValues(memSegment, w, chunkFactor) |  | ||||||
| 	if err != nil { |  | ||||||
| 		return 0, err |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	fieldDocValuesOffset := uint64(w.Count()) |  | ||||||
| 	buf := make([]byte, binary.MaxVarintLen64) |  | ||||||
| 	offset := uint64(0) |  | ||||||
| 	ok := true |  | ||||||
| 	for fieldID := range memSegment.FieldsInv { |  | ||||||
| 		// if the field isn't configured for docValue, then mark |  | ||||||
| 		// the offset accordingly |  | ||||||
| 		if offset, ok = fieldDvOffsets[uint16(fieldID)]; !ok { |  | ||||||
| 			offset = fieldNotUninverted |  | ||||||
| 		} |  | ||||||
| 		n := binary.PutUvarint(buf, uint64(offset)) |  | ||||||
| 		_, err := w.Write(buf[:n]) |  | ||||||
| 		if err != nil { |  | ||||||
| 			return 0, err |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	return fieldDocValuesOffset, nil |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func NewSegmentBase(memSegment *mem.Segment, chunkFactor uint32) (*SegmentBase, error) { |  | ||||||
| 	var br bytes.Buffer |  | ||||||
|  |  | ||||||
| 	cr := NewCountHashWriter(&br) |  | ||||||
|  |  | ||||||
| 	numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset, dictLocs, err := |  | ||||||
| 		persistBase(memSegment, cr, chunkFactor) |  | ||||||
| 	if err != nil { |  | ||||||
| 		return nil, err |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	return InitSegmentBase(br.Bytes(), cr.Sum32(), chunkFactor, |  | ||||||
| 		memSegment.FieldsMap, memSegment.FieldsInv, numDocs, |  | ||||||
| 		storedIndexOffset, fieldsIndexOffset, docValueOffset, dictLocs) |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func InitSegmentBase(mem []byte, memCRC uint32, chunkFactor uint32, | func InitSegmentBase(mem []byte, memCRC uint32, chunkFactor uint32, | ||||||
| 	fieldsMap map[string]uint16, fieldsInv []string, numDocs uint64, | 	fieldsMap map[string]uint16, fieldsInv []string, numDocs uint64, | ||||||
| 	storedIndexOffset uint64, fieldsIndexOffset uint64, docValueOffset uint64, | 	storedIndexOffset uint64, fieldsIndexOffset uint64, docValueOffset uint64, | ||||||
| @@ -653,10 +136,11 @@ func InitSegmentBase(mem []byte, memCRC uint32, chunkFactor uint32, | |||||||
| 		fieldsIndexOffset: fieldsIndexOffset, | 		fieldsIndexOffset: fieldsIndexOffset, | ||||||
| 		docValueOffset:    docValueOffset, | 		docValueOffset:    docValueOffset, | ||||||
| 		dictLocs:          dictLocs, | 		dictLocs:          dictLocs, | ||||||
| 		fieldDvIterMap:    make(map[uint16]*docValueIterator), | 		fieldDvReaders:    make(map[uint16]*docValueReader), | ||||||
| 	} | 	} | ||||||
|  | 	sb.updateSize() | ||||||
|  |  | ||||||
| 	err := sb.loadDvIterators() | 	err := sb.loadDvReaders() | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		return nil, err | 		return nil, err | ||||||
| 	} | 	} | ||||||
|   | |||||||
							
								
								
									
										123
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/contentcoder.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										123
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/contentcoder.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -18,10 +18,18 @@ import ( | |||||||
| 	"bytes" | 	"bytes" | ||||||
| 	"encoding/binary" | 	"encoding/binary" | ||||||
| 	"io" | 	"io" | ||||||
|  | 	"reflect" | ||||||
|  |  | ||||||
| 	"github.com/golang/snappy" | 	"github.com/golang/snappy" | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  | var reflectStaticSizeMetaData int | ||||||
|  |  | ||||||
|  | func init() { | ||||||
|  | 	var md MetaData | ||||||
|  | 	reflectStaticSizeMetaData = int(reflect.TypeOf(md).Size()) | ||||||
|  | } | ||||||
|  |  | ||||||
| var termSeparator byte = 0xff | var termSeparator byte = 0xff | ||||||
| var termSeparatorSplitSlice = []byte{termSeparator} | var termSeparatorSplitSlice = []byte{termSeparator} | ||||||
|  |  | ||||||
| @@ -30,29 +38,36 @@ type chunkedContentCoder struct { | |||||||
| 	chunkSize uint64 | 	chunkSize uint64 | ||||||
| 	currChunk uint64 | 	currChunk uint64 | ||||||
| 	chunkLens []uint64 | 	chunkLens []uint64 | ||||||
|  |  | ||||||
|  | 	w                io.Writer | ||||||
|  | 	progressiveWrite bool | ||||||
|  |  | ||||||
| 	chunkMetaBuf bytes.Buffer | 	chunkMetaBuf bytes.Buffer | ||||||
| 	chunkBuf     bytes.Buffer | 	chunkBuf     bytes.Buffer | ||||||
|  |  | ||||||
| 	chunkMeta []MetaData | 	chunkMeta []MetaData | ||||||
|  |  | ||||||
|  | 	compressed []byte // temp buf for snappy compression | ||||||
| } | } | ||||||
|  |  | ||||||
| // MetaData represents the data information inside a | // MetaData represents the data information inside a | ||||||
| // chunk. | // chunk. | ||||||
| type MetaData struct { | type MetaData struct { | ||||||
| 	DocNum      uint64 // docNum of the data inside the chunk | 	DocNum      uint64 // docNum of the data inside the chunk | ||||||
| 	DocDvLoc uint64 // starting offset for a given docid | 	DocDvOffset uint64 // offset of data inside the chunk for the given docid | ||||||
| 	DocDvLen uint64 // length of data inside the chunk for the given docid |  | ||||||
| } | } | ||||||
|  |  | ||||||
| // newChunkedContentCoder returns a new chunk content coder which | // newChunkedContentCoder returns a new chunk content coder which | ||||||
| // packs data into chunks based on the provided chunkSize | // packs data into chunks based on the provided chunkSize | ||||||
| func newChunkedContentCoder(chunkSize uint64, | func newChunkedContentCoder(chunkSize uint64, maxDocNum uint64, | ||||||
| 	maxDocNum uint64) *chunkedContentCoder { | 	w io.Writer, progressiveWrite bool) *chunkedContentCoder { | ||||||
| 	total := maxDocNum/chunkSize + 1 | 	total := maxDocNum/chunkSize + 1 | ||||||
| 	rv := &chunkedContentCoder{ | 	rv := &chunkedContentCoder{ | ||||||
| 		chunkSize:        chunkSize, | 		chunkSize:        chunkSize, | ||||||
| 		chunkLens:        make([]uint64, total), | 		chunkLens:        make([]uint64, total), | ||||||
| 		chunkMeta:        make([]MetaData, 0, total), | 		chunkMeta:        make([]MetaData, 0, total), | ||||||
|  | 		w:                w, | ||||||
|  | 		progressiveWrite: progressiveWrite, | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	return rv | 	return rv | ||||||
| @@ -88,7 +103,7 @@ func (c *chunkedContentCoder) flushContents() error { | |||||||
|  |  | ||||||
| 	// write out the metaData slice | 	// write out the metaData slice | ||||||
| 	for _, meta := range c.chunkMeta { | 	for _, meta := range c.chunkMeta { | ||||||
| 		_, err := writeUvarints(&c.chunkMetaBuf, meta.DocNum, meta.DocDvLoc, meta.DocDvLen) | 		_, err := writeUvarints(&c.chunkMetaBuf, meta.DocNum, meta.DocDvOffset) | ||||||
| 		if err != nil { | 		if err != nil { | ||||||
| 			return err | 			return err | ||||||
| 		} | 		} | ||||||
| @@ -98,10 +113,19 @@ func (c *chunkedContentCoder) flushContents() error { | |||||||
| 	metaData := c.chunkMetaBuf.Bytes() | 	metaData := c.chunkMetaBuf.Bytes() | ||||||
| 	c.final = append(c.final, c.chunkMetaBuf.Bytes()...) | 	c.final = append(c.final, c.chunkMetaBuf.Bytes()...) | ||||||
| 	// write the compressed data to the final data | 	// write the compressed data to the final data | ||||||
| 	compressedData := snappy.Encode(nil, c.chunkBuf.Bytes()) | 	c.compressed = snappy.Encode(c.compressed[:cap(c.compressed)], c.chunkBuf.Bytes()) | ||||||
| 	c.final = append(c.final, compressedData...) | 	c.final = append(c.final, c.compressed...) | ||||||
|  |  | ||||||
|  | 	c.chunkLens[c.currChunk] = uint64(len(c.compressed) + len(metaData)) | ||||||
|  |  | ||||||
|  | 	if c.progressiveWrite { | ||||||
|  | 		_, err := c.w.Write(c.final) | ||||||
|  | 		if err != nil { | ||||||
|  | 			return err | ||||||
|  | 		} | ||||||
|  | 		c.final = c.final[:0] | ||||||
|  | 	} | ||||||
|  |  | ||||||
| 	c.chunkLens[c.currChunk] = uint64(len(compressedData) + len(metaData)) |  | ||||||
| 	return nil | 	return nil | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -122,7 +146,7 @@ func (c *chunkedContentCoder) Add(docNum uint64, vals []byte) error { | |||||||
| 		c.currChunk = chunk | 		c.currChunk = chunk | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	// mark the starting offset for this doc | 	// get the starting offset for this doc | ||||||
| 	dvOffset := c.chunkBuf.Len() | 	dvOffset := c.chunkBuf.Len() | ||||||
| 	dvSize, err := c.chunkBuf.Write(vals) | 	dvSize, err := c.chunkBuf.Write(vals) | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| @@ -131,37 +155,76 @@ func (c *chunkedContentCoder) Add(docNum uint64, vals []byte) error { | |||||||
|  |  | ||||||
| 	c.chunkMeta = append(c.chunkMeta, MetaData{ | 	c.chunkMeta = append(c.chunkMeta, MetaData{ | ||||||
| 		DocNum:      docNum, | 		DocNum:      docNum, | ||||||
| 		DocDvLoc: uint64(dvOffset), | 		DocDvOffset: uint64(dvOffset + dvSize), | ||||||
| 		DocDvLen: uint64(dvSize), |  | ||||||
| 	}) | 	}) | ||||||
| 	return nil | 	return nil | ||||||
| } | } | ||||||
|  |  | ||||||
| // Write commits all the encoded chunked contents to the provided writer. | // Write commits all the encoded chunked contents to the provided writer. | ||||||
| func (c *chunkedContentCoder) Write(w io.Writer) (int, error) { | // | ||||||
|  | // | ..... data ..... | chunk offsets (varints) | ||||||
|  | // | position of chunk offsets (uint64) | number of offsets (uint64) | | ||||||
|  | // | ||||||
|  | func (c *chunkedContentCoder) Write() (int, error) { | ||||||
| 	var tw int | 	var tw int | ||||||
| 	buf := make([]byte, binary.MaxVarintLen64) |  | ||||||
|  | 	if c.final != nil { | ||||||
|  | 		// write out the data section first | ||||||
|  | 		nw, err := c.w.Write(c.final) | ||||||
|  | 		tw += nw | ||||||
|  | 		if err != nil { | ||||||
|  | 			return tw, err | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	chunkOffsetsStart := uint64(tw) | ||||||
|  |  | ||||||
|  | 	if cap(c.final) < binary.MaxVarintLen64 { | ||||||
|  | 		c.final = make([]byte, binary.MaxVarintLen64) | ||||||
|  | 	} else { | ||||||
|  | 		c.final = c.final[0:binary.MaxVarintLen64] | ||||||
|  | 	} | ||||||
|  | 	chunkOffsets := modifyLengthsToEndOffsets(c.chunkLens) | ||||||
|  | 	// write out the chunk offsets | ||||||
|  | 	for _, chunkOffset := range chunkOffsets { | ||||||
|  | 		n := binary.PutUvarint(c.final, chunkOffset) | ||||||
|  | 		nw, err := c.w.Write(c.final[:n]) | ||||||
|  | 		tw += nw | ||||||
|  | 		if err != nil { | ||||||
|  | 			return tw, err | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	chunkOffsetsLen := uint64(tw) - chunkOffsetsStart | ||||||
|  |  | ||||||
|  | 	c.final = c.final[0:8] | ||||||
|  | 	// write out the length of chunk offsets | ||||||
|  | 	binary.BigEndian.PutUint64(c.final, chunkOffsetsLen) | ||||||
|  | 	nw, err := c.w.Write(c.final) | ||||||
|  | 	tw += nw | ||||||
|  | 	if err != nil { | ||||||
|  | 		return tw, err | ||||||
|  | 	} | ||||||
|  |  | ||||||
| 	// write out the number of chunks | 	// write out the number of chunks | ||||||
| 	n := binary.PutUvarint(buf, uint64(len(c.chunkLens))) | 	binary.BigEndian.PutUint64(c.final, uint64(len(c.chunkLens))) | ||||||
| 	nw, err := w.Write(buf[:n]) | 	nw, err = c.w.Write(c.final) | ||||||
| 	tw += nw |  | ||||||
| 	if err != nil { |  | ||||||
| 		return tw, err |  | ||||||
| 	} |  | ||||||
| 	// write out the chunk lens |  | ||||||
| 	for _, chunkLen := range c.chunkLens { |  | ||||||
| 		n := binary.PutUvarint(buf, uint64(chunkLen)) |  | ||||||
| 		nw, err = w.Write(buf[:n]) |  | ||||||
| 		tw += nw |  | ||||||
| 		if err != nil { |  | ||||||
| 			return tw, err |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| 	// write out the data |  | ||||||
| 	nw, err = w.Write(c.final) |  | ||||||
| 	tw += nw | 	tw += nw | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		return tw, err | 		return tw, err | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | 	c.final = c.final[:0] | ||||||
|  |  | ||||||
| 	return tw, nil | 	return tw, nil | ||||||
| } | } | ||||||
|  |  | ||||||
|  | // ReadDocValueBoundary elicits the start, end offsets from a | ||||||
|  | // metaData header slice | ||||||
|  | func ReadDocValueBoundary(chunk int, metaHeaders []MetaData) (uint64, uint64) { | ||||||
|  | 	var start uint64 | ||||||
|  | 	if chunk > 0 { | ||||||
|  | 		start = metaHeaders[chunk-1].DocDvOffset | ||||||
|  | 	} | ||||||
|  | 	return start, metaHeaders[chunk].DocDvOffset | ||||||
|  | } | ||||||
|   | |||||||
							
								
								
									
										10
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/count.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										10
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/count.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -17,6 +17,8 @@ package zap | |||||||
| import ( | import ( | ||||||
| 	"hash/crc32" | 	"hash/crc32" | ||||||
| 	"io" | 	"io" | ||||||
|  |  | ||||||
|  | 	"github.com/blevesearch/bleve/index/scorch/segment" | ||||||
| ) | ) | ||||||
|  |  | ||||||
| // CountHashWriter is a wrapper around a Writer which counts the number of | // CountHashWriter is a wrapper around a Writer which counts the number of | ||||||
| @@ -25,6 +27,7 @@ type CountHashWriter struct { | |||||||
| 	w   io.Writer | 	w   io.Writer | ||||||
| 	crc uint32 | 	crc uint32 | ||||||
| 	n   int | 	n   int | ||||||
|  | 	s   segment.StatsReporter | ||||||
| } | } | ||||||
|  |  | ||||||
| // NewCountHashWriter returns a CountHashWriter which wraps the provided Writer | // NewCountHashWriter returns a CountHashWriter which wraps the provided Writer | ||||||
| @@ -32,11 +35,18 @@ func NewCountHashWriter(w io.Writer) *CountHashWriter { | |||||||
| 	return &CountHashWriter{w: w} | 	return &CountHashWriter{w: w} | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func NewCountHashWriterWithStatsReporter(w io.Writer, s segment.StatsReporter) *CountHashWriter { | ||||||
|  | 	return &CountHashWriter{w: w, s: s} | ||||||
|  | } | ||||||
|  |  | ||||||
| // Write writes the provided bytes to the wrapped writer and counts the bytes | // Write writes the provided bytes to the wrapped writer and counts the bytes | ||||||
| func (c *CountHashWriter) Write(b []byte) (int, error) { | func (c *CountHashWriter) Write(b []byte) (int, error) { | ||||||
| 	n, err := c.w.Write(b) | 	n, err := c.w.Write(b) | ||||||
| 	c.crc = crc32.Update(c.crc, crc32.IEEETable, b[:n]) | 	c.crc = crc32.Update(c.crc, crc32.IEEETable, b[:n]) | ||||||
| 	c.n += n | 	c.n += n | ||||||
|  | 	if c.s != nil { | ||||||
|  | 		c.s.ReportBytesWritten(uint64(n)) | ||||||
|  | 	} | ||||||
| 	return n, err | 	return n, err | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										125
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/dict.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										125
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/dict.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -15,13 +15,13 @@ | |||||||
| package zap | package zap | ||||||
|  |  | ||||||
| import ( | import ( | ||||||
|  | 	"bytes" | ||||||
| 	"fmt" | 	"fmt" | ||||||
|  |  | ||||||
| 	"github.com/RoaringBitmap/roaring" | 	"github.com/RoaringBitmap/roaring" | ||||||
| 	"github.com/blevesearch/bleve/index" | 	"github.com/blevesearch/bleve/index" | ||||||
| 	"github.com/blevesearch/bleve/index/scorch/segment" | 	"github.com/blevesearch/bleve/index/scorch/segment" | ||||||
| 	"github.com/couchbase/vellum" | 	"github.com/couchbase/vellum" | ||||||
| 	"github.com/couchbase/vellum/regexp" |  | ||||||
| ) | ) | ||||||
|  |  | ||||||
| // Dictionary is the zap representation of the term dictionary | // Dictionary is the zap representation of the term dictionary | ||||||
| @@ -30,23 +30,36 @@ type Dictionary struct { | |||||||
| 	field     string | 	field     string | ||||||
| 	fieldID   uint16 | 	fieldID   uint16 | ||||||
| 	fst       *vellum.FST | 	fst       *vellum.FST | ||||||
|  | 	fstReader *vellum.Reader | ||||||
| } | } | ||||||
|  |  | ||||||
| // PostingsList returns the postings list for the specified term | // PostingsList returns the postings list for the specified term | ||||||
| func (d *Dictionary) PostingsList(term string, except *roaring.Bitmap) (segment.PostingsList, error) { | func (d *Dictionary) PostingsList(term []byte, except *roaring.Bitmap, | ||||||
| 	return d.postingsList([]byte(term), except, nil) | 	prealloc segment.PostingsList) (segment.PostingsList, error) { | ||||||
|  | 	var preallocPL *PostingsList | ||||||
|  | 	pl, ok := prealloc.(*PostingsList) | ||||||
|  | 	if ok && pl != nil { | ||||||
|  | 		preallocPL = pl | ||||||
|  | 	} | ||||||
|  | 	return d.postingsList(term, except, preallocPL) | ||||||
| } | } | ||||||
|  |  | ||||||
| func (d *Dictionary) postingsList(term []byte, except *roaring.Bitmap, rv *PostingsList) (*PostingsList, error) { | func (d *Dictionary) postingsList(term []byte, except *roaring.Bitmap, rv *PostingsList) (*PostingsList, error) { | ||||||
| 	if d.fst == nil { | 	if d.fstReader == nil { | ||||||
|  | 		if rv == nil || rv == emptyPostingsList { | ||||||
|  | 			return emptyPostingsList, nil | ||||||
|  | 		} | ||||||
| 		return d.postingsListInit(rv, except), nil | 		return d.postingsListInit(rv, except), nil | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	postingsOffset, exists, err := d.fst.Get(term) | 	postingsOffset, exists, err := d.fstReader.Get(term) | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		return nil, fmt.Errorf("vellum err: %v", err) | 		return nil, fmt.Errorf("vellum err: %v", err) | ||||||
| 	} | 	} | ||||||
| 	if !exists { | 	if !exists { | ||||||
|  | 		if rv == nil || rv == emptyPostingsList { | ||||||
|  | 			return emptyPostingsList, nil | ||||||
|  | 		} | ||||||
| 		return d.postingsListInit(rv, except), nil | 		return d.postingsListInit(rv, except), nil | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| @@ -65,10 +78,17 @@ func (d *Dictionary) postingsListFromOffset(postingsOffset uint64, except *roari | |||||||
| } | } | ||||||
|  |  | ||||||
| func (d *Dictionary) postingsListInit(rv *PostingsList, except *roaring.Bitmap) *PostingsList { | func (d *Dictionary) postingsListInit(rv *PostingsList, except *roaring.Bitmap) *PostingsList { | ||||||
| 	if rv == nil { | 	if rv == nil || rv == emptyPostingsList { | ||||||
| 		rv = &PostingsList{} | 		rv = &PostingsList{} | ||||||
| 	} else { | 	} else { | ||||||
|  | 		postings := rv.postings | ||||||
|  | 		if postings != nil { | ||||||
|  | 			postings.Clear() | ||||||
|  | 		} | ||||||
|  |  | ||||||
| 		*rv = PostingsList{} // clear the struct | 		*rv = PostingsList{} // clear the struct | ||||||
|  |  | ||||||
|  | 		rv.postings = postings | ||||||
| 	} | 	} | ||||||
| 	rv.sb = d.sb | 	rv.sb = d.sb | ||||||
| 	rv.except = except | 	rv.except = except | ||||||
| @@ -85,6 +105,8 @@ func (d *Dictionary) Iterator() segment.DictionaryIterator { | |||||||
| 		itr, err := d.fst.Iterator(nil, nil) | 		itr, err := d.fst.Iterator(nil, nil) | ||||||
| 		if err == nil { | 		if err == nil { | ||||||
| 			rv.itr = itr | 			rv.itr = itr | ||||||
|  | 		} else if err != vellum.ErrIteratorDone { | ||||||
|  | 			rv.err = err | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| @@ -98,13 +120,15 @@ func (d *Dictionary) PrefixIterator(prefix string) segment.DictionaryIterator { | |||||||
| 		d: d, | 		d: d, | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | 	kBeg := []byte(prefix) | ||||||
|  | 	kEnd := segment.IncrementBytes(kBeg) | ||||||
|  |  | ||||||
| 	if d.fst != nil { | 	if d.fst != nil { | ||||||
| 		r, err := regexp.New(prefix + ".*") | 		itr, err := d.fst.Iterator(kBeg, kEnd) | ||||||
| 		if err == nil { |  | ||||||
| 			itr, err := d.fst.Search(r, nil, nil) |  | ||||||
| 		if err == nil { | 		if err == nil { | ||||||
| 			rv.itr = itr | 			rv.itr = itr | ||||||
| 			} | 		} else if err != vellum.ErrIteratorDone { | ||||||
|  | 			rv.err = err | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| @@ -130,36 +154,103 @@ func (d *Dictionary) RangeIterator(start, end string) segment.DictionaryIterator | |||||||
| 		itr, err := d.fst.Iterator([]byte(start), endBytes) | 		itr, err := d.fst.Iterator([]byte(start), endBytes) | ||||||
| 		if err == nil { | 		if err == nil { | ||||||
| 			rv.itr = itr | 			rv.itr = itr | ||||||
|  | 		} else if err != vellum.ErrIteratorDone { | ||||||
|  | 			rv.err = err | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	return rv | 	return rv | ||||||
| } | } | ||||||
|  |  | ||||||
|  | // AutomatonIterator returns an iterator which only visits terms | ||||||
|  | // having the the vellum automaton and start/end key range | ||||||
|  | func (d *Dictionary) AutomatonIterator(a vellum.Automaton, | ||||||
|  | 	startKeyInclusive, endKeyExclusive []byte) segment.DictionaryIterator { | ||||||
|  | 	rv := &DictionaryIterator{ | ||||||
|  | 		d: d, | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	if d.fst != nil { | ||||||
|  | 		itr, err := d.fst.Search(a, startKeyInclusive, endKeyExclusive) | ||||||
|  | 		if err == nil { | ||||||
|  | 			rv.itr = itr | ||||||
|  | 		} else if err != vellum.ErrIteratorDone { | ||||||
|  | 			rv.err = err | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return rv | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (d *Dictionary) OnlyIterator(onlyTerms [][]byte, | ||||||
|  | 	includeCount bool) segment.DictionaryIterator { | ||||||
|  |  | ||||||
|  | 	rv := &DictionaryIterator{ | ||||||
|  | 		d:         d, | ||||||
|  | 		omitCount: !includeCount, | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	var buf bytes.Buffer | ||||||
|  | 	builder, err := vellum.New(&buf, nil) | ||||||
|  | 	if err != nil { | ||||||
|  | 		rv.err = err | ||||||
|  | 		return rv | ||||||
|  | 	} | ||||||
|  | 	for _, term := range onlyTerms { | ||||||
|  | 		err = builder.Insert(term, 0) | ||||||
|  | 		if err != nil { | ||||||
|  | 			rv.err = err | ||||||
|  | 			return rv | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	err = builder.Close() | ||||||
|  | 	if err != nil { | ||||||
|  | 		rv.err = err | ||||||
|  | 		return rv | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	onlyFST, err := vellum.Load(buf.Bytes()) | ||||||
|  | 	if err != nil { | ||||||
|  | 		rv.err = err | ||||||
|  | 		return rv | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	itr, err := d.fst.Search(onlyFST, nil, nil) | ||||||
|  | 	if err == nil { | ||||||
|  | 		rv.itr = itr | ||||||
|  | 	} else if err != vellum.ErrIteratorDone { | ||||||
|  | 		rv.err = err | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return rv | ||||||
|  | } | ||||||
|  |  | ||||||
| // DictionaryIterator is an iterator for term dictionary | // DictionaryIterator is an iterator for term dictionary | ||||||
| type DictionaryIterator struct { | type DictionaryIterator struct { | ||||||
| 	d         *Dictionary | 	d         *Dictionary | ||||||
| 	itr       vellum.Iterator | 	itr       vellum.Iterator | ||||||
| 	err       error | 	err       error | ||||||
| 	tmp       PostingsList | 	tmp       PostingsList | ||||||
|  | 	entry     index.DictEntry | ||||||
|  | 	omitCount bool | ||||||
| } | } | ||||||
|  |  | ||||||
| // Next returns the next entry in the dictionary | // Next returns the next entry in the dictionary | ||||||
| func (i *DictionaryIterator) Next() (*index.DictEntry, error) { | func (i *DictionaryIterator) Next() (*index.DictEntry, error) { | ||||||
| 	if i.itr == nil || i.err == vellum.ErrIteratorDone { | 	if i.err != nil && i.err != vellum.ErrIteratorDone { | ||||||
| 		return nil, nil |  | ||||||
| 	} else if i.err != nil { |  | ||||||
| 		return nil, i.err | 		return nil, i.err | ||||||
|  | 	} else if i.itr == nil || i.err == vellum.ErrIteratorDone { | ||||||
|  | 		return nil, nil | ||||||
| 	} | 	} | ||||||
| 	term, postingsOffset := i.itr.Current() | 	term, postingsOffset := i.itr.Current() | ||||||
|  | 	i.entry.Term = string(term) | ||||||
|  | 	if !i.omitCount { | ||||||
| 		i.err = i.tmp.read(postingsOffset, i.d) | 		i.err = i.tmp.read(postingsOffset, i.d) | ||||||
| 		if i.err != nil { | 		if i.err != nil { | ||||||
| 			return nil, i.err | 			return nil, i.err | ||||||
| 		} | 		} | ||||||
| 	rv := &index.DictEntry{ | 		i.entry.Count = i.tmp.Count() | ||||||
| 		Term:  string(term), |  | ||||||
| 		Count: i.tmp.Count(), |  | ||||||
| 	} | 	} | ||||||
| 	i.err = i.itr.Next() | 	i.err = i.itr.Next() | ||||||
| 	return rv, nil | 	return &i.entry, nil | ||||||
| } | } | ||||||
|   | |||||||
							
								
								
									
										254
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/docvalues.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										254
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/docvalues.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -19,93 +19,129 @@ import ( | |||||||
| 	"encoding/binary" | 	"encoding/binary" | ||||||
| 	"fmt" | 	"fmt" | ||||||
| 	"math" | 	"math" | ||||||
|  | 	"reflect" | ||||||
| 	"sort" | 	"sort" | ||||||
|  |  | ||||||
| 	"github.com/blevesearch/bleve/index" | 	"github.com/blevesearch/bleve/index" | ||||||
| 	"github.com/blevesearch/bleve/index/scorch/segment" | 	"github.com/blevesearch/bleve/index/scorch/segment" | ||||||
|  | 	"github.com/blevesearch/bleve/size" | ||||||
| 	"github.com/golang/snappy" | 	"github.com/golang/snappy" | ||||||
| ) | ) | ||||||
|  |  | ||||||
| type docValueIterator struct { | var reflectStaticSizedocValueReader int | ||||||
|  |  | ||||||
|  | func init() { | ||||||
|  | 	var dvi docValueReader | ||||||
|  | 	reflectStaticSizedocValueReader = int(reflect.TypeOf(dvi).Size()) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | type docNumTermsVisitor func(docNum uint64, terms []byte) error | ||||||
|  |  | ||||||
|  | type docVisitState struct { | ||||||
|  | 	dvrs    map[uint16]*docValueReader | ||||||
|  | 	segment *Segment | ||||||
|  | } | ||||||
|  |  | ||||||
|  | type docValueReader struct { | ||||||
| 	field          string | 	field          string | ||||||
| 	curChunkNum    uint64 | 	curChunkNum    uint64 | ||||||
| 	numChunks      uint64 | 	chunkOffsets   []uint64 | ||||||
| 	chunkLens      []uint64 |  | ||||||
| 	dvDataLoc      uint64 | 	dvDataLoc      uint64 | ||||||
| 	curChunkHeader []MetaData | 	curChunkHeader []MetaData | ||||||
| 	curChunkData   []byte // compressed data cache | 	curChunkData   []byte // compressed data cache | ||||||
|  | 	uncompressed   []byte // temp buf for snappy decompression | ||||||
| } | } | ||||||
|  |  | ||||||
| func (di *docValueIterator) sizeInBytes() uint64 { | func (di *docValueReader) size() int { | ||||||
| 	// curChunkNum, numChunks, dvDataLoc --> uint64 | 	return reflectStaticSizedocValueReader + size.SizeOfPtr + | ||||||
| 	sizeInBytes := 24 | 		len(di.field) + | ||||||
|  | 		len(di.chunkOffsets)*size.SizeOfUint64 + | ||||||
| 	// field | 		len(di.curChunkHeader)*reflectStaticSizeMetaData + | ||||||
| 	sizeInBytes += (len(di.field) + int(segment.SizeOfString)) | 		len(di.curChunkData) | ||||||
|  |  | ||||||
| 	// chunkLens, curChunkHeader |  | ||||||
| 	sizeInBytes += len(di.chunkLens)*8 + |  | ||||||
| 		len(di.curChunkHeader)*24 + |  | ||||||
| 		int(segment.SizeOfSlice*2) /* overhead from slices */ |  | ||||||
|  |  | ||||||
| 	// curChunkData is mmap'ed, not included |  | ||||||
|  |  | ||||||
| 	return uint64(sizeInBytes) |  | ||||||
| } | } | ||||||
|  |  | ||||||
| func (di *docValueIterator) fieldName() string { | func (di *docValueReader) cloneInto(rv *docValueReader) *docValueReader { | ||||||
|  | 	if rv == nil { | ||||||
|  | 		rv = &docValueReader{} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	rv.field = di.field | ||||||
|  | 	rv.curChunkNum = math.MaxUint64 | ||||||
|  | 	rv.chunkOffsets = di.chunkOffsets // immutable, so it's sharable | ||||||
|  | 	rv.dvDataLoc = di.dvDataLoc | ||||||
|  | 	rv.curChunkHeader = rv.curChunkHeader[:0] | ||||||
|  | 	rv.curChunkData = nil | ||||||
|  | 	rv.uncompressed = rv.uncompressed[:0] | ||||||
|  |  | ||||||
|  | 	return rv | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (di *docValueReader) fieldName() string { | ||||||
| 	return di.field | 	return di.field | ||||||
| } | } | ||||||
|  |  | ||||||
| func (di *docValueIterator) curChunkNumber() uint64 { | func (di *docValueReader) curChunkNumber() uint64 { | ||||||
| 	return di.curChunkNum | 	return di.curChunkNum | ||||||
| } | } | ||||||
|  |  | ||||||
| func (s *SegmentBase) loadFieldDocValueIterator(field string, | func (s *SegmentBase) loadFieldDocValueReader(field string, | ||||||
| 	fieldDvLoc uint64) (*docValueIterator, error) { | 	fieldDvLocStart, fieldDvLocEnd uint64) (*docValueReader, error) { | ||||||
| 	// get the docValue offset for the given fields | 	// get the docValue offset for the given fields | ||||||
| 	if fieldDvLoc == fieldNotUninverted { | 	if fieldDvLocStart == fieldNotUninverted { | ||||||
| 		return nil, fmt.Errorf("loadFieldDocValueIterator: "+ | 		return nil, fmt.Errorf("loadFieldDocValueReader: "+ | ||||||
| 			"no docValues found for field: %s", field) | 			"no docValues found for field: %s", field) | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	// read the number of chunks, chunk lengths | 	// read the number of chunks, and chunk offsets position | ||||||
| 	var offset, clen uint64 | 	var numChunks, chunkOffsetsPosition uint64 | ||||||
| 	numChunks, read := binary.Uvarint(s.mem[fieldDvLoc : fieldDvLoc+binary.MaxVarintLen64]) |  | ||||||
| 	if read <= 0 { |  | ||||||
| 		return nil, fmt.Errorf("failed to read the field "+ |  | ||||||
| 			"doc values for field %s", field) |  | ||||||
| 	} |  | ||||||
| 	offset += uint64(read) |  | ||||||
|  |  | ||||||
| 	fdvIter := &docValueIterator{ | 	if fieldDvLocEnd-fieldDvLocStart > 16 { | ||||||
|  | 		numChunks = binary.BigEndian.Uint64(s.mem[fieldDvLocEnd-8 : fieldDvLocEnd]) | ||||||
|  | 		// read the length of chunk offsets | ||||||
|  | 		chunkOffsetsLen := binary.BigEndian.Uint64(s.mem[fieldDvLocEnd-16 : fieldDvLocEnd-8]) | ||||||
|  | 		// acquire position of chunk offsets | ||||||
|  | 		chunkOffsetsPosition = (fieldDvLocEnd - 16) - chunkOffsetsLen | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	fdvIter := &docValueReader{ | ||||||
| 		curChunkNum:  math.MaxUint64, | 		curChunkNum:  math.MaxUint64, | ||||||
| 		field:        field, | 		field:        field, | ||||||
| 		chunkLens:   make([]uint64, int(numChunks)), | 		chunkOffsets: make([]uint64, int(numChunks)), | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | 	// read the chunk offsets | ||||||
|  | 	var offset uint64 | ||||||
| 	for i := 0; i < int(numChunks); i++ { | 	for i := 0; i < int(numChunks); i++ { | ||||||
| 		clen, read = binary.Uvarint(s.mem[fieldDvLoc+offset : fieldDvLoc+offset+binary.MaxVarintLen64]) | 		loc, read := binary.Uvarint(s.mem[chunkOffsetsPosition+offset : chunkOffsetsPosition+offset+binary.MaxVarintLen64]) | ||||||
| 		if read <= 0 { | 		if read <= 0 { | ||||||
| 			return nil, fmt.Errorf("corrupted chunk length during segment load") | 			return nil, fmt.Errorf("corrupted chunk offset during segment load") | ||||||
| 		} | 		} | ||||||
| 		fdvIter.chunkLens[i] = clen | 		fdvIter.chunkOffsets[i] = loc | ||||||
| 		offset += uint64(read) | 		offset += uint64(read) | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	fdvIter.dvDataLoc = fieldDvLoc + offset | 	// set the data offset | ||||||
|  | 	fdvIter.dvDataLoc = fieldDvLocStart | ||||||
|  |  | ||||||
| 	return fdvIter, nil | 	return fdvIter, nil | ||||||
| } | } | ||||||
|  |  | ||||||
| func (di *docValueIterator) loadDvChunk(chunkNumber, | func (di *docValueReader) loadDvChunk(chunkNumber uint64, s *SegmentBase) error { | ||||||
| 	localDocNum uint64, s *SegmentBase) error { |  | ||||||
| 	// advance to the chunk where the docValues | 	// advance to the chunk where the docValues | ||||||
| 	// reside for the given docNum | 	// reside for the given docNum | ||||||
| 	destChunkDataLoc := di.dvDataLoc | 	destChunkDataLoc, curChunkEnd := di.dvDataLoc, di.dvDataLoc | ||||||
| 	for i := 0; i < int(chunkNumber); i++ { | 	start, end := readChunkBoundary(int(chunkNumber), di.chunkOffsets) | ||||||
| 		destChunkDataLoc += di.chunkLens[i] | 	if start >= end { | ||||||
|  | 		di.curChunkHeader = di.curChunkHeader[:0] | ||||||
|  | 		di.curChunkData = nil | ||||||
|  | 		di.curChunkNum = chunkNumber | ||||||
|  | 		di.uncompressed = di.uncompressed[:0] | ||||||
|  | 		return nil | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	curChunkSize := di.chunkLens[chunkNumber] | 	destChunkDataLoc += start | ||||||
|  | 	curChunkEnd += end | ||||||
|  |  | ||||||
| 	// read the number of docs reside in the chunk | 	// read the number of docs reside in the chunk | ||||||
| 	numDocs, read := binary.Uvarint(s.mem[destChunkDataLoc : destChunkDataLoc+binary.MaxVarintLen64]) | 	numDocs, read := binary.Uvarint(s.mem[destChunkDataLoc : destChunkDataLoc+binary.MaxVarintLen64]) | ||||||
| 	if read <= 0 { | 	if read <= 0 { | ||||||
| @@ -114,38 +150,81 @@ func (di *docValueIterator) loadDvChunk(chunkNumber, | |||||||
| 	chunkMetaLoc := destChunkDataLoc + uint64(read) | 	chunkMetaLoc := destChunkDataLoc + uint64(read) | ||||||
|  |  | ||||||
| 	offset := uint64(0) | 	offset := uint64(0) | ||||||
|  | 	if cap(di.curChunkHeader) < int(numDocs) { | ||||||
| 		di.curChunkHeader = make([]MetaData, int(numDocs)) | 		di.curChunkHeader = make([]MetaData, int(numDocs)) | ||||||
|  | 	} else { | ||||||
|  | 		di.curChunkHeader = di.curChunkHeader[:int(numDocs)] | ||||||
|  | 	} | ||||||
| 	for i := 0; i < int(numDocs); i++ { | 	for i := 0; i < int(numDocs); i++ { | ||||||
| 		di.curChunkHeader[i].DocNum, read = binary.Uvarint(s.mem[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64]) | 		di.curChunkHeader[i].DocNum, read = binary.Uvarint(s.mem[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64]) | ||||||
| 		offset += uint64(read) | 		offset += uint64(read) | ||||||
| 		di.curChunkHeader[i].DocDvLoc, read = binary.Uvarint(s.mem[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64]) | 		di.curChunkHeader[i].DocDvOffset, read = binary.Uvarint(s.mem[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64]) | ||||||
| 		offset += uint64(read) |  | ||||||
| 		di.curChunkHeader[i].DocDvLen, read = binary.Uvarint(s.mem[chunkMetaLoc+offset : chunkMetaLoc+offset+binary.MaxVarintLen64]) |  | ||||||
| 		offset += uint64(read) | 		offset += uint64(read) | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	compressedDataLoc := chunkMetaLoc + offset | 	compressedDataLoc := chunkMetaLoc + offset | ||||||
| 	dataLength := destChunkDataLoc + curChunkSize - compressedDataLoc | 	dataLength := curChunkEnd - compressedDataLoc | ||||||
| 	di.curChunkData = s.mem[compressedDataLoc : compressedDataLoc+dataLength] | 	di.curChunkData = s.mem[compressedDataLoc : compressedDataLoc+dataLength] | ||||||
| 	di.curChunkNum = chunkNumber | 	di.curChunkNum = chunkNumber | ||||||
|  | 	di.uncompressed = di.uncompressed[:0] | ||||||
| 	return nil | 	return nil | ||||||
| } | } | ||||||
|  |  | ||||||
| func (di *docValueIterator) visitDocValues(docNum uint64, | func (di *docValueReader) iterateAllDocValues(s *SegmentBase, visitor docNumTermsVisitor) error { | ||||||
| 	visitor index.DocumentFieldTermVisitor) error { | 	for i := 0; i < len(di.chunkOffsets); i++ { | ||||||
| 	// binary search the term locations for the docNum | 		err := di.loadDvChunk(uint64(i), s) | ||||||
| 	start, length := di.getDocValueLocs(docNum) | 		if err != nil { | ||||||
| 	if start == math.MaxUint64 || length == math.MaxUint64 { | 			return err | ||||||
| 		return nil |  | ||||||
| 		} | 		} | ||||||
|  | 		if di.curChunkData == nil || len(di.curChunkHeader) == 0 { | ||||||
|  | 			continue | ||||||
|  | 		} | ||||||
|  |  | ||||||
| 		// uncompress the already loaded data | 		// uncompress the already loaded data | ||||||
| 	uncompressed, err := snappy.Decode(nil, di.curChunkData) | 		uncompressed, err := snappy.Decode(di.uncompressed[:cap(di.uncompressed)], di.curChunkData) | ||||||
|  | 		if err != nil { | ||||||
|  | 			return err | ||||||
|  | 		} | ||||||
|  | 		di.uncompressed = uncompressed | ||||||
|  |  | ||||||
|  | 		start := uint64(0) | ||||||
|  | 		for _, entry := range di.curChunkHeader { | ||||||
|  | 			err = visitor(entry.DocNum, uncompressed[start:entry.DocDvOffset]) | ||||||
| 			if err != nil { | 			if err != nil { | ||||||
| 				return err | 				return err | ||||||
| 			} | 			} | ||||||
|  |  | ||||||
|  | 			start = entry.DocDvOffset | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return nil | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (di *docValueReader) visitDocValues(docNum uint64, | ||||||
|  | 	visitor index.DocumentFieldTermVisitor) error { | ||||||
|  | 	// binary search the term locations for the docNum | ||||||
|  | 	start, end := di.getDocValueLocs(docNum) | ||||||
|  | 	if start == math.MaxUint64 || end == math.MaxUint64 || start == end { | ||||||
|  | 		return nil | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	var uncompressed []byte | ||||||
|  | 	var err error | ||||||
|  | 	// use the uncompressed copy if available | ||||||
|  | 	if len(di.uncompressed) > 0 { | ||||||
|  | 		uncompressed = di.uncompressed | ||||||
|  | 	} else { | ||||||
|  | 		// uncompress the already loaded data | ||||||
|  | 		uncompressed, err = snappy.Decode(di.uncompressed[:cap(di.uncompressed)], di.curChunkData) | ||||||
|  | 		if err != nil { | ||||||
|  | 			return err | ||||||
|  | 		} | ||||||
|  | 		di.uncompressed = uncompressed | ||||||
|  | 	} | ||||||
|  |  | ||||||
| 	// pick the terms for the given docNum | 	// pick the terms for the given docNum | ||||||
| 	uncompressed = uncompressed[start : start+length] | 	uncompressed = uncompressed[start:end] | ||||||
| 	for { | 	for { | ||||||
| 		i := bytes.Index(uncompressed, termSeparatorSplitSlice) | 		i := bytes.Index(uncompressed, termSeparatorSplitSlice) | ||||||
| 		if i < 0 { | 		if i < 0 { | ||||||
| @@ -159,55 +238,72 @@ func (di *docValueIterator) visitDocValues(docNum uint64, | |||||||
| 	return nil | 	return nil | ||||||
| } | } | ||||||
|  |  | ||||||
| func (di *docValueIterator) getDocValueLocs(docNum uint64) (uint64, uint64) { | func (di *docValueReader) getDocValueLocs(docNum uint64) (uint64, uint64) { | ||||||
| 	i := sort.Search(len(di.curChunkHeader), func(i int) bool { | 	i := sort.Search(len(di.curChunkHeader), func(i int) bool { | ||||||
| 		return di.curChunkHeader[i].DocNum >= docNum | 		return di.curChunkHeader[i].DocNum >= docNum | ||||||
| 	}) | 	}) | ||||||
| 	if i < len(di.curChunkHeader) && di.curChunkHeader[i].DocNum == docNum { | 	if i < len(di.curChunkHeader) && di.curChunkHeader[i].DocNum == docNum { | ||||||
| 		return di.curChunkHeader[i].DocDvLoc, di.curChunkHeader[i].DocDvLen | 		return ReadDocValueBoundary(i, di.curChunkHeader) | ||||||
| 	} | 	} | ||||||
| 	return math.MaxUint64, math.MaxUint64 | 	return math.MaxUint64, math.MaxUint64 | ||||||
| } | } | ||||||
|  |  | ||||||
| // VisitDocumentFieldTerms is an implementation of the | // VisitDocumentFieldTerms is an implementation of the | ||||||
| // DocumentFieldTermVisitable interface | // DocumentFieldTermVisitable interface | ||||||
| func (s *SegmentBase) VisitDocumentFieldTerms(localDocNum uint64, fields []string, | func (s *Segment) VisitDocumentFieldTerms(localDocNum uint64, fields []string, | ||||||
| 	visitor index.DocumentFieldTermVisitor) error { | 	visitor index.DocumentFieldTermVisitor, dvsIn segment.DocVisitState) ( | ||||||
| 	fieldIDPlus1 := uint16(0) | 	segment.DocVisitState, error) { | ||||||
| 	ok := true | 	dvs, ok := dvsIn.(*docVisitState) | ||||||
|  | 	if !ok || dvs == nil { | ||||||
|  | 		dvs = &docVisitState{} | ||||||
|  | 	} else { | ||||||
|  | 		if dvs.segment != s { | ||||||
|  | 			dvs.segment = s | ||||||
|  | 			dvs.dvrs = nil | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	var fieldIDPlus1 uint16 | ||||||
|  | 	if dvs.dvrs == nil { | ||||||
|  | 		dvs.dvrs = make(map[uint16]*docValueReader, len(fields)) | ||||||
| 		for _, field := range fields { | 		for _, field := range fields { | ||||||
| 			if fieldIDPlus1, ok = s.fieldsMap[field]; !ok { | 			if fieldIDPlus1, ok = s.fieldsMap[field]; !ok { | ||||||
| 				continue | 				continue | ||||||
| 			} | 			} | ||||||
|  | 			fieldID := fieldIDPlus1 - 1 | ||||||
|  | 			if dvIter, exists := s.fieldDvReaders[fieldID]; exists && | ||||||
|  | 				dvIter != nil { | ||||||
|  | 				dvs.dvrs[fieldID] = dvIter.cloneInto(dvs.dvrs[fieldID]) | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
| 	// find the chunkNumber where the docValues are stored | 	// find the chunkNumber where the docValues are stored | ||||||
| 	docInChunk := localDocNum / uint64(s.chunkFactor) | 	docInChunk := localDocNum / uint64(s.chunkFactor) | ||||||
|  | 	var dvr *docValueReader | ||||||
| 		if dvIter, exists := s.fieldDvIterMap[fieldIDPlus1-1]; exists && | 	for _, field := range fields { | ||||||
| 			dvIter != nil { | 		if fieldIDPlus1, ok = s.fieldsMap[field]; !ok { | ||||||
| 			// check if the chunk is already loaded |  | ||||||
| 			if docInChunk != dvIter.curChunkNumber() { |  | ||||||
| 				err := dvIter.loadDvChunk(docInChunk, localDocNum, s) |  | ||||||
| 				if err != nil { |  | ||||||
| 			continue | 			continue | ||||||
| 		} | 		} | ||||||
|  | 		fieldID := fieldIDPlus1 - 1 | ||||||
|  | 		if dvr, ok = dvs.dvrs[fieldID]; ok && dvr != nil { | ||||||
|  | 			// check if the chunk is already loaded | ||||||
|  | 			if docInChunk != dvr.curChunkNumber() { | ||||||
|  | 				err := dvr.loadDvChunk(docInChunk, &s.SegmentBase) | ||||||
|  | 				if err != nil { | ||||||
|  | 					return dvs, err | ||||||
|  | 				} | ||||||
| 			} | 			} | ||||||
|  |  | ||||||
| 			_ = dvIter.visitDocValues(localDocNum, visitor) | 			_ = dvr.visitDocValues(localDocNum, visitor) | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| 	return nil | 	return dvs, nil | ||||||
| } | } | ||||||
|  |  | ||||||
| // VisitableDocValueFields returns the list of fields with | // VisitableDocValueFields returns the list of fields with | ||||||
| // persisted doc value terms ready to be visitable using the | // persisted doc value terms ready to be visitable using the | ||||||
| // VisitDocumentFieldTerms method. | // VisitDocumentFieldTerms method. | ||||||
| func (s *Segment) VisitableDocValueFields() ([]string, error) { | func (s *Segment) VisitableDocValueFields() ([]string, error) { | ||||||
| 	var rv []string | 	return s.fieldDvNames, nil | ||||||
| 	for fieldID, field := range s.fieldsInv { |  | ||||||
| 		if dvIter, ok := s.fieldDvIterMap[uint16(fieldID)]; ok && |  | ||||||
| 			dvIter != nil { |  | ||||||
| 			rv = append(rv, field) |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| 	return rv, nil |  | ||||||
| } | } | ||||||
|   | |||||||
							
								
								
									
										16
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/enumerator.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										16
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/enumerator.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -46,26 +46,27 @@ func newEnumerator(itrs []vellum.Iterator) (*enumerator, error) { | |||||||
| 	for i, itr := range rv.itrs { | 	for i, itr := range rv.itrs { | ||||||
| 		rv.currKs[i], rv.currVs[i] = itr.Current() | 		rv.currKs[i], rv.currVs[i] = itr.Current() | ||||||
| 	} | 	} | ||||||
| 	rv.updateMatches() | 	rv.updateMatches(false) | ||||||
| 	if rv.lowK == nil { | 	if rv.lowK == nil && len(rv.lowIdxs) == 0 { | ||||||
| 		return rv, vellum.ErrIteratorDone | 		return rv, vellum.ErrIteratorDone | ||||||
| 	} | 	} | ||||||
| 	return rv, nil | 	return rv, nil | ||||||
| } | } | ||||||
|  |  | ||||||
| // updateMatches maintains the low key matches based on the currKs | // updateMatches maintains the low key matches based on the currKs | ||||||
| func (m *enumerator) updateMatches() { | func (m *enumerator) updateMatches(skipEmptyKey bool) { | ||||||
| 	m.lowK = nil | 	m.lowK = nil | ||||||
| 	m.lowIdxs = m.lowIdxs[:0] | 	m.lowIdxs = m.lowIdxs[:0] | ||||||
| 	m.lowCurr = 0 | 	m.lowCurr = 0 | ||||||
|  |  | ||||||
| 	for i, key := range m.currKs { | 	for i, key := range m.currKs { | ||||||
| 		if key == nil { | 		if (key == nil && m.currVs[i] == 0) || // in case of empty iterator | ||||||
|  | 			(len(key) == 0 && skipEmptyKey) { // skip empty keys | ||||||
| 			continue | 			continue | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
| 		cmp := bytes.Compare(key, m.lowK) | 		cmp := bytes.Compare(key, m.lowK) | ||||||
| 		if cmp < 0 || m.lowK == nil { | 		if cmp < 0 || len(m.lowIdxs) == 0 { | ||||||
| 			// reached a new low | 			// reached a new low | ||||||
| 			m.lowK = key | 			m.lowK = key | ||||||
| 			m.lowIdxs = m.lowIdxs[:0] | 			m.lowIdxs = m.lowIdxs[:0] | ||||||
| @@ -102,9 +103,10 @@ func (m *enumerator) Next() error { | |||||||
| 			} | 			} | ||||||
| 			m.currKs[vi], m.currVs[vi] = m.itrs[vi].Current() | 			m.currKs[vi], m.currVs[vi] = m.itrs[vi].Current() | ||||||
| 		} | 		} | ||||||
| 		m.updateMatches() | 		// can skip any empty keys encountered at this point | ||||||
|  | 		m.updateMatches(true) | ||||||
| 	} | 	} | ||||||
| 	if m.lowK == nil { | 	if m.lowK == nil && len(m.lowIdxs) == 0 { | ||||||
| 		return vellum.ErrIteratorDone | 		return vellum.ErrIteratorDone | ||||||
| 	} | 	} | ||||||
| 	return nil | 	return nil | ||||||
|   | |||||||
							
								
								
									
										79
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/intcoder.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										79
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/intcoder.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -18,16 +18,12 @@ import ( | |||||||
| 	"bytes" | 	"bytes" | ||||||
| 	"encoding/binary" | 	"encoding/binary" | ||||||
| 	"io" | 	"io" | ||||||
|  |  | ||||||
| 	"github.com/Smerity/govarint" |  | ||||||
| ) | ) | ||||||
|  |  | ||||||
| type chunkedIntCoder struct { | type chunkedIntCoder struct { | ||||||
| 	final     []byte | 	final     []byte | ||||||
| 	maxDocNum uint64 |  | ||||||
| 	chunkSize uint64 | 	chunkSize uint64 | ||||||
| 	chunkBuf  bytes.Buffer | 	chunkBuf  bytes.Buffer | ||||||
| 	encoder   *govarint.Base128Encoder |  | ||||||
| 	chunkLens []uint64 | 	chunkLens []uint64 | ||||||
| 	currChunk uint64 | 	currChunk uint64 | ||||||
|  |  | ||||||
| @@ -41,11 +37,9 @@ func newChunkedIntCoder(chunkSize uint64, maxDocNum uint64) *chunkedIntCoder { | |||||||
| 	total := maxDocNum/chunkSize + 1 | 	total := maxDocNum/chunkSize + 1 | ||||||
| 	rv := &chunkedIntCoder{ | 	rv := &chunkedIntCoder{ | ||||||
| 		chunkSize: chunkSize, | 		chunkSize: chunkSize, | ||||||
| 		maxDocNum: maxDocNum, |  | ||||||
| 		chunkLens: make([]uint64, total), | 		chunkLens: make([]uint64, total), | ||||||
| 		final:     make([]byte, 0, 64), | 		final:     make([]byte, 0, 64), | ||||||
| 	} | 	} | ||||||
| 	rv.encoder = govarint.NewU64Base128Encoder(&rv.chunkBuf) |  | ||||||
|  |  | ||||||
| 	return rv | 	return rv | ||||||
| } | } | ||||||
| @@ -67,16 +61,18 @@ func (c *chunkedIntCoder) Add(docNum uint64, vals ...uint64) error { | |||||||
| 	chunk := docNum / c.chunkSize | 	chunk := docNum / c.chunkSize | ||||||
| 	if chunk != c.currChunk { | 	if chunk != c.currChunk { | ||||||
| 		// starting a new chunk | 		// starting a new chunk | ||||||
| 		if c.encoder != nil { |  | ||||||
| 			// close out last |  | ||||||
| 		c.Close() | 		c.Close() | ||||||
| 		c.chunkBuf.Reset() | 		c.chunkBuf.Reset() | ||||||
| 		} |  | ||||||
| 		c.currChunk = chunk | 		c.currChunk = chunk | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | 	if len(c.buf) < binary.MaxVarintLen64 { | ||||||
|  | 		c.buf = make([]byte, binary.MaxVarintLen64) | ||||||
|  | 	} | ||||||
|  |  | ||||||
| 	for _, val := range vals { | 	for _, val := range vals { | ||||||
| 		_, err := c.encoder.PutU64(val) | 		wb := binary.PutUvarint(c.buf, val) | ||||||
|  | 		_, err := c.chunkBuf.Write(c.buf[:wb]) | ||||||
| 		if err != nil { | 		if err != nil { | ||||||
| 			return err | 			return err | ||||||
| 		} | 		} | ||||||
| @@ -85,13 +81,26 @@ func (c *chunkedIntCoder) Add(docNum uint64, vals ...uint64) error { | |||||||
| 	return nil | 	return nil | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (c *chunkedIntCoder) AddBytes(docNum uint64, buf []byte) error { | ||||||
|  | 	chunk := docNum / c.chunkSize | ||||||
|  | 	if chunk != c.currChunk { | ||||||
|  | 		// starting a new chunk | ||||||
|  | 		c.Close() | ||||||
|  | 		c.chunkBuf.Reset() | ||||||
|  | 		c.currChunk = chunk | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	_, err := c.chunkBuf.Write(buf) | ||||||
|  | 	return err | ||||||
|  | } | ||||||
|  |  | ||||||
| // Close indicates you are done calling Add() this allows the final chunk | // Close indicates you are done calling Add() this allows the final chunk | ||||||
| // to be encoded. | // to be encoded. | ||||||
| func (c *chunkedIntCoder) Close() { | func (c *chunkedIntCoder) Close() { | ||||||
| 	c.encoder.Close() |  | ||||||
| 	encodingBytes := c.chunkBuf.Bytes() | 	encodingBytes := c.chunkBuf.Bytes() | ||||||
| 	c.chunkLens[c.currChunk] = uint64(len(encodingBytes)) | 	c.chunkLens[c.currChunk] = uint64(len(encodingBytes)) | ||||||
| 	c.final = append(c.final, encodingBytes...) | 	c.final = append(c.final, encodingBytes...) | ||||||
|  | 	c.currChunk = uint64(cap(c.chunkLens)) // sentinel to detect double close | ||||||
| } | } | ||||||
|  |  | ||||||
| // Write commits all the encoded chunked integers to the provided writer. | // Write commits all the encoded chunked integers to the provided writer. | ||||||
| @@ -102,10 +111,13 @@ func (c *chunkedIntCoder) Write(w io.Writer) (int, error) { | |||||||
| 	} | 	} | ||||||
| 	buf := c.buf | 	buf := c.buf | ||||||
|  |  | ||||||
| 	// write out the number of chunks & each chunkLen | 	// convert the chunk lengths into chunk offsets | ||||||
| 	n := binary.PutUvarint(buf, uint64(len(c.chunkLens))) | 	chunkOffsets := modifyLengthsToEndOffsets(c.chunkLens) | ||||||
| 	for _, chunkLen := range c.chunkLens { |  | ||||||
| 		n += binary.PutUvarint(buf[n:], uint64(chunkLen)) | 	// write out the number of chunks & each chunk offsets | ||||||
|  | 	n := binary.PutUvarint(buf, uint64(len(chunkOffsets))) | ||||||
|  | 	for _, chunkOffset := range chunkOffsets { | ||||||
|  | 		n += binary.PutUvarint(buf[n:], chunkOffset) | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	tw, err := w.Write(buf[:n]) | 	tw, err := w.Write(buf[:n]) | ||||||
| @@ -121,3 +133,40 @@ func (c *chunkedIntCoder) Write(w io.Writer) (int, error) { | |||||||
| 	} | 	} | ||||||
| 	return tw, nil | 	return tw, nil | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (c *chunkedIntCoder) FinalSize() int { | ||||||
|  | 	return len(c.final) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // modifyLengthsToEndOffsets converts the chunk length array | ||||||
|  | // to a chunk offset array. The readChunkBoundary | ||||||
|  | // will figure out the start and end of every chunk from | ||||||
|  | // these offsets. Starting offset of i'th index is stored | ||||||
|  | // in i-1'th position except for 0'th index and ending offset | ||||||
|  | // is stored at i'th index position. | ||||||
|  | // For 0'th element, starting position is always zero. | ||||||
|  | // eg: | ||||||
|  | // Lens ->  5 5 5 5 => 5 10 15 20 | ||||||
|  | // Lens ->  0 5 0 5 => 0 5 5 10 | ||||||
|  | // Lens ->  0 0 0 5 => 0 0 0 5 | ||||||
|  | // Lens ->  5 0 0 0 => 5 5 5 5 | ||||||
|  | // Lens ->  0 5 0 0 => 0 5 5 5 | ||||||
|  | // Lens ->  0 0 5 0 => 0 0 5 5 | ||||||
|  | func modifyLengthsToEndOffsets(lengths []uint64) []uint64 { | ||||||
|  | 	var runningOffset uint64 | ||||||
|  | 	var index, i int | ||||||
|  | 	for i = 1; i <= len(lengths); i++ { | ||||||
|  | 		runningOffset += lengths[i-1] | ||||||
|  | 		lengths[index] = runningOffset | ||||||
|  | 		index++ | ||||||
|  | 	} | ||||||
|  | 	return lengths | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func readChunkBoundary(chunk int, offsets []uint64) (uint64, uint64) { | ||||||
|  | 	var start uint64 | ||||||
|  | 	if chunk > 0 { | ||||||
|  | 		start = offsets[chunk-1] | ||||||
|  | 	} | ||||||
|  | 	return start, offsets[chunk] | ||||||
|  | } | ||||||
|   | |||||||
							
								
								
									
										544
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/merge.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										544
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/merge.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -24,11 +24,13 @@ import ( | |||||||
| 	"sort" | 	"sort" | ||||||
|  |  | ||||||
| 	"github.com/RoaringBitmap/roaring" | 	"github.com/RoaringBitmap/roaring" | ||||||
| 	"github.com/Smerity/govarint" | 	seg "github.com/blevesearch/bleve/index/scorch/segment" | ||||||
| 	"github.com/couchbase/vellum" | 	"github.com/couchbase/vellum" | ||||||
| 	"github.com/golang/snappy" | 	"github.com/golang/snappy" | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  | var DefaultFileMergerBufferSize = 1024 * 1024 | ||||||
|  |  | ||||||
| const docDropped = math.MaxUint64 // sentinel docNum to represent a deleted doc | const docDropped = math.MaxUint64 // sentinel docNum to represent a deleted doc | ||||||
|  |  | ||||||
| // Merge takes a slice of zap segments and bit masks describing which | // Merge takes a slice of zap segments and bit masks describing which | ||||||
| @@ -36,12 +38,24 @@ const docDropped = math.MaxUint64 // sentinel docNum to represent a deleted doc | |||||||
| // remaining data.  This new segment is built at the specified path, | // remaining data.  This new segment is built at the specified path, | ||||||
| // with the provided chunkFactor. | // with the provided chunkFactor. | ||||||
| func Merge(segments []*Segment, drops []*roaring.Bitmap, path string, | func Merge(segments []*Segment, drops []*roaring.Bitmap, path string, | ||||||
| 	chunkFactor uint32) ([][]uint64, error) { | 	chunkFactor uint32, closeCh chan struct{}, s seg.StatsReporter) ( | ||||||
|  | 	[][]uint64, uint64, error) { | ||||||
|  | 	segmentBases := make([]*SegmentBase, len(segments)) | ||||||
|  | 	for segmenti, segment := range segments { | ||||||
|  | 		segmentBases[segmenti] = &segment.SegmentBase | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return MergeSegmentBases(segmentBases, drops, path, chunkFactor, closeCh, s) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func MergeSegmentBases(segmentBases []*SegmentBase, drops []*roaring.Bitmap, path string, | ||||||
|  | 	chunkFactor uint32, closeCh chan struct{}, s seg.StatsReporter) ( | ||||||
|  | 	[][]uint64, uint64, error) { | ||||||
| 	flag := os.O_RDWR | os.O_CREATE | 	flag := os.O_RDWR | os.O_CREATE | ||||||
|  |  | ||||||
| 	f, err := os.OpenFile(path, flag, 0600) | 	f, err := os.OpenFile(path, flag, 0600) | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		return nil, err | 		return nil, 0, err | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	cleanup := func() { | 	cleanup := func() { | ||||||
| @@ -49,54 +63,49 @@ func Merge(segments []*Segment, drops []*roaring.Bitmap, path string, | |||||||
| 		_ = os.Remove(path) | 		_ = os.Remove(path) | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	segmentBases := make([]*SegmentBase, len(segments)) |  | ||||||
| 	for segmenti, segment := range segments { |  | ||||||
| 		segmentBases[segmenti] = &segment.SegmentBase |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	// buffer the output | 	// buffer the output | ||||||
| 	br := bufio.NewWriter(f) | 	br := bufio.NewWriterSize(f, DefaultFileMergerBufferSize) | ||||||
|  |  | ||||||
| 	// wrap it for counting (tracking offsets) | 	// wrap it for counting (tracking offsets) | ||||||
| 	cr := NewCountHashWriter(br) | 	cr := NewCountHashWriterWithStatsReporter(br, s) | ||||||
|  |  | ||||||
| 	newDocNums, numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset, _, _, _, err := | 	newDocNums, numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset, _, _, _, err := | ||||||
| 		MergeToWriter(segmentBases, drops, chunkFactor, cr) | 		MergeToWriter(segmentBases, drops, chunkFactor, cr, closeCh) | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		cleanup() | 		cleanup() | ||||||
| 		return nil, err | 		return nil, 0, err | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	err = persistFooter(numDocs, storedIndexOffset, fieldsIndexOffset, | 	err = persistFooter(numDocs, storedIndexOffset, fieldsIndexOffset, | ||||||
| 		docValueOffset, chunkFactor, cr.Sum32(), cr) | 		docValueOffset, chunkFactor, cr.Sum32(), cr) | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		cleanup() | 		cleanup() | ||||||
| 		return nil, err | 		return nil, 0, err | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	err = br.Flush() | 	err = br.Flush() | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		cleanup() | 		cleanup() | ||||||
| 		return nil, err | 		return nil, 0, err | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	err = f.Sync() | 	err = f.Sync() | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		cleanup() | 		cleanup() | ||||||
| 		return nil, err | 		return nil, 0, err | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	err = f.Close() | 	err = f.Close() | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		cleanup() | 		cleanup() | ||||||
| 		return nil, err | 		return nil, 0, err | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	return newDocNums, nil | 	return newDocNums, uint64(cr.Count()), nil | ||||||
| } | } | ||||||
|  |  | ||||||
| func MergeToWriter(segments []*SegmentBase, drops []*roaring.Bitmap, | func MergeToWriter(segments []*SegmentBase, drops []*roaring.Bitmap, | ||||||
| 	chunkFactor uint32, cr *CountHashWriter) ( | 	chunkFactor uint32, cr *CountHashWriter, closeCh chan struct{}) ( | ||||||
| 	newDocNums [][]uint64, | 	newDocNums [][]uint64, | ||||||
| 	numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset uint64, | 	numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset uint64, | ||||||
| 	dictLocs []uint64, fieldsInv []string, fieldsMap map[string]uint16, | 	dictLocs []uint64, fieldsInv []string, fieldsMap map[string]uint16, | ||||||
| @@ -108,15 +117,21 @@ func MergeToWriter(segments []*SegmentBase, drops []*roaring.Bitmap, | |||||||
| 	fieldsMap = mapFields(fieldsInv) | 	fieldsMap = mapFields(fieldsInv) | ||||||
|  |  | ||||||
| 	numDocs = computeNewDocCount(segments, drops) | 	numDocs = computeNewDocCount(segments, drops) | ||||||
|  |  | ||||||
|  | 	if isClosed(closeCh) { | ||||||
|  | 		return nil, 0, 0, 0, 0, nil, nil, nil, seg.ErrClosed | ||||||
|  | 	} | ||||||
|  |  | ||||||
| 	if numDocs > 0 { | 	if numDocs > 0 { | ||||||
| 		storedIndexOffset, newDocNums, err = mergeStoredAndRemap(segments, drops, | 		storedIndexOffset, newDocNums, err = mergeStoredAndRemap(segments, drops, | ||||||
| 			fieldsMap, fieldsInv, fieldsSame, numDocs, cr) | 			fieldsMap, fieldsInv, fieldsSame, numDocs, cr, closeCh) | ||||||
| 		if err != nil { | 		if err != nil { | ||||||
| 			return nil, 0, 0, 0, 0, nil, nil, nil, err | 			return nil, 0, 0, 0, 0, nil, nil, nil, err | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
| 		dictLocs, docValueOffset, err = persistMergedRest(segments, drops, fieldsInv, fieldsMap, | 		dictLocs, docValueOffset, err = persistMergedRest(segments, drops, | ||||||
| 			newDocNums, numDocs, chunkFactor, cr) | 			fieldsInv, fieldsMap, fieldsSame, | ||||||
|  | 			newDocNums, numDocs, chunkFactor, cr, closeCh) | ||||||
| 		if err != nil { | 		if err != nil { | ||||||
| 			return nil, 0, 0, 0, 0, nil, nil, nil, err | 			return nil, 0, 0, 0, 0, nil, nil, nil, err | ||||||
| 		} | 		} | ||||||
| @@ -156,11 +171,10 @@ func computeNewDocCount(segments []*SegmentBase, drops []*roaring.Bitmap) uint64 | |||||||
| } | } | ||||||
|  |  | ||||||
| func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap, | func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap, | ||||||
| 	fieldsInv []string, fieldsMap map[string]uint16, newDocNumsIn [][]uint64, | 	fieldsInv []string, fieldsMap map[string]uint16, fieldsSame bool, | ||||||
| 	newSegDocCount uint64, chunkFactor uint32, | 	newDocNumsIn [][]uint64, newSegDocCount uint64, chunkFactor uint32, | ||||||
| 	w *CountHashWriter) ([]uint64, uint64, error) { | 	w *CountHashWriter, closeCh chan struct{}) ([]uint64, uint64, error) { | ||||||
|  |  | ||||||
| 	var bufReuse bytes.Buffer |  | ||||||
| 	var bufMaxVarintLen64 []byte = make([]byte, binary.MaxVarintLen64) | 	var bufMaxVarintLen64 []byte = make([]byte, binary.MaxVarintLen64) | ||||||
| 	var bufLoc []uint64 | 	var bufLoc []uint64 | ||||||
|  |  | ||||||
| @@ -168,36 +182,38 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap, | |||||||
| 	var postItr *PostingsIterator | 	var postItr *PostingsIterator | ||||||
|  |  | ||||||
| 	rv := make([]uint64, len(fieldsInv)) | 	rv := make([]uint64, len(fieldsInv)) | ||||||
| 	fieldDvLocs := make([]uint64, len(fieldsInv)) | 	fieldDvLocsStart := make([]uint64, len(fieldsInv)) | ||||||
|  | 	fieldDvLocsEnd := make([]uint64, len(fieldsInv)) | ||||||
|  |  | ||||||
| 	tfEncoder := newChunkedIntCoder(uint64(chunkFactor), newSegDocCount-1) | 	tfEncoder := newChunkedIntCoder(uint64(chunkFactor), newSegDocCount-1) | ||||||
| 	locEncoder := newChunkedIntCoder(uint64(chunkFactor), newSegDocCount-1) | 	locEncoder := newChunkedIntCoder(uint64(chunkFactor), newSegDocCount-1) | ||||||
|  |  | ||||||
| 	// docTermMap is keyed by docNum, where the array impl provides |  | ||||||
| 	// better memory usage behavior than a sparse-friendlier hashmap |  | ||||||
| 	// for when docs have much structural similarity (i.e., every doc |  | ||||||
| 	// has a given field) |  | ||||||
| 	var docTermMap [][]byte |  | ||||||
|  |  | ||||||
| 	var vellumBuf bytes.Buffer | 	var vellumBuf bytes.Buffer | ||||||
|  |  | ||||||
| 	// for each field |  | ||||||
| 	for fieldID, fieldName := range fieldsInv { |  | ||||||
| 		if fieldID != 0 { |  | ||||||
| 			vellumBuf.Reset() |  | ||||||
| 		} |  | ||||||
| 	newVellum, err := vellum.New(&vellumBuf, nil) | 	newVellum, err := vellum.New(&vellumBuf, nil) | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		return nil, 0, err | 		return nil, 0, err | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | 	newRoaring := roaring.NewBitmap() | ||||||
|  |  | ||||||
|  | 	// for each field | ||||||
|  | 	for fieldID, fieldName := range fieldsInv { | ||||||
|  |  | ||||||
| 		// collect FST iterators from all active segments for this field | 		// collect FST iterators from all active segments for this field | ||||||
| 		var newDocNums [][]uint64 | 		var newDocNums [][]uint64 | ||||||
| 		var drops []*roaring.Bitmap | 		var drops []*roaring.Bitmap | ||||||
| 		var dicts []*Dictionary | 		var dicts []*Dictionary | ||||||
| 		var itrs []vellum.Iterator | 		var itrs []vellum.Iterator | ||||||
|  |  | ||||||
|  | 		var segmentsInFocus []*SegmentBase | ||||||
|  |  | ||||||
| 		for segmentI, segment := range segments { | 		for segmentI, segment := range segments { | ||||||
|  |  | ||||||
|  | 			// check for the closure in meantime | ||||||
|  | 			if isClosed(closeCh) { | ||||||
|  | 				return nil, 0, seg.ErrClosed | ||||||
|  | 			} | ||||||
|  |  | ||||||
| 			dict, err2 := segment.dictionary(fieldName) | 			dict, err2 := segment.dictionary(fieldName) | ||||||
| 			if err2 != nil { | 			if err2 != nil { | ||||||
| 				return nil, 0, err2 | 				return nil, 0, err2 | ||||||
| @@ -209,89 +225,63 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap, | |||||||
| 				} | 				} | ||||||
| 				if itr != nil { | 				if itr != nil { | ||||||
| 					newDocNums = append(newDocNums, newDocNumsIn[segmentI]) | 					newDocNums = append(newDocNums, newDocNumsIn[segmentI]) | ||||||
|  | 					if dropsIn[segmentI] != nil && !dropsIn[segmentI].IsEmpty() { | ||||||
| 						drops = append(drops, dropsIn[segmentI]) | 						drops = append(drops, dropsIn[segmentI]) | ||||||
|  | 					} else { | ||||||
|  | 						drops = append(drops, nil) | ||||||
|  | 					} | ||||||
| 					dicts = append(dicts, dict) | 					dicts = append(dicts, dict) | ||||||
| 					itrs = append(itrs, itr) | 					itrs = append(itrs, itr) | ||||||
|  | 					segmentsInFocus = append(segmentsInFocus, segment) | ||||||
| 				} | 				} | ||||||
| 			} | 			} | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
| 		if uint64(cap(docTermMap)) < newSegDocCount { |  | ||||||
| 			docTermMap = make([][]byte, newSegDocCount) |  | ||||||
| 		} else { |  | ||||||
| 			docTermMap = docTermMap[0:newSegDocCount] |  | ||||||
| 			for docNum := range docTermMap { // reset the docTermMap |  | ||||||
| 				docTermMap[docNum] = docTermMap[docNum][:0] |  | ||||||
| 			} |  | ||||||
| 		} |  | ||||||
|  |  | ||||||
| 		var prevTerm []byte | 		var prevTerm []byte | ||||||
|  |  | ||||||
| 		newRoaring := roaring.NewBitmap() | 		newRoaring.Clear() | ||||||
| 		newRoaringLocs := roaring.NewBitmap() |  | ||||||
|  |  | ||||||
| 		finishTerm := func(term []byte) error { | 		var lastDocNum, lastFreq, lastNorm uint64 | ||||||
| 			if term == nil { |  | ||||||
| 				return nil | 		// determines whether to use "1-hit" encoding optimization | ||||||
|  | 		// when a term appears in only 1 doc, with no loc info, | ||||||
|  | 		// has freq of 1, and the docNum fits into 31-bits | ||||||
|  | 		use1HitEncoding := func(termCardinality uint64) (bool, uint64, uint64) { | ||||||
|  | 			if termCardinality == uint64(1) && locEncoder.FinalSize() <= 0 { | ||||||
|  | 				docNum := uint64(newRoaring.Minimum()) | ||||||
|  | 				if under32Bits(docNum) && docNum == lastDocNum && lastFreq == 1 { | ||||||
|  | 					return true, docNum, lastNorm | ||||||
|  | 				} | ||||||
|  | 			} | ||||||
|  | 			return false, 0, 0 | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
|  | 		finishTerm := func(term []byte) error { | ||||||
| 			tfEncoder.Close() | 			tfEncoder.Close() | ||||||
| 			locEncoder.Close() | 			locEncoder.Close() | ||||||
|  |  | ||||||
| 			if newRoaring.GetCardinality() > 0 { | 			postingsOffset, err := writePostings(newRoaring, | ||||||
| 				// this field/term actually has hits in the new segment, lets write it down | 				tfEncoder, locEncoder, use1HitEncoding, w, bufMaxVarintLen64) | ||||||
| 				freqOffset := uint64(w.Count()) |  | ||||||
| 				_, err := tfEncoder.Write(w) |  | ||||||
| 				if err != nil { |  | ||||||
| 					return err |  | ||||||
| 				} |  | ||||||
| 				locOffset := uint64(w.Count()) |  | ||||||
| 				_, err = locEncoder.Write(w) |  | ||||||
| 				if err != nil { |  | ||||||
| 					return err |  | ||||||
| 				} |  | ||||||
| 				postingLocOffset := uint64(w.Count()) |  | ||||||
| 				_, err = writeRoaringWithLen(newRoaringLocs, w, &bufReuse, bufMaxVarintLen64) |  | ||||||
| 				if err != nil { |  | ||||||
| 					return err |  | ||||||
| 				} |  | ||||||
| 				postingOffset := uint64(w.Count()) |  | ||||||
|  |  | ||||||
| 				// write out the start of the term info |  | ||||||
| 				n := binary.PutUvarint(bufMaxVarintLen64, freqOffset) |  | ||||||
| 				_, err = w.Write(bufMaxVarintLen64[:n]) |  | ||||||
| 				if err != nil { |  | ||||||
| 					return err |  | ||||||
| 				} |  | ||||||
| 				// write out the start of the loc info |  | ||||||
| 				n = binary.PutUvarint(bufMaxVarintLen64, locOffset) |  | ||||||
| 				_, err = w.Write(bufMaxVarintLen64[:n]) |  | ||||||
| 				if err != nil { |  | ||||||
| 					return err |  | ||||||
| 				} |  | ||||||
| 				// write out the start of the posting locs |  | ||||||
| 				n = binary.PutUvarint(bufMaxVarintLen64, postingLocOffset) |  | ||||||
| 				_, err = w.Write(bufMaxVarintLen64[:n]) |  | ||||||
| 				if err != nil { |  | ||||||
| 					return err |  | ||||||
| 				} |  | ||||||
| 				_, err = writeRoaringWithLen(newRoaring, w, &bufReuse, bufMaxVarintLen64) |  | ||||||
| 			if err != nil { | 			if err != nil { | ||||||
| 				return err | 				return err | ||||||
| 			} | 			} | ||||||
|  |  | ||||||
| 				err = newVellum.Insert(term, postingOffset) | 			if postingsOffset > 0 { | ||||||
|  | 				err = newVellum.Insert(term, postingsOffset) | ||||||
| 				if err != nil { | 				if err != nil { | ||||||
| 					return err | 					return err | ||||||
| 				} | 				} | ||||||
| 			} | 			} | ||||||
|  |  | ||||||
| 			newRoaring = roaring.NewBitmap() | 			newRoaring.Clear() | ||||||
| 			newRoaringLocs = roaring.NewBitmap() |  | ||||||
|  |  | ||||||
| 			tfEncoder.Reset() | 			tfEncoder.Reset() | ||||||
| 			locEncoder.Reset() | 			locEncoder.Reset() | ||||||
|  |  | ||||||
|  | 			lastDocNum = 0 | ||||||
|  | 			lastFreq = 0 | ||||||
|  | 			lastNorm = 0 | ||||||
|  |  | ||||||
| 			return nil | 			return nil | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
| @@ -301,74 +291,47 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap, | |||||||
| 			term, itrI, postingsOffset := enumerator.Current() | 			term, itrI, postingsOffset := enumerator.Current() | ||||||
|  |  | ||||||
| 			if !bytes.Equal(prevTerm, term) { | 			if !bytes.Equal(prevTerm, term) { | ||||||
|  | 				// check for the closure in meantime | ||||||
|  | 				if isClosed(closeCh) { | ||||||
|  | 					return nil, 0, seg.ErrClosed | ||||||
|  | 				} | ||||||
|  |  | ||||||
| 				// if the term changed, write out the info collected | 				// if the term changed, write out the info collected | ||||||
| 				// for the previous term | 				// for the previous term | ||||||
| 				err2 := finishTerm(prevTerm) | 				err = finishTerm(prevTerm) | ||||||
| 				if err2 != nil { | 				if err != nil { | ||||||
| 					return nil, 0, err2 | 					return nil, 0, err | ||||||
| 				} | 				} | ||||||
| 			} | 			} | ||||||
|  |  | ||||||
| 			var err2 error | 			postings, err = dicts[itrI].postingsListFromOffset( | ||||||
| 			postings, err2 = dicts[itrI].postingsListFromOffset( |  | ||||||
| 				postingsOffset, drops[itrI], postings) | 				postingsOffset, drops[itrI], postings) | ||||||
| 			if err2 != nil { |  | ||||||
| 				return nil, 0, err2 |  | ||||||
| 			} |  | ||||||
|  |  | ||||||
| 			newDocNumsI := newDocNums[itrI] |  | ||||||
|  |  | ||||||
| 			postItr = postings.iterator(postItr) |  | ||||||
| 			next, err2 := postItr.Next() |  | ||||||
| 			for next != nil && err2 == nil { |  | ||||||
| 				hitNewDocNum := newDocNumsI[next.Number()] |  | ||||||
| 				if hitNewDocNum == docDropped { |  | ||||||
| 					return nil, 0, fmt.Errorf("see hit with dropped doc num") |  | ||||||
| 				} |  | ||||||
| 				newRoaring.Add(uint32(hitNewDocNum)) |  | ||||||
| 				// encode norm bits |  | ||||||
| 				norm := next.Norm() |  | ||||||
| 				normBits := math.Float32bits(float32(norm)) |  | ||||||
| 				err = tfEncoder.Add(hitNewDocNum, next.Frequency(), uint64(normBits)) |  | ||||||
| 			if err != nil { | 			if err != nil { | ||||||
| 				return nil, 0, err | 				return nil, 0, err | ||||||
| 			} | 			} | ||||||
| 				locs := next.Locations() |  | ||||||
| 				if len(locs) > 0 { | 			postItr = postings.iterator(true, true, true, postItr) | ||||||
| 					newRoaringLocs.Add(uint32(hitNewDocNum)) |  | ||||||
| 					for _, loc := range locs { | 			if fieldsSame { | ||||||
| 						if cap(bufLoc) < 5+len(loc.ArrayPositions()) { | 				// can optimize by copying freq/norm/loc bytes directly | ||||||
| 							bufLoc = make([]uint64, 0, 5+len(loc.ArrayPositions())) | 				lastDocNum, lastFreq, lastNorm, err = mergeTermFreqNormLocsByCopying( | ||||||
|  | 					term, postItr, newDocNums[itrI], newRoaring, | ||||||
|  | 					tfEncoder, locEncoder) | ||||||
|  | 			} else { | ||||||
|  | 				lastDocNum, lastFreq, lastNorm, bufLoc, err = mergeTermFreqNormLocs( | ||||||
|  | 					fieldsMap, term, postItr, newDocNums[itrI], newRoaring, | ||||||
|  | 					tfEncoder, locEncoder, bufLoc) | ||||||
| 			} | 			} | ||||||
| 						args := bufLoc[0:5] |  | ||||||
| 						args[0] = uint64(fieldsMap[loc.Field()] - 1) |  | ||||||
| 						args[1] = loc.Pos() |  | ||||||
| 						args[2] = loc.Start() |  | ||||||
| 						args[3] = loc.End() |  | ||||||
| 						args[4] = uint64(len(loc.ArrayPositions())) |  | ||||||
| 						args = append(args, loc.ArrayPositions()...) |  | ||||||
| 						err = locEncoder.Add(hitNewDocNum, args...) |  | ||||||
| 			if err != nil { | 			if err != nil { | ||||||
| 				return nil, 0, err | 				return nil, 0, err | ||||||
| 			} | 			} | ||||||
| 					} |  | ||||||
| 				} |  | ||||||
|  |  | ||||||
| 				docTermMap[hitNewDocNum] = |  | ||||||
| 					append(append(docTermMap[hitNewDocNum], term...), termSeparator) |  | ||||||
|  |  | ||||||
| 				next, err2 = postItr.Next() |  | ||||||
| 			} |  | ||||||
| 			if err2 != nil { |  | ||||||
| 				return nil, 0, err2 |  | ||||||
| 			} |  | ||||||
|  |  | ||||||
| 			prevTerm = prevTerm[:0] // copy to prevTerm in case Next() reuses term mem | 			prevTerm = prevTerm[:0] // copy to prevTerm in case Next() reuses term mem | ||||||
| 			prevTerm = append(prevTerm, term...) | 			prevTerm = append(prevTerm, term...) | ||||||
|  |  | ||||||
| 			err = enumerator.Next() | 			err = enumerator.Next() | ||||||
| 		} | 		} | ||||||
| 		if err != nil && err != vellum.ErrIteratorDone { | 		if err != vellum.ErrIteratorDone { | ||||||
| 			return nil, 0, err | 			return nil, 0, err | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
| @@ -400,26 +363,63 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap, | |||||||
|  |  | ||||||
| 		rv[fieldID] = dictOffset | 		rv[fieldID] = dictOffset | ||||||
|  |  | ||||||
|  | 		// get the field doc value offset (start) | ||||||
|  | 		fieldDvLocsStart[fieldID] = uint64(w.Count()) | ||||||
|  |  | ||||||
| 		// update the field doc values | 		// update the field doc values | ||||||
| 		fdvEncoder := newChunkedContentCoder(uint64(chunkFactor), newSegDocCount-1) | 		fdvEncoder := newChunkedContentCoder(uint64(chunkFactor), newSegDocCount-1, w, true) | ||||||
| 		for docNum, docTerms := range docTermMap { |  | ||||||
| 			if len(docTerms) > 0 { | 		fdvReadersAvailable := false | ||||||
| 				err = fdvEncoder.Add(uint64(docNum), docTerms) | 		var dvIterClone *docValueReader | ||||||
|  | 		for segmentI, segment := range segmentsInFocus { | ||||||
|  | 			// check for the closure in meantime | ||||||
|  | 			if isClosed(closeCh) { | ||||||
|  | 				return nil, 0, seg.ErrClosed | ||||||
|  | 			} | ||||||
|  |  | ||||||
|  | 			fieldIDPlus1 := uint16(segment.fieldsMap[fieldName]) | ||||||
|  | 			if dvIter, exists := segment.fieldDvReaders[fieldIDPlus1-1]; exists && | ||||||
|  | 				dvIter != nil { | ||||||
|  | 				fdvReadersAvailable = true | ||||||
|  | 				dvIterClone = dvIter.cloneInto(dvIterClone) | ||||||
|  | 				err = dvIterClone.iterateAllDocValues(segment, func(docNum uint64, terms []byte) error { | ||||||
|  | 					if newDocNums[segmentI][docNum] == docDropped { | ||||||
|  | 						return nil | ||||||
|  | 					} | ||||||
|  | 					err := fdvEncoder.Add(newDocNums[segmentI][docNum], terms) | ||||||
|  | 					if err != nil { | ||||||
|  | 						return err | ||||||
|  | 					} | ||||||
|  | 					return nil | ||||||
|  | 				}) | ||||||
| 				if err != nil { | 				if err != nil { | ||||||
| 					return nil, 0, err | 					return nil, 0, err | ||||||
| 				} | 				} | ||||||
| 			} | 			} | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
|  | 		if fdvReadersAvailable { | ||||||
| 			err = fdvEncoder.Close() | 			err = fdvEncoder.Close() | ||||||
| 			if err != nil { | 			if err != nil { | ||||||
| 				return nil, 0, err | 				return nil, 0, err | ||||||
| 			} | 			} | ||||||
|  |  | ||||||
| 		// get the field doc value offset |  | ||||||
| 		fieldDvLocs[fieldID] = uint64(w.Count()) |  | ||||||
|  |  | ||||||
| 			// persist the doc value details for this field | 			// persist the doc value details for this field | ||||||
| 		_, err = fdvEncoder.Write(w) | 			_, err = fdvEncoder.Write() | ||||||
|  | 			if err != nil { | ||||||
|  | 				return nil, 0, err | ||||||
|  | 			} | ||||||
|  |  | ||||||
|  | 			// get the field doc value offset (end) | ||||||
|  | 			fieldDvLocsEnd[fieldID] = uint64(w.Count()) | ||||||
|  | 		} else { | ||||||
|  | 			fieldDvLocsStart[fieldID] = fieldNotUninverted | ||||||
|  | 			fieldDvLocsEnd[fieldID] = fieldNotUninverted | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		// reset vellum buffer and vellum builder | ||||||
|  | 		vellumBuf.Reset() | ||||||
|  | 		err = newVellum.Reset(&vellumBuf) | ||||||
| 		if err != nil { | 		if err != nil { | ||||||
| 			return nil, 0, err | 			return nil, 0, err | ||||||
| 		} | 		} | ||||||
| @@ -428,38 +428,210 @@ func persistMergedRest(segments []*SegmentBase, dropsIn []*roaring.Bitmap, | |||||||
| 	fieldDvLocsOffset := uint64(w.Count()) | 	fieldDvLocsOffset := uint64(w.Count()) | ||||||
|  |  | ||||||
| 	buf := bufMaxVarintLen64 | 	buf := bufMaxVarintLen64 | ||||||
| 	for _, offset := range fieldDvLocs { | 	for i := 0; i < len(fieldDvLocsStart); i++ { | ||||||
| 		n := binary.PutUvarint(buf, uint64(offset)) | 		n := binary.PutUvarint(buf, fieldDvLocsStart[i]) | ||||||
| 		_, err := w.Write(buf[:n]) | 		_, err := w.Write(buf[:n]) | ||||||
| 		if err != nil { | 		if err != nil { | ||||||
| 			return nil, 0, err | 			return nil, 0, err | ||||||
| 		} | 		} | ||||||
|  | 		n = binary.PutUvarint(buf, fieldDvLocsEnd[i]) | ||||||
|  | 		_, err = w.Write(buf[:n]) | ||||||
|  | 		if err != nil { | ||||||
|  | 			return nil, 0, err | ||||||
|  | 		} | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	return rv, fieldDvLocsOffset, nil | 	return rv, fieldDvLocsOffset, nil | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func mergeTermFreqNormLocs(fieldsMap map[string]uint16, term []byte, postItr *PostingsIterator, | ||||||
|  | 	newDocNums []uint64, newRoaring *roaring.Bitmap, | ||||||
|  | 	tfEncoder *chunkedIntCoder, locEncoder *chunkedIntCoder, bufLoc []uint64) ( | ||||||
|  | 	lastDocNum uint64, lastFreq uint64, lastNorm uint64, bufLocOut []uint64, err error) { | ||||||
|  | 	next, err := postItr.Next() | ||||||
|  | 	for next != nil && err == nil { | ||||||
|  | 		hitNewDocNum := newDocNums[next.Number()] | ||||||
|  | 		if hitNewDocNum == docDropped { | ||||||
|  | 			return 0, 0, 0, nil, fmt.Errorf("see hit with dropped docNum") | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		newRoaring.Add(uint32(hitNewDocNum)) | ||||||
|  |  | ||||||
|  | 		nextFreq := next.Frequency() | ||||||
|  | 		nextNorm := uint64(math.Float32bits(float32(next.Norm()))) | ||||||
|  |  | ||||||
|  | 		locs := next.Locations() | ||||||
|  |  | ||||||
|  | 		err = tfEncoder.Add(hitNewDocNum, | ||||||
|  | 			encodeFreqHasLocs(nextFreq, len(locs) > 0), nextNorm) | ||||||
|  | 		if err != nil { | ||||||
|  | 			return 0, 0, 0, nil, err | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		if len(locs) > 0 { | ||||||
|  | 			numBytesLocs := 0 | ||||||
|  | 			for _, loc := range locs { | ||||||
|  | 				ap := loc.ArrayPositions() | ||||||
|  | 				numBytesLocs += totalUvarintBytes(uint64(fieldsMap[loc.Field()]-1), | ||||||
|  | 					loc.Pos(), loc.Start(), loc.End(), uint64(len(ap)), ap) | ||||||
|  | 			} | ||||||
|  |  | ||||||
|  | 			err = locEncoder.Add(hitNewDocNum, uint64(numBytesLocs)) | ||||||
|  | 			if err != nil { | ||||||
|  | 				return 0, 0, 0, nil, err | ||||||
|  | 			} | ||||||
|  |  | ||||||
|  | 			for _, loc := range locs { | ||||||
|  | 				ap := loc.ArrayPositions() | ||||||
|  | 				if cap(bufLoc) < 5+len(ap) { | ||||||
|  | 					bufLoc = make([]uint64, 0, 5+len(ap)) | ||||||
|  | 				} | ||||||
|  | 				args := bufLoc[0:5] | ||||||
|  | 				args[0] = uint64(fieldsMap[loc.Field()] - 1) | ||||||
|  | 				args[1] = loc.Pos() | ||||||
|  | 				args[2] = loc.Start() | ||||||
|  | 				args[3] = loc.End() | ||||||
|  | 				args[4] = uint64(len(ap)) | ||||||
|  | 				args = append(args, ap...) | ||||||
|  | 				err = locEncoder.Add(hitNewDocNum, args...) | ||||||
|  | 				if err != nil { | ||||||
|  | 					return 0, 0, 0, nil, err | ||||||
|  | 				} | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		lastDocNum = hitNewDocNum | ||||||
|  | 		lastFreq = nextFreq | ||||||
|  | 		lastNorm = nextNorm | ||||||
|  |  | ||||||
|  | 		next, err = postItr.Next() | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return lastDocNum, lastFreq, lastNorm, bufLoc, err | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func mergeTermFreqNormLocsByCopying(term []byte, postItr *PostingsIterator, | ||||||
|  | 	newDocNums []uint64, newRoaring *roaring.Bitmap, | ||||||
|  | 	tfEncoder *chunkedIntCoder, locEncoder *chunkedIntCoder) ( | ||||||
|  | 	lastDocNum uint64, lastFreq uint64, lastNorm uint64, err error) { | ||||||
|  | 	nextDocNum, nextFreq, nextNorm, nextFreqNormBytes, nextLocBytes, err := | ||||||
|  | 		postItr.nextBytes() | ||||||
|  | 	for err == nil && len(nextFreqNormBytes) > 0 { | ||||||
|  | 		hitNewDocNum := newDocNums[nextDocNum] | ||||||
|  | 		if hitNewDocNum == docDropped { | ||||||
|  | 			return 0, 0, 0, fmt.Errorf("see hit with dropped doc num") | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		newRoaring.Add(uint32(hitNewDocNum)) | ||||||
|  | 		err = tfEncoder.AddBytes(hitNewDocNum, nextFreqNormBytes) | ||||||
|  | 		if err != nil { | ||||||
|  | 			return 0, 0, 0, err | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		if len(nextLocBytes) > 0 { | ||||||
|  | 			err = locEncoder.AddBytes(hitNewDocNum, nextLocBytes) | ||||||
|  | 			if err != nil { | ||||||
|  | 				return 0, 0, 0, err | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		lastDocNum = hitNewDocNum | ||||||
|  | 		lastFreq = nextFreq | ||||||
|  | 		lastNorm = nextNorm | ||||||
|  |  | ||||||
|  | 		nextDocNum, nextFreq, nextNorm, nextFreqNormBytes, nextLocBytes, err = | ||||||
|  | 			postItr.nextBytes() | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return lastDocNum, lastFreq, lastNorm, err | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func writePostings(postings *roaring.Bitmap, tfEncoder, locEncoder *chunkedIntCoder, | ||||||
|  | 	use1HitEncoding func(uint64) (bool, uint64, uint64), | ||||||
|  | 	w *CountHashWriter, bufMaxVarintLen64 []byte) ( | ||||||
|  | 	offset uint64, err error) { | ||||||
|  | 	termCardinality := postings.GetCardinality() | ||||||
|  | 	if termCardinality <= 0 { | ||||||
|  | 		return 0, nil | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	if use1HitEncoding != nil { | ||||||
|  | 		encodeAs1Hit, docNum1Hit, normBits1Hit := use1HitEncoding(termCardinality) | ||||||
|  | 		if encodeAs1Hit { | ||||||
|  | 			return FSTValEncode1Hit(docNum1Hit, normBits1Hit), nil | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	tfOffset := uint64(w.Count()) | ||||||
|  | 	_, err = tfEncoder.Write(w) | ||||||
|  | 	if err != nil { | ||||||
|  | 		return 0, err | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	locOffset := uint64(w.Count()) | ||||||
|  | 	_, err = locEncoder.Write(w) | ||||||
|  | 	if err != nil { | ||||||
|  | 		return 0, err | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	postingsOffset := uint64(w.Count()) | ||||||
|  |  | ||||||
|  | 	n := binary.PutUvarint(bufMaxVarintLen64, tfOffset) | ||||||
|  | 	_, err = w.Write(bufMaxVarintLen64[:n]) | ||||||
|  | 	if err != nil { | ||||||
|  | 		return 0, err | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	n = binary.PutUvarint(bufMaxVarintLen64, locOffset) | ||||||
|  | 	_, err = w.Write(bufMaxVarintLen64[:n]) | ||||||
|  | 	if err != nil { | ||||||
|  | 		return 0, err | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	_, err = writeRoaringWithLen(postings, w, bufMaxVarintLen64) | ||||||
|  | 	if err != nil { | ||||||
|  | 		return 0, err | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return postingsOffset, nil | ||||||
|  | } | ||||||
|  |  | ||||||
|  | type varintEncoder func(uint64) (int, error) | ||||||
|  |  | ||||||
| func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap, | func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap, | ||||||
| 	fieldsMap map[string]uint16, fieldsInv []string, fieldsSame bool, newSegDocCount uint64, | 	fieldsMap map[string]uint16, fieldsInv []string, fieldsSame bool, newSegDocCount uint64, | ||||||
| 	w *CountHashWriter) (uint64, [][]uint64, error) { | 	w *CountHashWriter, closeCh chan struct{}) (uint64, [][]uint64, error) { | ||||||
| 	var rv [][]uint64 // The remapped or newDocNums for each segment. | 	var rv [][]uint64 // The remapped or newDocNums for each segment. | ||||||
|  |  | ||||||
| 	var newDocNum uint64 | 	var newDocNum uint64 | ||||||
|  |  | ||||||
| 	var curr int | 	var curr int | ||||||
| 	var metaBuf bytes.Buffer |  | ||||||
| 	var data, compressed []byte | 	var data, compressed []byte | ||||||
|  | 	var metaBuf bytes.Buffer | ||||||
| 	metaEncoder := govarint.NewU64Base128Encoder(&metaBuf) | 	varBuf := make([]byte, binary.MaxVarintLen64) | ||||||
|  | 	metaEncode := func(val uint64) (int, error) { | ||||||
|  | 		wb := binary.PutUvarint(varBuf, val) | ||||||
|  | 		return metaBuf.Write(varBuf[:wb]) | ||||||
|  | 	} | ||||||
|  |  | ||||||
| 	vals := make([][][]byte, len(fieldsInv)) | 	vals := make([][][]byte, len(fieldsInv)) | ||||||
| 	typs := make([][]byte, len(fieldsInv)) | 	typs := make([][]byte, len(fieldsInv)) | ||||||
| 	poss := make([][][]uint64, len(fieldsInv)) | 	poss := make([][][]uint64, len(fieldsInv)) | ||||||
|  |  | ||||||
|  | 	var posBuf []uint64 | ||||||
|  |  | ||||||
| 	docNumOffsets := make([]uint64, newSegDocCount) | 	docNumOffsets := make([]uint64, newSegDocCount) | ||||||
|  |  | ||||||
|  | 	vdc := visitDocumentCtxPool.Get().(*visitDocumentCtx) | ||||||
|  | 	defer visitDocumentCtxPool.Put(vdc) | ||||||
|  |  | ||||||
| 	// for each segment | 	// for each segment | ||||||
| 	for segI, segment := range segments { | 	for segI, segment := range segments { | ||||||
|  | 		// check for the closure in meantime | ||||||
|  | 		if isClosed(closeCh) { | ||||||
|  | 			return 0, nil, seg.ErrClosed | ||||||
|  | 		} | ||||||
|  |  | ||||||
| 		segNewDocNums := make([]uint64, segment.numDocs) | 		segNewDocNums := make([]uint64, segment.numDocs) | ||||||
|  |  | ||||||
| 		dropsI := drops[segI] | 		dropsI := drops[segI] | ||||||
| @@ -495,7 +667,8 @@ func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap, | |||||||
| 			curr = 0 | 			curr = 0 | ||||||
| 			metaBuf.Reset() | 			metaBuf.Reset() | ||||||
| 			data = data[:0] | 			data = data[:0] | ||||||
| 			compressed = compressed[:0] |  | ||||||
|  | 			posTemp := posBuf | ||||||
|  |  | ||||||
| 			// collect all the data | 			// collect all the data | ||||||
| 			for i := 0; i < len(fieldsInv); i++ { | 			for i := 0; i < len(fieldsInv); i++ { | ||||||
| @@ -503,42 +676,63 @@ func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap, | |||||||
| 				typs[i] = typs[i][:0] | 				typs[i] = typs[i][:0] | ||||||
| 				poss[i] = poss[i][:0] | 				poss[i] = poss[i][:0] | ||||||
| 			} | 			} | ||||||
| 			err := segment.VisitDocument(docNum, func(field string, typ byte, value []byte, pos []uint64) bool { | 			err := segment.visitDocument(vdc, docNum, func(field string, typ byte, value []byte, pos []uint64) bool { | ||||||
| 				fieldID := int(fieldsMap[field]) - 1 | 				fieldID := int(fieldsMap[field]) - 1 | ||||||
| 				vals[fieldID] = append(vals[fieldID], value) | 				vals[fieldID] = append(vals[fieldID], value) | ||||||
| 				typs[fieldID] = append(typs[fieldID], typ) | 				typs[fieldID] = append(typs[fieldID], typ) | ||||||
| 				poss[fieldID] = append(poss[fieldID], pos) |  | ||||||
|  | 				// copy array positions to preserve them beyond the scope of this callback | ||||||
|  | 				var curPos []uint64 | ||||||
|  | 				if len(pos) > 0 { | ||||||
|  | 					if cap(posTemp) < len(pos) { | ||||||
|  | 						posBuf = make([]uint64, len(pos)*len(fieldsInv)) | ||||||
|  | 						posTemp = posBuf | ||||||
|  | 					} | ||||||
|  | 					curPos = posTemp[0:len(pos)] | ||||||
|  | 					copy(curPos, pos) | ||||||
|  | 					posTemp = posTemp[len(pos):] | ||||||
|  | 				} | ||||||
|  | 				poss[fieldID] = append(poss[fieldID], curPos) | ||||||
|  |  | ||||||
| 				return true | 				return true | ||||||
| 			}) | 			}) | ||||||
| 			if err != nil { | 			if err != nil { | ||||||
| 				return 0, nil, err | 				return 0, nil, err | ||||||
| 			} | 			} | ||||||
|  |  | ||||||
| 			// now walk the fields in order | 			// _id field special case optimizes ExternalID() lookups | ||||||
| 			for fieldID := range fieldsInv { | 			idFieldVal := vals[uint16(0)][0] | ||||||
| 				storedFieldValues := vals[int(fieldID)] | 			_, err = metaEncode(uint64(len(idFieldVal))) | ||||||
|  | 			if err != nil { | ||||||
|  | 				return 0, nil, err | ||||||
|  | 			} | ||||||
|  |  | ||||||
| 				stf := typs[int(fieldID)] | 			// now walk the non-"_id" fields in order | ||||||
| 				spf := poss[int(fieldID)] | 			for fieldID := 1; fieldID < len(fieldsInv); fieldID++ { | ||||||
|  | 				storedFieldValues := vals[fieldID] | ||||||
|  |  | ||||||
|  | 				stf := typs[fieldID] | ||||||
|  | 				spf := poss[fieldID] | ||||||
|  |  | ||||||
| 				var err2 error | 				var err2 error | ||||||
| 				curr, data, err2 = persistStoredFieldValues(fieldID, | 				curr, data, err2 = persistStoredFieldValues(fieldID, | ||||||
| 					storedFieldValues, stf, spf, curr, metaEncoder, data) | 					storedFieldValues, stf, spf, curr, metaEncode, data) | ||||||
| 				if err2 != nil { | 				if err2 != nil { | ||||||
| 					return 0, nil, err2 | 					return 0, nil, err2 | ||||||
| 				} | 				} | ||||||
| 			} | 			} | ||||||
|  |  | ||||||
| 			metaEncoder.Close() |  | ||||||
| 			metaBytes := metaBuf.Bytes() | 			metaBytes := metaBuf.Bytes() | ||||||
|  |  | ||||||
| 			compressed = snappy.Encode(compressed, data) | 			compressed = snappy.Encode(compressed[:cap(compressed)], data) | ||||||
|  |  | ||||||
| 			// record where we're about to start writing | 			// record where we're about to start writing | ||||||
| 			docNumOffsets[newDocNum] = uint64(w.Count()) | 			docNumOffsets[newDocNum] = uint64(w.Count()) | ||||||
|  |  | ||||||
| 			// write out the meta len and compressed data len | 			// write out the meta len and compressed data len | ||||||
| 			_, err = writeUvarints(w, uint64(len(metaBytes)), uint64(len(compressed))) | 			_, err = writeUvarints(w, | ||||||
|  | 				uint64(len(metaBytes)), | ||||||
|  | 				uint64(len(idFieldVal)+len(compressed))) | ||||||
| 			if err != nil { | 			if err != nil { | ||||||
| 				return 0, nil, err | 				return 0, nil, err | ||||||
| 			} | 			} | ||||||
| @@ -547,6 +741,11 @@ func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap, | |||||||
| 			if err != nil { | 			if err != nil { | ||||||
| 				return 0, nil, err | 				return 0, nil, err | ||||||
| 			} | 			} | ||||||
|  | 			// now write the _id field val (counted as part of the 'compressed' data) | ||||||
|  | 			_, err = w.Write(idFieldVal) | ||||||
|  | 			if err != nil { | ||||||
|  | 				return 0, nil, err | ||||||
|  | 			} | ||||||
| 			// now write the compressed data | 			// now write the compressed data | ||||||
| 			_, err = w.Write(compressed) | 			_, err = w.Write(compressed) | ||||||
| 			if err != nil { | 			if err != nil { | ||||||
| @@ -644,3 +843,12 @@ func mergeFields(segments []*SegmentBase) (bool, []string) { | |||||||
|  |  | ||||||
| 	return fieldsSame, rv | 	return fieldsSame, rv | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func isClosed(closeCh chan struct{}) bool { | ||||||
|  | 	select { | ||||||
|  | 	case <-closeCh: | ||||||
|  | 		return true | ||||||
|  | 	default: | ||||||
|  | 		return false | ||||||
|  | 	} | ||||||
|  | } | ||||||
|   | |||||||
							
								
								
									
										826
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/new.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										826
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/new.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,826 @@ | |||||||
|  | //  Copyright (c) 2018 Couchbase, Inc. | ||||||
|  | // | ||||||
|  | // Licensed under the Apache License, Version 2.0 (the "License"); | ||||||
|  | // you may not use this file except in compliance with the License. | ||||||
|  | // You may obtain a copy of the License at | ||||||
|  | // | ||||||
|  | // 		http://www.apache.org/licenses/LICENSE-2.0 | ||||||
|  | // | ||||||
|  | // Unless required by applicable law or agreed to in writing, software | ||||||
|  | // distributed under the License is distributed on an "AS IS" BASIS, | ||||||
|  | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||||
|  | // See the License for the specific language governing permissions and | ||||||
|  | // limitations under the License. | ||||||
|  |  | ||||||
|  | package zap | ||||||
|  |  | ||||||
|  | import ( | ||||||
|  | 	"bytes" | ||||||
|  | 	"encoding/binary" | ||||||
|  | 	"math" | ||||||
|  | 	"sort" | ||||||
|  | 	"sync" | ||||||
|  |  | ||||||
|  | 	"github.com/RoaringBitmap/roaring" | ||||||
|  | 	"github.com/blevesearch/bleve/analysis" | ||||||
|  | 	"github.com/blevesearch/bleve/document" | ||||||
|  | 	"github.com/blevesearch/bleve/index" | ||||||
|  | 	"github.com/couchbase/vellum" | ||||||
|  | 	"github.com/golang/snappy" | ||||||
|  | ) | ||||||
|  |  | ||||||
|  | var NewSegmentBufferNumResultsBump int = 100 | ||||||
|  | var NewSegmentBufferNumResultsFactor float64 = 1.0 | ||||||
|  | var NewSegmentBufferAvgBytesPerDocFactor float64 = 1.0 | ||||||
|  |  | ||||||
|  | // AnalysisResultsToSegmentBase produces an in-memory zap-encoded | ||||||
|  | // SegmentBase from analysis results | ||||||
|  | func AnalysisResultsToSegmentBase(results []*index.AnalysisResult, | ||||||
|  | 	chunkFactor uint32) (*SegmentBase, uint64, error) { | ||||||
|  | 	s := interimPool.Get().(*interim) | ||||||
|  |  | ||||||
|  | 	var br bytes.Buffer | ||||||
|  | 	if s.lastNumDocs > 0 { | ||||||
|  | 		// use previous results to initialize the buf with an estimate | ||||||
|  | 		// size, but note that the interim instance comes from a | ||||||
|  | 		// global interimPool, so multiple scorch instances indexing | ||||||
|  | 		// different docs can lead to low quality estimates | ||||||
|  | 		estimateAvgBytesPerDoc := int(float64(s.lastOutSize/s.lastNumDocs) * | ||||||
|  | 			NewSegmentBufferNumResultsFactor) | ||||||
|  | 		estimateNumResults := int(float64(len(results)+NewSegmentBufferNumResultsBump) * | ||||||
|  | 			NewSegmentBufferAvgBytesPerDocFactor) | ||||||
|  | 		br.Grow(estimateAvgBytesPerDoc * estimateNumResults) | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	s.results = results | ||||||
|  | 	s.chunkFactor = chunkFactor | ||||||
|  | 	s.w = NewCountHashWriter(&br) | ||||||
|  |  | ||||||
|  | 	storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets, | ||||||
|  | 		err := s.convert() | ||||||
|  | 	if err != nil { | ||||||
|  | 		return nil, uint64(0), err | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	sb, err := InitSegmentBase(br.Bytes(), s.w.Sum32(), chunkFactor, | ||||||
|  | 		s.FieldsMap, s.FieldsInv, uint64(len(results)), | ||||||
|  | 		storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets) | ||||||
|  |  | ||||||
|  | 	if err == nil && s.reset() == nil { | ||||||
|  | 		s.lastNumDocs = len(results) | ||||||
|  | 		s.lastOutSize = len(br.Bytes()) | ||||||
|  | 		interimPool.Put(s) | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return sb, uint64(len(br.Bytes())), err | ||||||
|  | } | ||||||
|  |  | ||||||
|  | var interimPool = sync.Pool{New: func() interface{} { return &interim{} }} | ||||||
|  |  | ||||||
|  | // interim holds temporary working data used while converting from | ||||||
|  | // analysis results to a zap-encoded segment | ||||||
|  | type interim struct { | ||||||
|  | 	results []*index.AnalysisResult | ||||||
|  |  | ||||||
|  | 	chunkFactor uint32 | ||||||
|  |  | ||||||
|  | 	w *CountHashWriter | ||||||
|  |  | ||||||
|  | 	// FieldsMap adds 1 to field id to avoid zero value issues | ||||||
|  | 	//  name -> field id + 1 | ||||||
|  | 	FieldsMap map[string]uint16 | ||||||
|  |  | ||||||
|  | 	// FieldsInv is the inverse of FieldsMap | ||||||
|  | 	//  field id -> name | ||||||
|  | 	FieldsInv []string | ||||||
|  |  | ||||||
|  | 	// Term dictionaries for each field | ||||||
|  | 	//  field id -> term -> postings list id + 1 | ||||||
|  | 	Dicts []map[string]uint64 | ||||||
|  |  | ||||||
|  | 	// Terms for each field, where terms are sorted ascending | ||||||
|  | 	//  field id -> []term | ||||||
|  | 	DictKeys [][]string | ||||||
|  |  | ||||||
|  | 	// Fields whose IncludeDocValues is true | ||||||
|  | 	//  field id -> bool | ||||||
|  | 	IncludeDocValues []bool | ||||||
|  |  | ||||||
|  | 	// postings id -> bitmap of docNums | ||||||
|  | 	Postings []*roaring.Bitmap | ||||||
|  |  | ||||||
|  | 	// postings id -> freq/norm's, one for each docNum in postings | ||||||
|  | 	FreqNorms        [][]interimFreqNorm | ||||||
|  | 	freqNormsBacking []interimFreqNorm | ||||||
|  |  | ||||||
|  | 	// postings id -> locs, one for each freq | ||||||
|  | 	Locs        [][]interimLoc | ||||||
|  | 	locsBacking []interimLoc | ||||||
|  |  | ||||||
|  | 	numTermsPerPostingsList []int // key is postings list id | ||||||
|  | 	numLocsPerPostingsList  []int // key is postings list id | ||||||
|  |  | ||||||
|  | 	builder    *vellum.Builder | ||||||
|  | 	builderBuf bytes.Buffer | ||||||
|  |  | ||||||
|  | 	metaBuf bytes.Buffer | ||||||
|  |  | ||||||
|  | 	tmp0 []byte | ||||||
|  | 	tmp1 []byte | ||||||
|  |  | ||||||
|  | 	lastNumDocs int | ||||||
|  | 	lastOutSize int | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (s *interim) reset() (err error) { | ||||||
|  | 	s.results = nil | ||||||
|  | 	s.chunkFactor = 0 | ||||||
|  | 	s.w = nil | ||||||
|  | 	s.FieldsMap = nil | ||||||
|  | 	s.FieldsInv = nil | ||||||
|  | 	for i := range s.Dicts { | ||||||
|  | 		s.Dicts[i] = nil | ||||||
|  | 	} | ||||||
|  | 	s.Dicts = s.Dicts[:0] | ||||||
|  | 	for i := range s.DictKeys { | ||||||
|  | 		s.DictKeys[i] = s.DictKeys[i][:0] | ||||||
|  | 	} | ||||||
|  | 	s.DictKeys = s.DictKeys[:0] | ||||||
|  | 	for i := range s.IncludeDocValues { | ||||||
|  | 		s.IncludeDocValues[i] = false | ||||||
|  | 	} | ||||||
|  | 	s.IncludeDocValues = s.IncludeDocValues[:0] | ||||||
|  | 	for _, idn := range s.Postings { | ||||||
|  | 		idn.Clear() | ||||||
|  | 	} | ||||||
|  | 	s.Postings = s.Postings[:0] | ||||||
|  | 	s.FreqNorms = s.FreqNorms[:0] | ||||||
|  | 	for i := range s.freqNormsBacking { | ||||||
|  | 		s.freqNormsBacking[i] = interimFreqNorm{} | ||||||
|  | 	} | ||||||
|  | 	s.freqNormsBacking = s.freqNormsBacking[:0] | ||||||
|  | 	s.Locs = s.Locs[:0] | ||||||
|  | 	for i := range s.locsBacking { | ||||||
|  | 		s.locsBacking[i] = interimLoc{} | ||||||
|  | 	} | ||||||
|  | 	s.locsBacking = s.locsBacking[:0] | ||||||
|  | 	s.numTermsPerPostingsList = s.numTermsPerPostingsList[:0] | ||||||
|  | 	s.numLocsPerPostingsList = s.numLocsPerPostingsList[:0] | ||||||
|  | 	s.builderBuf.Reset() | ||||||
|  | 	if s.builder != nil { | ||||||
|  | 		err = s.builder.Reset(&s.builderBuf) | ||||||
|  | 	} | ||||||
|  | 	s.metaBuf.Reset() | ||||||
|  | 	s.tmp0 = s.tmp0[:0] | ||||||
|  | 	s.tmp1 = s.tmp1[:0] | ||||||
|  | 	s.lastNumDocs = 0 | ||||||
|  | 	s.lastOutSize = 0 | ||||||
|  |  | ||||||
|  | 	return err | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (s *interim) grabBuf(size int) []byte { | ||||||
|  | 	buf := s.tmp0 | ||||||
|  | 	if cap(buf) < size { | ||||||
|  | 		buf = make([]byte, size) | ||||||
|  | 		s.tmp0 = buf | ||||||
|  | 	} | ||||||
|  | 	return buf[0:size] | ||||||
|  | } | ||||||
|  |  | ||||||
|  | type interimStoredField struct { | ||||||
|  | 	vals      [][]byte | ||||||
|  | 	typs      []byte | ||||||
|  | 	arrayposs [][]uint64 // array positions | ||||||
|  | } | ||||||
|  |  | ||||||
|  | type interimFreqNorm struct { | ||||||
|  | 	freq    uint64 | ||||||
|  | 	norm    float32 | ||||||
|  | 	numLocs int | ||||||
|  | } | ||||||
|  |  | ||||||
|  | type interimLoc struct { | ||||||
|  | 	fieldID   uint16 | ||||||
|  | 	pos       uint64 | ||||||
|  | 	start     uint64 | ||||||
|  | 	end       uint64 | ||||||
|  | 	arrayposs []uint64 | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (s *interim) convert() (uint64, uint64, uint64, []uint64, error) { | ||||||
|  | 	s.FieldsMap = map[string]uint16{} | ||||||
|  |  | ||||||
|  | 	s.getOrDefineField("_id") // _id field is fieldID 0 | ||||||
|  |  | ||||||
|  | 	for _, result := range s.results { | ||||||
|  | 		for _, field := range result.Document.CompositeFields { | ||||||
|  | 			s.getOrDefineField(field.Name()) | ||||||
|  | 		} | ||||||
|  | 		for _, field := range result.Document.Fields { | ||||||
|  | 			s.getOrDefineField(field.Name()) | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	sort.Strings(s.FieldsInv[1:]) // keep _id as first field | ||||||
|  |  | ||||||
|  | 	for fieldID, fieldName := range s.FieldsInv { | ||||||
|  | 		s.FieldsMap[fieldName] = uint16(fieldID + 1) | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	if cap(s.IncludeDocValues) >= len(s.FieldsInv) { | ||||||
|  | 		s.IncludeDocValues = s.IncludeDocValues[:len(s.FieldsInv)] | ||||||
|  | 	} else { | ||||||
|  | 		s.IncludeDocValues = make([]bool, len(s.FieldsInv)) | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	s.prepareDicts() | ||||||
|  |  | ||||||
|  | 	for _, dict := range s.DictKeys { | ||||||
|  | 		sort.Strings(dict) | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	s.processDocuments() | ||||||
|  |  | ||||||
|  | 	storedIndexOffset, err := s.writeStoredFields() | ||||||
|  | 	if err != nil { | ||||||
|  | 		return 0, 0, 0, nil, err | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	var fdvIndexOffset uint64 | ||||||
|  | 	var dictOffsets []uint64 | ||||||
|  |  | ||||||
|  | 	if len(s.results) > 0 { | ||||||
|  | 		fdvIndexOffset, dictOffsets, err = s.writeDicts() | ||||||
|  | 		if err != nil { | ||||||
|  | 			return 0, 0, 0, nil, err | ||||||
|  | 		} | ||||||
|  | 	} else { | ||||||
|  | 		dictOffsets = make([]uint64, len(s.FieldsInv)) | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	fieldsIndexOffset, err := persistFields(s.FieldsInv, s.w, dictOffsets) | ||||||
|  | 	if err != nil { | ||||||
|  | 		return 0, 0, 0, nil, err | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return storedIndexOffset, fieldsIndexOffset, fdvIndexOffset, dictOffsets, nil | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (s *interim) getOrDefineField(fieldName string) int { | ||||||
|  | 	fieldIDPlus1, exists := s.FieldsMap[fieldName] | ||||||
|  | 	if !exists { | ||||||
|  | 		fieldIDPlus1 = uint16(len(s.FieldsInv) + 1) | ||||||
|  | 		s.FieldsMap[fieldName] = fieldIDPlus1 | ||||||
|  | 		s.FieldsInv = append(s.FieldsInv, fieldName) | ||||||
|  |  | ||||||
|  | 		s.Dicts = append(s.Dicts, make(map[string]uint64)) | ||||||
|  |  | ||||||
|  | 		n := len(s.DictKeys) | ||||||
|  | 		if n < cap(s.DictKeys) { | ||||||
|  | 			s.DictKeys = s.DictKeys[:n+1] | ||||||
|  | 			s.DictKeys[n] = s.DictKeys[n][:0] | ||||||
|  | 		} else { | ||||||
|  | 			s.DictKeys = append(s.DictKeys, []string(nil)) | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return int(fieldIDPlus1 - 1) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // fill Dicts and DictKeys from analysis results | ||||||
|  | func (s *interim) prepareDicts() { | ||||||
|  | 	var pidNext int | ||||||
|  |  | ||||||
|  | 	var totTFs int | ||||||
|  | 	var totLocs int | ||||||
|  |  | ||||||
|  | 	visitField := func(fieldID uint16, tfs analysis.TokenFrequencies) { | ||||||
|  | 		dict := s.Dicts[fieldID] | ||||||
|  | 		dictKeys := s.DictKeys[fieldID] | ||||||
|  |  | ||||||
|  | 		for term, tf := range tfs { | ||||||
|  | 			pidPlus1, exists := dict[term] | ||||||
|  | 			if !exists { | ||||||
|  | 				pidNext++ | ||||||
|  | 				pidPlus1 = uint64(pidNext) | ||||||
|  |  | ||||||
|  | 				dict[term] = pidPlus1 | ||||||
|  | 				dictKeys = append(dictKeys, term) | ||||||
|  |  | ||||||
|  | 				s.numTermsPerPostingsList = append(s.numTermsPerPostingsList, 0) | ||||||
|  | 				s.numLocsPerPostingsList = append(s.numLocsPerPostingsList, 0) | ||||||
|  | 			} | ||||||
|  |  | ||||||
|  | 			pid := pidPlus1 - 1 | ||||||
|  |  | ||||||
|  | 			s.numTermsPerPostingsList[pid] += 1 | ||||||
|  | 			s.numLocsPerPostingsList[pid] += len(tf.Locations) | ||||||
|  |  | ||||||
|  | 			totLocs += len(tf.Locations) | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		totTFs += len(tfs) | ||||||
|  |  | ||||||
|  | 		s.DictKeys[fieldID] = dictKeys | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	for _, result := range s.results { | ||||||
|  | 		// walk each composite field | ||||||
|  | 		for _, field := range result.Document.CompositeFields { | ||||||
|  | 			fieldID := uint16(s.getOrDefineField(field.Name())) | ||||||
|  | 			_, tf := field.Analyze() | ||||||
|  | 			visitField(fieldID, tf) | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		// walk each field | ||||||
|  | 		for i, field := range result.Document.Fields { | ||||||
|  | 			fieldID := uint16(s.getOrDefineField(field.Name())) | ||||||
|  | 			tf := result.Analyzed[i] | ||||||
|  | 			visitField(fieldID, tf) | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	numPostingsLists := pidNext | ||||||
|  |  | ||||||
|  | 	if cap(s.Postings) >= numPostingsLists { | ||||||
|  | 		s.Postings = s.Postings[:numPostingsLists] | ||||||
|  | 	} else { | ||||||
|  | 		postings := make([]*roaring.Bitmap, numPostingsLists) | ||||||
|  | 		copy(postings, s.Postings[:cap(s.Postings)]) | ||||||
|  | 		for i := 0; i < numPostingsLists; i++ { | ||||||
|  | 			if postings[i] == nil { | ||||||
|  | 				postings[i] = roaring.New() | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  | 		s.Postings = postings | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	if cap(s.FreqNorms) >= numPostingsLists { | ||||||
|  | 		s.FreqNorms = s.FreqNorms[:numPostingsLists] | ||||||
|  | 	} else { | ||||||
|  | 		s.FreqNorms = make([][]interimFreqNorm, numPostingsLists) | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	if cap(s.freqNormsBacking) >= totTFs { | ||||||
|  | 		s.freqNormsBacking = s.freqNormsBacking[:totTFs] | ||||||
|  | 	} else { | ||||||
|  | 		s.freqNormsBacking = make([]interimFreqNorm, totTFs) | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	freqNormsBacking := s.freqNormsBacking | ||||||
|  | 	for pid, numTerms := range s.numTermsPerPostingsList { | ||||||
|  | 		s.FreqNorms[pid] = freqNormsBacking[0:0] | ||||||
|  | 		freqNormsBacking = freqNormsBacking[numTerms:] | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	if cap(s.Locs) >= numPostingsLists { | ||||||
|  | 		s.Locs = s.Locs[:numPostingsLists] | ||||||
|  | 	} else { | ||||||
|  | 		s.Locs = make([][]interimLoc, numPostingsLists) | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	if cap(s.locsBacking) >= totLocs { | ||||||
|  | 		s.locsBacking = s.locsBacking[:totLocs] | ||||||
|  | 	} else { | ||||||
|  | 		s.locsBacking = make([]interimLoc, totLocs) | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	locsBacking := s.locsBacking | ||||||
|  | 	for pid, numLocs := range s.numLocsPerPostingsList { | ||||||
|  | 		s.Locs[pid] = locsBacking[0:0] | ||||||
|  | 		locsBacking = locsBacking[numLocs:] | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (s *interim) processDocuments() { | ||||||
|  | 	numFields := len(s.FieldsInv) | ||||||
|  | 	reuseFieldLens := make([]int, numFields) | ||||||
|  | 	reuseFieldTFs := make([]analysis.TokenFrequencies, numFields) | ||||||
|  |  | ||||||
|  | 	for docNum, result := range s.results { | ||||||
|  | 		for i := 0; i < numFields; i++ { // clear these for reuse | ||||||
|  | 			reuseFieldLens[i] = 0 | ||||||
|  | 			reuseFieldTFs[i] = nil | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		s.processDocument(uint64(docNum), result, | ||||||
|  | 			reuseFieldLens, reuseFieldTFs) | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (s *interim) processDocument(docNum uint64, | ||||||
|  | 	result *index.AnalysisResult, | ||||||
|  | 	fieldLens []int, fieldTFs []analysis.TokenFrequencies) { | ||||||
|  | 	visitField := func(fieldID uint16, fieldName string, | ||||||
|  | 		ln int, tf analysis.TokenFrequencies) { | ||||||
|  | 		fieldLens[fieldID] += ln | ||||||
|  |  | ||||||
|  | 		existingFreqs := fieldTFs[fieldID] | ||||||
|  | 		if existingFreqs != nil { | ||||||
|  | 			existingFreqs.MergeAll(fieldName, tf) | ||||||
|  | 		} else { | ||||||
|  | 			fieldTFs[fieldID] = tf | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	// walk each composite field | ||||||
|  | 	for _, field := range result.Document.CompositeFields { | ||||||
|  | 		fieldID := uint16(s.getOrDefineField(field.Name())) | ||||||
|  | 		ln, tf := field.Analyze() | ||||||
|  | 		visitField(fieldID, field.Name(), ln, tf) | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	// walk each field | ||||||
|  | 	for i, field := range result.Document.Fields { | ||||||
|  | 		fieldID := uint16(s.getOrDefineField(field.Name())) | ||||||
|  | 		ln := result.Length[i] | ||||||
|  | 		tf := result.Analyzed[i] | ||||||
|  | 		visitField(fieldID, field.Name(), ln, tf) | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	// now that it's been rolled up into fieldTFs, walk that | ||||||
|  | 	for fieldID, tfs := range fieldTFs { | ||||||
|  | 		dict := s.Dicts[fieldID] | ||||||
|  | 		norm := float32(1.0 / math.Sqrt(float64(fieldLens[fieldID]))) | ||||||
|  |  | ||||||
|  | 		for term, tf := range tfs { | ||||||
|  | 			pid := dict[term] - 1 | ||||||
|  | 			bs := s.Postings[pid] | ||||||
|  | 			bs.Add(uint32(docNum)) | ||||||
|  |  | ||||||
|  | 			s.FreqNorms[pid] = append(s.FreqNorms[pid], | ||||||
|  | 				interimFreqNorm{ | ||||||
|  | 					freq:    uint64(tf.Frequency()), | ||||||
|  | 					norm:    norm, | ||||||
|  | 					numLocs: len(tf.Locations), | ||||||
|  | 				}) | ||||||
|  |  | ||||||
|  | 			if len(tf.Locations) > 0 { | ||||||
|  | 				locs := s.Locs[pid] | ||||||
|  |  | ||||||
|  | 				for _, loc := range tf.Locations { | ||||||
|  | 					var locf = uint16(fieldID) | ||||||
|  | 					if loc.Field != "" { | ||||||
|  | 						locf = uint16(s.getOrDefineField(loc.Field)) | ||||||
|  | 					} | ||||||
|  | 					var arrayposs []uint64 | ||||||
|  | 					if len(loc.ArrayPositions) > 0 { | ||||||
|  | 						arrayposs = loc.ArrayPositions | ||||||
|  | 					} | ||||||
|  | 					locs = append(locs, interimLoc{ | ||||||
|  | 						fieldID:   locf, | ||||||
|  | 						pos:       uint64(loc.Position), | ||||||
|  | 						start:     uint64(loc.Start), | ||||||
|  | 						end:       uint64(loc.End), | ||||||
|  | 						arrayposs: arrayposs, | ||||||
|  | 					}) | ||||||
|  | 				} | ||||||
|  |  | ||||||
|  | 				s.Locs[pid] = locs | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (s *interim) writeStoredFields() ( | ||||||
|  | 	storedIndexOffset uint64, err error) { | ||||||
|  | 	varBuf := make([]byte, binary.MaxVarintLen64) | ||||||
|  | 	metaEncode := func(val uint64) (int, error) { | ||||||
|  | 		wb := binary.PutUvarint(varBuf, val) | ||||||
|  | 		return s.metaBuf.Write(varBuf[:wb]) | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	data, compressed := s.tmp0[:0], s.tmp1[:0] | ||||||
|  | 	defer func() { s.tmp0, s.tmp1 = data, compressed }() | ||||||
|  |  | ||||||
|  | 	// keyed by docNum | ||||||
|  | 	docStoredOffsets := make([]uint64, len(s.results)) | ||||||
|  |  | ||||||
|  | 	// keyed by fieldID, for the current doc in the loop | ||||||
|  | 	docStoredFields := map[uint16]interimStoredField{} | ||||||
|  |  | ||||||
|  | 	for docNum, result := range s.results { | ||||||
|  | 		for fieldID := range docStoredFields { // reset for next doc | ||||||
|  | 			delete(docStoredFields, fieldID) | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		for _, field := range result.Document.Fields { | ||||||
|  | 			fieldID := uint16(s.getOrDefineField(field.Name())) | ||||||
|  |  | ||||||
|  | 			opts := field.Options() | ||||||
|  |  | ||||||
|  | 			if opts.IsStored() { | ||||||
|  | 				isf := docStoredFields[fieldID] | ||||||
|  | 				isf.vals = append(isf.vals, field.Value()) | ||||||
|  | 				isf.typs = append(isf.typs, encodeFieldType(field)) | ||||||
|  | 				isf.arrayposs = append(isf.arrayposs, field.ArrayPositions()) | ||||||
|  | 				docStoredFields[fieldID] = isf | ||||||
|  | 			} | ||||||
|  |  | ||||||
|  | 			if opts.IncludeDocValues() { | ||||||
|  | 				s.IncludeDocValues[fieldID] = true | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		var curr int | ||||||
|  |  | ||||||
|  | 		s.metaBuf.Reset() | ||||||
|  | 		data = data[:0] | ||||||
|  |  | ||||||
|  | 		// _id field special case optimizes ExternalID() lookups | ||||||
|  | 		idFieldVal := docStoredFields[uint16(0)].vals[0] | ||||||
|  | 		_, err = metaEncode(uint64(len(idFieldVal))) | ||||||
|  | 		if err != nil { | ||||||
|  | 			return 0, err | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		// handle non-"_id" fields | ||||||
|  | 		for fieldID := 1; fieldID < len(s.FieldsInv); fieldID++ { | ||||||
|  | 			isf, exists := docStoredFields[uint16(fieldID)] | ||||||
|  | 			if exists { | ||||||
|  | 				curr, data, err = persistStoredFieldValues( | ||||||
|  | 					fieldID, isf.vals, isf.typs, isf.arrayposs, | ||||||
|  | 					curr, metaEncode, data) | ||||||
|  | 				if err != nil { | ||||||
|  | 					return 0, err | ||||||
|  | 				} | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		metaBytes := s.metaBuf.Bytes() | ||||||
|  |  | ||||||
|  | 		compressed = snappy.Encode(compressed[:cap(compressed)], data) | ||||||
|  |  | ||||||
|  | 		docStoredOffsets[docNum] = uint64(s.w.Count()) | ||||||
|  |  | ||||||
|  | 		_, err := writeUvarints(s.w, | ||||||
|  | 			uint64(len(metaBytes)), | ||||||
|  | 			uint64(len(idFieldVal)+len(compressed))) | ||||||
|  | 		if err != nil { | ||||||
|  | 			return 0, err | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		_, err = s.w.Write(metaBytes) | ||||||
|  | 		if err != nil { | ||||||
|  | 			return 0, err | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		_, err = s.w.Write(idFieldVal) | ||||||
|  | 		if err != nil { | ||||||
|  | 			return 0, err | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		_, err = s.w.Write(compressed) | ||||||
|  | 		if err != nil { | ||||||
|  | 			return 0, err | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	storedIndexOffset = uint64(s.w.Count()) | ||||||
|  |  | ||||||
|  | 	for _, docStoredOffset := range docStoredOffsets { | ||||||
|  | 		err = binary.Write(s.w, binary.BigEndian, docStoredOffset) | ||||||
|  | 		if err != nil { | ||||||
|  | 			return 0, err | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return storedIndexOffset, nil | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (s *interim) writeDicts() (fdvIndexOffset uint64, dictOffsets []uint64, err error) { | ||||||
|  | 	dictOffsets = make([]uint64, len(s.FieldsInv)) | ||||||
|  |  | ||||||
|  | 	fdvOffsetsStart := make([]uint64, len(s.FieldsInv)) | ||||||
|  | 	fdvOffsetsEnd := make([]uint64, len(s.FieldsInv)) | ||||||
|  |  | ||||||
|  | 	buf := s.grabBuf(binary.MaxVarintLen64) | ||||||
|  |  | ||||||
|  | 	tfEncoder := newChunkedIntCoder(uint64(s.chunkFactor), uint64(len(s.results)-1)) | ||||||
|  | 	locEncoder := newChunkedIntCoder(uint64(s.chunkFactor), uint64(len(s.results)-1)) | ||||||
|  | 	fdvEncoder := newChunkedContentCoder(uint64(s.chunkFactor), uint64(len(s.results)-1), s.w, false) | ||||||
|  |  | ||||||
|  | 	var docTermMap [][]byte | ||||||
|  |  | ||||||
|  | 	if s.builder == nil { | ||||||
|  | 		s.builder, err = vellum.New(&s.builderBuf, nil) | ||||||
|  | 		if err != nil { | ||||||
|  | 			return 0, nil, err | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	for fieldID, terms := range s.DictKeys { | ||||||
|  | 		if cap(docTermMap) < len(s.results) { | ||||||
|  | 			docTermMap = make([][]byte, len(s.results)) | ||||||
|  | 		} else { | ||||||
|  | 			docTermMap = docTermMap[0:len(s.results)] | ||||||
|  | 			for docNum := range docTermMap { // reset the docTermMap | ||||||
|  | 				docTermMap[docNum] = docTermMap[docNum][:0] | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		dict := s.Dicts[fieldID] | ||||||
|  |  | ||||||
|  | 		for _, term := range terms { // terms are already sorted | ||||||
|  | 			pid := dict[term] - 1 | ||||||
|  |  | ||||||
|  | 			postingsBS := s.Postings[pid] | ||||||
|  |  | ||||||
|  | 			freqNorms := s.FreqNorms[pid] | ||||||
|  | 			freqNormOffset := 0 | ||||||
|  |  | ||||||
|  | 			locs := s.Locs[pid] | ||||||
|  | 			locOffset := 0 | ||||||
|  |  | ||||||
|  | 			postingsItr := postingsBS.Iterator() | ||||||
|  | 			for postingsItr.HasNext() { | ||||||
|  | 				docNum := uint64(postingsItr.Next()) | ||||||
|  |  | ||||||
|  | 				freqNorm := freqNorms[freqNormOffset] | ||||||
|  |  | ||||||
|  | 				err = tfEncoder.Add(docNum, | ||||||
|  | 					encodeFreqHasLocs(freqNorm.freq, freqNorm.numLocs > 0), | ||||||
|  | 					uint64(math.Float32bits(freqNorm.norm))) | ||||||
|  | 				if err != nil { | ||||||
|  | 					return 0, nil, err | ||||||
|  | 				} | ||||||
|  |  | ||||||
|  | 				if freqNorm.numLocs > 0 { | ||||||
|  | 					numBytesLocs := 0 | ||||||
|  | 					for _, loc := range locs[locOffset : locOffset+freqNorm.numLocs] { | ||||||
|  | 						numBytesLocs += totalUvarintBytes( | ||||||
|  | 							uint64(loc.fieldID), loc.pos, loc.start, loc.end, | ||||||
|  | 							uint64(len(loc.arrayposs)), loc.arrayposs) | ||||||
|  | 					} | ||||||
|  |  | ||||||
|  | 					err = locEncoder.Add(docNum, uint64(numBytesLocs)) | ||||||
|  | 					if err != nil { | ||||||
|  | 						return 0, nil, err | ||||||
|  | 					} | ||||||
|  |  | ||||||
|  | 					for _, loc := range locs[locOffset : locOffset+freqNorm.numLocs] { | ||||||
|  | 						err = locEncoder.Add(docNum, | ||||||
|  | 							uint64(loc.fieldID), loc.pos, loc.start, loc.end, | ||||||
|  | 							uint64(len(loc.arrayposs))) | ||||||
|  | 						if err != nil { | ||||||
|  | 							return 0, nil, err | ||||||
|  | 						} | ||||||
|  |  | ||||||
|  | 						err = locEncoder.Add(docNum, loc.arrayposs...) | ||||||
|  | 						if err != nil { | ||||||
|  | 							return 0, nil, err | ||||||
|  | 						} | ||||||
|  | 					} | ||||||
|  |  | ||||||
|  | 					locOffset += freqNorm.numLocs | ||||||
|  | 				} | ||||||
|  |  | ||||||
|  | 				freqNormOffset++ | ||||||
|  |  | ||||||
|  | 				docTermMap[docNum] = append( | ||||||
|  | 					append(docTermMap[docNum], term...), | ||||||
|  | 					termSeparator) | ||||||
|  | 			} | ||||||
|  |  | ||||||
|  | 			tfEncoder.Close() | ||||||
|  | 			locEncoder.Close() | ||||||
|  |  | ||||||
|  | 			postingsOffset, err := | ||||||
|  | 				writePostings(postingsBS, tfEncoder, locEncoder, nil, s.w, buf) | ||||||
|  | 			if err != nil { | ||||||
|  | 				return 0, nil, err | ||||||
|  | 			} | ||||||
|  |  | ||||||
|  | 			if postingsOffset > uint64(0) { | ||||||
|  | 				err = s.builder.Insert([]byte(term), postingsOffset) | ||||||
|  | 				if err != nil { | ||||||
|  | 					return 0, nil, err | ||||||
|  | 				} | ||||||
|  | 			} | ||||||
|  |  | ||||||
|  | 			tfEncoder.Reset() | ||||||
|  | 			locEncoder.Reset() | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		err = s.builder.Close() | ||||||
|  | 		if err != nil { | ||||||
|  | 			return 0, nil, err | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		// record where this dictionary starts | ||||||
|  | 		dictOffsets[fieldID] = uint64(s.w.Count()) | ||||||
|  |  | ||||||
|  | 		vellumData := s.builderBuf.Bytes() | ||||||
|  |  | ||||||
|  | 		// write out the length of the vellum data | ||||||
|  | 		n := binary.PutUvarint(buf, uint64(len(vellumData))) | ||||||
|  | 		_, err = s.w.Write(buf[:n]) | ||||||
|  | 		if err != nil { | ||||||
|  | 			return 0, nil, err | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		// write this vellum to disk | ||||||
|  | 		_, err = s.w.Write(vellumData) | ||||||
|  | 		if err != nil { | ||||||
|  | 			return 0, nil, err | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		// reset vellum for reuse | ||||||
|  | 		s.builderBuf.Reset() | ||||||
|  |  | ||||||
|  | 		err = s.builder.Reset(&s.builderBuf) | ||||||
|  | 		if err != nil { | ||||||
|  | 			return 0, nil, err | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		// write the field doc values | ||||||
|  | 		if s.IncludeDocValues[fieldID] { | ||||||
|  | 			for docNum, docTerms := range docTermMap { | ||||||
|  | 				if len(docTerms) > 0 { | ||||||
|  | 					err = fdvEncoder.Add(uint64(docNum), docTerms) | ||||||
|  | 					if err != nil { | ||||||
|  | 						return 0, nil, err | ||||||
|  | 					} | ||||||
|  | 				} | ||||||
|  | 			} | ||||||
|  | 			err = fdvEncoder.Close() | ||||||
|  | 			if err != nil { | ||||||
|  | 				return 0, nil, err | ||||||
|  | 			} | ||||||
|  |  | ||||||
|  | 			fdvOffsetsStart[fieldID] = uint64(s.w.Count()) | ||||||
|  |  | ||||||
|  | 			_, err = fdvEncoder.Write() | ||||||
|  | 			if err != nil { | ||||||
|  | 				return 0, nil, err | ||||||
|  | 			} | ||||||
|  |  | ||||||
|  | 			fdvOffsetsEnd[fieldID] = uint64(s.w.Count()) | ||||||
|  |  | ||||||
|  | 			fdvEncoder.Reset() | ||||||
|  | 		} else { | ||||||
|  | 			fdvOffsetsStart[fieldID] = fieldNotUninverted | ||||||
|  | 			fdvOffsetsEnd[fieldID] = fieldNotUninverted | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	fdvIndexOffset = uint64(s.w.Count()) | ||||||
|  |  | ||||||
|  | 	for i := 0; i < len(fdvOffsetsStart); i++ { | ||||||
|  | 		n := binary.PutUvarint(buf, fdvOffsetsStart[i]) | ||||||
|  | 		_, err := s.w.Write(buf[:n]) | ||||||
|  | 		if err != nil { | ||||||
|  | 			return 0, nil, err | ||||||
|  | 		} | ||||||
|  | 		n = binary.PutUvarint(buf, fdvOffsetsEnd[i]) | ||||||
|  | 		_, err = s.w.Write(buf[:n]) | ||||||
|  | 		if err != nil { | ||||||
|  | 			return 0, nil, err | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return fdvIndexOffset, dictOffsets, nil | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func encodeFieldType(f document.Field) byte { | ||||||
|  | 	fieldType := byte('x') | ||||||
|  | 	switch f.(type) { | ||||||
|  | 	case *document.TextField: | ||||||
|  | 		fieldType = 't' | ||||||
|  | 	case *document.NumericField: | ||||||
|  | 		fieldType = 'n' | ||||||
|  | 	case *document.DateTimeField: | ||||||
|  | 		fieldType = 'd' | ||||||
|  | 	case *document.BooleanField: | ||||||
|  | 		fieldType = 'b' | ||||||
|  | 	case *document.GeoPointField: | ||||||
|  | 		fieldType = 'g' | ||||||
|  | 	case *document.CompositeField: | ||||||
|  | 		fieldType = 'c' | ||||||
|  | 	} | ||||||
|  | 	return fieldType | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // returns the total # of bytes needed to encode the given uint64's | ||||||
|  | // into binary.PutUVarint() encoding | ||||||
|  | func totalUvarintBytes(a, b, c, d, e uint64, more []uint64) (n int) { | ||||||
|  | 	n = numUvarintBytes(a) | ||||||
|  | 	n += numUvarintBytes(b) | ||||||
|  | 	n += numUvarintBytes(c) | ||||||
|  | 	n += numUvarintBytes(d) | ||||||
|  | 	n += numUvarintBytes(e) | ||||||
|  | 	for _, v := range more { | ||||||
|  | 		n += numUvarintBytes(v) | ||||||
|  | 	} | ||||||
|  | 	return n | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // returns # of bytes needed to encode x in binary.PutUvarint() encoding | ||||||
|  | func numUvarintBytes(x uint64) (n int) { | ||||||
|  | 	for x >= 0x80 { | ||||||
|  | 		x >>= 7 | ||||||
|  | 		n++ | ||||||
|  | 	} | ||||||
|  | 	return n + 1 | ||||||
|  | } | ||||||
							
								
								
									
										789
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/posting.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										789
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/posting.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										232
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/segment.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										232
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/segment.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -20,16 +20,24 @@ import ( | |||||||
| 	"fmt" | 	"fmt" | ||||||
| 	"io" | 	"io" | ||||||
| 	"os" | 	"os" | ||||||
|  | 	"reflect" | ||||||
| 	"sync" | 	"sync" | ||||||
|  |  | ||||||
| 	"github.com/RoaringBitmap/roaring" | 	"github.com/RoaringBitmap/roaring" | ||||||
| 	"github.com/Smerity/govarint" |  | ||||||
| 	"github.com/blevesearch/bleve/index/scorch/segment" | 	"github.com/blevesearch/bleve/index/scorch/segment" | ||||||
|  | 	"github.com/blevesearch/bleve/size" | ||||||
| 	"github.com/couchbase/vellum" | 	"github.com/couchbase/vellum" | ||||||
| 	mmap "github.com/edsrzf/mmap-go" | 	mmap "github.com/edsrzf/mmap-go" | ||||||
| 	"github.com/golang/snappy" | 	"github.com/golang/snappy" | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  | var reflectStaticSizeSegmentBase int | ||||||
|  |  | ||||||
|  | func init() { | ||||||
|  | 	var sb SegmentBase | ||||||
|  | 	reflectStaticSizeSegmentBase = int(reflect.TypeOf(sb).Size()) | ||||||
|  | } | ||||||
|  |  | ||||||
| // Open returns a zap impl of a segment | // Open returns a zap impl of a segment | ||||||
| func Open(path string) (segment.Segment, error) { | func Open(path string) (segment.Segment, error) { | ||||||
| 	f, err := os.Open(path) | 	f, err := os.Open(path) | ||||||
| @@ -47,13 +55,14 @@ func Open(path string) (segment.Segment, error) { | |||||||
| 		SegmentBase: SegmentBase{ | 		SegmentBase: SegmentBase{ | ||||||
| 			mem:            mm[0 : len(mm)-FooterSize], | 			mem:            mm[0 : len(mm)-FooterSize], | ||||||
| 			fieldsMap:      make(map[string]uint16), | 			fieldsMap:      make(map[string]uint16), | ||||||
| 			fieldDvIterMap: make(map[uint16]*docValueIterator), | 			fieldDvReaders: make(map[uint16]*docValueReader), | ||||||
| 		}, | 		}, | ||||||
| 		f:    f, | 		f:    f, | ||||||
| 		mm:   mm, | 		mm:   mm, | ||||||
| 		path: path, | 		path: path, | ||||||
| 		refs: 1, | 		refs: 1, | ||||||
| 	} | 	} | ||||||
|  | 	rv.SegmentBase.updateSize() | ||||||
|  |  | ||||||
| 	err = rv.loadConfig() | 	err = rv.loadConfig() | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| @@ -67,7 +76,7 @@ func Open(path string) (segment.Segment, error) { | |||||||
| 		return nil, err | 		return nil, err | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	err = rv.loadDvIterators() | 	err = rv.loadDvReaders() | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		_ = rv.Close() | 		_ = rv.Close() | ||||||
| 		return nil, err | 		return nil, err | ||||||
| @@ -89,7 +98,39 @@ type SegmentBase struct { | |||||||
| 	fieldsIndexOffset uint64 | 	fieldsIndexOffset uint64 | ||||||
| 	docValueOffset    uint64 | 	docValueOffset    uint64 | ||||||
| 	dictLocs          []uint64 | 	dictLocs          []uint64 | ||||||
| 	fieldDvIterMap    map[uint16]*docValueIterator // naive chunk cache per field | 	fieldDvReaders    map[uint16]*docValueReader // naive chunk cache per field | ||||||
|  | 	fieldDvNames      []string                   // field names cached in fieldDvReaders | ||||||
|  | 	size              uint64 | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (sb *SegmentBase) Size() int { | ||||||
|  | 	return int(sb.size) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (sb *SegmentBase) updateSize() { | ||||||
|  | 	sizeInBytes := reflectStaticSizeSegmentBase + | ||||||
|  | 		cap(sb.mem) | ||||||
|  |  | ||||||
|  | 	// fieldsMap | ||||||
|  | 	for k, _ := range sb.fieldsMap { | ||||||
|  | 		sizeInBytes += (len(k) + size.SizeOfString) + size.SizeOfUint16 | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	// fieldsInv, dictLocs | ||||||
|  | 	for _, entry := range sb.fieldsInv { | ||||||
|  | 		sizeInBytes += len(entry) + size.SizeOfString | ||||||
|  | 	} | ||||||
|  | 	sizeInBytes += len(sb.dictLocs) * size.SizeOfUint64 | ||||||
|  |  | ||||||
|  | 	// fieldDvReaders | ||||||
|  | 	for _, v := range sb.fieldDvReaders { | ||||||
|  | 		sizeInBytes += size.SizeOfUint16 + size.SizeOfPtr | ||||||
|  | 		if v != nil { | ||||||
|  | 			sizeInBytes += v.size() | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	sb.size = uint64(sizeInBytes) | ||||||
| } | } | ||||||
|  |  | ||||||
| func (sb *SegmentBase) AddRef()             {} | func (sb *SegmentBase) AddRef()             {} | ||||||
| @@ -111,56 +152,19 @@ type Segment struct { | |||||||
| 	refs int64 | 	refs int64 | ||||||
| } | } | ||||||
|  |  | ||||||
| func (s *Segment) SizeInBytes() uint64 { | func (s *Segment) Size() int { | ||||||
| 	// 8 /* size of file pointer */ | 	// 8 /* size of file pointer */ | ||||||
| 	// 4 /* size of version -> uint32 */ | 	// 4 /* size of version -> uint32 */ | ||||||
| 	// 4 /* size of crc -> uint32 */ | 	// 4 /* size of crc -> uint32 */ | ||||||
| 	sizeOfUints := 16 | 	sizeOfUints := 16 | ||||||
|  |  | ||||||
| 	sizeInBytes := (len(s.path) + int(segment.SizeOfString)) + sizeOfUints | 	sizeInBytes := (len(s.path) + size.SizeOfString) + sizeOfUints | ||||||
|  |  | ||||||
| 	// mutex, refs -> int64 | 	// mutex, refs -> int64 | ||||||
| 	sizeInBytes += 16 | 	sizeInBytes += 16 | ||||||
|  |  | ||||||
| 	// do not include the mmap'ed part | 	// do not include the mmap'ed part | ||||||
| 	return uint64(sizeInBytes) + s.SegmentBase.SizeInBytes() - uint64(len(s.mem)) | 	return sizeInBytes + s.SegmentBase.Size() - cap(s.mem) | ||||||
| } |  | ||||||
|  |  | ||||||
| func (s *SegmentBase) SizeInBytes() uint64 { |  | ||||||
| 	// 4 /* size of memCRC -> uint32 */ |  | ||||||
| 	// 4 /* size of chunkFactor -> uint32 */ |  | ||||||
| 	// 8 /* size of numDocs -> uint64 */ |  | ||||||
| 	// 8 /* size of storedIndexOffset -> uint64 */ |  | ||||||
| 	// 8 /* size of fieldsIndexOffset -> uint64 */ |  | ||||||
| 	// 8 /* size of docValueOffset -> uint64 */ |  | ||||||
| 	sizeInBytes := 40 |  | ||||||
|  |  | ||||||
| 	sizeInBytes += len(s.mem) + int(segment.SizeOfSlice) |  | ||||||
|  |  | ||||||
| 	// fieldsMap |  | ||||||
| 	for k, _ := range s.fieldsMap { |  | ||||||
| 		sizeInBytes += (len(k) + int(segment.SizeOfString)) + 2 /* size of uint16 */ |  | ||||||
| 	} |  | ||||||
| 	sizeInBytes += int(segment.SizeOfMap) /* overhead from map */ |  | ||||||
|  |  | ||||||
| 	// fieldsInv, dictLocs |  | ||||||
| 	for _, entry := range s.fieldsInv { |  | ||||||
| 		sizeInBytes += (len(entry) + int(segment.SizeOfString)) |  | ||||||
| 	} |  | ||||||
| 	sizeInBytes += len(s.dictLocs) * 8          /* size of uint64 */ |  | ||||||
| 	sizeInBytes += int(segment.SizeOfSlice) * 3 /* overhead from slices */ |  | ||||||
|  |  | ||||||
| 	// fieldDvIterMap |  | ||||||
| 	sizeInBytes += len(s.fieldDvIterMap) * |  | ||||||
| 		int(segment.SizeOfPointer+2 /* size of uint16 */) |  | ||||||
| 	for _, entry := range s.fieldDvIterMap { |  | ||||||
| 		if entry != nil { |  | ||||||
| 			sizeInBytes += int(entry.sizeInBytes()) |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| 	sizeInBytes += int(segment.SizeOfMap) |  | ||||||
|  |  | ||||||
| 	return uint64(sizeInBytes) |  | ||||||
| } | } | ||||||
|  |  | ||||||
| func (s *Segment) AddRef() { | func (s *Segment) AddRef() { | ||||||
| @@ -185,7 +189,7 @@ func (s *Segment) loadConfig() error { | |||||||
|  |  | ||||||
| 	verOffset := crcOffset - 4 | 	verOffset := crcOffset - 4 | ||||||
| 	s.version = binary.BigEndian.Uint32(s.mm[verOffset : verOffset+4]) | 	s.version = binary.BigEndian.Uint32(s.mm[verOffset : verOffset+4]) | ||||||
| 	if s.version != version { | 	if s.version != Version { | ||||||
| 		return fmt.Errorf("unsupported version %d", s.version) | 		return fmt.Errorf("unsupported version %d", s.version) | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| @@ -207,7 +211,7 @@ func (s *Segment) loadConfig() error { | |||||||
| } | } | ||||||
|  |  | ||||||
| func (s *SegmentBase) loadFields() error { | func (s *SegmentBase) loadFields() error { | ||||||
| 	// NOTE for now we assume the fields index immediately preceeds | 	// NOTE for now we assume the fields index immediately precedes | ||||||
| 	// the footer, and if this changes, need to adjust accordingly (or | 	// the footer, and if this changes, need to adjust accordingly (or | ||||||
| 	// store explicit length), where s.mem was sliced from s.mm in Open(). | 	// store explicit length), where s.mem was sliced from s.mm in Open(). | ||||||
| 	fieldsIndexEnd := uint64(len(s.mem)) | 	fieldsIndexEnd := uint64(len(s.mem)) | ||||||
| @@ -262,6 +266,10 @@ func (sb *SegmentBase) dictionary(field string) (rv *Dictionary, err error) { | |||||||
| 				if err != nil { | 				if err != nil { | ||||||
| 					return nil, fmt.Errorf("dictionary field %s vellum err: %v", field, err) | 					return nil, fmt.Errorf("dictionary field %s vellum err: %v", field, err) | ||||||
| 				} | 				} | ||||||
|  | 				rv.fstReader, err = rv.fst.Reader() | ||||||
|  | 				if err != nil { | ||||||
|  | 					return nil, fmt.Errorf("dictionary field %s vellum reader err: %v", field, err) | ||||||
|  | 				} | ||||||
| 			} | 			} | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| @@ -269,50 +277,90 @@ func (sb *SegmentBase) dictionary(field string) (rv *Dictionary, err error) { | |||||||
| 	return rv, nil | 	return rv, nil | ||||||
| } | } | ||||||
|  |  | ||||||
|  | // visitDocumentCtx holds data structures that are reusable across | ||||||
|  | // multiple VisitDocument() calls to avoid memory allocations | ||||||
|  | type visitDocumentCtx struct { | ||||||
|  | 	buf      []byte | ||||||
|  | 	reader   bytes.Reader | ||||||
|  | 	arrayPos []uint64 | ||||||
|  | } | ||||||
|  |  | ||||||
|  | var visitDocumentCtxPool = sync.Pool{ | ||||||
|  | 	New: func() interface{} { | ||||||
|  | 		reuse := &visitDocumentCtx{} | ||||||
|  | 		return reuse | ||||||
|  | 	}, | ||||||
|  | } | ||||||
|  |  | ||||||
| // VisitDocument invokes the DocFieldValueVistor for each stored field | // VisitDocument invokes the DocFieldValueVistor for each stored field | ||||||
| // for the specified doc number | // for the specified doc number | ||||||
| func (s *SegmentBase) VisitDocument(num uint64, visitor segment.DocumentFieldValueVisitor) error { | func (s *SegmentBase) VisitDocument(num uint64, visitor segment.DocumentFieldValueVisitor) error { | ||||||
|  | 	vdc := visitDocumentCtxPool.Get().(*visitDocumentCtx) | ||||||
|  | 	defer visitDocumentCtxPool.Put(vdc) | ||||||
|  | 	return s.visitDocument(vdc, num, visitor) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (s *SegmentBase) visitDocument(vdc *visitDocumentCtx, num uint64, | ||||||
|  | 	visitor segment.DocumentFieldValueVisitor) error { | ||||||
| 	// first make sure this is a valid number in this segment | 	// first make sure this is a valid number in this segment | ||||||
| 	if num < s.numDocs { | 	if num < s.numDocs { | ||||||
| 		meta, compressed := s.getDocStoredMetaAndCompressed(num) | 		meta, compressed := s.getDocStoredMetaAndCompressed(num) | ||||||
| 		uncompressed, err := snappy.Decode(nil, compressed) |  | ||||||
|  | 		vdc.reader.Reset(meta) | ||||||
|  |  | ||||||
|  | 		// handle _id field special case | ||||||
|  | 		idFieldValLen, err := binary.ReadUvarint(&vdc.reader) | ||||||
|  | 		if err != nil { | ||||||
|  | 			return err | ||||||
|  | 		} | ||||||
|  | 		idFieldVal := compressed[:idFieldValLen] | ||||||
|  |  | ||||||
|  | 		keepGoing := visitor("_id", byte('t'), idFieldVal, nil) | ||||||
|  | 		if !keepGoing { | ||||||
|  | 			visitDocumentCtxPool.Put(vdc) | ||||||
|  | 			return nil | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		// handle non-"_id" fields | ||||||
|  | 		compressed = compressed[idFieldValLen:] | ||||||
|  |  | ||||||
|  | 		uncompressed, err := snappy.Decode(vdc.buf[:cap(vdc.buf)], compressed) | ||||||
| 		if err != nil { | 		if err != nil { | ||||||
| 			return err | 			return err | ||||||
| 		} | 		} | ||||||
| 		// now decode meta and process |  | ||||||
| 		reader := bytes.NewReader(meta) |  | ||||||
| 		decoder := govarint.NewU64Base128Decoder(reader) |  | ||||||
|  |  | ||||||
| 		keepGoing := true |  | ||||||
| 		for keepGoing { | 		for keepGoing { | ||||||
| 			field, err := decoder.GetU64() | 			field, err := binary.ReadUvarint(&vdc.reader) | ||||||
| 			if err == io.EOF { | 			if err == io.EOF { | ||||||
| 				break | 				break | ||||||
| 			} | 			} | ||||||
| 			if err != nil { | 			if err != nil { | ||||||
| 				return err | 				return err | ||||||
| 			} | 			} | ||||||
| 			typ, err := decoder.GetU64() | 			typ, err := binary.ReadUvarint(&vdc.reader) | ||||||
| 			if err != nil { | 			if err != nil { | ||||||
| 				return err | 				return err | ||||||
| 			} | 			} | ||||||
| 			offset, err := decoder.GetU64() | 			offset, err := binary.ReadUvarint(&vdc.reader) | ||||||
| 			if err != nil { | 			if err != nil { | ||||||
| 				return err | 				return err | ||||||
| 			} | 			} | ||||||
| 			l, err := decoder.GetU64() | 			l, err := binary.ReadUvarint(&vdc.reader) | ||||||
| 			if err != nil { | 			if err != nil { | ||||||
| 				return err | 				return err | ||||||
| 			} | 			} | ||||||
| 			numap, err := decoder.GetU64() | 			numap, err := binary.ReadUvarint(&vdc.reader) | ||||||
| 			if err != nil { | 			if err != nil { | ||||||
| 				return err | 				return err | ||||||
| 			} | 			} | ||||||
| 			var arrayPos []uint64 | 			var arrayPos []uint64 | ||||||
| 			if numap > 0 { | 			if numap > 0 { | ||||||
| 				arrayPos = make([]uint64, numap) | 				if cap(vdc.arrayPos) < int(numap) { | ||||||
|  | 					vdc.arrayPos = make([]uint64, numap) | ||||||
|  | 				} | ||||||
|  | 				arrayPos = vdc.arrayPos[:numap] | ||||||
| 				for i := 0; i < int(numap); i++ { | 				for i := 0; i < int(numap); i++ { | ||||||
| 					ap, err := decoder.GetU64() | 					ap, err := binary.ReadUvarint(&vdc.reader) | ||||||
| 					if err != nil { | 					if err != nil { | ||||||
| 						return err | 						return err | ||||||
| 					} | 					} | ||||||
| @@ -323,10 +371,36 @@ func (s *SegmentBase) VisitDocument(num uint64, visitor segment.DocumentFieldVal | |||||||
| 			value := uncompressed[offset : offset+l] | 			value := uncompressed[offset : offset+l] | ||||||
| 			keepGoing = visitor(s.fieldsInv[field], byte(typ), value, arrayPos) | 			keepGoing = visitor(s.fieldsInv[field], byte(typ), value, arrayPos) | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
|  | 		vdc.buf = uncompressed | ||||||
| 	} | 	} | ||||||
| 	return nil | 	return nil | ||||||
| } | } | ||||||
|  |  | ||||||
|  | // DocID returns the value of the _id field for the given docNum | ||||||
|  | func (s *SegmentBase) DocID(num uint64) ([]byte, error) { | ||||||
|  | 	if num >= s.numDocs { | ||||||
|  | 		return nil, nil | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	vdc := visitDocumentCtxPool.Get().(*visitDocumentCtx) | ||||||
|  |  | ||||||
|  | 	meta, compressed := s.getDocStoredMetaAndCompressed(num) | ||||||
|  |  | ||||||
|  | 	vdc.reader.Reset(meta) | ||||||
|  |  | ||||||
|  | 	// handle _id field special case | ||||||
|  | 	idFieldValLen, err := binary.ReadUvarint(&vdc.reader) | ||||||
|  | 	if err != nil { | ||||||
|  | 		return nil, err | ||||||
|  | 	} | ||||||
|  | 	idFieldVal := compressed[:idFieldValLen] | ||||||
|  |  | ||||||
|  | 	visitDocumentCtxPool.Put(vdc) | ||||||
|  |  | ||||||
|  | 	return idFieldVal, nil | ||||||
|  | } | ||||||
|  |  | ||||||
| // Count returns the number of documents in this segment. | // Count returns the number of documents in this segment. | ||||||
| func (s *SegmentBase) Count() uint64 { | func (s *SegmentBase) Count() uint64 { | ||||||
| 	return s.numDocs | 	return s.numDocs | ||||||
| @@ -343,16 +417,27 @@ func (s *SegmentBase) DocNumbers(ids []string) (*roaring.Bitmap, error) { | |||||||
| 			return nil, err | 			return nil, err | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
| 		var postings *PostingsList | 		postingsList := emptyPostingsList | ||||||
| 		for _, id := range ids { |  | ||||||
| 			postings, err = idDict.postingsList([]byte(id), nil, postings) | 		sMax, err := idDict.fst.GetMaxKey() | ||||||
| 		if err != nil { | 		if err != nil { | ||||||
| 			return nil, err | 			return nil, err | ||||||
| 		} | 		} | ||||||
| 			if postings.postings != nil { | 		sMaxStr := string(sMax) | ||||||
| 				rv.Or(postings.postings) | 		filteredIds := make([]string, 0, len(ids)) | ||||||
|  | 		for _, id := range ids { | ||||||
|  | 			if id <= sMaxStr { | ||||||
|  | 				filteredIds = append(filteredIds, id) | ||||||
| 			} | 			} | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
|  | 		for _, id := range filteredIds { | ||||||
|  | 			postingsList, err = idDict.postingsList([]byte(id), nil, postingsList) | ||||||
|  | 			if err != nil { | ||||||
|  | 				return nil, err | ||||||
|  | 			} | ||||||
|  | 			postingsList.OrInto(rv) | ||||||
|  | 		} | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	return rv, nil | 	return rv, nil | ||||||
| @@ -441,19 +526,32 @@ func (s *Segment) DictAddr(field string) (uint64, error) { | |||||||
| 	return s.dictLocs[fieldIDPlus1-1], nil | 	return s.dictLocs[fieldIDPlus1-1], nil | ||||||
| } | } | ||||||
|  |  | ||||||
| func (s *SegmentBase) loadDvIterators() error { | func (s *SegmentBase) loadDvReaders() error { | ||||||
| 	if s.docValueOffset == fieldNotUninverted { | 	if s.docValueOffset == fieldNotUninverted { | ||||||
| 		return nil | 		return nil | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	var read uint64 | 	var read uint64 | ||||||
| 	for fieldID, field := range s.fieldsInv { | 	for fieldID, field := range s.fieldsInv { | ||||||
| 		fieldLoc, n := binary.Uvarint(s.mem[s.docValueOffset+read : s.docValueOffset+read+binary.MaxVarintLen64]) | 		var fieldLocStart, fieldLocEnd uint64 | ||||||
|  | 		var n int | ||||||
|  | 		fieldLocStart, n = binary.Uvarint(s.mem[s.docValueOffset+read : s.docValueOffset+read+binary.MaxVarintLen64]) | ||||||
| 		if n <= 0 { | 		if n <= 0 { | ||||||
| 			return fmt.Errorf("loadDvIterators: failed to read the docvalue offsets for field %d", fieldID) | 			return fmt.Errorf("loadDvReaders: failed to read the docvalue offset start for field %d", fieldID) | ||||||
| 		} | 		} | ||||||
| 		s.fieldDvIterMap[uint16(fieldID)], _ = s.loadFieldDocValueIterator(field, fieldLoc) |  | ||||||
| 		read += uint64(n) | 		read += uint64(n) | ||||||
|  | 		fieldLocEnd, n = binary.Uvarint(s.mem[s.docValueOffset+read : s.docValueOffset+read+binary.MaxVarintLen64]) | ||||||
|  | 		if n <= 0 { | ||||||
|  | 			return fmt.Errorf("loadDvReaders: failed to read the docvalue offset end for field %d", fieldID) | ||||||
| 		} | 		} | ||||||
|  | 		read += uint64(n) | ||||||
|  |  | ||||||
|  | 		fieldDvReader, _ := s.loadFieldDocValueReader(field, fieldLocStart, fieldLocEnd) | ||||||
|  | 		if fieldDvReader != nil { | ||||||
|  | 			s.fieldDvReaders[uint16(fieldID)] = fieldDvReader | ||||||
|  | 			s.fieldDvNames = append(s.fieldDvNames, field) | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
| 	return nil | 	return nil | ||||||
| } | } | ||||||
|   | |||||||
							
								
								
									
										22
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/write.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										22
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/write.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -15,7 +15,6 @@ | |||||||
| package zap | package zap | ||||||
|  |  | ||||||
| import ( | import ( | ||||||
| 	"bytes" |  | ||||||
| 	"encoding/binary" | 	"encoding/binary" | ||||||
| 	"io" | 	"io" | ||||||
|  |  | ||||||
| @@ -25,28 +24,29 @@ import ( | |||||||
| // writes out the length of the roaring bitmap in bytes as varint | // writes out the length of the roaring bitmap in bytes as varint | ||||||
| // then writes out the roaring bitmap itself | // then writes out the roaring bitmap itself | ||||||
| func writeRoaringWithLen(r *roaring.Bitmap, w io.Writer, | func writeRoaringWithLen(r *roaring.Bitmap, w io.Writer, | ||||||
| 	reuseBuf *bytes.Buffer, reuseBufVarint []byte) (int, error) { | 	reuseBufVarint []byte) (int, error) { | ||||||
| 	reuseBuf.Reset() | 	buf, err := r.ToBytes() | ||||||
|  |  | ||||||
| 	// write out postings list to memory so we know the len |  | ||||||
| 	postingsListLen, err := r.WriteTo(reuseBuf) |  | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		return 0, err | 		return 0, err | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	var tw int | 	var tw int | ||||||
| 	// write out the length of this postings list |  | ||||||
| 	n := binary.PutUvarint(reuseBufVarint, uint64(postingsListLen)) | 	// write out the length | ||||||
|  | 	n := binary.PutUvarint(reuseBufVarint, uint64(len(buf))) | ||||||
| 	nw, err := w.Write(reuseBufVarint[:n]) | 	nw, err := w.Write(reuseBufVarint[:n]) | ||||||
| 	tw += nw | 	tw += nw | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		return tw, err | 		return tw, err | ||||||
| 	} | 	} | ||||||
| 	// write out the postings list itself |  | ||||||
| 	nw, err = w.Write(reuseBuf.Bytes()) | 	// write out the roaring bytes | ||||||
|  | 	nw, err = w.Write(buf) | ||||||
| 	tw += nw | 	tw += nw | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		return tw, err | 		return tw, err | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	return tw, nil | 	return tw, nil | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -118,7 +118,7 @@ func persistFooter(numDocs, storedIndexOffset, fieldsIndexOffset, docValueOffset | |||||||
| 		return err | 		return err | ||||||
| 	} | 	} | ||||||
| 	// write out 32-bit version | 	// write out 32-bit version | ||||||
| 	err = binary.Write(w, binary.BigEndian, version) | 	err = binary.Write(w, binary.BigEndian, Version) | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		return err | 		return err | ||||||
| 	} | 	} | ||||||
|   | |||||||
							
								
								
									
										428
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										428
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -15,10 +15,10 @@ | |||||||
| package scorch | package scorch | ||||||
|  |  | ||||||
| import ( | import ( | ||||||
| 	"bytes" |  | ||||||
| 	"container/heap" | 	"container/heap" | ||||||
| 	"encoding/binary" | 	"encoding/binary" | ||||||
| 	"fmt" | 	"fmt" | ||||||
|  | 	"reflect" | ||||||
| 	"sort" | 	"sort" | ||||||
| 	"sync" | 	"sync" | ||||||
| 	"sync/atomic" | 	"sync/atomic" | ||||||
| @@ -27,8 +27,13 @@ import ( | |||||||
| 	"github.com/blevesearch/bleve/document" | 	"github.com/blevesearch/bleve/document" | ||||||
| 	"github.com/blevesearch/bleve/index" | 	"github.com/blevesearch/bleve/index" | ||||||
| 	"github.com/blevesearch/bleve/index/scorch/segment" | 	"github.com/blevesearch/bleve/index/scorch/segment" | ||||||
|  | 	"github.com/couchbase/vellum" | ||||||
|  | 	lev2 "github.com/couchbase/vellum/levenshtein2" | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  | // re usable, threadsafe levenshtein builders | ||||||
|  | var lb1, lb2 *lev2.LevenshteinAutomatonBuilder | ||||||
|  |  | ||||||
| type asynchSegmentResult struct { | type asynchSegmentResult struct { | ||||||
| 	dictItr segment.DictionaryIterator | 	dictItr segment.DictionaryIterator | ||||||
|  |  | ||||||
| @@ -40,15 +45,36 @@ type asynchSegmentResult struct { | |||||||
| 	err error | 	err error | ||||||
| } | } | ||||||
|  |  | ||||||
|  | var reflectStaticSizeIndexSnapshot int | ||||||
|  |  | ||||||
|  | func init() { | ||||||
|  | 	var is interface{} = IndexSnapshot{} | ||||||
|  | 	reflectStaticSizeIndexSnapshot = int(reflect.TypeOf(is).Size()) | ||||||
|  | 	var err error | ||||||
|  | 	lb1, err = lev2.NewLevenshteinAutomatonBuilder(1, true) | ||||||
|  | 	if err != nil { | ||||||
|  | 		panic(fmt.Errorf("Levenshtein automaton ed1 builder err: %v", err)) | ||||||
|  | 	} | ||||||
|  | 	lb2, err = lev2.NewLevenshteinAutomatonBuilder(2, true) | ||||||
|  | 	if err != nil { | ||||||
|  | 		panic(fmt.Errorf("Levenshtein automaton ed2 builder err: %v", err)) | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  |  | ||||||
| type IndexSnapshot struct { | type IndexSnapshot struct { | ||||||
| 	parent   *Scorch | 	parent   *Scorch | ||||||
| 	segment  []*SegmentSnapshot | 	segment  []*SegmentSnapshot | ||||||
| 	offsets  []uint64 | 	offsets  []uint64 | ||||||
| 	internal map[string][]byte | 	internal map[string][]byte | ||||||
| 	epoch    uint64 | 	epoch    uint64 | ||||||
|  | 	size     uint64 | ||||||
|  | 	creator  string | ||||||
|  |  | ||||||
| 	m    sync.Mutex // Protects the fields that follow. | 	m    sync.Mutex // Protects the fields that follow. | ||||||
| 	refs int64 | 	refs int64 | ||||||
|  |  | ||||||
|  | 	m2        sync.Mutex                                 // Protects the fields that follow. | ||||||
|  | 	fieldTFRs map[string][]*IndexSnapshotTermFieldReader // keyed by field, recycled TFR's | ||||||
| } | } | ||||||
|  |  | ||||||
| func (i *IndexSnapshot) Segments() []*SegmentSnapshot { | func (i *IndexSnapshot) Segments() []*SegmentSnapshot { | ||||||
| @@ -85,12 +111,27 @@ func (i *IndexSnapshot) DecRef() (err error) { | |||||||
| 	return err | 	return err | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (i *IndexSnapshot) Close() error { | ||||||
|  | 	return i.DecRef() | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (i *IndexSnapshot) Size() int { | ||||||
|  | 	return int(i.size) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (i *IndexSnapshot) updateSize() { | ||||||
|  | 	i.size += uint64(reflectStaticSizeIndexSnapshot) | ||||||
|  | 	for _, s := range i.segment { | ||||||
|  | 		i.size += uint64(s.Size()) | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  |  | ||||||
| func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i segment.TermDictionary) segment.DictionaryIterator) (*IndexSnapshotFieldDict, error) { | func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i segment.TermDictionary) segment.DictionaryIterator) (*IndexSnapshotFieldDict, error) { | ||||||
|  |  | ||||||
| 	results := make(chan *asynchSegmentResult) | 	results := make(chan *asynchSegmentResult) | ||||||
| 	for index, segment := range i.segment { | 	for index, segment := range i.segment { | ||||||
| 		go func(index int, segment *SegmentSnapshot) { | 		go func(index int, segment *SegmentSnapshot) { | ||||||
| 			dict, err := segment.Dictionary(field) | 			dict, err := segment.segment.Dictionary(field) | ||||||
| 			if err != nil { | 			if err != nil { | ||||||
| 				results <- &asynchSegmentResult{err: err} | 				results <- &asynchSegmentResult{err: err} | ||||||
| 			} else { | 			} else { | ||||||
| @@ -116,7 +157,7 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s | |||||||
| 			if next != nil { | 			if next != nil { | ||||||
| 				rv.cursors = append(rv.cursors, &segmentDictCursor{ | 				rv.cursors = append(rv.cursors, &segmentDictCursor{ | ||||||
| 					itr:  asr.dictItr, | 					itr:  asr.dictItr, | ||||||
| 					curr: next, | 					curr: *next, | ||||||
| 				}) | 				}) | ||||||
| 			} | 			} | ||||||
| 		} | 		} | ||||||
| @@ -151,6 +192,56 @@ func (i *IndexSnapshot) FieldDictPrefix(field string, | |||||||
| 	}) | 	}) | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (i *IndexSnapshot) FieldDictRegexp(field string, | ||||||
|  | 	termRegex string) (index.FieldDict, error) { | ||||||
|  | 	// TODO: potential optimization where the literal prefix represents the, | ||||||
|  | 	//       entire regexp, allowing us to use PrefixIterator(prefixTerm)? | ||||||
|  |  | ||||||
|  | 	a, prefixBeg, prefixEnd, err := segment.ParseRegexp(termRegex) | ||||||
|  | 	if err != nil { | ||||||
|  | 		return nil, err | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator { | ||||||
|  | 		return i.AutomatonIterator(a, prefixBeg, prefixEnd) | ||||||
|  | 	}) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (i *IndexSnapshot) getLevAutomaton(term string, | ||||||
|  | 	fuzziness uint8) (vellum.Automaton, error) { | ||||||
|  | 	if fuzziness == 1 { | ||||||
|  | 		return lb1.BuildDfa(term, fuzziness) | ||||||
|  | 	} else if fuzziness == 2 { | ||||||
|  | 		return lb2.BuildDfa(term, fuzziness) | ||||||
|  | 	} | ||||||
|  | 	return nil, fmt.Errorf("fuzziness exceeds the max limit") | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (i *IndexSnapshot) FieldDictFuzzy(field string, | ||||||
|  | 	term string, fuzziness int, prefix string) (index.FieldDict, error) { | ||||||
|  | 	a, err := i.getLevAutomaton(term, uint8(fuzziness)) | ||||||
|  | 	if err != nil { | ||||||
|  | 		return nil, err | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	var prefixBeg, prefixEnd []byte | ||||||
|  | 	if prefix != "" { | ||||||
|  | 		prefixBeg = []byte(prefix) | ||||||
|  | 		prefixEnd = segment.IncrementBytes(prefixBeg) | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator { | ||||||
|  | 		return i.AutomatonIterator(a, prefixBeg, prefixEnd) | ||||||
|  | 	}) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (i *IndexSnapshot) FieldDictOnly(field string, | ||||||
|  | 	onlyTerms [][]byte, includeCount bool) (index.FieldDict, error) { | ||||||
|  | 	return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator { | ||||||
|  | 		return i.OnlyIterator(onlyTerms, includeCount) | ||||||
|  | 	}) | ||||||
|  | } | ||||||
|  |  | ||||||
| func (i *IndexSnapshot) DocIDReaderAll() (index.DocIDReader, error) { | func (i *IndexSnapshot) DocIDReaderAll() (index.DocIDReader, error) { | ||||||
| 	results := make(chan *asynchSegmentResult) | 	results := make(chan *asynchSegmentResult) | ||||||
| 	for index, segment := range i.segment { | 	for index, segment := range i.segment { | ||||||
| @@ -264,21 +355,26 @@ func (i *IndexSnapshot) Document(id string) (rv *document.Document, err error) { | |||||||
| 	segmentIndex, localDocNum := i.segmentIndexAndLocalDocNumFromGlobal(docNum) | 	segmentIndex, localDocNum := i.segmentIndexAndLocalDocNumFromGlobal(docNum) | ||||||
|  |  | ||||||
| 	rv = document.NewDocument(id) | 	rv = document.NewDocument(id) | ||||||
| 	err = i.segment[segmentIndex].VisitDocument(localDocNum, func(name string, typ byte, value []byte, pos []uint64) bool { | 	err = i.segment[segmentIndex].VisitDocument(localDocNum, func(name string, typ byte, val []byte, pos []uint64) bool { | ||||||
| 		if name == "_id" { | 		if name == "_id" { | ||||||
| 			return true | 			return true | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
|  | 		// copy value, array positions to preserve them beyond the scope of this callback | ||||||
|  | 		value := append([]byte(nil), val...) | ||||||
|  | 		arrayPos := append([]uint64(nil), pos...) | ||||||
|  |  | ||||||
| 		switch typ { | 		switch typ { | ||||||
| 		case 't': | 		case 't': | ||||||
| 			rv.AddField(document.NewTextField(name, pos, value)) | 			rv.AddField(document.NewTextField(name, arrayPos, value)) | ||||||
| 		case 'n': | 		case 'n': | ||||||
| 			rv.AddField(document.NewNumericFieldFromBytes(name, pos, value)) | 			rv.AddField(document.NewNumericFieldFromBytes(name, arrayPos, value)) | ||||||
| 		case 'd': | 		case 'd': | ||||||
| 			rv.AddField(document.NewDateTimeFieldFromBytes(name, pos, value)) | 			rv.AddField(document.NewDateTimeFieldFromBytes(name, arrayPos, value)) | ||||||
| 		case 'b': | 		case 'b': | ||||||
| 			rv.AddField(document.NewBooleanFieldFromBytes(name, pos, value)) | 			rv.AddField(document.NewBooleanFieldFromBytes(name, arrayPos, value)) | ||||||
| 		case 'g': | 		case 'g': | ||||||
| 			rv.AddField(document.NewGeoPointFieldFromBytes(name, pos, value)) | 			rv.AddField(document.NewGeoPointFieldFromBytes(name, arrayPos, value)) | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
| 		return true | 		return true | ||||||
| @@ -307,24 +403,15 @@ func (i *IndexSnapshot) ExternalID(id index.IndexInternalID) (string, error) { | |||||||
| 	} | 	} | ||||||
| 	segmentIndex, localDocNum := i.segmentIndexAndLocalDocNumFromGlobal(docNum) | 	segmentIndex, localDocNum := i.segmentIndexAndLocalDocNumFromGlobal(docNum) | ||||||
|  |  | ||||||
| 	var found bool | 	v, err := i.segment[segmentIndex].DocID(localDocNum) | ||||||
| 	var rv string |  | ||||||
| 	err = i.segment[segmentIndex].VisitDocument(localDocNum, func(field string, typ byte, value []byte, pos []uint64) bool { |  | ||||||
| 		if field == "_id" { |  | ||||||
| 			found = true |  | ||||||
| 			rv = string(value) |  | ||||||
| 			return false |  | ||||||
| 		} |  | ||||||
| 		return true |  | ||||||
| 	}) |  | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		return "", err | 		return "", err | ||||||
| 	} | 	} | ||||||
|  | 	if v == nil { | ||||||
| 	if found { |  | ||||||
| 		return rv, nil |  | ||||||
| 	} |  | ||||||
| 		return "", fmt.Errorf("document number %d not found", docNum) | 		return "", fmt.Errorf("document number %d not found", docNum) | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return string(v), nil | ||||||
| } | } | ||||||
|  |  | ||||||
| func (i *IndexSnapshot) InternalID(id string) (rv index.IndexInternalID, err error) { | func (i *IndexSnapshot) InternalID(id string) (rv index.IndexInternalID, err error) { | ||||||
| @@ -349,33 +436,81 @@ func (i *IndexSnapshot) InternalID(id string) (rv index.IndexInternalID, err err | |||||||
|  |  | ||||||
| func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq, | func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq, | ||||||
| 	includeNorm, includeTermVectors bool) (index.TermFieldReader, error) { | 	includeNorm, includeTermVectors bool) (index.TermFieldReader, error) { | ||||||
|  | 	rv := i.allocTermFieldReaderDicts(field) | ||||||
|  |  | ||||||
| 	rv := &IndexSnapshotTermFieldReader{ | 	rv.term = term | ||||||
| 		term:               term, | 	rv.field = field | ||||||
| 		field:              field, | 	rv.snapshot = i | ||||||
| 		snapshot:           i, | 	if rv.postings == nil { | ||||||
| 		postings:           make([]segment.PostingsList, len(i.segment)), | 		rv.postings = make([]segment.PostingsList, len(i.segment)) | ||||||
| 		iterators:          make([]segment.PostingsIterator, len(i.segment)), |  | ||||||
| 		includeFreq:        includeFreq, |  | ||||||
| 		includeNorm:        includeNorm, |  | ||||||
| 		includeTermVectors: includeTermVectors, |  | ||||||
| 	} | 	} | ||||||
|  | 	if rv.iterators == nil { | ||||||
|  | 		rv.iterators = make([]segment.PostingsIterator, len(i.segment)) | ||||||
|  | 	} | ||||||
|  | 	rv.segmentOffset = 0 | ||||||
|  | 	rv.includeFreq = includeFreq | ||||||
|  | 	rv.includeNorm = includeNorm | ||||||
|  | 	rv.includeTermVectors = includeTermVectors | ||||||
|  | 	rv.currPosting = nil | ||||||
|  | 	rv.currID = rv.currID[:0] | ||||||
|  |  | ||||||
|  | 	if rv.dicts == nil { | ||||||
|  | 		rv.dicts = make([]segment.TermDictionary, len(i.segment)) | ||||||
| 		for i, segment := range i.segment { | 		for i, segment := range i.segment { | ||||||
| 		dict, err := segment.Dictionary(field) | 			dict, err := segment.segment.Dictionary(field) | ||||||
| 			if err != nil { | 			if err != nil { | ||||||
| 				return nil, err | 				return nil, err | ||||||
| 			} | 			} | ||||||
| 		pl, err := dict.PostingsList(string(term), nil) | 			rv.dicts[i] = dict | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	for i, segment := range i.segment { | ||||||
|  | 		pl, err := rv.dicts[i].PostingsList(term, segment.deleted, rv.postings[i]) | ||||||
| 		if err != nil { | 		if err != nil { | ||||||
| 			return nil, err | 			return nil, err | ||||||
| 		} | 		} | ||||||
| 		rv.postings[i] = pl | 		rv.postings[i] = pl | ||||||
| 		rv.iterators[i] = pl.Iterator() | 		rv.iterators[i] = pl.Iterator(includeFreq, includeNorm, includeTermVectors, rv.iterators[i]) | ||||||
| 	} | 	} | ||||||
| 	atomic.AddUint64(&i.parent.stats.termSearchersStarted, uint64(1)) | 	atomic.AddUint64(&i.parent.stats.TotTermSearchersStarted, uint64(1)) | ||||||
| 	return rv, nil | 	return rv, nil | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (i *IndexSnapshot) allocTermFieldReaderDicts(field string) (tfr *IndexSnapshotTermFieldReader) { | ||||||
|  | 	i.m2.Lock() | ||||||
|  | 	if i.fieldTFRs != nil { | ||||||
|  | 		tfrs := i.fieldTFRs[field] | ||||||
|  | 		last := len(tfrs) - 1 | ||||||
|  | 		if last >= 0 { | ||||||
|  | 			tfr = tfrs[last] | ||||||
|  | 			tfrs[last] = nil | ||||||
|  | 			i.fieldTFRs[field] = tfrs[:last] | ||||||
|  | 			i.m2.Unlock() | ||||||
|  | 			return | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	i.m2.Unlock() | ||||||
|  | 	return &IndexSnapshotTermFieldReader{} | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (i *IndexSnapshot) recycleTermFieldReader(tfr *IndexSnapshotTermFieldReader) { | ||||||
|  | 	i.parent.rootLock.RLock() | ||||||
|  | 	obsolete := i.parent.root != i | ||||||
|  | 	i.parent.rootLock.RUnlock() | ||||||
|  | 	if obsolete { | ||||||
|  | 		// if we're not the current root (mutations happened), don't bother recycling | ||||||
|  | 		return | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	i.m2.Lock() | ||||||
|  | 	if i.fieldTFRs == nil { | ||||||
|  | 		i.fieldTFRs = map[string][]*IndexSnapshotTermFieldReader{} | ||||||
|  | 	} | ||||||
|  | 	i.fieldTFRs[tfr.field] = append(i.fieldTFRs[tfr.field], tfr) | ||||||
|  | 	i.m2.Unlock() | ||||||
|  | } | ||||||
|  |  | ||||||
| func docNumberToBytes(buf []byte, in uint64) []byte { | func docNumberToBytes(buf []byte, in uint64) []byte { | ||||||
| 	if len(buf) != 8 { | 	if len(buf) != 8 { | ||||||
| 		if cap(buf) >= 8 { | 		if cap(buf) >= 8 { | ||||||
| @@ -389,115 +524,172 @@ func docNumberToBytes(buf []byte, in uint64) []byte { | |||||||
| } | } | ||||||
|  |  | ||||||
| func docInternalToNumber(in index.IndexInternalID) (uint64, error) { | func docInternalToNumber(in index.IndexInternalID) (uint64, error) { | ||||||
| 	var res uint64 | 	if len(in) != 8 { | ||||||
| 	err := binary.Read(bytes.NewReader(in), binary.BigEndian, &res) | 		return 0, fmt.Errorf("wrong len for IndexInternalID: %q", in) | ||||||
| 	if err != nil { |  | ||||||
| 		return 0, err |  | ||||||
| 	} | 	} | ||||||
| 	return res, nil | 	return binary.BigEndian.Uint64(in), nil | ||||||
| } | } | ||||||
|  |  | ||||||
| func (i *IndexSnapshot) DocumentVisitFieldTerms(id index.IndexInternalID, | func (i *IndexSnapshot) DocumentVisitFieldTerms(id index.IndexInternalID, | ||||||
| 	fields []string, visitor index.DocumentFieldTermVisitor) error { | 	fields []string, visitor index.DocumentFieldTermVisitor) error { | ||||||
|  | 	_, err := i.documentVisitFieldTerms(id, fields, visitor, nil) | ||||||
|  | 	return err | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (i *IndexSnapshot) documentVisitFieldTerms(id index.IndexInternalID, | ||||||
|  | 	fields []string, visitor index.DocumentFieldTermVisitor, | ||||||
|  | 	dvs segment.DocVisitState) (segment.DocVisitState, error) { | ||||||
|  | 	docNum, err := docInternalToNumber(id) | ||||||
|  | 	if err != nil { | ||||||
|  | 		return nil, err | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	segmentIndex, localDocNum := i.segmentIndexAndLocalDocNumFromGlobal(docNum) | ||||||
|  | 	if segmentIndex >= len(i.segment) { | ||||||
|  | 		return nil, nil | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	_, dvs, err = i.documentVisitFieldTermsOnSegment( | ||||||
|  | 		segmentIndex, localDocNum, fields, nil, visitor, dvs) | ||||||
|  |  | ||||||
|  | 	return dvs, err | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (i *IndexSnapshot) documentVisitFieldTermsOnSegment( | ||||||
|  | 	segmentIndex int, localDocNum uint64, fields []string, cFields []string, | ||||||
|  | 	visitor index.DocumentFieldTermVisitor, dvs segment.DocVisitState) ( | ||||||
|  | 	cFieldsOut []string, dvsOut segment.DocVisitState, err error) { | ||||||
|  | 	ss := i.segment[segmentIndex] | ||||||
|  |  | ||||||
|  | 	var vFields []string // fields that are visitable via the segment | ||||||
|  |  | ||||||
|  | 	ssv, ssvOk := ss.segment.(segment.DocumentFieldTermVisitable) | ||||||
|  | 	if ssvOk && ssv != nil { | ||||||
|  | 		vFields, err = ssv.VisitableDocValueFields() | ||||||
|  | 		if err != nil { | ||||||
|  | 			return nil, nil, err | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	var errCh chan error | ||||||
|  |  | ||||||
|  | 	// cFields represents the fields that we'll need from the | ||||||
|  | 	// cachedDocs, and might be optionally be provided by the caller, | ||||||
|  | 	// if the caller happens to know we're on the same segmentIndex | ||||||
|  | 	// from a previous invocation | ||||||
|  | 	if cFields == nil { | ||||||
|  | 		cFields = subtractStrings(fields, vFields) | ||||||
|  |  | ||||||
|  | 		if !ss.cachedDocs.hasFields(cFields) { | ||||||
|  | 			errCh = make(chan error, 1) | ||||||
|  |  | ||||||
|  | 			go func() { | ||||||
|  | 				err := ss.cachedDocs.prepareFields(cFields, ss) | ||||||
|  | 				if err != nil { | ||||||
|  | 					errCh <- err | ||||||
|  | 				} | ||||||
|  | 				close(errCh) | ||||||
|  | 			}() | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	if ssvOk && ssv != nil && len(vFields) > 0 { | ||||||
|  | 		dvs, err = ssv.VisitDocumentFieldTerms(localDocNum, fields, visitor, dvs) | ||||||
|  | 		if err != nil { | ||||||
|  | 			return nil, nil, err | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	if errCh != nil { | ||||||
|  | 		err = <-errCh | ||||||
|  | 		if err != nil { | ||||||
|  | 			return nil, nil, err | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	if len(cFields) > 0 { | ||||||
|  | 		ss.cachedDocs.visitDoc(localDocNum, cFields, visitor) | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return cFields, dvs, nil | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (i *IndexSnapshot) DocValueReader(fields []string) ( | ||||||
|  | 	index.DocValueReader, error) { | ||||||
|  | 	return &DocValueReader{i: i, fields: fields, currSegmentIndex: -1}, nil | ||||||
|  | } | ||||||
|  |  | ||||||
|  | type DocValueReader struct { | ||||||
|  | 	i      *IndexSnapshot | ||||||
|  | 	fields []string | ||||||
|  | 	dvs    segment.DocVisitState | ||||||
|  |  | ||||||
|  | 	currSegmentIndex int | ||||||
|  | 	currCachedFields []string | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (dvr *DocValueReader) VisitDocValues(id index.IndexInternalID, | ||||||
|  | 	visitor index.DocumentFieldTermVisitor) (err error) { | ||||||
| 	docNum, err := docInternalToNumber(id) | 	docNum, err := docInternalToNumber(id) | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		return err | 		return err | ||||||
| 	} | 	} | ||||||
| 	segmentIndex, localDocNum := i.segmentIndexAndLocalDocNumFromGlobal(docNum) |  | ||||||
| 	if segmentIndex >= len(i.segment) { | 	segmentIndex, localDocNum := dvr.i.segmentIndexAndLocalDocNumFromGlobal(docNum) | ||||||
|  | 	if segmentIndex >= len(dvr.i.segment) { | ||||||
| 		return nil | 		return nil | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	ss := i.segment[segmentIndex] | 	if dvr.currSegmentIndex != segmentIndex { | ||||||
|  | 		dvr.currSegmentIndex = segmentIndex | ||||||
|  | 		dvr.currCachedFields = nil | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	dvr.currCachedFields, dvr.dvs, err = dvr.i.documentVisitFieldTermsOnSegment( | ||||||
|  | 		dvr.currSegmentIndex, localDocNum, dvr.fields, dvr.currCachedFields, visitor, dvr.dvs) | ||||||
|  |  | ||||||
| 	if zaps, ok := ss.segment.(segment.DocumentFieldTermVisitable); ok { |  | ||||||
| 		// get the list of doc value persisted fields |  | ||||||
| 		pFields, err := zaps.VisitableDocValueFields() |  | ||||||
| 		if err != nil { |  | ||||||
| 	return err | 	return err | ||||||
| 		} | } | ||||||
| 		// assort the fields for which terms look up have to |  | ||||||
| 		// be performed runtime |  | ||||||
| 		dvPendingFields := extractDvPendingFields(fields, pFields) |  | ||||||
| 		if len(dvPendingFields) == 0 { |  | ||||||
| 			// all fields are doc value persisted |  | ||||||
| 			return zaps.VisitDocumentFieldTerms(localDocNum, fields, visitor) |  | ||||||
| 		} |  | ||||||
|  |  | ||||||
| 		// concurrently trigger the runtime doc value preparations for |  | ||||||
| 		// pending fields as well as the visit of the persisted doc values |  | ||||||
| 		errCh := make(chan error, 1) |  | ||||||
|  |  | ||||||
|  | func (i *IndexSnapshot) DumpAll() chan interface{} { | ||||||
|  | 	rv := make(chan interface{}) | ||||||
| 	go func() { | 	go func() { | ||||||
| 			defer close(errCh) | 		close(rv) | ||||||
| 			err := ss.cachedDocs.prepareFields(fields, ss) |  | ||||||
| 			if err != nil { |  | ||||||
| 				errCh <- err |  | ||||||
| 			} |  | ||||||
| 	}() | 	}() | ||||||
|  | 	return rv | ||||||
| 		// visit the persisted dv while the cache preparation is in progress |  | ||||||
| 		err = zaps.VisitDocumentFieldTerms(localDocNum, fields, visitor) |  | ||||||
| 		if err != nil { |  | ||||||
| 			return err |  | ||||||
| 		} |  | ||||||
|  |  | ||||||
| 		// err out if fieldCache preparation failed |  | ||||||
| 		err = <-errCh |  | ||||||
| 		if err != nil { |  | ||||||
| 			return err |  | ||||||
| 		} |  | ||||||
|  |  | ||||||
| 		visitDocumentFieldCacheTerms(localDocNum, dvPendingFields, ss, visitor) |  | ||||||
| 		return nil |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	return prepareCacheVisitDocumentFieldTerms(localDocNum, fields, ss, visitor) |  | ||||||
| } | } | ||||||
|  |  | ||||||
| func prepareCacheVisitDocumentFieldTerms(localDocNum uint64, fields []string, | func (i *IndexSnapshot) DumpDoc(id string) chan interface{} { | ||||||
| 	ss *SegmentSnapshot, visitor index.DocumentFieldTermVisitor) error { | 	rv := make(chan interface{}) | ||||||
| 	err := ss.cachedDocs.prepareFields(fields, ss) | 	go func() { | ||||||
| 	if err != nil { | 		close(rv) | ||||||
| 		return err | 	}() | ||||||
| 	} | 	return rv | ||||||
|  |  | ||||||
| 	visitDocumentFieldCacheTerms(localDocNum, fields, ss, visitor) |  | ||||||
| 	return nil |  | ||||||
| } | } | ||||||
|  |  | ||||||
| func visitDocumentFieldCacheTerms(localDocNum uint64, fields []string, | func (i *IndexSnapshot) DumpFields() chan interface{} { | ||||||
| 	ss *SegmentSnapshot, visitor index.DocumentFieldTermVisitor) { | 	rv := make(chan interface{}) | ||||||
|  | 	go func() { | ||||||
| 	for _, field := range fields { | 		close(rv) | ||||||
| 		if cachedFieldDocs, exists := ss.cachedDocs.cache[field]; exists { | 	}() | ||||||
| 			if tlist, exists := cachedFieldDocs.docs[localDocNum]; exists { | 	return rv | ||||||
| 				for { |  | ||||||
| 					i := bytes.Index(tlist, TermSeparatorSplitSlice) |  | ||||||
| 					if i < 0 { |  | ||||||
| 						break |  | ||||||
| 					} |  | ||||||
| 					visitor(field, tlist[0:i]) |  | ||||||
| 					tlist = tlist[i+1:] |  | ||||||
| 				} |  | ||||||
| 			} |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| } | } | ||||||
|  |  | ||||||
| func extractDvPendingFields(requestedFields, persistedFields []string) []string { | // subtractStrings returns set a minus elements of set b. | ||||||
| 	removeMap := map[string]struct{}{} | func subtractStrings(a, b []string) []string { | ||||||
| 	for _, str := range persistedFields { | 	if len(b) == 0 { | ||||||
| 		removeMap[str] = struct{}{} | 		return a | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	rv := make([]string, 0, len(requestedFields)) | 	rv := make([]string, 0, len(a)) | ||||||
| 	for _, s := range requestedFields { | OUTER: | ||||||
| 		if _, ok := removeMap[s]; !ok { | 	for _, as := range a { | ||||||
| 			rv = append(rv, s) | 		for _, bs := range b { | ||||||
|  | 			if as == bs { | ||||||
|  | 				continue OUTER | ||||||
| 			} | 			} | ||||||
| 		} | 		} | ||||||
|  | 		rv = append(rv, as) | ||||||
|  | 	} | ||||||
| 	return rv | 	return rv | ||||||
| } | } | ||||||
|   | |||||||
							
								
								
									
										17
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_dict.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										17
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_dict.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -23,12 +23,13 @@ import ( | |||||||
|  |  | ||||||
| type segmentDictCursor struct { | type segmentDictCursor struct { | ||||||
| 	itr  segment.DictionaryIterator | 	itr  segment.DictionaryIterator | ||||||
| 	curr *index.DictEntry | 	curr index.DictEntry | ||||||
| } | } | ||||||
|  |  | ||||||
| type IndexSnapshotFieldDict struct { | type IndexSnapshotFieldDict struct { | ||||||
| 	snapshot *IndexSnapshot | 	snapshot *IndexSnapshot | ||||||
| 	cursors  []*segmentDictCursor | 	cursors  []*segmentDictCursor | ||||||
|  | 	entry    index.DictEntry | ||||||
| } | } | ||||||
|  |  | ||||||
| func (i *IndexSnapshotFieldDict) Len() int { return len(i.cursors) } | func (i *IndexSnapshotFieldDict) Len() int { return len(i.cursors) } | ||||||
| @@ -51,10 +52,10 @@ func (i *IndexSnapshotFieldDict) Pop() interface{} { | |||||||
| } | } | ||||||
|  |  | ||||||
| func (i *IndexSnapshotFieldDict) Next() (*index.DictEntry, error) { | func (i *IndexSnapshotFieldDict) Next() (*index.DictEntry, error) { | ||||||
| 	if len(i.cursors) <= 0 { | 	if len(i.cursors) == 0 { | ||||||
| 		return nil, nil | 		return nil, nil | ||||||
| 	} | 	} | ||||||
| 	rv := i.cursors[0].curr | 	i.entry = i.cursors[0].curr | ||||||
| 	next, err := i.cursors[0].itr.Next() | 	next, err := i.cursors[0].itr.Next() | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		return nil, err | 		return nil, err | ||||||
| @@ -64,12 +65,12 @@ func (i *IndexSnapshotFieldDict) Next() (*index.DictEntry, error) { | |||||||
| 		heap.Pop(i) | 		heap.Pop(i) | ||||||
| 	} else { | 	} else { | ||||||
| 		// modified heap, fix it | 		// modified heap, fix it | ||||||
| 		i.cursors[0].curr = next | 		i.cursors[0].curr = *next | ||||||
| 		heap.Fix(i, 0) | 		heap.Fix(i, 0) | ||||||
| 	} | 	} | ||||||
| 	// look for any other entries with the exact same term | 	// look for any other entries with the exact same term | ||||||
| 	for len(i.cursors) > 0 && i.cursors[0].curr.Term == rv.Term { | 	for len(i.cursors) > 0 && i.cursors[0].curr.Term == i.entry.Term { | ||||||
| 		rv.Count += i.cursors[0].curr.Count | 		i.entry.Count += i.cursors[0].curr.Count | ||||||
| 		next, err := i.cursors[0].itr.Next() | 		next, err := i.cursors[0].itr.Next() | ||||||
| 		if err != nil { | 		if err != nil { | ||||||
| 			return nil, err | 			return nil, err | ||||||
| @@ -79,12 +80,12 @@ func (i *IndexSnapshotFieldDict) Next() (*index.DictEntry, error) { | |||||||
| 			heap.Pop(i) | 			heap.Pop(i) | ||||||
| 		} else { | 		} else { | ||||||
| 			// modified heap, fix it | 			// modified heap, fix it | ||||||
| 			i.cursors[0].curr = next | 			i.cursors[0].curr = *next | ||||||
| 			heap.Fix(i, 0) | 			heap.Fix(i, 0) | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	return rv, nil | 	return &i.entry, nil | ||||||
| } | } | ||||||
|  |  | ||||||
| func (i *IndexSnapshotFieldDict) Close() error { | func (i *IndexSnapshotFieldDict) Close() error { | ||||||
|   | |||||||
							
								
								
									
										13
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_doc.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										13
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_doc.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -16,17 +16,30 @@ package scorch | |||||||
|  |  | ||||||
| import ( | import ( | ||||||
| 	"bytes" | 	"bytes" | ||||||
|  | 	"reflect" | ||||||
|  |  | ||||||
| 	"github.com/RoaringBitmap/roaring" | 	"github.com/RoaringBitmap/roaring" | ||||||
| 	"github.com/blevesearch/bleve/index" | 	"github.com/blevesearch/bleve/index" | ||||||
|  | 	"github.com/blevesearch/bleve/size" | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  | var reflectStaticSizeIndexSnapshotDocIDReader int | ||||||
|  |  | ||||||
|  | func init() { | ||||||
|  | 	var isdr IndexSnapshotDocIDReader | ||||||
|  | 	reflectStaticSizeIndexSnapshotDocIDReader = int(reflect.TypeOf(isdr).Size()) | ||||||
|  | } | ||||||
|  |  | ||||||
| type IndexSnapshotDocIDReader struct { | type IndexSnapshotDocIDReader struct { | ||||||
| 	snapshot      *IndexSnapshot | 	snapshot      *IndexSnapshot | ||||||
| 	iterators     []roaring.IntIterable | 	iterators     []roaring.IntIterable | ||||||
| 	segmentOffset int | 	segmentOffset int | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (i *IndexSnapshotDocIDReader) Size() int { | ||||||
|  | 	return reflectStaticSizeIndexSnapshotDocIDReader + size.SizeOfPtr | ||||||
|  | } | ||||||
|  |  | ||||||
| func (i *IndexSnapshotDocIDReader) Next() (index.IndexInternalID, error) { | func (i *IndexSnapshotDocIDReader) Next() (index.IndexInternalID, error) { | ||||||
| 	for i.segmentOffset < len(i.iterators) { | 	for i.segmentOffset < len(i.iterators) { | ||||||
| 		if !i.iterators[i.segmentOffset].HasNext() { | 		if !i.iterators[i.segmentOffset].HasNext() { | ||||||
|   | |||||||
							
								
								
									
										83
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_tfr.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										83
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_tfr.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -16,16 +16,27 @@ package scorch | |||||||
|  |  | ||||||
| import ( | import ( | ||||||
| 	"bytes" | 	"bytes" | ||||||
|  | 	"fmt" | ||||||
|  | 	"reflect" | ||||||
| 	"sync/atomic" | 	"sync/atomic" | ||||||
|  |  | ||||||
| 	"github.com/blevesearch/bleve/index" | 	"github.com/blevesearch/bleve/index" | ||||||
| 	"github.com/blevesearch/bleve/index/scorch/segment" | 	"github.com/blevesearch/bleve/index/scorch/segment" | ||||||
|  | 	"github.com/blevesearch/bleve/size" | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  | var reflectStaticSizeIndexSnapshotTermFieldReader int | ||||||
|  |  | ||||||
|  | func init() { | ||||||
|  | 	var istfr IndexSnapshotTermFieldReader | ||||||
|  | 	reflectStaticSizeIndexSnapshotTermFieldReader = int(reflect.TypeOf(istfr).Size()) | ||||||
|  | } | ||||||
|  |  | ||||||
| type IndexSnapshotTermFieldReader struct { | type IndexSnapshotTermFieldReader struct { | ||||||
| 	term               []byte | 	term               []byte | ||||||
| 	field              string | 	field              string | ||||||
| 	snapshot           *IndexSnapshot | 	snapshot           *IndexSnapshot | ||||||
|  | 	dicts              []segment.TermDictionary | ||||||
| 	postings           []segment.PostingsList | 	postings           []segment.PostingsList | ||||||
| 	iterators          []segment.PostingsIterator | 	iterators          []segment.PostingsIterator | ||||||
| 	segmentOffset      int | 	segmentOffset      int | ||||||
| @@ -36,13 +47,34 @@ type IndexSnapshotTermFieldReader struct { | |||||||
| 	currID             index.IndexInternalID | 	currID             index.IndexInternalID | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (i *IndexSnapshotTermFieldReader) Size() int { | ||||||
|  | 	sizeInBytes := reflectStaticSizeIndexSnapshotTermFieldReader + size.SizeOfPtr + | ||||||
|  | 		len(i.term) + | ||||||
|  | 		len(i.field) + | ||||||
|  | 		len(i.currID) | ||||||
|  |  | ||||||
|  | 	for _, entry := range i.postings { | ||||||
|  | 		sizeInBytes += entry.Size() | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	for _, entry := range i.iterators { | ||||||
|  | 		sizeInBytes += entry.Size() | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	if i.currPosting != nil { | ||||||
|  | 		sizeInBytes += i.currPosting.Size() | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return sizeInBytes | ||||||
|  | } | ||||||
|  |  | ||||||
| func (i *IndexSnapshotTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) { | func (i *IndexSnapshotTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) { | ||||||
| 	rv := preAlloced | 	rv := preAlloced | ||||||
| 	if rv == nil { | 	if rv == nil { | ||||||
| 		rv = &index.TermFieldDoc{} | 		rv = &index.TermFieldDoc{} | ||||||
| 	} | 	} | ||||||
| 	// find the next hit | 	// find the next hit | ||||||
| 	for i.segmentOffset < len(i.postings) { | 	for i.segmentOffset < len(i.iterators) { | ||||||
| 		next, err := i.iterators[i.segmentOffset].Next() | 		next, err := i.iterators[i.segmentOffset].Next() | ||||||
| 		if err != nil { | 		if err != nil { | ||||||
| 			return nil, err | 			return nil, err | ||||||
| @@ -72,9 +104,16 @@ func (i *IndexSnapshotTermFieldReader) postingToTermFieldDoc(next segment.Postin | |||||||
| 	} | 	} | ||||||
| 	if i.includeTermVectors { | 	if i.includeTermVectors { | ||||||
| 		locs := next.Locations() | 		locs := next.Locations() | ||||||
|  | 		if cap(rv.Vectors) < len(locs) { | ||||||
| 			rv.Vectors = make([]*index.TermFieldVector, len(locs)) | 			rv.Vectors = make([]*index.TermFieldVector, len(locs)) | ||||||
|  | 			backing := make([]index.TermFieldVector, len(locs)) | ||||||
|  | 			for i := range backing { | ||||||
|  | 				rv.Vectors[i] = &backing[i] | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  | 		rv.Vectors = rv.Vectors[:len(locs)] | ||||||
| 		for i, loc := range locs { | 		for i, loc := range locs { | ||||||
| 			rv.Vectors[i] = &index.TermFieldVector{ | 			*rv.Vectors[i] = index.TermFieldVector{ | ||||||
| 				Start:          loc.Start(), | 				Start:          loc.Start(), | ||||||
| 				End:            loc.End(), | 				End:            loc.End(), | ||||||
| 				Pos:            loc.Pos(), | 				Pos:            loc.Pos(), | ||||||
| @@ -96,24 +135,37 @@ func (i *IndexSnapshotTermFieldReader) Advance(ID index.IndexInternalID, preAllo | |||||||
| 		} | 		} | ||||||
| 		*i = *(i2.(*IndexSnapshotTermFieldReader)) | 		*i = *(i2.(*IndexSnapshotTermFieldReader)) | ||||||
| 	} | 	} | ||||||
| 	// FIXME do something better | 	num, err := docInternalToNumber(ID) | ||||||
| 	next, err := i.Next(preAlloced) | 	if err != nil { | ||||||
|  | 		return nil, fmt.Errorf("error converting to doc number % x - %v", ID, err) | ||||||
|  | 	} | ||||||
|  | 	segIndex, ldocNum := i.snapshot.segmentIndexAndLocalDocNumFromGlobal(num) | ||||||
|  | 	if segIndex >= len(i.snapshot.segment) { | ||||||
|  | 		return nil, fmt.Errorf("computed segment index %d out of bounds %d", | ||||||
|  | 			segIndex, len(i.snapshot.segment)) | ||||||
|  | 	} | ||||||
|  | 	// skip directly to the target segment | ||||||
|  | 	i.segmentOffset = segIndex | ||||||
|  | 	next, err := i.iterators[i.segmentOffset].Advance(ldocNum) | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		return nil, err | 		return nil, err | ||||||
| 	} | 	} | ||||||
| 	if next == nil { | 	if next == nil { | ||||||
| 		return nil, nil | 		// we jumped directly to the segment that should have contained it | ||||||
|  | 		// but it wasn't there, so reuse Next() which should correctly | ||||||
|  | 		// get the next hit after it (we moved i.segmentOffset) | ||||||
|  | 		return i.Next(preAlloced) | ||||||
| 	} | 	} | ||||||
| 	for bytes.Compare(next.ID, ID) < 0 { |  | ||||||
| 		next, err = i.Next(preAlloced) | 	if preAlloced == nil { | ||||||
| 		if err != nil { | 		preAlloced = &index.TermFieldDoc{} | ||||||
| 			return nil, err |  | ||||||
| 	} | 	} | ||||||
| 		if next == nil { | 	preAlloced.ID = docNumberToBytes(preAlloced.ID, next.Number()+ | ||||||
| 			break | 		i.snapshot.offsets[segIndex]) | ||||||
| 		} | 	i.postingToTermFieldDoc(next, preAlloced) | ||||||
| 	} | 	i.currID = preAlloced.ID | ||||||
| 	return next, nil | 	i.currPosting = next | ||||||
|  | 	return preAlloced, nil | ||||||
| } | } | ||||||
|  |  | ||||||
| func (i *IndexSnapshotTermFieldReader) Count() uint64 { | func (i *IndexSnapshotTermFieldReader) Count() uint64 { | ||||||
| @@ -126,7 +178,8 @@ func (i *IndexSnapshotTermFieldReader) Count() uint64 { | |||||||
|  |  | ||||||
| func (i *IndexSnapshotTermFieldReader) Close() error { | func (i *IndexSnapshotTermFieldReader) Close() error { | ||||||
| 	if i.snapshot != nil { | 	if i.snapshot != nil { | ||||||
| 		atomic.AddUint64(&i.snapshot.parent.stats.termSearchersFinished, uint64(1)) | 		atomic.AddUint64(&i.snapshot.parent.stats.TotTermSearchersFinished, uint64(1)) | ||||||
|  | 		i.snapshot.recycleTermFieldReader(i) | ||||||
| 	} | 	} | ||||||
| 	return nil | 	return nil | ||||||
| } | } | ||||||
|   | |||||||
							
								
								
									
										2
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/snapshot_rollback.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/snapshot_rollback.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -19,7 +19,7 @@ import ( | |||||||
| 	"log" | 	"log" | ||||||
|  |  | ||||||
| 	"github.com/blevesearch/bleve/index/scorch/segment" | 	"github.com/blevesearch/bleve/index/scorch/segment" | ||||||
| 	"github.com/boltdb/bolt" | 	bolt "github.com/etcd-io/bbolt" | ||||||
| ) | ) | ||||||
|  |  | ||||||
| type RollbackPoint struct { | type RollbackPoint struct { | ||||||
|   | |||||||
							
								
								
									
										148
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/snapshot_segment.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										148
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/snapshot_segment.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -15,42 +15,25 @@ | |||||||
| package scorch | package scorch | ||||||
|  |  | ||||||
| import ( | import ( | ||||||
|  | 	"bytes" | ||||||
| 	"sync" | 	"sync" | ||||||
|  | 	"sync/atomic" | ||||||
|  |  | ||||||
| 	"github.com/RoaringBitmap/roaring" | 	"github.com/RoaringBitmap/roaring" | ||||||
|  | 	"github.com/blevesearch/bleve/index" | ||||||
| 	"github.com/blevesearch/bleve/index/scorch/segment" | 	"github.com/blevesearch/bleve/index/scorch/segment" | ||||||
|  | 	"github.com/blevesearch/bleve/size" | ||||||
| ) | ) | ||||||
|  |  | ||||||
| var TermSeparator byte = 0xff | var TermSeparator byte = 0xff | ||||||
|  |  | ||||||
| var TermSeparatorSplitSlice = []byte{TermSeparator} | var TermSeparatorSplitSlice = []byte{TermSeparator} | ||||||
|  |  | ||||||
| type SegmentDictionarySnapshot struct { |  | ||||||
| 	s *SegmentSnapshot |  | ||||||
| 	d segment.TermDictionary |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func (s *SegmentDictionarySnapshot) PostingsList(term string, except *roaring.Bitmap) (segment.PostingsList, error) { |  | ||||||
| 	// TODO: if except is non-nil, perhaps need to OR it with s.s.deleted? |  | ||||||
| 	return s.d.PostingsList(term, s.s.deleted) |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func (s *SegmentDictionarySnapshot) Iterator() segment.DictionaryIterator { |  | ||||||
| 	return s.d.Iterator() |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func (s *SegmentDictionarySnapshot) PrefixIterator(prefix string) segment.DictionaryIterator { |  | ||||||
| 	return s.d.PrefixIterator(prefix) |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func (s *SegmentDictionarySnapshot) RangeIterator(start, end string) segment.DictionaryIterator { |  | ||||||
| 	return s.d.RangeIterator(start, end) |  | ||||||
| } |  | ||||||
|  |  | ||||||
| type SegmentSnapshot struct { | type SegmentSnapshot struct { | ||||||
| 	id      uint64 | 	id      uint64 | ||||||
| 	segment segment.Segment | 	segment segment.Segment | ||||||
| 	deleted *roaring.Bitmap | 	deleted *roaring.Bitmap | ||||||
|  | 	creator string | ||||||
|  |  | ||||||
| 	cachedDocs *cachedDocs | 	cachedDocs *cachedDocs | ||||||
| } | } | ||||||
| @@ -83,8 +66,11 @@ func (s *SegmentSnapshot) VisitDocument(num uint64, visitor segment.DocumentFiel | |||||||
| 	return s.segment.VisitDocument(num, visitor) | 	return s.segment.VisitDocument(num, visitor) | ||||||
| } | } | ||||||
|  |  | ||||||
| func (s *SegmentSnapshot) Count() uint64 { | func (s *SegmentSnapshot) DocID(num uint64) ([]byte, error) { | ||||||
|  | 	return s.segment.DocID(num) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (s *SegmentSnapshot) Count() uint64 { | ||||||
| 	rv := s.segment.Count() | 	rv := s.segment.Count() | ||||||
| 	if s.deleted != nil { | 	if s.deleted != nil { | ||||||
| 		rv -= s.deleted.GetCardinality() | 		rv -= s.deleted.GetCardinality() | ||||||
| @@ -92,17 +78,6 @@ func (s *SegmentSnapshot) Count() uint64 { | |||||||
| 	return rv | 	return rv | ||||||
| } | } | ||||||
|  |  | ||||||
| func (s *SegmentSnapshot) Dictionary(field string) (segment.TermDictionary, error) { |  | ||||||
| 	d, err := s.segment.Dictionary(field) |  | ||||||
| 	if err != nil { |  | ||||||
| 		return nil, err |  | ||||||
| 	} |  | ||||||
| 	return &SegmentDictionarySnapshot{ |  | ||||||
| 		s: s, |  | ||||||
| 		d: d, |  | ||||||
| 	}, nil |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func (s *SegmentSnapshot) DocNumbers(docIDs []string) (*roaring.Bitmap, error) { | func (s *SegmentSnapshot) DocNumbers(docIDs []string) (*roaring.Bitmap, error) { | ||||||
| 	rv, err := s.segment.DocNumbers(docIDs) | 	rv, err := s.segment.DocNumbers(docIDs) | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| @@ -114,7 +89,7 @@ func (s *SegmentSnapshot) DocNumbers(docIDs []string) (*roaring.Bitmap, error) { | |||||||
| 	return rv, nil | 	return rv, nil | ||||||
| } | } | ||||||
|  |  | ||||||
| // DocNumbersLive returns bitsit containing doc numbers for all live docs | // DocNumbersLive returns a bitmap containing doc numbers for all live docs | ||||||
| func (s *SegmentSnapshot) DocNumbersLive() *roaring.Bitmap { | func (s *SegmentSnapshot) DocNumbersLive() *roaring.Bitmap { | ||||||
| 	rv := roaring.NewBitmap() | 	rv := roaring.NewBitmap() | ||||||
| 	rv.AddRange(0, s.segment.Count()) | 	rv.AddRange(0, s.segment.Count()) | ||||||
| @@ -128,36 +103,68 @@ func (s *SegmentSnapshot) Fields() []string { | |||||||
| 	return s.segment.Fields() | 	return s.segment.Fields() | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (s *SegmentSnapshot) Size() (rv int) { | ||||||
|  | 	rv = s.segment.Size() | ||||||
|  | 	if s.deleted != nil { | ||||||
|  | 		rv += int(s.deleted.GetSizeInBytes()) | ||||||
|  | 	} | ||||||
|  | 	rv += s.cachedDocs.Size() | ||||||
|  | 	return | ||||||
|  | } | ||||||
|  |  | ||||||
| type cachedFieldDocs struct { | type cachedFieldDocs struct { | ||||||
|  | 	m       sync.Mutex | ||||||
| 	readyCh chan struct{}     // closed when the cachedFieldDocs.docs is ready to be used. | 	readyCh chan struct{}     // closed when the cachedFieldDocs.docs is ready to be used. | ||||||
| 	err     error             // Non-nil if there was an error when preparing this cachedFieldDocs. | 	err     error             // Non-nil if there was an error when preparing this cachedFieldDocs. | ||||||
| 	docs    map[uint64][]byte // Keyed by localDocNum, value is a list of terms delimited by 0xFF. | 	docs    map[uint64][]byte // Keyed by localDocNum, value is a list of terms delimited by 0xFF. | ||||||
|  | 	size    uint64 | ||||||
| } | } | ||||||
|  |  | ||||||
| func (cfd *cachedFieldDocs) prepareFields(field string, ss *SegmentSnapshot) { | func (cfd *cachedFieldDocs) Size() int { | ||||||
| 	defer close(cfd.readyCh) | 	var rv int | ||||||
|  | 	cfd.m.Lock() | ||||||
|  | 	for _, entry := range cfd.docs { | ||||||
|  | 		rv += 8 /* size of uint64 */ + len(entry) | ||||||
|  | 	} | ||||||
|  | 	cfd.m.Unlock() | ||||||
|  | 	return rv | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (cfd *cachedFieldDocs) prepareField(field string, ss *SegmentSnapshot) { | ||||||
|  | 	cfd.m.Lock() | ||||||
|  | 	defer func() { | ||||||
|  | 		close(cfd.readyCh) | ||||||
|  | 		cfd.m.Unlock() | ||||||
|  | 	}() | ||||||
|  |  | ||||||
|  | 	cfd.size += uint64(size.SizeOfUint64) /* size field */ | ||||||
| 	dict, err := ss.segment.Dictionary(field) | 	dict, err := ss.segment.Dictionary(field) | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		cfd.err = err | 		cfd.err = err | ||||||
| 		return | 		return | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | 	var postings segment.PostingsList | ||||||
|  | 	var postingsItr segment.PostingsIterator | ||||||
|  |  | ||||||
| 	dictItr := dict.Iterator() | 	dictItr := dict.Iterator() | ||||||
| 	next, err := dictItr.Next() | 	next, err := dictItr.Next() | ||||||
| 	for err == nil && next != nil { | 	for err == nil && next != nil { | ||||||
| 		postings, err1 := dict.PostingsList(next.Term, nil) | 		var err1 error | ||||||
|  | 		postings, err1 = dict.PostingsList([]byte(next.Term), nil, postings) | ||||||
| 		if err1 != nil { | 		if err1 != nil { | ||||||
| 			cfd.err = err1 | 			cfd.err = err1 | ||||||
| 			return | 			return | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
| 		postingsItr := postings.Iterator() | 		cfd.size += uint64(size.SizeOfUint64) /* map key */ | ||||||
|  | 		postingsItr = postings.Iterator(false, false, false, postingsItr) | ||||||
| 		nextPosting, err2 := postingsItr.Next() | 		nextPosting, err2 := postingsItr.Next() | ||||||
| 		for err2 == nil && nextPosting != nil { | 		for err2 == nil && nextPosting != nil { | ||||||
| 			docNum := nextPosting.Number() | 			docNum := nextPosting.Number() | ||||||
| 			cfd.docs[docNum] = append(cfd.docs[docNum], []byte(next.Term)...) | 			cfd.docs[docNum] = append(cfd.docs[docNum], []byte(next.Term)...) | ||||||
| 			cfd.docs[docNum] = append(cfd.docs[docNum], TermSeparator) | 			cfd.docs[docNum] = append(cfd.docs[docNum], TermSeparator) | ||||||
|  | 			cfd.size += uint64(len(next.Term) + 1) // map value | ||||||
| 			nextPosting, err2 = postingsItr.Next() | 			nextPosting, err2 = postingsItr.Next() | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
| @@ -178,10 +185,12 @@ func (cfd *cachedFieldDocs) prepareFields(field string, ss *SegmentSnapshot) { | |||||||
| type cachedDocs struct { | type cachedDocs struct { | ||||||
| 	m     sync.Mutex                  // As the cache is asynchronously prepared, need a lock | 	m     sync.Mutex                  // As the cache is asynchronously prepared, need a lock | ||||||
| 	cache map[string]*cachedFieldDocs // Keyed by field | 	cache map[string]*cachedFieldDocs // Keyed by field | ||||||
|  | 	size  uint64 | ||||||
| } | } | ||||||
|  |  | ||||||
| func (c *cachedDocs) prepareFields(wantedFields []string, ss *SegmentSnapshot) error { | func (c *cachedDocs) prepareFields(wantedFields []string, ss *SegmentSnapshot) error { | ||||||
| 	c.m.Lock() | 	c.m.Lock() | ||||||
|  |  | ||||||
| 	if c.cache == nil { | 	if c.cache == nil { | ||||||
| 		c.cache = make(map[string]*cachedFieldDocs, len(ss.Fields())) | 		c.cache = make(map[string]*cachedFieldDocs, len(ss.Fields())) | ||||||
| 	} | 	} | ||||||
| @@ -194,7 +203,7 @@ func (c *cachedDocs) prepareFields(wantedFields []string, ss *SegmentSnapshot) e | |||||||
| 				docs:    make(map[uint64][]byte), | 				docs:    make(map[uint64][]byte), | ||||||
| 			} | 			} | ||||||
|  |  | ||||||
| 			go c.cache[field].prepareFields(field, ss) | 			go c.cache[field].prepareField(field, ss) | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| @@ -209,21 +218,62 @@ func (c *cachedDocs) prepareFields(wantedFields []string, ss *SegmentSnapshot) e | |||||||
| 		c.m.Lock() | 		c.m.Lock() | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | 	c.updateSizeLOCKED() | ||||||
|  |  | ||||||
| 	c.m.Unlock() | 	c.m.Unlock() | ||||||
| 	return nil | 	return nil | ||||||
| } | } | ||||||
|  |  | ||||||
| func (c *cachedDocs) sizeInBytes() uint64 { | // hasFields returns true if the cache has all the given fields | ||||||
| 	sizeInBytes := 0 | func (c *cachedDocs) hasFields(fields []string) bool { | ||||||
| 	c.m.Lock() | 	c.m.Lock() | ||||||
| 	for k, v := range c.cache { // cachedFieldDocs | 	for _, field := range fields { | ||||||
| 		sizeInBytes += len(k) | 		if _, exists := c.cache[field]; !exists { | ||||||
| 		if v != nil { | 			c.m.Unlock() | ||||||
| 			for _, entry := range v.docs { // docs | 			return false // found a field not in cache | ||||||
| 				sizeInBytes += 8 /* size of uint64 */ + len(entry) |  | ||||||
| 			} |  | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| 	c.m.Unlock() | 	c.m.Unlock() | ||||||
| 	return uint64(sizeInBytes) | 	return true | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (c *cachedDocs) Size() int { | ||||||
|  | 	return int(atomic.LoadUint64(&c.size)) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (c *cachedDocs) updateSizeLOCKED() { | ||||||
|  | 	sizeInBytes := 0 | ||||||
|  | 	for k, v := range c.cache { // cachedFieldDocs | ||||||
|  | 		sizeInBytes += len(k) | ||||||
|  | 		if v != nil { | ||||||
|  | 			sizeInBytes += v.Size() | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	atomic.StoreUint64(&c.size, uint64(sizeInBytes)) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (c *cachedDocs) visitDoc(localDocNum uint64, | ||||||
|  | 	fields []string, visitor index.DocumentFieldTermVisitor) { | ||||||
|  | 	c.m.Lock() | ||||||
|  |  | ||||||
|  | 	for _, field := range fields { | ||||||
|  | 		if cachedFieldDocs, exists := c.cache[field]; exists { | ||||||
|  | 			c.m.Unlock() | ||||||
|  | 			<-cachedFieldDocs.readyCh | ||||||
|  | 			c.m.Lock() | ||||||
|  |  | ||||||
|  | 			if tlist, exists := cachedFieldDocs.docs[localDocNum]; exists { | ||||||
|  | 				for { | ||||||
|  | 					i := bytes.Index(tlist, TermSeparatorSplitSlice) | ||||||
|  | 					if i < 0 { | ||||||
|  | 						break | ||||||
|  | 					} | ||||||
|  | 					visitor(field, tlist[0:i]) | ||||||
|  | 					tlist = tlist[i+1:] | ||||||
|  | 				} | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	c.m.Unlock() | ||||||
| } | } | ||||||
|   | |||||||
							
								
								
									
										156
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/stats.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										156
									
								
								vendor/github.com/blevesearch/bleve/index/scorch/stats.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -16,63 +16,125 @@ package scorch | |||||||
|  |  | ||||||
| import ( | import ( | ||||||
| 	"encoding/json" | 	"encoding/json" | ||||||
| 	"io/ioutil" | 	"reflect" | ||||||
| 	"sync/atomic" | 	"sync/atomic" | ||||||
| ) | ) | ||||||
|  |  | ||||||
| // Stats tracks statistics about the index | // Stats tracks statistics about the index, fields that are | ||||||
|  | // prefixed like CurXxxx are gauges (can go up and down), | ||||||
|  | // and fields that are prefixed like TotXxxx are monotonically | ||||||
|  | // increasing counters. | ||||||
| type Stats struct { | type Stats struct { | ||||||
| 	updates, deletes, batches, errors uint64 | 	TotUpdates uint64 | ||||||
| 	analysisTime, indexTime           uint64 | 	TotDeletes uint64 | ||||||
| 	termSearchersStarted              uint64 |  | ||||||
| 	termSearchersFinished             uint64 | 	TotBatches        uint64 | ||||||
| 	numPlainTextBytesIndexed          uint64 | 	TotBatchesEmpty   uint64 | ||||||
| 	numItemsIntroduced                uint64 | 	TotBatchIntroTime uint64 | ||||||
| 	numItemsPersisted                 uint64 | 	MaxBatchIntroTime uint64 | ||||||
| 	i                                 *Scorch |  | ||||||
|  | 	CurRootEpoch       uint64 | ||||||
|  | 	LastPersistedEpoch uint64 | ||||||
|  | 	LastMergedEpoch    uint64 | ||||||
|  |  | ||||||
|  | 	TotOnErrors uint64 | ||||||
|  |  | ||||||
|  | 	TotAnalysisTime uint64 | ||||||
|  | 	TotIndexTime    uint64 | ||||||
|  |  | ||||||
|  | 	TotIndexedPlainTextBytes uint64 | ||||||
|  |  | ||||||
|  | 	TotTermSearchersStarted  uint64 | ||||||
|  | 	TotTermSearchersFinished uint64 | ||||||
|  |  | ||||||
|  | 	TotIntroduceLoop       uint64 | ||||||
|  | 	TotIntroduceSegmentBeg uint64 | ||||||
|  | 	TotIntroduceSegmentEnd uint64 | ||||||
|  | 	TotIntroducePersistBeg uint64 | ||||||
|  | 	TotIntroducePersistEnd uint64 | ||||||
|  | 	TotIntroduceMergeBeg   uint64 | ||||||
|  | 	TotIntroduceMergeEnd   uint64 | ||||||
|  | 	TotIntroduceRevertBeg  uint64 | ||||||
|  | 	TotIntroduceRevertEnd  uint64 | ||||||
|  |  | ||||||
|  | 	TotIntroducedItems         uint64 | ||||||
|  | 	TotIntroducedSegmentsBatch uint64 | ||||||
|  | 	TotIntroducedSegmentsMerge uint64 | ||||||
|  |  | ||||||
|  | 	TotPersistLoopBeg          uint64 | ||||||
|  | 	TotPersistLoopErr          uint64 | ||||||
|  | 	TotPersistLoopProgress     uint64 | ||||||
|  | 	TotPersistLoopWait         uint64 | ||||||
|  | 	TotPersistLoopWaitNotified uint64 | ||||||
|  | 	TotPersistLoopEnd          uint64 | ||||||
|  |  | ||||||
|  | 	TotPersistedItems    uint64 | ||||||
|  | 	TotItemsToPersist    uint64 | ||||||
|  | 	TotPersistedSegments uint64 | ||||||
|  |  | ||||||
|  | 	TotPersisterSlowMergerPause  uint64 | ||||||
|  | 	TotPersisterSlowMergerResume uint64 | ||||||
|  |  | ||||||
|  | 	TotPersisterNapPauseCompleted uint64 | ||||||
|  | 	TotPersisterMergerNapBreak    uint64 | ||||||
|  |  | ||||||
|  | 	TotFileMergeLoopBeg uint64 | ||||||
|  | 	TotFileMergeLoopErr uint64 | ||||||
|  | 	TotFileMergeLoopEnd uint64 | ||||||
|  |  | ||||||
|  | 	TotFileMergePlan     uint64 | ||||||
|  | 	TotFileMergePlanErr  uint64 | ||||||
|  | 	TotFileMergePlanNone uint64 | ||||||
|  | 	TotFileMergePlanOk   uint64 | ||||||
|  |  | ||||||
|  | 	TotFileMergePlanTasks              uint64 | ||||||
|  | 	TotFileMergePlanTasksDone          uint64 | ||||||
|  | 	TotFileMergePlanTasksErr           uint64 | ||||||
|  | 	TotFileMergePlanTasksSegments      uint64 | ||||||
|  | 	TotFileMergePlanTasksSegmentsEmpty uint64 | ||||||
|  |  | ||||||
|  | 	TotFileMergeSegmentsEmpty uint64 | ||||||
|  | 	TotFileMergeSegments      uint64 | ||||||
|  | 	TotFileSegmentsAtRoot     uint64 | ||||||
|  | 	TotFileMergeWrittenBytes  uint64 | ||||||
|  |  | ||||||
|  | 	TotFileMergeZapBeg  uint64 | ||||||
|  | 	TotFileMergeZapEnd  uint64 | ||||||
|  | 	TotFileMergeZapTime uint64 | ||||||
|  | 	MaxFileMergeZapTime uint64 | ||||||
|  |  | ||||||
|  | 	TotFileMergeIntroductions        uint64 | ||||||
|  | 	TotFileMergeIntroductionsDone    uint64 | ||||||
|  | 	TotFileMergeIntroductionsSkipped uint64 | ||||||
|  |  | ||||||
|  | 	TotMemMergeBeg          uint64 | ||||||
|  | 	TotMemMergeErr          uint64 | ||||||
|  | 	TotMemMergeDone         uint64 | ||||||
|  | 	TotMemMergeZapBeg       uint64 | ||||||
|  | 	TotMemMergeZapEnd       uint64 | ||||||
|  | 	TotMemMergeZapTime      uint64 | ||||||
|  | 	MaxMemMergeZapTime      uint64 | ||||||
|  | 	TotMemMergeSegments     uint64 | ||||||
|  | 	TotMemorySegmentsAtRoot uint64 | ||||||
| } | } | ||||||
|  |  | ||||||
| func (s *Stats) statsMap() (map[string]interface{}, error) { | // atomically populates the returned map | ||||||
|  | func (s *Stats) ToMap() map[string]interface{} { | ||||||
| 	m := map[string]interface{}{} | 	m := map[string]interface{}{} | ||||||
| 	m["updates"] = atomic.LoadUint64(&s.updates) | 	sve := reflect.ValueOf(s).Elem() | ||||||
| 	m["deletes"] = atomic.LoadUint64(&s.deletes) | 	svet := sve.Type() | ||||||
| 	m["batches"] = atomic.LoadUint64(&s.batches) | 	for i := 0; i < svet.NumField(); i++ { | ||||||
| 	m["errors"] = atomic.LoadUint64(&s.errors) | 		svef := sve.Field(i) | ||||||
| 	m["analysis_time"] = atomic.LoadUint64(&s.analysisTime) | 		if svef.CanAddr() { | ||||||
| 	m["index_time"] = atomic.LoadUint64(&s.indexTime) | 			svefp := svef.Addr().Interface() | ||||||
| 	m["term_searchers_started"] = atomic.LoadUint64(&s.termSearchersStarted) | 			m[svet.Field(i).Name] = atomic.LoadUint64(svefp.(*uint64)) | ||||||
| 	m["term_searchers_finished"] = atomic.LoadUint64(&s.termSearchersFinished) |  | ||||||
| 	m["num_plain_text_bytes_indexed"] = atomic.LoadUint64(&s.numPlainTextBytesIndexed) |  | ||||||
| 	m["num_items_introduced"] = atomic.LoadUint64(&s.numItemsIntroduced) |  | ||||||
| 	m["num_items_persisted"] = atomic.LoadUint64(&s.numItemsPersisted) |  | ||||||
|  |  | ||||||
| 	if s.i.path != "" { |  | ||||||
| 		finfos, err := ioutil.ReadDir(s.i.path) |  | ||||||
| 		if err != nil { |  | ||||||
| 			return nil, err |  | ||||||
| 		} |  | ||||||
|  |  | ||||||
| 		var numFilesOnDisk, numBytesUsedDisk uint64 |  | ||||||
|  |  | ||||||
| 		for _, finfo := range finfos { |  | ||||||
| 			if !finfo.IsDir() { |  | ||||||
| 				numBytesUsedDisk += uint64(finfo.Size()) |  | ||||||
| 				numFilesOnDisk++ |  | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
|  | 	return m | ||||||
| 		m["num_bytes_used_disk"] = numBytesUsedDisk |  | ||||||
| 		m["num_files_on_disk"] = numFilesOnDisk |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	return m, nil |  | ||||||
| } | } | ||||||
|  |  | ||||||
| // MarshalJSON implements json.Marshaler | // MarshalJSON implements json.Marshaler, and in contrast to standard | ||||||
|  | // json marshaling provides atomic safety | ||||||
| func (s *Stats) MarshalJSON() ([]byte, error) { | func (s *Stats) MarshalJSON() ([]byte, error) { | ||||||
| 	m, err := s.statsMap() | 	return json.Marshal(s.ToMap()) | ||||||
| 	if err != nil { |  | ||||||
| 		return nil, err |  | ||||||
| 	} |  | ||||||
| 	return json.Marshal(m) |  | ||||||
| } | } | ||||||
|   | |||||||
							
								
								
									
										2
									
								
								vendor/github.com/blevesearch/bleve/index/store/boltdb/iterator.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								vendor/github.com/blevesearch/bleve/index/store/boltdb/iterator.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -17,7 +17,7 @@ package boltdb | |||||||
| import ( | import ( | ||||||
| 	"bytes" | 	"bytes" | ||||||
|  |  | ||||||
| 	"github.com/boltdb/bolt" | 	bolt "github.com/etcd-io/bbolt" | ||||||
| ) | ) | ||||||
|  |  | ||||||
| type Iterator struct { | type Iterator struct { | ||||||
|   | |||||||
							
								
								
									
										2
									
								
								vendor/github.com/blevesearch/bleve/index/store/boltdb/reader.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								vendor/github.com/blevesearch/bleve/index/store/boltdb/reader.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -16,7 +16,7 @@ package boltdb | |||||||
|  |  | ||||||
| import ( | import ( | ||||||
| 	"github.com/blevesearch/bleve/index/store" | 	"github.com/blevesearch/bleve/index/store" | ||||||
| 	"github.com/boltdb/bolt" | 	bolt "github.com/etcd-io/bbolt" | ||||||
| ) | ) | ||||||
|  |  | ||||||
| type Reader struct { | type Reader struct { | ||||||
|   | |||||||
							
								
								
									
										8
									
								
								vendor/github.com/blevesearch/bleve/index/store/boltdb/store.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										8
									
								
								vendor/github.com/blevesearch/bleve/index/store/boltdb/store.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -30,7 +30,7 @@ import ( | |||||||
|  |  | ||||||
| 	"github.com/blevesearch/bleve/index/store" | 	"github.com/blevesearch/bleve/index/store" | ||||||
| 	"github.com/blevesearch/bleve/registry" | 	"github.com/blevesearch/bleve/registry" | ||||||
| 	"github.com/boltdb/bolt" | 	bolt "github.com/etcd-io/bbolt" | ||||||
| ) | ) | ||||||
|  |  | ||||||
| const ( | const ( | ||||||
| @@ -74,6 +74,12 @@ func New(mo store.MergeOperator, config map[string]interface{}) (store.KVStore, | |||||||
| 		bo.ReadOnly = ro | 		bo.ReadOnly = ro | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | 	if initialMmapSize, ok := config["initialMmapSize"].(int); ok { | ||||||
|  | 		bo.InitialMmapSize = initialMmapSize | ||||||
|  | 	} else if initialMmapSize, ok := config["initialMmapSize"].(float64); ok { | ||||||
|  | 		bo.InitialMmapSize = int(initialMmapSize) | ||||||
|  | 	} | ||||||
|  |  | ||||||
| 	db, err := bolt.Open(path, 0600, bo) | 	db, err := bolt.Open(path, 0600, bo) | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		return nil, err | 		return nil, err | ||||||
|   | |||||||
							
								
								
									
										23
									
								
								vendor/github.com/blevesearch/bleve/index/upsidedown/index_reader.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										23
									
								
								vendor/github.com/blevesearch/bleve/index/upsidedown/index_reader.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -15,11 +15,20 @@ | |||||||
| package upsidedown | package upsidedown | ||||||
|  |  | ||||||
| import ( | import ( | ||||||
|  | 	"reflect" | ||||||
|  |  | ||||||
| 	"github.com/blevesearch/bleve/document" | 	"github.com/blevesearch/bleve/document" | ||||||
| 	"github.com/blevesearch/bleve/index" | 	"github.com/blevesearch/bleve/index" | ||||||
| 	"github.com/blevesearch/bleve/index/store" | 	"github.com/blevesearch/bleve/index/store" | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  | var reflectStaticSizeIndexReader int | ||||||
|  |  | ||||||
|  | func init() { | ||||||
|  | 	var ir IndexReader | ||||||
|  | 	reflectStaticSizeIndexReader = int(reflect.TypeOf(ir).Size()) | ||||||
|  | } | ||||||
|  |  | ||||||
| type IndexReader struct { | type IndexReader struct { | ||||||
| 	index    *UpsideDownCouch | 	index    *UpsideDownCouch | ||||||
| 	kvreader store.KVReader | 	kvreader store.KVReader | ||||||
| @@ -201,3 +210,17 @@ func incrementBytes(in []byte) []byte { | |||||||
| 	} | 	} | ||||||
| 	return rv | 	return rv | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (i *IndexReader) DocValueReader(fields []string) (index.DocValueReader, error) { | ||||||
|  | 	return &DocValueReader{i: i, fields: fields}, nil | ||||||
|  | } | ||||||
|  |  | ||||||
|  | type DocValueReader struct { | ||||||
|  | 	i      *IndexReader | ||||||
|  | 	fields []string | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (dvr *DocValueReader) VisitDocValues(id index.IndexInternalID, | ||||||
|  | 	visitor index.DocumentFieldTermVisitor) error { | ||||||
|  | 	return dvr.i.DocumentVisitFieldTerms(id, dvr.fields, visitor) | ||||||
|  | } | ||||||
|   | |||||||
							
								
								
									
										39
									
								
								vendor/github.com/blevesearch/bleve/index/upsidedown/reader.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										39
									
								
								vendor/github.com/blevesearch/bleve/index/upsidedown/reader.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -16,13 +16,27 @@ package upsidedown | |||||||
|  |  | ||||||
| import ( | import ( | ||||||
| 	"bytes" | 	"bytes" | ||||||
|  | 	"reflect" | ||||||
| 	"sort" | 	"sort" | ||||||
| 	"sync/atomic" | 	"sync/atomic" | ||||||
|  |  | ||||||
| 	"github.com/blevesearch/bleve/index" | 	"github.com/blevesearch/bleve/index" | ||||||
| 	"github.com/blevesearch/bleve/index/store" | 	"github.com/blevesearch/bleve/index/store" | ||||||
|  | 	"github.com/blevesearch/bleve/size" | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  | var reflectStaticSizeUpsideDownCouchTermFieldReader int | ||||||
|  | var reflectStaticSizeUpsideDownCouchDocIDReader int | ||||||
|  |  | ||||||
|  | func init() { | ||||||
|  | 	var tfr UpsideDownCouchTermFieldReader | ||||||
|  | 	reflectStaticSizeUpsideDownCouchTermFieldReader = | ||||||
|  | 		int(reflect.TypeOf(tfr).Size()) | ||||||
|  | 	var cdr UpsideDownCouchDocIDReader | ||||||
|  | 	reflectStaticSizeUpsideDownCouchDocIDReader = | ||||||
|  | 		int(reflect.TypeOf(cdr).Size()) | ||||||
|  | } | ||||||
|  |  | ||||||
| type UpsideDownCouchTermFieldReader struct { | type UpsideDownCouchTermFieldReader struct { | ||||||
| 	count              uint64 | 	count              uint64 | ||||||
| 	indexReader        *IndexReader | 	indexReader        *IndexReader | ||||||
| @@ -35,6 +49,19 @@ type UpsideDownCouchTermFieldReader struct { | |||||||
| 	includeTermVectors bool | 	includeTermVectors bool | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (r *UpsideDownCouchTermFieldReader) Size() int { | ||||||
|  | 	sizeInBytes := reflectStaticSizeUpsideDownCouchTermFieldReader + size.SizeOfPtr + | ||||||
|  | 		len(r.term) + | ||||||
|  | 		r.tfrPrealloc.Size() + | ||||||
|  | 		len(r.keyBuf) | ||||||
|  |  | ||||||
|  | 	if r.tfrNext != nil { | ||||||
|  | 		sizeInBytes += r.tfrNext.Size() | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return sizeInBytes | ||||||
|  | } | ||||||
|  |  | ||||||
| func newUpsideDownCouchTermFieldReader(indexReader *IndexReader, term []byte, field uint16, includeFreq, includeNorm, includeTermVectors bool) (*UpsideDownCouchTermFieldReader, error) { | func newUpsideDownCouchTermFieldReader(indexReader *IndexReader, term []byte, field uint16, includeFreq, includeNorm, includeTermVectors bool) (*UpsideDownCouchTermFieldReader, error) { | ||||||
| 	bufNeeded := termFrequencyRowKeySize(term, nil) | 	bufNeeded := termFrequencyRowKeySize(term, nil) | ||||||
| 	if bufNeeded < dictionaryRowKeySize(term) { | 	if bufNeeded < dictionaryRowKeySize(term) { | ||||||
| @@ -174,8 +201,18 @@ type UpsideDownCouchDocIDReader struct { | |||||||
| 	onlyMode    bool | 	onlyMode    bool | ||||||
| } | } | ||||||
|  |  | ||||||
| func newUpsideDownCouchDocIDReader(indexReader *IndexReader) (*UpsideDownCouchDocIDReader, error) { | func (r *UpsideDownCouchDocIDReader) Size() int { | ||||||
|  | 	sizeInBytes := reflectStaticSizeUpsideDownCouchDocIDReader + | ||||||
|  | 		reflectStaticSizeIndexReader + size.SizeOfPtr | ||||||
|  |  | ||||||
|  | 	for _, entry := range r.only { | ||||||
|  | 		sizeInBytes += size.SizeOfString + len(entry) | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return sizeInBytes | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func newUpsideDownCouchDocIDReader(indexReader *IndexReader) (*UpsideDownCouchDocIDReader, error) { | ||||||
| 	startBytes := []byte{0x0} | 	startBytes := []byte{0x0} | ||||||
| 	endBytes := []byte{0xff} | 	endBytes := []byte{0xff} | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										31
									
								
								vendor/github.com/blevesearch/bleve/index/upsidedown/row.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										31
									
								
								vendor/github.com/blevesearch/bleve/index/upsidedown/row.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -20,10 +20,22 @@ import ( | |||||||
| 	"fmt" | 	"fmt" | ||||||
| 	"io" | 	"io" | ||||||
| 	"math" | 	"math" | ||||||
|  | 	"reflect" | ||||||
|  |  | ||||||
|  | 	"github.com/blevesearch/bleve/size" | ||||||
| 	"github.com/golang/protobuf/proto" | 	"github.com/golang/protobuf/proto" | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  | var reflectStaticSizeTermFrequencyRow int | ||||||
|  | var reflectStaticSizeTermVector int | ||||||
|  |  | ||||||
|  | func init() { | ||||||
|  | 	var tfr TermFrequencyRow | ||||||
|  | 	reflectStaticSizeTermFrequencyRow = int(reflect.TypeOf(tfr).Size()) | ||||||
|  | 	var tv TermVector | ||||||
|  | 	reflectStaticSizeTermVector = int(reflect.TypeOf(tv).Size()) | ||||||
|  | } | ||||||
|  |  | ||||||
| const ByteSeparator byte = 0xff | const ByteSeparator byte = 0xff | ||||||
|  |  | ||||||
| type UpsideDownCouchRowStream chan UpsideDownCouchRow | type UpsideDownCouchRowStream chan UpsideDownCouchRow | ||||||
| @@ -358,6 +370,11 @@ type TermVector struct { | |||||||
| 	end            uint64 | 	end            uint64 | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (tv *TermVector) Size() int { | ||||||
|  | 	return reflectStaticSizeTermVector + size.SizeOfPtr + | ||||||
|  | 		len(tv.arrayPositions)*size.SizeOfUint64 | ||||||
|  | } | ||||||
|  |  | ||||||
| func (tv *TermVector) String() string { | func (tv *TermVector) String() string { | ||||||
| 	return fmt.Sprintf("Field: %d Pos: %d Start: %d End %d ArrayPositions: %#v", tv.field, tv.pos, tv.start, tv.end, tv.arrayPositions) | 	return fmt.Sprintf("Field: %d Pos: %d Start: %d End %d ArrayPositions: %#v", tv.field, tv.pos, tv.start, tv.end, tv.arrayPositions) | ||||||
| } | } | ||||||
| @@ -371,6 +388,18 @@ type TermFrequencyRow struct { | |||||||
| 	field   uint16 | 	field   uint16 | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (tfr *TermFrequencyRow) Size() int { | ||||||
|  | 	sizeInBytes := reflectStaticSizeTermFrequencyRow + | ||||||
|  | 		len(tfr.term) + | ||||||
|  | 		len(tfr.doc) | ||||||
|  |  | ||||||
|  | 	for _, entry := range tfr.vectors { | ||||||
|  | 		sizeInBytes += entry.Size() | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return sizeInBytes | ||||||
|  | } | ||||||
|  |  | ||||||
| func (tfr *TermFrequencyRow) Term() []byte { | func (tfr *TermFrequencyRow) Term() []byte { | ||||||
| 	return tfr.term | 	return tfr.term | ||||||
| } | } | ||||||
| @@ -555,7 +584,7 @@ func (tfr *TermFrequencyRow) parseK(key []byte) error { | |||||||
|  |  | ||||||
| func (tfr *TermFrequencyRow) parseKDoc(key []byte, term []byte) error { | func (tfr *TermFrequencyRow) parseKDoc(key []byte, term []byte) error { | ||||||
| 	tfr.doc = key[3+len(term)+1:] | 	tfr.doc = key[3+len(term)+1:] | ||||||
| 	if len(tfr.doc) <= 0 { | 	if len(tfr.doc) == 0 { | ||||||
| 		return fmt.Errorf("invalid term frequency key, empty docid") | 		return fmt.Errorf("invalid term frequency key, empty docid") | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										9
									
								
								vendor/github.com/blevesearch/bleve/index/upsidedown/upsidedown.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										9
									
								
								vendor/github.com/blevesearch/bleve/index/upsidedown/upsidedown.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -775,7 +775,7 @@ func (udc *UpsideDownCouch) termVectorsFromTokenFreq(field uint16, tf *analysis. | |||||||
| } | } | ||||||
|  |  | ||||||
| func (udc *UpsideDownCouch) termFieldVectorsFromTermVectors(in []*TermVector) []*index.TermFieldVector { | func (udc *UpsideDownCouch) termFieldVectorsFromTermVectors(in []*TermVector) []*index.TermFieldVector { | ||||||
| 	if len(in) <= 0 { | 	if len(in) == 0 { | ||||||
| 		return nil | 		return nil | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| @@ -810,6 +810,7 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) { | |||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | 	if len(batch.IndexOps) > 0 { | ||||||
| 		go func() { | 		go func() { | ||||||
| 			for _, doc := range batch.IndexOps { | 			for _, doc := range batch.IndexOps { | ||||||
| 				if doc != nil { | 				if doc != nil { | ||||||
| @@ -819,6 +820,7 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) { | |||||||
| 				} | 				} | ||||||
| 			} | 			} | ||||||
| 		}() | 		}() | ||||||
|  | 	} | ||||||
|  |  | ||||||
| 	// retrieve back index rows concurrent with analysis | 	// retrieve back index rows concurrent with analysis | ||||||
| 	docBackIndexRowErr := error(nil) | 	docBackIndexRowErr := error(nil) | ||||||
| @@ -958,6 +960,11 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) { | |||||||
| 	} else { | 	} else { | ||||||
| 		atomic.AddUint64(&udc.stats.errors, 1) | 		atomic.AddUint64(&udc.stats.errors, 1) | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | 	persistedCallback := batch.PersistedCallback() | ||||||
|  | 	if persistedCallback != nil { | ||||||
|  | 		persistedCallback(err) | ||||||
|  | 	} | ||||||
| 	return | 	return | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										1
									
								
								vendor/github.com/blevesearch/bleve/index_alias_impl.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								vendor/github.com/blevesearch/bleve/index_alias_impl.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -433,6 +433,7 @@ func createChildSearchRequest(req *SearchRequest) *SearchRequest { | |||||||
| 		Explain:          req.Explain, | 		Explain:          req.Explain, | ||||||
| 		Sort:             req.Sort.Copy(), | 		Sort:             req.Sort.Copy(), | ||||||
| 		IncludeLocations: req.IncludeLocations, | 		IncludeLocations: req.IncludeLocations, | ||||||
|  | 		Score:            req.Score, | ||||||
| 	} | 	} | ||||||
| 	return &rv | 	return &rv | ||||||
| } | } | ||||||
|   | |||||||
							
								
								
									
										105
									
								
								vendor/github.com/blevesearch/bleve/index_impl.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										105
									
								
								vendor/github.com/blevesearch/bleve/index_impl.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -50,6 +50,12 @@ const storePath = "store" | |||||||
|  |  | ||||||
| var mappingInternalKey = []byte("_mapping") | var mappingInternalKey = []byte("_mapping") | ||||||
|  |  | ||||||
|  | const SearchQueryStartCallbackKey = "_search_query_start_callback_key" | ||||||
|  | const SearchQueryEndCallbackKey = "_search_query_end_callback_key" | ||||||
|  |  | ||||||
|  | type SearchQueryStartCallbackFn func(size uint64) error | ||||||
|  | type SearchQueryEndCallbackFn func(size uint64) error | ||||||
|  |  | ||||||
| func indexStorePath(path string) string { | func indexStorePath(path string) string { | ||||||
| 	return path + string(os.PathSeparator) + storePath | 	return path + string(os.PathSeparator) + storePath | ||||||
| } | } | ||||||
| @@ -362,6 +368,68 @@ func (i *indexImpl) Search(req *SearchRequest) (sr *SearchResult, err error) { | |||||||
| 	return i.SearchInContext(context.Background(), req) | 	return i.SearchInContext(context.Background(), req) | ||||||
| } | } | ||||||
|  |  | ||||||
|  | var documentMatchEmptySize int | ||||||
|  | var searchContextEmptySize int | ||||||
|  | var facetResultEmptySize int | ||||||
|  | var documentEmptySize int | ||||||
|  |  | ||||||
|  | func init() { | ||||||
|  | 	var dm search.DocumentMatch | ||||||
|  | 	documentMatchEmptySize = dm.Size() | ||||||
|  |  | ||||||
|  | 	var sc search.SearchContext | ||||||
|  | 	searchContextEmptySize = sc.Size() | ||||||
|  |  | ||||||
|  | 	var fr search.FacetResult | ||||||
|  | 	facetResultEmptySize = fr.Size() | ||||||
|  |  | ||||||
|  | 	var d document.Document | ||||||
|  | 	documentEmptySize = d.Size() | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // memNeededForSearch is a helper function that returns an estimate of RAM | ||||||
|  | // needed to execute a search request. | ||||||
|  | func memNeededForSearch(req *SearchRequest, | ||||||
|  | 	searcher search.Searcher, | ||||||
|  | 	topnCollector *collector.TopNCollector) uint64 { | ||||||
|  |  | ||||||
|  | 	backingSize := req.Size + req.From + 1 | ||||||
|  | 	if req.Size+req.From > collector.PreAllocSizeSkipCap { | ||||||
|  | 		backingSize = collector.PreAllocSizeSkipCap + 1 | ||||||
|  | 	} | ||||||
|  | 	numDocMatches := backingSize + searcher.DocumentMatchPoolSize() | ||||||
|  |  | ||||||
|  | 	estimate := 0 | ||||||
|  |  | ||||||
|  | 	// overhead, size in bytes from collector | ||||||
|  | 	estimate += topnCollector.Size() | ||||||
|  |  | ||||||
|  | 	// pre-allocing DocumentMatchPool | ||||||
|  | 	estimate += searchContextEmptySize + numDocMatches*documentMatchEmptySize | ||||||
|  |  | ||||||
|  | 	// searcher overhead | ||||||
|  | 	estimate += searcher.Size() | ||||||
|  |  | ||||||
|  | 	// overhead from results, lowestMatchOutsideResults | ||||||
|  | 	estimate += (numDocMatches + 1) * documentMatchEmptySize | ||||||
|  |  | ||||||
|  | 	// additional overhead from SearchResult | ||||||
|  | 	estimate += reflectStaticSizeSearchResult + reflectStaticSizeSearchStatus | ||||||
|  |  | ||||||
|  | 	// overhead from facet results | ||||||
|  | 	if req.Facets != nil { | ||||||
|  | 		estimate += len(req.Facets) * facetResultEmptySize | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	// highlighting, store | ||||||
|  | 	if len(req.Fields) > 0 || req.Highlight != nil { | ||||||
|  | 		// Size + From => number of hits | ||||||
|  | 		estimate += (req.Size + req.From) * documentEmptySize | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return uint64(estimate) | ||||||
|  | } | ||||||
|  |  | ||||||
| // SearchInContext executes a search request operation within the provided | // SearchInContext executes a search request operation within the provided | ||||||
| // Context. Returns a SearchResult object or an error. | // Context. Returns a SearchResult object or an error. | ||||||
| func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr *SearchResult, err error) { | func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr *SearchResult, err error) { | ||||||
| @@ -390,6 +458,7 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr | |||||||
| 	searcher, err := req.Query.Searcher(indexReader, i.m, search.SearcherOptions{ | 	searcher, err := req.Query.Searcher(indexReader, i.m, search.SearcherOptions{ | ||||||
| 		Explain:            req.Explain, | 		Explain:            req.Explain, | ||||||
| 		IncludeTermVectors: req.IncludeLocations || req.Highlight != nil, | 		IncludeTermVectors: req.IncludeLocations || req.Highlight != nil, | ||||||
|  | 		Score:              req.Score, | ||||||
| 	}) | 	}) | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		return nil, err | 		return nil, err | ||||||
| @@ -428,6 +497,24 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr | |||||||
| 		collector.SetFacetsBuilder(facetsBuilder) | 		collector.SetFacetsBuilder(facetsBuilder) | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | 	memNeeded := memNeededForSearch(req, searcher, collector) | ||||||
|  | 	if cb := ctx.Value(SearchQueryStartCallbackKey); cb != nil { | ||||||
|  | 		if cbF, ok := cb.(SearchQueryStartCallbackFn); ok { | ||||||
|  | 			err = cbF(memNeeded) | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	if err != nil { | ||||||
|  | 		return nil, err | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	if cb := ctx.Value(SearchQueryEndCallbackKey); cb != nil { | ||||||
|  | 		if cbF, ok := cb.(SearchQueryEndCallbackFn); ok { | ||||||
|  | 			defer func() { | ||||||
|  | 				_ = cbF(memNeeded) | ||||||
|  | 			}() | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
| 	err = collector.Collect(ctx, searcher, indexReader) | 	err = collector.Collect(ctx, searcher, indexReader) | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		return nil, err | 		return nil, err | ||||||
| @@ -459,7 +546,8 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr | |||||||
| 			doc, err := indexReader.Document(hit.ID) | 			doc, err := indexReader.Document(hit.ID) | ||||||
| 			if err == nil && doc != nil { | 			if err == nil && doc != nil { | ||||||
| 				if len(req.Fields) > 0 { | 				if len(req.Fields) > 0 { | ||||||
| 					for _, f := range req.Fields { | 					fieldsToLoad := deDuplicate(req.Fields) | ||||||
|  | 					for _, f := range fieldsToLoad { | ||||||
| 						for _, docF := range doc.Fields { | 						for _, docF := range doc.Fields { | ||||||
| 							if f == "*" || docF.Name() == f { | 							if f == "*" || docF.Name() == f { | ||||||
| 								var value interface{} | 								var value interface{} | ||||||
| @@ -533,9 +621,7 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr | |||||||
| 	return &SearchResult{ | 	return &SearchResult{ | ||||||
| 		Status: &SearchStatus{ | 		Status: &SearchStatus{ | ||||||
| 			Total:      1, | 			Total:      1, | ||||||
| 			Failed:     0, |  | ||||||
| 			Successful: 1, | 			Successful: 1, | ||||||
| 			Errors:     make(map[string]error), |  | ||||||
| 		}, | 		}, | ||||||
| 		Request:  req, | 		Request:  req, | ||||||
| 		Hits:     hits, | 		Hits:     hits, | ||||||
| @@ -755,3 +841,16 @@ func (f *indexImplFieldDict) Close() error { | |||||||
| 	} | 	} | ||||||
| 	return f.indexReader.Close() | 	return f.indexReader.Close() | ||||||
| } | } | ||||||
|  |  | ||||||
|  | // helper function to remove duplicate entries from slice of strings | ||||||
|  | func deDuplicate(fields []string) []string { | ||||||
|  | 	entries := make(map[string]struct{}) | ||||||
|  | 	ret := []string{} | ||||||
|  | 	for _, entry := range fields { | ||||||
|  | 		if _, exists := entries[entry]; !exists { | ||||||
|  | 			entries[entry] = struct{}{} | ||||||
|  | 			ret = append(ret, entry) | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	return ret | ||||||
|  | } | ||||||
|   | |||||||
							
								
								
									
										3
									
								
								vendor/github.com/blevesearch/bleve/index_meta.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								vendor/github.com/blevesearch/bleve/index_meta.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -18,6 +18,7 @@ import ( | |||||||
| 	"encoding/json" | 	"encoding/json" | ||||||
| 	"io/ioutil" | 	"io/ioutil" | ||||||
| 	"os" | 	"os" | ||||||
|  | 	"path/filepath" | ||||||
|  |  | ||||||
| 	"github.com/blevesearch/bleve/index/upsidedown" | 	"github.com/blevesearch/bleve/index/upsidedown" | ||||||
| ) | ) | ||||||
| @@ -92,5 +93,5 @@ func (i *indexMeta) Save(path string) (err error) { | |||||||
| } | } | ||||||
|  |  | ||||||
| func indexMetaPath(path string) string { | func indexMetaPath(path string) string { | ||||||
| 	return path + string(os.PathSeparator) + metaFilename | 	return filepath.Join(path, metaFilename) | ||||||
| } | } | ||||||
|   | |||||||
							
								
								
									
										12
									
								
								vendor/github.com/blevesearch/bleve/mapping/document.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										12
									
								
								vendor/github.com/blevesearch/bleve/mapping/document.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -42,7 +42,7 @@ type DocumentMapping struct { | |||||||
| 	Dynamic         bool                        `json:"dynamic"` | 	Dynamic         bool                        `json:"dynamic"` | ||||||
| 	Properties      map[string]*DocumentMapping `json:"properties,omitempty"` | 	Properties      map[string]*DocumentMapping `json:"properties,omitempty"` | ||||||
| 	Fields          []*FieldMapping             `json:"fields,omitempty"` | 	Fields          []*FieldMapping             `json:"fields,omitempty"` | ||||||
| 	DefaultAnalyzer string                      `json:"default_analyzer"` | 	DefaultAnalyzer string                      `json:"default_analyzer,omitempty"` | ||||||
|  |  | ||||||
| 	// StructTagKey overrides "json" when looking for field names in struct tags | 	// StructTagKey overrides "json" when looking for field names in struct tags | ||||||
| 	StructTagKey string `json:"struct_tag_key,omitempty"` | 	StructTagKey string `json:"struct_tag_key,omitempty"` | ||||||
| @@ -324,13 +324,17 @@ func (dm *DocumentMapping) defaultAnalyzerName(path []string) string { | |||||||
| } | } | ||||||
|  |  | ||||||
| func (dm *DocumentMapping) walkDocument(data interface{}, path []string, indexes []uint64, context *walkContext) { | func (dm *DocumentMapping) walkDocument(data interface{}, path []string, indexes []uint64, context *walkContext) { | ||||||
| 	// allow default "json" tag to be overriden | 	// allow default "json" tag to be overridden | ||||||
| 	structTagKey := dm.StructTagKey | 	structTagKey := dm.StructTagKey | ||||||
| 	if structTagKey == "" { | 	if structTagKey == "" { | ||||||
| 		structTagKey = "json" | 		structTagKey = "json" | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	val := reflect.ValueOf(data) | 	val := reflect.ValueOf(data) | ||||||
|  | 	if !val.IsValid() { | ||||||
|  | 		return | ||||||
|  | 	} | ||||||
|  |  | ||||||
| 	typ := val.Type() | 	typ := val.Type() | ||||||
| 	switch typ.Kind() { | 	switch typ.Kind() { | ||||||
| 	case reflect.Map: | 	case reflect.Map: | ||||||
| @@ -420,8 +424,12 @@ func (dm *DocumentMapping) processProperty(property interface{}, path []string, | |||||||
| 		if subDocMapping != nil { | 		if subDocMapping != nil { | ||||||
| 			// index by explicit mapping | 			// index by explicit mapping | ||||||
| 			for _, fieldMapping := range subDocMapping.Fields { | 			for _, fieldMapping := range subDocMapping.Fields { | ||||||
|  | 				if fieldMapping.Type == "geopoint" { | ||||||
|  | 					fieldMapping.processGeoPoint(property, pathString, path, indexes, context) | ||||||
|  | 				} else { | ||||||
| 					fieldMapping.processString(propertyValueString, pathString, path, indexes, context) | 					fieldMapping.processString(propertyValueString, pathString, path, indexes, context) | ||||||
| 				} | 				} | ||||||
|  | 			} | ||||||
| 		} else if closestDocMapping.Dynamic { | 		} else if closestDocMapping.Dynamic { | ||||||
| 			// automatic indexing behavior | 			// automatic indexing behavior | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										2
									
								
								vendor/github.com/blevesearch/bleve/mapping/index.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								vendor/github.com/blevesearch/bleve/mapping/index.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -320,8 +320,8 @@ func (im *IndexMappingImpl) determineType(data interface{}) string { | |||||||
| func (im *IndexMappingImpl) MapDocument(doc *document.Document, data interface{}) error { | func (im *IndexMappingImpl) MapDocument(doc *document.Document, data interface{}) error { | ||||||
| 	docType := im.determineType(data) | 	docType := im.determineType(data) | ||||||
| 	docMapping := im.mappingForType(docType) | 	docMapping := im.mappingForType(docType) | ||||||
| 	walkContext := im.newWalkContext(doc, docMapping) |  | ||||||
| 	if docMapping.Enabled { | 	if docMapping.Enabled { | ||||||
|  | 		walkContext := im.newWalkContext(doc, docMapping) | ||||||
| 		docMapping.walkDocument(data, []string{}, []uint64{}, walkContext) | 		docMapping.walkDocument(data, []string{}, []uint64{}, walkContext) | ||||||
|  |  | ||||||
| 		// see if the _all field was disabled | 		// see if the _all field was disabled | ||||||
|   | |||||||
							
								
								
									
										3
									
								
								vendor/github.com/blevesearch/bleve/mapping/reflect.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								vendor/github.com/blevesearch/bleve/mapping/reflect.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -35,6 +35,9 @@ func lookupPropertyPath(data interface{}, path string) interface{} { | |||||||
|  |  | ||||||
| func lookupPropertyPathPart(data interface{}, part string) interface{} { | func lookupPropertyPathPart(data interface{}, part string) interface{} { | ||||||
| 	val := reflect.ValueOf(data) | 	val := reflect.ValueOf(data) | ||||||
|  | 	if !val.IsValid() { | ||||||
|  | 		return nil | ||||||
|  | 	} | ||||||
| 	typ := val.Type() | 	typ := val.Type() | ||||||
| 	switch typ.Kind() { | 	switch typ.Kind() { | ||||||
| 	case reflect.Map: | 	case reflect.Map: | ||||||
|   | |||||||
							
								
								
									
										2
									
								
								vendor/github.com/blevesearch/bleve/numeric/bin.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								vendor/github.com/blevesearch/bleve/numeric/bin.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -14,7 +14,7 @@ var interleaveShift = []uint{1, 2, 4, 8, 16} | |||||||
|  |  | ||||||
| // Interleave the first 32 bits of each uint64 | // Interleave the first 32 bits of each uint64 | ||||||
| // apdated from org.apache.lucene.util.BitUtil | // apdated from org.apache.lucene.util.BitUtil | ||||||
| // whcih was adapted from: | // which was adapted from: | ||||||
| // http://graphics.stanford.edu/~seander/bithacks.html#InterleaveBMN | // http://graphics.stanford.edu/~seander/bithacks.html#InterleaveBMN | ||||||
| func Interleave(v1, v2 uint64) uint64 { | func Interleave(v1, v2 uint64) uint64 { | ||||||
| 	v1 = (v1 | (v1 << interleaveShift[4])) & interleaveMagic[4] | 	v1 = (v1 | (v1 << interleaveShift[4])) & interleaveMagic[4] | ||||||
|   | |||||||
							
								
								
									
										4
									
								
								vendor/github.com/blevesearch/bleve/numeric/prefix_coded.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										4
									
								
								vendor/github.com/blevesearch/bleve/numeric/prefix_coded.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -77,6 +77,10 @@ func (p PrefixCoded) Int64() (int64, error) { | |||||||
| } | } | ||||||
|  |  | ||||||
| func ValidPrefixCodedTerm(p string) (bool, int) { | func ValidPrefixCodedTerm(p string) (bool, int) { | ||||||
|  | 	return ValidPrefixCodedTermBytes([]byte(p)) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func ValidPrefixCodedTermBytes(p []byte) (bool, int) { | ||||||
| 	if len(p) > 0 { | 	if len(p) > 0 { | ||||||
| 		if p[0] < ShiftStartInt64 || p[0] > ShiftStartInt64+63 { | 		if p[0] < ShiftStartInt64 || p[0] > ShiftStartInt64+63 { | ||||||
| 			return false, 0 | 			return false, 0 | ||||||
|   | |||||||
							
								
								
									
										77
									
								
								vendor/github.com/blevesearch/bleve/search.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										77
									
								
								vendor/github.com/blevesearch/bleve/search.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -17,15 +17,29 @@ package bleve | |||||||
| import ( | import ( | ||||||
| 	"encoding/json" | 	"encoding/json" | ||||||
| 	"fmt" | 	"fmt" | ||||||
|  | 	"reflect" | ||||||
| 	"time" | 	"time" | ||||||
|  |  | ||||||
| 	"github.com/blevesearch/bleve/analysis" | 	"github.com/blevesearch/bleve/analysis" | ||||||
| 	"github.com/blevesearch/bleve/analysis/datetime/optional" | 	"github.com/blevesearch/bleve/analysis/datetime/optional" | ||||||
|  | 	"github.com/blevesearch/bleve/document" | ||||||
| 	"github.com/blevesearch/bleve/registry" | 	"github.com/blevesearch/bleve/registry" | ||||||
| 	"github.com/blevesearch/bleve/search" | 	"github.com/blevesearch/bleve/search" | ||||||
|  | 	"github.com/blevesearch/bleve/search/collector" | ||||||
| 	"github.com/blevesearch/bleve/search/query" | 	"github.com/blevesearch/bleve/search/query" | ||||||
|  | 	"github.com/blevesearch/bleve/size" | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  | var reflectStaticSizeSearchResult int | ||||||
|  | var reflectStaticSizeSearchStatus int | ||||||
|  |  | ||||||
|  | func init() { | ||||||
|  | 	var sr SearchResult | ||||||
|  | 	reflectStaticSizeSearchResult = int(reflect.TypeOf(sr).Size()) | ||||||
|  | 	var ss SearchStatus | ||||||
|  | 	reflectStaticSizeSearchStatus = int(reflect.TypeOf(ss).Size()) | ||||||
|  | } | ||||||
|  |  | ||||||
| var cache = registry.NewCache() | var cache = registry.NewCache() | ||||||
|  |  | ||||||
| const defaultDateTimeParser = optional.Name | const defaultDateTimeParser = optional.Name | ||||||
| @@ -247,6 +261,7 @@ func (h *HighlightRequest) AddField(field string) { | |||||||
| // Explain triggers inclusion of additional search | // Explain triggers inclusion of additional search | ||||||
| // result score explanations. | // result score explanations. | ||||||
| // Sort describes the desired order for the results to be returned. | // Sort describes the desired order for the results to be returned. | ||||||
|  | // Score controls the kind of scoring performed | ||||||
| // | // | ||||||
| // A special field named "*" can be used to return all fields. | // A special field named "*" can be used to return all fields. | ||||||
| type SearchRequest struct { | type SearchRequest struct { | ||||||
| @@ -259,6 +274,7 @@ type SearchRequest struct { | |||||||
| 	Explain          bool              `json:"explain"` | 	Explain          bool              `json:"explain"` | ||||||
| 	Sort             search.SortOrder  `json:"sort"` | 	Sort             search.SortOrder  `json:"sort"` | ||||||
| 	IncludeLocations bool              `json:"includeLocations"` | 	IncludeLocations bool              `json:"includeLocations"` | ||||||
|  | 	Score            string            `json:"score,omitempty"` | ||||||
| } | } | ||||||
|  |  | ||||||
| func (r *SearchRequest) Validate() error { | func (r *SearchRequest) Validate() error { | ||||||
| @@ -308,6 +324,7 @@ func (r *SearchRequest) UnmarshalJSON(input []byte) error { | |||||||
| 		Explain          bool              `json:"explain"` | 		Explain          bool              `json:"explain"` | ||||||
| 		Sort             []json.RawMessage `json:"sort"` | 		Sort             []json.RawMessage `json:"sort"` | ||||||
| 		IncludeLocations bool              `json:"includeLocations"` | 		IncludeLocations bool              `json:"includeLocations"` | ||||||
|  | 		Score            string            `json:"score"` | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	err := json.Unmarshal(input, &temp) | 	err := json.Unmarshal(input, &temp) | ||||||
| @@ -334,6 +351,7 @@ func (r *SearchRequest) UnmarshalJSON(input []byte) error { | |||||||
| 	r.Fields = temp.Fields | 	r.Fields = temp.Fields | ||||||
| 	r.Facets = temp.Facets | 	r.Facets = temp.Facets | ||||||
| 	r.IncludeLocations = temp.IncludeLocations | 	r.IncludeLocations = temp.IncludeLocations | ||||||
|  | 	r.Score = temp.Score | ||||||
| 	r.Query, err = query.ParseQuery(temp.Q) | 	r.Query, err = query.ParseQuery(temp.Q) | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		return err | 		return err | ||||||
| @@ -432,6 +450,24 @@ type SearchResult struct { | |||||||
| 	Facets   search.FacetResults            `json:"facets"` | 	Facets   search.FacetResults            `json:"facets"` | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (sr *SearchResult) Size() int { | ||||||
|  | 	sizeInBytes := reflectStaticSizeSearchResult + size.SizeOfPtr + | ||||||
|  | 		reflectStaticSizeSearchStatus | ||||||
|  |  | ||||||
|  | 	for _, entry := range sr.Hits { | ||||||
|  | 		if entry != nil { | ||||||
|  | 			sizeInBytes += entry.Size() | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	for k, v := range sr.Facets { | ||||||
|  | 		sizeInBytes += size.SizeOfString + len(k) + | ||||||
|  | 			v.Size() | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return sizeInBytes | ||||||
|  | } | ||||||
|  |  | ||||||
| func (sr *SearchResult) String() string { | func (sr *SearchResult) String() string { | ||||||
| 	rv := "" | 	rv := "" | ||||||
| 	if sr.Total > 0 { | 	if sr.Total > 0 { | ||||||
| @@ -488,3 +524,44 @@ func (sr *SearchResult) Merge(other *SearchResult) { | |||||||
|  |  | ||||||
| 	sr.Facets.Merge(other.Facets) | 	sr.Facets.Merge(other.Facets) | ||||||
| } | } | ||||||
|  |  | ||||||
|  | // MemoryNeededForSearchResult is an exported helper function to determine the RAM | ||||||
|  | // needed to accommodate the results for a given search request. | ||||||
|  | func MemoryNeededForSearchResult(req *SearchRequest) uint64 { | ||||||
|  | 	if req == nil { | ||||||
|  | 		return 0 | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	numDocMatches := req.Size + req.From | ||||||
|  | 	if req.Size+req.From > collector.PreAllocSizeSkipCap { | ||||||
|  | 		numDocMatches = collector.PreAllocSizeSkipCap | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	estimate := 0 | ||||||
|  |  | ||||||
|  | 	// overhead from the SearchResult structure | ||||||
|  | 	var sr SearchResult | ||||||
|  | 	estimate += sr.Size() | ||||||
|  |  | ||||||
|  | 	var dm search.DocumentMatch | ||||||
|  | 	sizeOfDocumentMatch := dm.Size() | ||||||
|  |  | ||||||
|  | 	// overhead from results | ||||||
|  | 	estimate += numDocMatches * sizeOfDocumentMatch | ||||||
|  |  | ||||||
|  | 	// overhead from facet results | ||||||
|  | 	if req.Facets != nil { | ||||||
|  | 		var fr search.FacetResult | ||||||
|  | 		estimate += len(req.Facets) * fr.Size() | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	// highlighting, store | ||||||
|  | 	var d document.Document | ||||||
|  | 	if len(req.Fields) > 0 || req.Highlight != nil { | ||||||
|  | 		for i := 0; i < (req.Size + req.From); i++ { | ||||||
|  | 			estimate += (req.Size + req.From) * d.Size() | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return uint64(estimate) | ||||||
|  | } | ||||||
|   | |||||||
							
								
								
									
										20
									
								
								vendor/github.com/blevesearch/bleve/search/collector.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										20
									
								
								vendor/github.com/blevesearch/bleve/search/collector.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -30,3 +30,23 @@ type Collector interface { | |||||||
| 	SetFacetsBuilder(facetsBuilder *FacetsBuilder) | 	SetFacetsBuilder(facetsBuilder *FacetsBuilder) | ||||||
| 	FacetResults() FacetResults | 	FacetResults() FacetResults | ||||||
| } | } | ||||||
|  |  | ||||||
|  | // DocumentMatchHandler is the type of document match callback | ||||||
|  | // bleve will invoke during the search. | ||||||
|  | // Eventually, bleve will indicate the completion of an ongoing search, | ||||||
|  | // by passing a nil value for the document match callback. | ||||||
|  | // The application should take a copy of the hit/documentMatch | ||||||
|  | // if it wish to own it or need prolonged access to it. | ||||||
|  | type DocumentMatchHandler func(hit *DocumentMatch) error | ||||||
|  |  | ||||||
|  | type MakeDocumentMatchHandlerKeyType string | ||||||
|  |  | ||||||
|  | var MakeDocumentMatchHandlerKey = MakeDocumentMatchHandlerKeyType( | ||||||
|  | 	"MakeDocumentMatchHandlerKey") | ||||||
|  |  | ||||||
|  | // MakeDocumentMatchHandler is an optional DocumentMatchHandler | ||||||
|  | // builder function which the applications can pass to bleve. | ||||||
|  | // These builder methods gives a DocumentMatchHandler function | ||||||
|  | // to bleve, which it will invoke on every document matches. | ||||||
|  | type MakeDocumentMatchHandler func(ctx *SearchContext) ( | ||||||
|  | 	callback DocumentMatchHandler, loadID bool, err error) | ||||||
|   | |||||||
							
								
								
									
										4
									
								
								vendor/github.com/blevesearch/bleve/search/collector/heap.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										4
									
								
								vendor/github.com/blevesearch/bleve/search/collector/heap.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -25,9 +25,9 @@ type collectStoreHeap struct { | |||||||
| 	compare collectorCompare | 	compare collectorCompare | ||||||
| } | } | ||||||
|  |  | ||||||
| func newStoreHeap(cap int, compare collectorCompare) *collectStoreHeap { | func newStoreHeap(capacity int, compare collectorCompare) *collectStoreHeap { | ||||||
| 	rv := &collectStoreHeap{ | 	rv := &collectStoreHeap{ | ||||||
| 		heap:    make(search.DocumentMatchCollection, 0, cap), | 		heap:    make(search.DocumentMatchCollection, 0, capacity), | ||||||
| 		compare: compare, | 		compare: compare, | ||||||
| 	} | 	} | ||||||
| 	heap.Init(rv) | 	heap.Init(rv) | ||||||
|   | |||||||
							
								
								
									
										5
									
								
								vendor/github.com/blevesearch/bleve/search/collector/list.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										5
									
								
								vendor/github.com/blevesearch/bleve/search/collector/list.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -25,7 +25,7 @@ type collectStoreList struct { | |||||||
| 	compare collectorCompare | 	compare collectorCompare | ||||||
| } | } | ||||||
|  |  | ||||||
| func newStoreList(cap int, compare collectorCompare) *collectStoreList { | func newStoreList(capacity int, compare collectorCompare) *collectStoreList { | ||||||
| 	rv := &collectStoreList{ | 	rv := &collectStoreList{ | ||||||
| 		results: list.New(), | 		results: list.New(), | ||||||
| 		compare: compare, | 		compare: compare, | ||||||
| @@ -34,8 +34,7 @@ func newStoreList(cap int, compare collectorCompare) *collectStoreList { | |||||||
| 	return rv | 	return rv | ||||||
| } | } | ||||||
|  |  | ||||||
| func (c *collectStoreList) AddNotExceedingSize(doc *search.DocumentMatch, | func (c *collectStoreList) AddNotExceedingSize(doc *search.DocumentMatch, size int) *search.DocumentMatch { | ||||||
| 	size int) *search.DocumentMatch { |  | ||||||
| 	c.add(doc) | 	c.add(doc) | ||||||
| 	if c.len() > size { | 	if c.len() > size { | ||||||
| 		return c.removeLast() | 		return c.removeLast() | ||||||
|   | |||||||
							
								
								
									
										4
									
								
								vendor/github.com/blevesearch/bleve/search/collector/slice.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										4
									
								
								vendor/github.com/blevesearch/bleve/search/collector/slice.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -21,9 +21,9 @@ type collectStoreSlice struct { | |||||||
| 	compare collectorCompare | 	compare collectorCompare | ||||||
| } | } | ||||||
|  |  | ||||||
| func newStoreSlice(cap int, compare collectorCompare) *collectStoreSlice { | func newStoreSlice(capacity int, compare collectorCompare) *collectStoreSlice { | ||||||
| 	rv := &collectStoreSlice{ | 	rv := &collectStoreSlice{ | ||||||
| 		slice:   make(search.DocumentMatchCollection, 0, cap), | 		slice:   make(search.DocumentMatchCollection, 0, capacity), | ||||||
| 		compare: compare, | 		compare: compare, | ||||||
| 	} | 	} | ||||||
| 	return rv | 	return rv | ||||||
|   | |||||||
							
								
								
									
										105
									
								
								vendor/github.com/blevesearch/bleve/search/collector/topn.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										105
									
								
								vendor/github.com/blevesearch/bleve/search/collector/topn.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -16,12 +16,21 @@ package collector | |||||||
|  |  | ||||||
| import ( | import ( | ||||||
| 	"context" | 	"context" | ||||||
|  | 	"reflect" | ||||||
| 	"time" | 	"time" | ||||||
|  |  | ||||||
| 	"github.com/blevesearch/bleve/index" | 	"github.com/blevesearch/bleve/index" | ||||||
| 	"github.com/blevesearch/bleve/search" | 	"github.com/blevesearch/bleve/search" | ||||||
|  | 	"github.com/blevesearch/bleve/size" | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  | var reflectStaticSizeTopNCollector int | ||||||
|  |  | ||||||
|  | func init() { | ||||||
|  | 	var coll TopNCollector | ||||||
|  | 	reflectStaticSizeTopNCollector = int(reflect.TypeOf(coll).Size()) | ||||||
|  | } | ||||||
|  |  | ||||||
| type collectorStore interface { | type collectorStore interface { | ||||||
| 	// Add the document, and if the new store size exceeds the provided size | 	// Add the document, and if the new store size exceeds the provided size | ||||||
| 	// the last element is removed and returned.  If the size has not been | 	// the last element is removed and returned.  If the size has not been | ||||||
| @@ -58,6 +67,8 @@ type TopNCollector struct { | |||||||
| 	cachedDesc    []bool | 	cachedDesc    []bool | ||||||
|  |  | ||||||
| 	lowestMatchOutsideResults *search.DocumentMatch | 	lowestMatchOutsideResults *search.DocumentMatch | ||||||
|  | 	updateFieldVisitor        index.DocumentFieldTermVisitor | ||||||
|  | 	dvReader                  index.DocValueReader | ||||||
| } | } | ||||||
|  |  | ||||||
| // CheckDoneEvery controls how frequently we check the context deadline | // CheckDoneEvery controls how frequently we check the context deadline | ||||||
| @@ -98,6 +109,22 @@ func NewTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector | |||||||
| 	return hc | 	return hc | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (hc *TopNCollector) Size() int { | ||||||
|  | 	sizeInBytes := reflectStaticSizeTopNCollector + size.SizeOfPtr | ||||||
|  |  | ||||||
|  | 	if hc.facetsBuilder != nil { | ||||||
|  | 		sizeInBytes += hc.facetsBuilder.Size() | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	for _, entry := range hc.neededFields { | ||||||
|  | 		sizeInBytes += len(entry) + size.SizeOfString | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	sizeInBytes += len(hc.cachedScoring) + len(hc.cachedDesc) | ||||||
|  |  | ||||||
|  | 	return sizeInBytes | ||||||
|  | } | ||||||
|  |  | ||||||
| // Collect goes to the index to find the matching documents | // Collect goes to the index to find the matching documents | ||||||
| func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher, reader index.IndexReader) error { | func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher, reader index.IndexReader) error { | ||||||
| 	startTime := time.Now() | 	startTime := time.Now() | ||||||
| @@ -113,8 +140,34 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher, | |||||||
| 	} | 	} | ||||||
| 	searchContext := &search.SearchContext{ | 	searchContext := &search.SearchContext{ | ||||||
| 		DocumentMatchPool: search.NewDocumentMatchPool(backingSize+searcher.DocumentMatchPoolSize(), len(hc.sort)), | 		DocumentMatchPool: search.NewDocumentMatchPool(backingSize+searcher.DocumentMatchPoolSize(), len(hc.sort)), | ||||||
|  | 		Collector:         hc, | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | 	hc.dvReader, err = reader.DocValueReader(hc.neededFields) | ||||||
|  | 	if err != nil { | ||||||
|  | 		return err | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	hc.updateFieldVisitor = func(field string, term []byte) { | ||||||
|  | 		if hc.facetsBuilder != nil { | ||||||
|  | 			hc.facetsBuilder.UpdateVisitor(field, term) | ||||||
|  | 		} | ||||||
|  | 		hc.sort.UpdateVisitor(field, term) | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	dmHandlerMaker := MakeTopNDocumentMatchHandler | ||||||
|  | 	if cv := ctx.Value(search.MakeDocumentMatchHandlerKey); cv != nil { | ||||||
|  | 		dmHandlerMaker = cv.(search.MakeDocumentMatchHandler) | ||||||
|  | 	} | ||||||
|  | 	// use the application given builder for making the custom document match | ||||||
|  | 	// handler and perform callbacks/invocations on the newly made handler. | ||||||
|  | 	dmHandler, loadID, err := dmHandlerMaker(searchContext) | ||||||
|  | 	if err != nil { | ||||||
|  | 		return err | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	hc.needDocIds = hc.needDocIds || loadID | ||||||
|  |  | ||||||
| 	select { | 	select { | ||||||
| 	case <-ctx.Done(): | 	case <-ctx.Done(): | ||||||
| 		return ctx.Err() | 		return ctx.Err() | ||||||
| @@ -130,13 +183,26 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher, | |||||||
| 			} | 			} | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
| 		err = hc.collectSingle(searchContext, reader, next) | 		err = hc.prepareDocumentMatch(searchContext, reader, next) | ||||||
|  | 		if err != nil { | ||||||
|  | 			break | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		err = dmHandler(next) | ||||||
| 		if err != nil { | 		if err != nil { | ||||||
| 			break | 			break | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
| 		next, err = searcher.Next(searchContext) | 		next, err = searcher.Next(searchContext) | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | 	// help finalize/flush the results in case | ||||||
|  | 	// of custom document match handlers. | ||||||
|  | 	err = dmHandler(nil) | ||||||
|  | 	if err != nil { | ||||||
|  | 		return err | ||||||
|  | 	} | ||||||
|  |  | ||||||
| 	// compute search duration | 	// compute search duration | ||||||
| 	hc.took = time.Since(startTime) | 	hc.took = time.Since(startTime) | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| @@ -152,8 +218,8 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher, | |||||||
|  |  | ||||||
| var sortByScoreOpt = []string{"_score"} | var sortByScoreOpt = []string{"_score"} | ||||||
|  |  | ||||||
| func (hc *TopNCollector) collectSingle(ctx *search.SearchContext, reader index.IndexReader, d *search.DocumentMatch) error { | func (hc *TopNCollector) prepareDocumentMatch(ctx *search.SearchContext, | ||||||
| 	var err error | 	reader index.IndexReader, d *search.DocumentMatch) (err error) { | ||||||
|  |  | ||||||
| 	// visit field terms for features that require it (sort, facets) | 	// visit field terms for features that require it (sort, facets) | ||||||
| 	if len(hc.neededFields) > 0 { | 	if len(hc.neededFields) > 0 { | ||||||
| @@ -187,11 +253,24 @@ func (hc *TopNCollector) collectSingle(ctx *search.SearchContext, reader index.I | |||||||
| 		hc.sort.Value(d) | 		hc.sort.Value(d) | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | 	return nil | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func MakeTopNDocumentMatchHandler( | ||||||
|  | 	ctx *search.SearchContext) (search.DocumentMatchHandler, bool, error) { | ||||||
|  | 	var hc *TopNCollector | ||||||
|  | 	var ok bool | ||||||
|  | 	if hc, ok = ctx.Collector.(*TopNCollector); ok { | ||||||
|  | 		return func(d *search.DocumentMatch) error { | ||||||
|  | 			if d == nil { | ||||||
|  | 				return nil | ||||||
|  | 			} | ||||||
| 			// optimization, we track lowest sorting hit already removed from heap | 			// optimization, we track lowest sorting hit already removed from heap | ||||||
| 			// with this one comparison, we can avoid all heap operations if | 			// with this one comparison, we can avoid all heap operations if | ||||||
| 			// this hit would have been added and then immediately removed | 			// this hit would have been added and then immediately removed | ||||||
| 			if hc.lowestMatchOutsideResults != nil { | 			if hc.lowestMatchOutsideResults != nil { | ||||||
| 		cmp := hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, d, hc.lowestMatchOutsideResults) | 				cmp := hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, d, | ||||||
|  | 					hc.lowestMatchOutsideResults) | ||||||
| 				if cmp >= 0 { | 				if cmp >= 0 { | ||||||
| 					// this hit can't possibly be in the result set, so avoid heap ops | 					// this hit can't possibly be in the result set, so avoid heap ops | ||||||
| 					ctx.DocumentMatchPool.Put(d) | 					ctx.DocumentMatchPool.Put(d) | ||||||
| @@ -204,7 +283,8 @@ func (hc *TopNCollector) collectSingle(ctx *search.SearchContext, reader index.I | |||||||
| 				if hc.lowestMatchOutsideResults == nil { | 				if hc.lowestMatchOutsideResults == nil { | ||||||
| 					hc.lowestMatchOutsideResults = removed | 					hc.lowestMatchOutsideResults = removed | ||||||
| 				} else { | 				} else { | ||||||
| 			cmp := hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, removed, hc.lowestMatchOutsideResults) | 					cmp := hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, | ||||||
|  | 						removed, hc.lowestMatchOutsideResults) | ||||||
| 					if cmp < 0 { | 					if cmp < 0 { | ||||||
| 						tmp := hc.lowestMatchOutsideResults | 						tmp := hc.lowestMatchOutsideResults | ||||||
| 						hc.lowestMatchOutsideResults = removed | 						hc.lowestMatchOutsideResults = removed | ||||||
| @@ -212,8 +292,10 @@ func (hc *TopNCollector) collectSingle(ctx *search.SearchContext, reader index.I | |||||||
| 					} | 					} | ||||||
| 				} | 				} | ||||||
| 			} | 			} | ||||||
|  |  | ||||||
| 			return nil | 			return nil | ||||||
|  | 		}, false, nil | ||||||
|  | 	} | ||||||
|  | 	return nil, false, nil | ||||||
| } | } | ||||||
|  |  | ||||||
| // visitFieldTerms is responsible for visiting the field terms of the | // visitFieldTerms is responsible for visiting the field terms of the | ||||||
| @@ -223,13 +305,7 @@ func (hc *TopNCollector) visitFieldTerms(reader index.IndexReader, d *search.Doc | |||||||
| 		hc.facetsBuilder.StartDoc() | 		hc.facetsBuilder.StartDoc() | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	err := reader.DocumentVisitFieldTerms(d.IndexInternalID, hc.neededFields, func(field string, term []byte) { | 	err := hc.dvReader.VisitDocValues(d.IndexInternalID, hc.updateFieldVisitor) | ||||||
| 		if hc.facetsBuilder != nil { |  | ||||||
| 			hc.facetsBuilder.UpdateVisitor(field, term) |  | ||||||
| 		} |  | ||||||
| 		hc.sort.UpdateVisitor(field, term) |  | ||||||
| 	}) |  | ||||||
|  |  | ||||||
| 	if hc.facetsBuilder != nil { | 	if hc.facetsBuilder != nil { | ||||||
| 		hc.facetsBuilder.EndDoc() | 		hc.facetsBuilder.EndDoc() | ||||||
| 	} | 	} | ||||||
| @@ -257,6 +333,7 @@ func (hc *TopNCollector) finalizeResults(r index.IndexReader) error { | |||||||
| 				return err | 				return err | ||||||
| 			} | 			} | ||||||
| 		} | 		} | ||||||
|  | 		doc.Complete(nil) | ||||||
| 		return nil | 		return nil | ||||||
| 	}) | 	}) | ||||||
|  |  | ||||||
| @@ -288,5 +365,5 @@ func (hc *TopNCollector) FacetResults() search.FacetResults { | |||||||
| 	if hc.facetsBuilder != nil { | 	if hc.facetsBuilder != nil { | ||||||
| 		return hc.facetsBuilder.Results() | 		return hc.facetsBuilder.Results() | ||||||
| 	} | 	} | ||||||
| 	return search.FacetResults{} | 	return nil | ||||||
| } | } | ||||||
|   | |||||||
							
								
								
									
										21
									
								
								vendor/github.com/blevesearch/bleve/search/explanation.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										21
									
								
								vendor/github.com/blevesearch/bleve/search/explanation.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -17,8 +17,18 @@ package search | |||||||
| import ( | import ( | ||||||
| 	"encoding/json" | 	"encoding/json" | ||||||
| 	"fmt" | 	"fmt" | ||||||
|  | 	"reflect" | ||||||
|  |  | ||||||
|  | 	"github.com/blevesearch/bleve/size" | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  | var reflectStaticSizeExplanation int | ||||||
|  |  | ||||||
|  | func init() { | ||||||
|  | 	var e Explanation | ||||||
|  | 	reflectStaticSizeExplanation = int(reflect.TypeOf(e).Size()) | ||||||
|  | } | ||||||
|  |  | ||||||
| type Explanation struct { | type Explanation struct { | ||||||
| 	Value    float64        `json:"value"` | 	Value    float64        `json:"value"` | ||||||
| 	Message  string         `json:"message"` | 	Message  string         `json:"message"` | ||||||
| @@ -32,3 +42,14 @@ func (expl *Explanation) String() string { | |||||||
| 	} | 	} | ||||||
| 	return string(js) | 	return string(js) | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (expl *Explanation) Size() int { | ||||||
|  | 	sizeInBytes := reflectStaticSizeExplanation + size.SizeOfPtr + | ||||||
|  | 		len(expl.Message) | ||||||
|  |  | ||||||
|  | 	for _, entry := range expl.Children { | ||||||
|  | 		sizeInBytes += entry.Size() | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return sizeInBytes | ||||||
|  | } | ||||||
|   | |||||||
							
								
								
									
										29
									
								
								vendor/github.com/blevesearch/bleve/search/facet/facet_builder_datetime.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										29
									
								
								vendor/github.com/blevesearch/bleve/search/facet/facet_builder_datetime.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -15,13 +15,25 @@ | |||||||
| package facet | package facet | ||||||
|  |  | ||||||
| import ( | import ( | ||||||
|  | 	"reflect" | ||||||
| 	"sort" | 	"sort" | ||||||
| 	"time" | 	"time" | ||||||
|  |  | ||||||
| 	"github.com/blevesearch/bleve/numeric" | 	"github.com/blevesearch/bleve/numeric" | ||||||
| 	"github.com/blevesearch/bleve/search" | 	"github.com/blevesearch/bleve/search" | ||||||
|  | 	"github.com/blevesearch/bleve/size" | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  | var reflectStaticSizeDateTimeFacetBuilder int | ||||||
|  | var reflectStaticSizedateTimeRange int | ||||||
|  |  | ||||||
|  | func init() { | ||||||
|  | 	var dtfb DateTimeFacetBuilder | ||||||
|  | 	reflectStaticSizeDateTimeFacetBuilder = int(reflect.TypeOf(dtfb).Size()) | ||||||
|  | 	var dtr dateTimeRange | ||||||
|  | 	reflectStaticSizedateTimeRange = int(reflect.TypeOf(dtr).Size()) | ||||||
|  | } | ||||||
|  |  | ||||||
| type dateTimeRange struct { | type dateTimeRange struct { | ||||||
| 	start time.Time | 	start time.Time | ||||||
| 	end   time.Time | 	end   time.Time | ||||||
| @@ -46,6 +58,23 @@ func NewDateTimeFacetBuilder(field string, size int) *DateTimeFacetBuilder { | |||||||
| 	} | 	} | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (fb *DateTimeFacetBuilder) Size() int { | ||||||
|  | 	sizeInBytes := reflectStaticSizeDateTimeFacetBuilder + size.SizeOfPtr + | ||||||
|  | 		len(fb.field) | ||||||
|  |  | ||||||
|  | 	for k, _ := range fb.termsCount { | ||||||
|  | 		sizeInBytes += size.SizeOfString + len(k) + | ||||||
|  | 			size.SizeOfInt | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	for k, _ := range fb.ranges { | ||||||
|  | 		sizeInBytes += size.SizeOfString + len(k) + | ||||||
|  | 			size.SizeOfPtr + reflectStaticSizedateTimeRange | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return sizeInBytes | ||||||
|  | } | ||||||
|  |  | ||||||
| func (fb *DateTimeFacetBuilder) AddRange(name string, start, end time.Time) { | func (fb *DateTimeFacetBuilder) AddRange(name string, start, end time.Time) { | ||||||
| 	r := dateTimeRange{ | 	r := dateTimeRange{ | ||||||
| 		start: start, | 		start: start, | ||||||
|   | |||||||
							
								
								
									
										29
									
								
								vendor/github.com/blevesearch/bleve/search/facet/facet_builder_numeric.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										29
									
								
								vendor/github.com/blevesearch/bleve/search/facet/facet_builder_numeric.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -15,12 +15,24 @@ | |||||||
| package facet | package facet | ||||||
|  |  | ||||||
| import ( | import ( | ||||||
|  | 	"reflect" | ||||||
| 	"sort" | 	"sort" | ||||||
|  |  | ||||||
| 	"github.com/blevesearch/bleve/numeric" | 	"github.com/blevesearch/bleve/numeric" | ||||||
| 	"github.com/blevesearch/bleve/search" | 	"github.com/blevesearch/bleve/search" | ||||||
|  | 	"github.com/blevesearch/bleve/size" | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  | var reflectStaticSizeNumericFacetBuilder int | ||||||
|  | var reflectStaticSizenumericRange int | ||||||
|  |  | ||||||
|  | func init() { | ||||||
|  | 	var nfb NumericFacetBuilder | ||||||
|  | 	reflectStaticSizeNumericFacetBuilder = int(reflect.TypeOf(nfb).Size()) | ||||||
|  | 	var nr numericRange | ||||||
|  | 	reflectStaticSizenumericRange = int(reflect.TypeOf(nr).Size()) | ||||||
|  | } | ||||||
|  |  | ||||||
| type numericRange struct { | type numericRange struct { | ||||||
| 	min *float64 | 	min *float64 | ||||||
| 	max *float64 | 	max *float64 | ||||||
| @@ -45,6 +57,23 @@ func NewNumericFacetBuilder(field string, size int) *NumericFacetBuilder { | |||||||
| 	} | 	} | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (fb *NumericFacetBuilder) Size() int { | ||||||
|  | 	sizeInBytes := reflectStaticSizeNumericFacetBuilder + size.SizeOfPtr + | ||||||
|  | 		len(fb.field) | ||||||
|  |  | ||||||
|  | 	for k, _ := range fb.termsCount { | ||||||
|  | 		sizeInBytes += size.SizeOfString + len(k) + | ||||||
|  | 			size.SizeOfInt | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	for k, _ := range fb.ranges { | ||||||
|  | 		sizeInBytes += size.SizeOfString + len(k) + | ||||||
|  | 			size.SizeOfPtr + reflectStaticSizenumericRange | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return sizeInBytes | ||||||
|  | } | ||||||
|  |  | ||||||
| func (fb *NumericFacetBuilder) AddRange(name string, min, max *float64) { | func (fb *NumericFacetBuilder) AddRange(name string, min, max *float64) { | ||||||
| 	r := numericRange{ | 	r := numericRange{ | ||||||
| 		min: min, | 		min: min, | ||||||
|   | |||||||
							
								
								
									
										21
									
								
								vendor/github.com/blevesearch/bleve/search/facet/facet_builder_terms.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										21
									
								
								vendor/github.com/blevesearch/bleve/search/facet/facet_builder_terms.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -15,11 +15,20 @@ | |||||||
| package facet | package facet | ||||||
|  |  | ||||||
| import ( | import ( | ||||||
|  | 	"reflect" | ||||||
| 	"sort" | 	"sort" | ||||||
|  |  | ||||||
| 	"github.com/blevesearch/bleve/search" | 	"github.com/blevesearch/bleve/search" | ||||||
|  | 	"github.com/blevesearch/bleve/size" | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  | var reflectStaticSizeTermsFacetBuilder int | ||||||
|  |  | ||||||
|  | func init() { | ||||||
|  | 	var tfb TermsFacetBuilder | ||||||
|  | 	reflectStaticSizeTermsFacetBuilder = int(reflect.TypeOf(tfb).Size()) | ||||||
|  | } | ||||||
|  |  | ||||||
| type TermsFacetBuilder struct { | type TermsFacetBuilder struct { | ||||||
| 	size       int | 	size       int | ||||||
| 	field      string | 	field      string | ||||||
| @@ -37,6 +46,18 @@ func NewTermsFacetBuilder(field string, size int) *TermsFacetBuilder { | |||||||
| 	} | 	} | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (fb *TermsFacetBuilder) Size() int { | ||||||
|  | 	sizeInBytes := reflectStaticSizeTermsFacetBuilder + size.SizeOfPtr + | ||||||
|  | 		len(fb.field) | ||||||
|  |  | ||||||
|  | 	for k, _ := range fb.termsCount { | ||||||
|  | 		sizeInBytes += size.SizeOfString + len(k) + | ||||||
|  | 			size.SizeOfInt | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return sizeInBytes | ||||||
|  | } | ||||||
|  |  | ||||||
| func (fb *TermsFacetBuilder) Field() string { | func (fb *TermsFacetBuilder) Field() string { | ||||||
| 	return fb.field | 	return fb.field | ||||||
| } | } | ||||||
|   | |||||||
							
								
								
									
										56
									
								
								vendor/github.com/blevesearch/bleve/search/facets_builder.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										56
									
								
								vendor/github.com/blevesearch/bleve/search/facets_builder.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -15,11 +15,32 @@ | |||||||
| package search | package search | ||||||
|  |  | ||||||
| import ( | import ( | ||||||
|  | 	"reflect" | ||||||
| 	"sort" | 	"sort" | ||||||
|  |  | ||||||
| 	"github.com/blevesearch/bleve/index" | 	"github.com/blevesearch/bleve/index" | ||||||
|  | 	"github.com/blevesearch/bleve/size" | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  | var reflectStaticSizeFacetsBuilder int | ||||||
|  | var reflectStaticSizeFacetResult int | ||||||
|  | var reflectStaticSizeTermFacet int | ||||||
|  | var reflectStaticSizeNumericRangeFacet int | ||||||
|  | var reflectStaticSizeDateRangeFacet int | ||||||
|  |  | ||||||
|  | func init() { | ||||||
|  | 	var fb FacetsBuilder | ||||||
|  | 	reflectStaticSizeFacetsBuilder = int(reflect.TypeOf(fb).Size()) | ||||||
|  | 	var fr FacetResult | ||||||
|  | 	reflectStaticSizeFacetResult = int(reflect.TypeOf(fr).Size()) | ||||||
|  | 	var tf TermFacet | ||||||
|  | 	reflectStaticSizeTermFacet = int(reflect.TypeOf(tf).Size()) | ||||||
|  | 	var nrf NumericRangeFacet | ||||||
|  | 	reflectStaticSizeNumericRangeFacet = int(reflect.TypeOf(nrf).Size()) | ||||||
|  | 	var drf DateRangeFacet | ||||||
|  | 	reflectStaticSizeDateRangeFacet = int(reflect.TypeOf(drf).Size()) | ||||||
|  | } | ||||||
|  |  | ||||||
| type FacetBuilder interface { | type FacetBuilder interface { | ||||||
| 	StartDoc() | 	StartDoc() | ||||||
| 	UpdateVisitor(field string, term []byte) | 	UpdateVisitor(field string, term []byte) | ||||||
| @@ -27,23 +48,40 @@ type FacetBuilder interface { | |||||||
|  |  | ||||||
| 	Result() *FacetResult | 	Result() *FacetResult | ||||||
| 	Field() string | 	Field() string | ||||||
|  |  | ||||||
|  | 	Size() int | ||||||
| } | } | ||||||
|  |  | ||||||
| type FacetsBuilder struct { | type FacetsBuilder struct { | ||||||
| 	indexReader index.IndexReader | 	indexReader index.IndexReader | ||||||
| 	facets      map[string]FacetBuilder | 	facetNames  []string | ||||||
|  | 	facets      []FacetBuilder | ||||||
| 	fields      []string | 	fields      []string | ||||||
| } | } | ||||||
|  |  | ||||||
| func NewFacetsBuilder(indexReader index.IndexReader) *FacetsBuilder { | func NewFacetsBuilder(indexReader index.IndexReader) *FacetsBuilder { | ||||||
| 	return &FacetsBuilder{ | 	return &FacetsBuilder{ | ||||||
| 		indexReader: indexReader, | 		indexReader: indexReader, | ||||||
| 		facets:      make(map[string]FacetBuilder, 0), |  | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (fb *FacetsBuilder) Size() int { | ||||||
|  | 	sizeInBytes := reflectStaticSizeFacetsBuilder + size.SizeOfPtr | ||||||
|  |  | ||||||
|  | 	for k, v := range fb.facets { | ||||||
|  | 		sizeInBytes += size.SizeOfString + v.Size() + len(fb.facetNames[k]) | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	for _, entry := range fb.fields { | ||||||
|  | 		sizeInBytes += size.SizeOfString + len(entry) | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return sizeInBytes | ||||||
|  | } | ||||||
|  |  | ||||||
| func (fb *FacetsBuilder) Add(name string, facetBuilder FacetBuilder) { | func (fb *FacetsBuilder) Add(name string, facetBuilder FacetBuilder) { | ||||||
| 	fb.facets[name] = facetBuilder | 	fb.facetNames = append(fb.facetNames, name) | ||||||
|  | 	fb.facets = append(fb.facets, facetBuilder) | ||||||
| 	fb.fields = append(fb.fields, facetBuilder.Field()) | 	fb.fields = append(fb.fields, facetBuilder.Field()) | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -213,6 +251,14 @@ type FacetResult struct { | |||||||
| 	DateRanges    DateRangeFacets    `json:"date_ranges,omitempty"` | 	DateRanges    DateRangeFacets    `json:"date_ranges,omitempty"` | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (fr *FacetResult) Size() int { | ||||||
|  | 	return reflectStaticSizeFacetResult + size.SizeOfPtr + | ||||||
|  | 		len(fr.Field) + | ||||||
|  | 		len(fr.Terms)*(reflectStaticSizeTermFacet+size.SizeOfPtr) + | ||||||
|  | 		len(fr.NumericRanges)*(reflectStaticSizeNumericRangeFacet+size.SizeOfPtr) + | ||||||
|  | 		len(fr.DateRanges)*(reflectStaticSizeDateRangeFacet+size.SizeOfPtr) | ||||||
|  | } | ||||||
|  |  | ||||||
| func (fr *FacetResult) Merge(other *FacetResult) { | func (fr *FacetResult) Merge(other *FacetResult) { | ||||||
| 	fr.Total += other.Total | 	fr.Total += other.Total | ||||||
| 	fr.Missing += other.Missing | 	fr.Missing += other.Missing | ||||||
| @@ -287,9 +333,9 @@ func (fr FacetResults) Fixup(name string, size int) { | |||||||
|  |  | ||||||
| func (fb *FacetsBuilder) Results() FacetResults { | func (fb *FacetsBuilder) Results() FacetResults { | ||||||
| 	fr := make(FacetResults) | 	fr := make(FacetResults) | ||||||
| 	for facetName, facetBuilder := range fb.facets { | 	for i, facetBuilder := range fb.facets { | ||||||
| 		facetResult := facetBuilder.Result() | 		facetResult := facetBuilder.Result() | ||||||
| 		fr[facetName] = facetResult | 		fr[fb.facetNames[i]] = facetResult | ||||||
| 	} | 	} | ||||||
| 	return fr | 	return fr | ||||||
| } | } | ||||||
|   | |||||||
							
								
								
									
										17
									
								
								vendor/github.com/blevesearch/bleve/search/levenshtein.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										17
									
								
								vendor/github.com/blevesearch/bleve/search/levenshtein.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -57,15 +57,24 @@ func LevenshteinDistance(a, b string) int { | |||||||
| // in which case the first return val will be the max | // in which case the first return val will be the max | ||||||
| // and the second will be true, indicating max was exceeded | // and the second will be true, indicating max was exceeded | ||||||
| func LevenshteinDistanceMax(a, b string, max int) (int, bool) { | func LevenshteinDistanceMax(a, b string, max int) (int, bool) { | ||||||
|  | 	v, wasMax, _ := LevenshteinDistanceMaxReuseSlice(a, b, max, nil) | ||||||
|  | 	return v, wasMax | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func LevenshteinDistanceMaxReuseSlice(a, b string, max int, d []int) (int, bool, []int) { | ||||||
| 	la := len(a) | 	la := len(a) | ||||||
| 	lb := len(b) | 	lb := len(b) | ||||||
|  |  | ||||||
| 	ld := int(math.Abs(float64(la - lb))) | 	ld := int(math.Abs(float64(la - lb))) | ||||||
| 	if ld > max { | 	if ld > max { | ||||||
| 		return max, true | 		return max, true, d | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	d := make([]int, la+1) | 	if cap(d) < la+1 { | ||||||
|  | 		d = make([]int, la+1) | ||||||
|  | 	} | ||||||
|  | 	d = d[:la+1] | ||||||
|  |  | ||||||
| 	var lastdiag, olddiag, temp int | 	var lastdiag, olddiag, temp int | ||||||
|  |  | ||||||
| 	for i := 1; i <= la; i++ { | 	for i := 1; i <= la; i++ { | ||||||
| @@ -98,8 +107,8 @@ func LevenshteinDistanceMax(a, b string, max int) (int, bool) { | |||||||
| 		} | 		} | ||||||
| 		// after each row if rowmin isn't less than max stop | 		// after each row if rowmin isn't less than max stop | ||||||
| 		if rowmin > max { | 		if rowmin > max { | ||||||
| 			return max, true | 			return max, true, d | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| 	return d[la], false | 	return d[la], false, d | ||||||
| } | } | ||||||
|   | |||||||
							
								
								
									
										11
									
								
								vendor/github.com/blevesearch/bleve/search/pool.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										11
									
								
								vendor/github.com/blevesearch/bleve/search/pool.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -14,6 +14,17 @@ | |||||||
|  |  | ||||||
| package search | package search | ||||||
|  |  | ||||||
|  | import ( | ||||||
|  | 	"reflect" | ||||||
|  | ) | ||||||
|  |  | ||||||
|  | var reflectStaticSizeDocumentMatchPool int | ||||||
|  |  | ||||||
|  | func init() { | ||||||
|  | 	var dmp DocumentMatchPool | ||||||
|  | 	reflectStaticSizeDocumentMatchPool = int(reflect.TypeOf(dmp).Size()) | ||||||
|  | } | ||||||
|  |  | ||||||
| // DocumentMatchPoolTooSmall is a callback function that can be executed | // DocumentMatchPoolTooSmall is a callback function that can be executed | ||||||
| // when the DocumentMatchPool does not have sufficient capacity | // when the DocumentMatchPool does not have sufficient capacity | ||||||
| // By default we just perform just-in-time allocation, but you could log | // By default we just perform just-in-time allocation, but you could log | ||||||
|   | |||||||
							
								
								
									
										2
									
								
								vendor/github.com/blevesearch/bleve/search/query/conjunction.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								vendor/github.com/blevesearch/bleve/search/query/conjunction.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -70,9 +70,11 @@ func (q *ConjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, | |||||||
| 		} | 		} | ||||||
| 		ss = append(ss, sr) | 		ss = append(ss, sr) | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	if len(ss) < 1 { | 	if len(ss) < 1 { | ||||||
| 		return searcher.NewMatchNoneSearcher(i) | 		return searcher.NewMatchNoneSearcher(i) | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	return searcher.NewConjunctionSearcher(i, ss, options) | 	return searcher.NewConjunctionSearcher(i, ss, options) | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										11
									
								
								vendor/github.com/blevesearch/bleve/search/query/disjunction.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										11
									
								
								vendor/github.com/blevesearch/bleve/search/query/disjunction.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -58,7 +58,8 @@ func (q *DisjunctionQuery) SetMin(m float64) { | |||||||
| 	q.Min = m | 	q.Min = m | ||||||
| } | } | ||||||
|  |  | ||||||
| func (q *DisjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) { | func (q *DisjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, | ||||||
|  | 	options search.SearcherOptions) (search.Searcher, error) { | ||||||
| 	ss := make([]search.Searcher, 0, len(q.Disjuncts)) | 	ss := make([]search.Searcher, 0, len(q.Disjuncts)) | ||||||
| 	for _, disjunct := range q.Disjuncts { | 	for _, disjunct := range q.Disjuncts { | ||||||
| 		sr, err := disjunct.Searcher(i, m, options) | 		sr, err := disjunct.Searcher(i, m, options) | ||||||
| @@ -76,9 +77,17 @@ func (q *DisjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, | |||||||
| 		} | 		} | ||||||
| 		ss = append(ss, sr) | 		ss = append(ss, sr) | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	if len(ss) < 1 { | 	if len(ss) < 1 { | ||||||
| 		return searcher.NewMatchNoneSearcher(i) | 		return searcher.NewMatchNoneSearcher(i) | ||||||
|  | 	} else if len(ss) == 1 && int(q.Min) == ss[0].Min() { | ||||||
|  | 		// apply optimization only if both conditions below are satisfied: | ||||||
|  | 		// - disjunction searcher has only 1 child searcher | ||||||
|  | 		// - parent searcher's min setting is equal to child searcher's min | ||||||
|  |  | ||||||
|  | 		return ss[0], nil | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	return searcher.NewDisjunctionSearcher(i, ss, q.Min, options) | 	return searcher.NewDisjunctionSearcher(i, ss, q.Min, options) | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										12
									
								
								vendor/github.com/blevesearch/bleve/search/query/query.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										12
									
								
								vendor/github.com/blevesearch/bleve/search/query/query.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -296,32 +296,28 @@ func expandQuery(m mapping.IndexMapping, query Query) (Query, error) { | |||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	expand = func(query Query) (Query, error) { | 	expand = func(query Query) (Query, error) { | ||||||
| 		switch query.(type) { | 		switch q := query.(type) { | ||||||
| 		case *QueryStringQuery: | 		case *QueryStringQuery: | ||||||
| 			q := query.(*QueryStringQuery) |  | ||||||
| 			parsed, err := parseQuerySyntax(q.Query) | 			parsed, err := parseQuerySyntax(q.Query) | ||||||
| 			if err != nil { | 			if err != nil { | ||||||
| 				return nil, fmt.Errorf("could not parse '%s': %s", q.Query, err) | 				return nil, fmt.Errorf("could not parse '%s': %s", q.Query, err) | ||||||
| 			} | 			} | ||||||
| 			return expand(parsed) | 			return expand(parsed) | ||||||
| 		case *ConjunctionQuery: | 		case *ConjunctionQuery: | ||||||
| 			q := *query.(*ConjunctionQuery) |  | ||||||
| 			children, err := expandSlice(q.Conjuncts) | 			children, err := expandSlice(q.Conjuncts) | ||||||
| 			if err != nil { | 			if err != nil { | ||||||
| 				return nil, err | 				return nil, err | ||||||
| 			} | 			} | ||||||
| 			q.Conjuncts = children | 			q.Conjuncts = children | ||||||
| 			return &q, nil | 			return q, nil | ||||||
| 		case *DisjunctionQuery: | 		case *DisjunctionQuery: | ||||||
| 			q := *query.(*DisjunctionQuery) |  | ||||||
| 			children, err := expandSlice(q.Disjuncts) | 			children, err := expandSlice(q.Disjuncts) | ||||||
| 			if err != nil { | 			if err != nil { | ||||||
| 				return nil, err | 				return nil, err | ||||||
| 			} | 			} | ||||||
| 			q.Disjuncts = children | 			q.Disjuncts = children | ||||||
| 			return &q, nil | 			return q, nil | ||||||
| 		case *BooleanQuery: | 		case *BooleanQuery: | ||||||
| 			q := *query.(*BooleanQuery) |  | ||||||
| 			var err error | 			var err error | ||||||
| 			q.Must, err = expand(q.Must) | 			q.Must, err = expand(q.Must) | ||||||
| 			if err != nil { | 			if err != nil { | ||||||
| @@ -335,7 +331,7 @@ func expandQuery(m mapping.IndexMapping, query Query) (Query, error) { | |||||||
| 			if err != nil { | 			if err != nil { | ||||||
| 				return nil, err | 				return nil, err | ||||||
| 			} | 			} | ||||||
| 			return &q, nil | 			return q, nil | ||||||
| 		default: | 		default: | ||||||
| 			return query, nil | 			return query, nil | ||||||
| 		} | 		} | ||||||
|   | |||||||
							
								
								
									
										1
									
								
								vendor/github.com/blevesearch/bleve/search/query/query_string_lex.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								vendor/github.com/blevesearch/bleve/search/query/query_string_lex.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -273,6 +273,7 @@ func inNumOrStrState(l *queryStringLex, next rune, eof bool) (lexState, bool) { | |||||||
| 	// see where to go | 	// see where to go | ||||||
| 	if !l.seenDot && next == '.' { | 	if !l.seenDot && next == '.' { | ||||||
| 		// stay in this state | 		// stay in this state | ||||||
|  | 		l.seenDot = true | ||||||
| 		l.buf += string(next) | 		l.buf += string(next) | ||||||
| 		return inNumOrStrState, true | 		return inNumOrStrState, true | ||||||
| 	} else if unicode.IsDigit(next) { | 	} else if unicode.IsDigit(next) { | ||||||
|   | |||||||
							
								
								
									
										37
									
								
								vendor/github.com/blevesearch/bleve/search/query/regexp.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										37
									
								
								vendor/github.com/blevesearch/bleve/search/query/regexp.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -15,7 +15,6 @@ | |||||||
| package query | package query | ||||||
|  |  | ||||||
| import ( | import ( | ||||||
| 	"regexp" |  | ||||||
| 	"strings" | 	"strings" | ||||||
|  |  | ||||||
| 	"github.com/blevesearch/bleve/index" | 	"github.com/blevesearch/bleve/index" | ||||||
| @@ -28,7 +27,6 @@ type RegexpQuery struct { | |||||||
| 	Regexp   string `json:"regexp"` | 	Regexp   string `json:"regexp"` | ||||||
| 	FieldVal string `json:"field,omitempty"` | 	FieldVal string `json:"field,omitempty"` | ||||||
| 	BoostVal *Boost `json:"boost,omitempty"` | 	BoostVal *Boost `json:"boost,omitempty"` | ||||||
| 	compiled *regexp.Regexp |  | ||||||
| } | } | ||||||
|  |  | ||||||
| // NewRegexpQuery creates a new Query which finds | // NewRegexpQuery creates a new Query which finds | ||||||
| @@ -64,33 +62,20 @@ func (q *RegexpQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, opti | |||||||
| 	if q.FieldVal == "" { | 	if q.FieldVal == "" { | ||||||
| 		field = m.DefaultSearchField() | 		field = m.DefaultSearchField() | ||||||
| 	} | 	} | ||||||
| 	err := q.compile() |  | ||||||
| 	if err != nil { |  | ||||||
| 		return nil, err |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	return searcher.NewRegexpSearcher(i, q.compiled, field, q.BoostVal.Value(), options) | 	// require that pattern NOT be anchored to start and end of term. | ||||||
| } | 	// do not attempt to remove trailing $, its presence is not | ||||||
|  | 	// known to interfere with LiteralPrefix() the way ^ does | ||||||
| func (q *RegexpQuery) Validate() error { | 	// and removing $ introduces possible ambiguities with escaped \$, \\$, etc | ||||||
| 	return q.compile() |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func (q *RegexpQuery) compile() error { |  | ||||||
| 	if q.compiled == nil { |  | ||||||
| 		// require that pattern NOT be anchored to start and end of term |  | ||||||
| 	actualRegexp := q.Regexp | 	actualRegexp := q.Regexp | ||||||
| 	if strings.HasPrefix(actualRegexp, "^") { | 	if strings.HasPrefix(actualRegexp, "^") { | ||||||
| 		actualRegexp = actualRegexp[1:] // remove leading ^ | 		actualRegexp = actualRegexp[1:] // remove leading ^ | ||||||
| 	} | 	} | ||||||
| 		// do not attempt to remove trailing $, it's presence is not |  | ||||||
| 		// known to interfere with LiteralPrefix() the way ^ does | 	return searcher.NewRegexpStringSearcher(i, actualRegexp, field, | ||||||
| 		// and removing $ introduces possible ambiguities with escaped \$, \\$, etc | 		q.BoostVal.Value(), options) | ||||||
| 		var err error | } | ||||||
| 		q.compiled, err = regexp.Compile(actualRegexp) |  | ||||||
| 		if err != nil { | func (q *RegexpQuery) Validate() error { | ||||||
| 			return err | 	return nil // real validation delayed until searcher constructor | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| 	return nil |  | ||||||
| } | } | ||||||
|   | |||||||
							
								
								
									
										23
									
								
								vendor/github.com/blevesearch/bleve/search/query/wildcard.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										23
									
								
								vendor/github.com/blevesearch/bleve/search/query/wildcard.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -15,7 +15,6 @@ | |||||||
| package query | package query | ||||||
|  |  | ||||||
| import ( | import ( | ||||||
| 	"regexp" |  | ||||||
| 	"strings" | 	"strings" | ||||||
|  |  | ||||||
| 	"github.com/blevesearch/bleve/index" | 	"github.com/blevesearch/bleve/index" | ||||||
| @@ -47,7 +46,6 @@ type WildcardQuery struct { | |||||||
| 	Wildcard string `json:"wildcard"` | 	Wildcard string `json:"wildcard"` | ||||||
| 	FieldVal string `json:"field,omitempty"` | 	FieldVal string `json:"field,omitempty"` | ||||||
| 	BoostVal *Boost `json:"boost,omitempty"` | 	BoostVal *Boost `json:"boost,omitempty"` | ||||||
| 	compiled *regexp.Regexp |  | ||||||
| } | } | ||||||
|  |  | ||||||
| // NewWildcardQuery creates a new Query which finds | // NewWildcardQuery creates a new Query which finds | ||||||
| @@ -83,24 +81,13 @@ func (q *WildcardQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, op | |||||||
| 	if q.FieldVal == "" { | 	if q.FieldVal == "" { | ||||||
| 		field = m.DefaultSearchField() | 		field = m.DefaultSearchField() | ||||||
| 	} | 	} | ||||||
| 	if q.compiled == nil { |  | ||||||
| 		var err error |  | ||||||
| 		q.compiled, err = q.convertToRegexp() |  | ||||||
| 		if err != nil { |  | ||||||
| 			return nil, err |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	return searcher.NewRegexpSearcher(i, q.compiled, field, q.BoostVal.Value(), options) | 	regexpString := wildcardRegexpReplacer.Replace(q.Wildcard) | ||||||
|  |  | ||||||
|  | 	return searcher.NewRegexpStringSearcher(i, regexpString, field, | ||||||
|  | 		q.BoostVal.Value(), options) | ||||||
| } | } | ||||||
|  |  | ||||||
| func (q *WildcardQuery) Validate() error { | func (q *WildcardQuery) Validate() error { | ||||||
| 	var err error | 	return nil // real validation delayed until searcher constructor | ||||||
| 	q.compiled, err = q.convertToRegexp() |  | ||||||
| 	return err |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func (q *WildcardQuery) convertToRegexp() (*regexp.Regexp, error) { |  | ||||||
| 	regexpString := wildcardRegexpReplacer.Replace(q.Wildcard) |  | ||||||
| 	return regexp.Compile(regexpString) |  | ||||||
| } | } | ||||||
|   | |||||||
							
								
								
									
										25
									
								
								vendor/github.com/blevesearch/bleve/search/scorer/scorer_conjunction.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										25
									
								
								vendor/github.com/blevesearch/bleve/search/scorer/scorer_conjunction.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -15,13 +15,27 @@ | |||||||
| package scorer | package scorer | ||||||
|  |  | ||||||
| import ( | import ( | ||||||
|  | 	"reflect" | ||||||
|  |  | ||||||
| 	"github.com/blevesearch/bleve/search" | 	"github.com/blevesearch/bleve/search" | ||||||
|  | 	"github.com/blevesearch/bleve/size" | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  | var reflectStaticSizeConjunctionQueryScorer int | ||||||
|  |  | ||||||
|  | func init() { | ||||||
|  | 	var cqs ConjunctionQueryScorer | ||||||
|  | 	reflectStaticSizeConjunctionQueryScorer = int(reflect.TypeOf(cqs).Size()) | ||||||
|  | } | ||||||
|  |  | ||||||
| type ConjunctionQueryScorer struct { | type ConjunctionQueryScorer struct { | ||||||
| 	options search.SearcherOptions | 	options search.SearcherOptions | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (s *ConjunctionQueryScorer) Size() int { | ||||||
|  | 	return reflectStaticSizeConjunctionQueryScorer + size.SizeOfPtr | ||||||
|  | } | ||||||
|  |  | ||||||
| func NewConjunctionQueryScorer(options search.SearcherOptions) *ConjunctionQueryScorer { | func NewConjunctionQueryScorer(options search.SearcherOptions) *ConjunctionQueryScorer { | ||||||
| 	return &ConjunctionQueryScorer{ | 	return &ConjunctionQueryScorer{ | ||||||
| 		options: options, | 		options: options, | ||||||
| @@ -35,15 +49,11 @@ func (s *ConjunctionQueryScorer) Score(ctx *search.SearchContext, constituents [ | |||||||
| 		childrenExplanations = make([]*search.Explanation, len(constituents)) | 		childrenExplanations = make([]*search.Explanation, len(constituents)) | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	locations := []search.FieldTermLocationMap{} |  | ||||||
| 	for i, docMatch := range constituents { | 	for i, docMatch := range constituents { | ||||||
| 		sum += docMatch.Score | 		sum += docMatch.Score | ||||||
| 		if s.options.Explain { | 		if s.options.Explain { | ||||||
| 			childrenExplanations[i] = docMatch.Expl | 			childrenExplanations[i] = docMatch.Expl | ||||||
| 		} | 		} | ||||||
| 		if docMatch.Locations != nil { |  | ||||||
| 			locations = append(locations, docMatch.Locations) |  | ||||||
| 		} |  | ||||||
| 	} | 	} | ||||||
| 	newScore := sum | 	newScore := sum | ||||||
| 	var newExpl *search.Explanation | 	var newExpl *search.Explanation | ||||||
| @@ -55,11 +65,8 @@ func (s *ConjunctionQueryScorer) Score(ctx *search.SearchContext, constituents [ | |||||||
| 	rv := constituents[0] | 	rv := constituents[0] | ||||||
| 	rv.Score = newScore | 	rv.Score = newScore | ||||||
| 	rv.Expl = newExpl | 	rv.Expl = newExpl | ||||||
| 	if len(locations) == 1 { | 	rv.FieldTermLocations = search.MergeFieldTermLocations( | ||||||
| 		rv.Locations = locations[0] | 		rv.FieldTermLocations, constituents[1:]) | ||||||
| 	} else if len(locations) > 1 { |  | ||||||
| 		rv.Locations = search.MergeLocations(locations) |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	return rv | 	return rv | ||||||
| } | } | ||||||
|   | |||||||
							
								
								
									
										19
									
								
								vendor/github.com/blevesearch/bleve/search/scorer/scorer_constant.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										19
									
								
								vendor/github.com/blevesearch/bleve/search/scorer/scorer_constant.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -16,11 +16,20 @@ package scorer | |||||||
|  |  | ||||||
| import ( | import ( | ||||||
| 	"fmt" | 	"fmt" | ||||||
|  | 	"reflect" | ||||||
|  |  | ||||||
| 	"github.com/blevesearch/bleve/index" | 	"github.com/blevesearch/bleve/index" | ||||||
| 	"github.com/blevesearch/bleve/search" | 	"github.com/blevesearch/bleve/search" | ||||||
|  | 	"github.com/blevesearch/bleve/size" | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  | var reflectStaticSizeConstantScorer int | ||||||
|  |  | ||||||
|  | func init() { | ||||||
|  | 	var cs ConstantScorer | ||||||
|  | 	reflectStaticSizeConstantScorer = int(reflect.TypeOf(cs).Size()) | ||||||
|  | } | ||||||
|  |  | ||||||
| type ConstantScorer struct { | type ConstantScorer struct { | ||||||
| 	constant               float64 | 	constant               float64 | ||||||
| 	boost                  float64 | 	boost                  float64 | ||||||
| @@ -30,6 +39,16 @@ type ConstantScorer struct { | |||||||
| 	queryWeightExplanation *search.Explanation | 	queryWeightExplanation *search.Explanation | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (s *ConstantScorer) Size() int { | ||||||
|  | 	sizeInBytes := reflectStaticSizeConstantScorer + size.SizeOfPtr | ||||||
|  |  | ||||||
|  | 	if s.queryWeightExplanation != nil { | ||||||
|  | 		sizeInBytes += s.queryWeightExplanation.Size() | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return sizeInBytes | ||||||
|  | } | ||||||
|  |  | ||||||
| func NewConstantScorer(constant float64, boost float64, options search.SearcherOptions) *ConstantScorer { | func NewConstantScorer(constant float64, boost float64, options search.SearcherOptions) *ConstantScorer { | ||||||
| 	rv := ConstantScorer{ | 	rv := ConstantScorer{ | ||||||
| 		options:     options, | 		options:     options, | ||||||
|   | |||||||
							
								
								
									
										24
									
								
								vendor/github.com/blevesearch/bleve/search/scorer/scorer_disjunction.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										24
									
								
								vendor/github.com/blevesearch/bleve/search/scorer/scorer_disjunction.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -16,14 +16,27 @@ package scorer | |||||||
|  |  | ||||||
| import ( | import ( | ||||||
| 	"fmt" | 	"fmt" | ||||||
|  | 	"reflect" | ||||||
|  |  | ||||||
| 	"github.com/blevesearch/bleve/search" | 	"github.com/blevesearch/bleve/search" | ||||||
|  | 	"github.com/blevesearch/bleve/size" | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  | var reflectStaticSizeDisjunctionQueryScorer int | ||||||
|  |  | ||||||
|  | func init() { | ||||||
|  | 	var dqs DisjunctionQueryScorer | ||||||
|  | 	reflectStaticSizeDisjunctionQueryScorer = int(reflect.TypeOf(dqs).Size()) | ||||||
|  | } | ||||||
|  |  | ||||||
| type DisjunctionQueryScorer struct { | type DisjunctionQueryScorer struct { | ||||||
| 	options search.SearcherOptions | 	options search.SearcherOptions | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (s *DisjunctionQueryScorer) Size() int { | ||||||
|  | 	return reflectStaticSizeDisjunctionQueryScorer + size.SizeOfPtr | ||||||
|  | } | ||||||
|  |  | ||||||
| func NewDisjunctionQueryScorer(options search.SearcherOptions) *DisjunctionQueryScorer { | func NewDisjunctionQueryScorer(options search.SearcherOptions) *DisjunctionQueryScorer { | ||||||
| 	return &DisjunctionQueryScorer{ | 	return &DisjunctionQueryScorer{ | ||||||
| 		options: options, | 		options: options, | ||||||
| @@ -37,15 +50,11 @@ func (s *DisjunctionQueryScorer) Score(ctx *search.SearchContext, constituents [ | |||||||
| 		childrenExplanations = make([]*search.Explanation, len(constituents)) | 		childrenExplanations = make([]*search.Explanation, len(constituents)) | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	var locations []search.FieldTermLocationMap |  | ||||||
| 	for i, docMatch := range constituents { | 	for i, docMatch := range constituents { | ||||||
| 		sum += docMatch.Score | 		sum += docMatch.Score | ||||||
| 		if s.options.Explain { | 		if s.options.Explain { | ||||||
| 			childrenExplanations[i] = docMatch.Expl | 			childrenExplanations[i] = docMatch.Expl | ||||||
| 		} | 		} | ||||||
| 		if docMatch.Locations != nil { |  | ||||||
| 			locations = append(locations, docMatch.Locations) |  | ||||||
| 		} |  | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	var rawExpl *search.Explanation | 	var rawExpl *search.Explanation | ||||||
| @@ -67,11 +76,8 @@ func (s *DisjunctionQueryScorer) Score(ctx *search.SearchContext, constituents [ | |||||||
| 	rv := constituents[0] | 	rv := constituents[0] | ||||||
| 	rv.Score = newScore | 	rv.Score = newScore | ||||||
| 	rv.Expl = newExpl | 	rv.Expl = newExpl | ||||||
| 	if len(locations) == 1 { | 	rv.FieldTermLocations = search.MergeFieldTermLocations( | ||||||
| 		rv.Locations = locations[0] | 		rv.FieldTermLocations, constituents[1:]) | ||||||
| 	} else if len(locations) > 1 { |  | ||||||
| 		rv.Locations = search.MergeLocations(locations) |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	return rv | 	return rv | ||||||
| } | } | ||||||
|   | |||||||
							
								
								
									
										86
									
								
								vendor/github.com/blevesearch/bleve/search/scorer/scorer_term.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										86
									
								
								vendor/github.com/blevesearch/bleve/search/scorer/scorer_term.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -17,13 +17,22 @@ package scorer | |||||||
| import ( | import ( | ||||||
| 	"fmt" | 	"fmt" | ||||||
| 	"math" | 	"math" | ||||||
|  | 	"reflect" | ||||||
|  |  | ||||||
| 	"github.com/blevesearch/bleve/index" | 	"github.com/blevesearch/bleve/index" | ||||||
| 	"github.com/blevesearch/bleve/search" | 	"github.com/blevesearch/bleve/search" | ||||||
|  | 	"github.com/blevesearch/bleve/size" | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  | var reflectStaticSizeTermQueryScorer int | ||||||
|  |  | ||||||
|  | func init() { | ||||||
|  | 	var tqs TermQueryScorer | ||||||
|  | 	reflectStaticSizeTermQueryScorer = int(reflect.TypeOf(tqs).Size()) | ||||||
|  | } | ||||||
|  |  | ||||||
| type TermQueryScorer struct { | type TermQueryScorer struct { | ||||||
| 	queryTerm              []byte | 	queryTerm              string | ||||||
| 	queryField             string | 	queryField             string | ||||||
| 	queryBoost             float64 | 	queryBoost             float64 | ||||||
| 	docTerm                uint64 | 	docTerm                uint64 | ||||||
| @@ -36,9 +45,24 @@ type TermQueryScorer struct { | |||||||
| 	queryWeightExplanation *search.Explanation | 	queryWeightExplanation *search.Explanation | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (s *TermQueryScorer) Size() int { | ||||||
|  | 	sizeInBytes := reflectStaticSizeTermQueryScorer + size.SizeOfPtr + | ||||||
|  | 		len(s.queryTerm) + len(s.queryField) | ||||||
|  |  | ||||||
|  | 	if s.idfExplanation != nil { | ||||||
|  | 		sizeInBytes += s.idfExplanation.Size() | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	if s.queryWeightExplanation != nil { | ||||||
|  | 		sizeInBytes += s.queryWeightExplanation.Size() | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return sizeInBytes | ||||||
|  | } | ||||||
|  |  | ||||||
| func NewTermQueryScorer(queryTerm []byte, queryField string, queryBoost float64, docTotal, docTerm uint64, options search.SearcherOptions) *TermQueryScorer { | func NewTermQueryScorer(queryTerm []byte, queryField string, queryBoost float64, docTotal, docTerm uint64, options search.SearcherOptions) *TermQueryScorer { | ||||||
| 	rv := TermQueryScorer{ | 	rv := TermQueryScorer{ | ||||||
| 		queryTerm:   queryTerm, | 		queryTerm:   string(queryTerm), | ||||||
| 		queryField:  queryField, | 		queryField:  queryField, | ||||||
| 		queryBoost:  queryBoost, | 		queryBoost:  queryBoost, | ||||||
| 		docTerm:     docTerm, | 		docTerm:     docTerm, | ||||||
| @@ -82,7 +106,7 @@ func (s *TermQueryScorer) SetQueryNorm(qnorm float64) { | |||||||
| 		} | 		} | ||||||
| 		s.queryWeightExplanation = &search.Explanation{ | 		s.queryWeightExplanation = &search.Explanation{ | ||||||
| 			Value:    s.queryWeight, | 			Value:    s.queryWeight, | ||||||
| 			Message:  fmt.Sprintf("queryWeight(%s:%s^%f), product of:", s.queryField, string(s.queryTerm), s.queryBoost), | 			Message:  fmt.Sprintf("queryWeight(%s:%s^%f), product of:", s.queryField, s.queryTerm, s.queryBoost), | ||||||
| 			Children: childrenExplanations, | 			Children: childrenExplanations, | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| @@ -104,7 +128,7 @@ func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.Term | |||||||
| 		childrenExplanations := make([]*search.Explanation, 3) | 		childrenExplanations := make([]*search.Explanation, 3) | ||||||
| 		childrenExplanations[0] = &search.Explanation{ | 		childrenExplanations[0] = &search.Explanation{ | ||||||
| 			Value:   tf, | 			Value:   tf, | ||||||
| 			Message: fmt.Sprintf("tf(termFreq(%s:%s)=%d", s.queryField, string(s.queryTerm), termMatch.Freq), | 			Message: fmt.Sprintf("tf(termFreq(%s:%s)=%d", s.queryField, s.queryTerm, termMatch.Freq), | ||||||
| 		} | 		} | ||||||
| 		childrenExplanations[1] = &search.Explanation{ | 		childrenExplanations[1] = &search.Explanation{ | ||||||
| 			Value:   termMatch.Norm, | 			Value:   termMatch.Norm, | ||||||
| @@ -113,7 +137,7 @@ func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.Term | |||||||
| 		childrenExplanations[2] = s.idfExplanation | 		childrenExplanations[2] = s.idfExplanation | ||||||
| 		scoreExplanation = &search.Explanation{ | 		scoreExplanation = &search.Explanation{ | ||||||
| 			Value:    score, | 			Value:    score, | ||||||
| 			Message:  fmt.Sprintf("fieldWeight(%s:%s in %s), product of:", s.queryField, string(s.queryTerm), termMatch.ID), | 			Message:  fmt.Sprintf("fieldWeight(%s:%s in %s), product of:", s.queryField, s.queryTerm, termMatch.ID), | ||||||
| 			Children: childrenExplanations, | 			Children: childrenExplanations, | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| @@ -127,7 +151,7 @@ func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.Term | |||||||
| 			childExplanations[1] = scoreExplanation | 			childExplanations[1] = scoreExplanation | ||||||
| 			scoreExplanation = &search.Explanation{ | 			scoreExplanation = &search.Explanation{ | ||||||
| 				Value:    score, | 				Value:    score, | ||||||
| 				Message:  fmt.Sprintf("weight(%s:%s^%f in %s), product of:", s.queryField, string(s.queryTerm), s.queryBoost, termMatch.ID), | 				Message:  fmt.Sprintf("weight(%s:%s^%f in %s), product of:", s.queryField, s.queryTerm, s.queryBoost, termMatch.ID), | ||||||
| 				Children: childExplanations, | 				Children: childExplanations, | ||||||
| 			} | 			} | ||||||
| 		} | 		} | ||||||
| @@ -140,41 +164,31 @@ func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.Term | |||||||
| 		rv.Expl = scoreExplanation | 		rv.Expl = scoreExplanation | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	if termMatch.Vectors != nil && len(termMatch.Vectors) > 0 { | 	if len(termMatch.Vectors) > 0 { | ||||||
| 		locs := make([]search.Location, len(termMatch.Vectors)) | 		if cap(rv.FieldTermLocations) < len(termMatch.Vectors) { | ||||||
| 		locsUsed := 0 | 			rv.FieldTermLocations = make([]search.FieldTermLocation, 0, len(termMatch.Vectors)) | ||||||
|  |  | ||||||
| 		totalPositions := 0 |  | ||||||
| 		for _, v := range termMatch.Vectors { |  | ||||||
| 			totalPositions += len(v.ArrayPositions) |  | ||||||
| 		} |  | ||||||
| 		positions := make(search.ArrayPositions, totalPositions) |  | ||||||
| 		positionsUsed := 0 |  | ||||||
|  |  | ||||||
| 		rv.Locations = make(search.FieldTermLocationMap) |  | ||||||
| 		for _, v := range termMatch.Vectors { |  | ||||||
| 			tlm := rv.Locations[v.Field] |  | ||||||
| 			if tlm == nil { |  | ||||||
| 				tlm = make(search.TermLocationMap) |  | ||||||
| 				rv.Locations[v.Field] = tlm |  | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
| 			loc := &locs[locsUsed] | 		for _, v := range termMatch.Vectors { | ||||||
| 			locsUsed++ | 			var ap search.ArrayPositions | ||||||
|  |  | ||||||
| 			loc.Pos = v.Pos |  | ||||||
| 			loc.Start = v.Start |  | ||||||
| 			loc.End = v.End |  | ||||||
|  |  | ||||||
| 			if len(v.ArrayPositions) > 0 { | 			if len(v.ArrayPositions) > 0 { | ||||||
| 				loc.ArrayPositions = positions[positionsUsed : positionsUsed+len(v.ArrayPositions)] | 				n := len(rv.FieldTermLocations) | ||||||
| 				for i, ap := range v.ArrayPositions { | 				if n < cap(rv.FieldTermLocations) { // reuse ap slice if available | ||||||
| 					loc.ArrayPositions[i] = ap | 					ap = rv.FieldTermLocations[:n+1][n].Location.ArrayPositions[:0] | ||||||
| 				} | 				} | ||||||
| 				positionsUsed += len(v.ArrayPositions) | 				ap = append(ap, v.ArrayPositions...) | ||||||
| 			} | 			} | ||||||
|  | 			rv.FieldTermLocations = | ||||||
| 			tlm[string(s.queryTerm)] = append(tlm[string(s.queryTerm)], loc) | 				append(rv.FieldTermLocations, search.FieldTermLocation{ | ||||||
|  | 					Field: v.Field, | ||||||
|  | 					Term:  s.queryTerm, | ||||||
|  | 					Location: search.Location{ | ||||||
|  | 						Pos:            v.Pos, | ||||||
|  | 						Start:          v.Start, | ||||||
|  | 						End:            v.End, | ||||||
|  | 						ArrayPositions: ap, | ||||||
|  | 					}, | ||||||
|  | 				}) | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										155
									
								
								vendor/github.com/blevesearch/bleve/search/search.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										155
									
								
								vendor/github.com/blevesearch/bleve/search/search.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -16,11 +16,25 @@ package search | |||||||
|  |  | ||||||
| import ( | import ( | ||||||
| 	"fmt" | 	"fmt" | ||||||
|  | 	"reflect" | ||||||
|  |  | ||||||
| 	"github.com/blevesearch/bleve/document" |  | ||||||
| 	"github.com/blevesearch/bleve/index" | 	"github.com/blevesearch/bleve/index" | ||||||
|  | 	"github.com/blevesearch/bleve/size" | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  | var reflectStaticSizeDocumentMatch int | ||||||
|  | var reflectStaticSizeSearchContext int | ||||||
|  | var reflectStaticSizeLocation int | ||||||
|  |  | ||||||
|  | func init() { | ||||||
|  | 	var dm DocumentMatch | ||||||
|  | 	reflectStaticSizeDocumentMatch = int(reflect.TypeOf(dm).Size()) | ||||||
|  | 	var sc SearchContext | ||||||
|  | 	reflectStaticSizeSearchContext = int(reflect.TypeOf(sc).Size()) | ||||||
|  | 	var l Location | ||||||
|  | 	reflectStaticSizeLocation = int(reflect.TypeOf(l).Size()) | ||||||
|  | } | ||||||
|  |  | ||||||
| type ArrayPositions []uint64 | type ArrayPositions []uint64 | ||||||
|  |  | ||||||
| func (ap ArrayPositions) Equals(other ArrayPositions) bool { | func (ap ArrayPositions) Equals(other ArrayPositions) bool { | ||||||
| @@ -47,6 +61,11 @@ type Location struct { | |||||||
| 	ArrayPositions ArrayPositions `json:"array_positions"` | 	ArrayPositions ArrayPositions `json:"array_positions"` | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (l *Location) Size() int { | ||||||
|  | 	return reflectStaticSizeLocation + size.SizeOfPtr + | ||||||
|  | 		len(l.ArrayPositions)*size.SizeOfUint64 | ||||||
|  | } | ||||||
|  |  | ||||||
| type Locations []*Location | type Locations []*Location | ||||||
|  |  | ||||||
| type TermLocationMap map[string]Locations | type TermLocationMap map[string]Locations | ||||||
| @@ -57,6 +76,12 @@ func (t TermLocationMap) AddLocation(term string, location *Location) { | |||||||
|  |  | ||||||
| type FieldTermLocationMap map[string]TermLocationMap | type FieldTermLocationMap map[string]TermLocationMap | ||||||
|  |  | ||||||
|  | type FieldTermLocation struct { | ||||||
|  | 	Field    string | ||||||
|  | 	Term     string | ||||||
|  | 	Location Location | ||||||
|  | } | ||||||
|  |  | ||||||
| type FieldFragmentMap map[string][]string | type FieldFragmentMap map[string][]string | ||||||
|  |  | ||||||
| type DocumentMatch struct { | type DocumentMatch struct { | ||||||
| @@ -74,11 +99,14 @@ type DocumentMatch struct { | |||||||
| 	// fields as float64s and date fields as time.RFC3339 formatted strings. | 	// fields as float64s and date fields as time.RFC3339 formatted strings. | ||||||
| 	Fields map[string]interface{} `json:"fields,omitempty"` | 	Fields map[string]interface{} `json:"fields,omitempty"` | ||||||
|  |  | ||||||
| 	// if we load the document for this hit, remember it so we dont load again |  | ||||||
| 	Document *document.Document `json:"-"` |  | ||||||
|  |  | ||||||
| 	// used to maintain natural index order | 	// used to maintain natural index order | ||||||
| 	HitNumber uint64 `json:"-"` | 	HitNumber uint64 `json:"-"` | ||||||
|  |  | ||||||
|  | 	// used to temporarily hold field term location information during | ||||||
|  | 	// search processing in an efficient, recycle-friendly manner, to | ||||||
|  | 	// be later incorporated into the Locations map when search | ||||||
|  | 	// results are completed | ||||||
|  | 	FieldTermLocations []FieldTermLocation `json:"-"` | ||||||
| } | } | ||||||
|  |  | ||||||
| func (dm *DocumentMatch) AddFieldValue(name string, value interface{}) { | func (dm *DocumentMatch) AddFieldValue(name string, value interface{}) { | ||||||
| @@ -108,15 +136,116 @@ func (dm *DocumentMatch) Reset() *DocumentMatch { | |||||||
| 	indexInternalID := dm.IndexInternalID | 	indexInternalID := dm.IndexInternalID | ||||||
| 	// remember the []interface{} used for sort | 	// remember the []interface{} used for sort | ||||||
| 	sort := dm.Sort | 	sort := dm.Sort | ||||||
|  | 	// remember the FieldTermLocations backing array | ||||||
|  | 	ftls := dm.FieldTermLocations | ||||||
|  | 	for i := range ftls { // recycle the ArrayPositions of each location | ||||||
|  | 		ftls[i].Location.ArrayPositions = ftls[i].Location.ArrayPositions[:0] | ||||||
|  | 	} | ||||||
| 	// idiom to copy over from empty DocumentMatch (0 allocations) | 	// idiom to copy over from empty DocumentMatch (0 allocations) | ||||||
| 	*dm = DocumentMatch{} | 	*dm = DocumentMatch{} | ||||||
| 	// reuse the []byte already allocated (and reset len to 0) | 	// reuse the []byte already allocated (and reset len to 0) | ||||||
| 	dm.IndexInternalID = indexInternalID[:0] | 	dm.IndexInternalID = indexInternalID[:0] | ||||||
| 	// reuse the []interface{} already allocated (and reset len to 0) | 	// reuse the []interface{} already allocated (and reset len to 0) | ||||||
| 	dm.Sort = sort[:0] | 	dm.Sort = sort[:0] | ||||||
|  | 	// reuse the FieldTermLocations already allocated (and reset len to 0) | ||||||
|  | 	dm.FieldTermLocations = ftls[:0] | ||||||
| 	return dm | 	return dm | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (dm *DocumentMatch) Size() int { | ||||||
|  | 	sizeInBytes := reflectStaticSizeDocumentMatch + size.SizeOfPtr + | ||||||
|  | 		len(dm.Index) + | ||||||
|  | 		len(dm.ID) + | ||||||
|  | 		len(dm.IndexInternalID) | ||||||
|  |  | ||||||
|  | 	if dm.Expl != nil { | ||||||
|  | 		sizeInBytes += dm.Expl.Size() | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	for k, v := range dm.Locations { | ||||||
|  | 		sizeInBytes += size.SizeOfString + len(k) | ||||||
|  | 		for k1, v1 := range v { | ||||||
|  | 			sizeInBytes += size.SizeOfString + len(k1) + | ||||||
|  | 				size.SizeOfSlice | ||||||
|  | 			for _, entry := range v1 { | ||||||
|  | 				sizeInBytes += entry.Size() | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	for k, v := range dm.Fragments { | ||||||
|  | 		sizeInBytes += size.SizeOfString + len(k) + | ||||||
|  | 			size.SizeOfSlice | ||||||
|  |  | ||||||
|  | 		for _, entry := range v { | ||||||
|  | 			sizeInBytes += size.SizeOfString + len(entry) | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	for _, entry := range dm.Sort { | ||||||
|  | 		sizeInBytes += size.SizeOfString + len(entry) | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	for k, _ := range dm.Fields { | ||||||
|  | 		sizeInBytes += size.SizeOfString + len(k) + | ||||||
|  | 			size.SizeOfPtr | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return sizeInBytes | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Complete performs final preparation & transformation of the | ||||||
|  | // DocumentMatch at the end of search processing, also allowing the | ||||||
|  | // caller to provide an optional preallocated locations slice | ||||||
|  | func (dm *DocumentMatch) Complete(prealloc []Location) []Location { | ||||||
|  | 	// transform the FieldTermLocations slice into the Locations map | ||||||
|  | 	nlocs := len(dm.FieldTermLocations) | ||||||
|  | 	if nlocs > 0 { | ||||||
|  | 		if cap(prealloc) < nlocs { | ||||||
|  | 			prealloc = make([]Location, nlocs) | ||||||
|  | 		} | ||||||
|  | 		prealloc = prealloc[:nlocs] | ||||||
|  |  | ||||||
|  | 		var lastField string | ||||||
|  | 		var tlm TermLocationMap | ||||||
|  |  | ||||||
|  | 		for i, ftl := range dm.FieldTermLocations { | ||||||
|  | 			if lastField != ftl.Field { | ||||||
|  | 				lastField = ftl.Field | ||||||
|  |  | ||||||
|  | 				if dm.Locations == nil { | ||||||
|  | 					dm.Locations = make(FieldTermLocationMap) | ||||||
|  | 				} | ||||||
|  |  | ||||||
|  | 				tlm = dm.Locations[ftl.Field] | ||||||
|  | 				if tlm == nil { | ||||||
|  | 					tlm = make(TermLocationMap) | ||||||
|  | 					dm.Locations[ftl.Field] = tlm | ||||||
|  | 				} | ||||||
|  | 			} | ||||||
|  |  | ||||||
|  | 			loc := &prealloc[i] | ||||||
|  | 			*loc = ftl.Location | ||||||
|  |  | ||||||
|  | 			if len(loc.ArrayPositions) > 0 { // copy | ||||||
|  | 				loc.ArrayPositions = append(ArrayPositions(nil), loc.ArrayPositions...) | ||||||
|  | 			} | ||||||
|  |  | ||||||
|  | 			tlm[ftl.Term] = append(tlm[ftl.Term], loc) | ||||||
|  |  | ||||||
|  | 			dm.FieldTermLocations[i] = FieldTermLocation{ // recycle | ||||||
|  | 				Location: Location{ | ||||||
|  | 					ArrayPositions: ftl.Location.ArrayPositions[:0], | ||||||
|  | 				}, | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	dm.FieldTermLocations = dm.FieldTermLocations[:0] // recycle | ||||||
|  |  | ||||||
|  | 	return prealloc | ||||||
|  | } | ||||||
|  |  | ||||||
| func (dm *DocumentMatch) String() string { | func (dm *DocumentMatch) String() string { | ||||||
| 	return fmt.Sprintf("[%s-%f]", string(dm.IndexInternalID), dm.Score) | 	return fmt.Sprintf("[%s-%f]", string(dm.IndexInternalID), dm.Score) | ||||||
| } | } | ||||||
| @@ -135,6 +264,7 @@ type Searcher interface { | |||||||
| 	SetQueryNorm(float64) | 	SetQueryNorm(float64) | ||||||
| 	Count() uint64 | 	Count() uint64 | ||||||
| 	Min() int | 	Min() int | ||||||
|  | 	Size() int | ||||||
|  |  | ||||||
| 	DocumentMatchPoolSize() int | 	DocumentMatchPoolSize() int | ||||||
| } | } | ||||||
| @@ -142,9 +272,26 @@ type Searcher interface { | |||||||
| type SearcherOptions struct { | type SearcherOptions struct { | ||||||
| 	Explain            bool | 	Explain            bool | ||||||
| 	IncludeTermVectors bool | 	IncludeTermVectors bool | ||||||
|  | 	Score              string | ||||||
| } | } | ||||||
|  |  | ||||||
| // SearchContext represents the context around a single search | // SearchContext represents the context around a single search | ||||||
| type SearchContext struct { | type SearchContext struct { | ||||||
| 	DocumentMatchPool *DocumentMatchPool | 	DocumentMatchPool *DocumentMatchPool | ||||||
|  | 	Collector         Collector | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (sc *SearchContext) Size() int { | ||||||
|  | 	sizeInBytes := reflectStaticSizeSearchContext + size.SizeOfPtr + | ||||||
|  | 		reflectStaticSizeDocumentMatchPool + size.SizeOfPtr | ||||||
|  |  | ||||||
|  | 	if sc.DocumentMatchPool != nil { | ||||||
|  | 		for _, entry := range sc.DocumentMatchPool.avail { | ||||||
|  | 			if entry != nil { | ||||||
|  | 				sizeInBytes += entry.Size() | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return sizeInBytes | ||||||
| } | } | ||||||
|   | |||||||
							
								
								
									
										47
									
								
								vendor/github.com/blevesearch/bleve/search/searcher/search_boolean.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										47
									
								
								vendor/github.com/blevesearch/bleve/search/searcher/search_boolean.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -16,12 +16,21 @@ package searcher | |||||||
|  |  | ||||||
| import ( | import ( | ||||||
| 	"math" | 	"math" | ||||||
|  | 	"reflect" | ||||||
|  |  | ||||||
| 	"github.com/blevesearch/bleve/index" | 	"github.com/blevesearch/bleve/index" | ||||||
| 	"github.com/blevesearch/bleve/search" | 	"github.com/blevesearch/bleve/search" | ||||||
| 	"github.com/blevesearch/bleve/search/scorer" | 	"github.com/blevesearch/bleve/search/scorer" | ||||||
|  | 	"github.com/blevesearch/bleve/size" | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  | var reflectStaticSizeBooleanSearcher int | ||||||
|  |  | ||||||
|  | func init() { | ||||||
|  | 	var bs BooleanSearcher | ||||||
|  | 	reflectStaticSizeBooleanSearcher = int(reflect.TypeOf(bs).Size()) | ||||||
|  | } | ||||||
|  |  | ||||||
| type BooleanSearcher struct { | type BooleanSearcher struct { | ||||||
| 	indexReader     index.IndexReader | 	indexReader     index.IndexReader | ||||||
| 	mustSearcher    search.Searcher | 	mustSearcher    search.Searcher | ||||||
| @@ -52,6 +61,32 @@ func NewBooleanSearcher(indexReader index.IndexReader, mustSearcher search.Searc | |||||||
| 	return &rv, nil | 	return &rv, nil | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (s *BooleanSearcher) Size() int { | ||||||
|  | 	sizeInBytes := reflectStaticSizeBooleanSearcher + size.SizeOfPtr | ||||||
|  |  | ||||||
|  | 	if s.mustSearcher != nil { | ||||||
|  | 		sizeInBytes += s.mustSearcher.Size() | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	if s.shouldSearcher != nil { | ||||||
|  | 		sizeInBytes += s.shouldSearcher.Size() | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	if s.mustNotSearcher != nil { | ||||||
|  | 		sizeInBytes += s.mustNotSearcher.Size() | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	sizeInBytes += s.scorer.Size() | ||||||
|  |  | ||||||
|  | 	for _, entry := range s.matches { | ||||||
|  | 		if entry != nil { | ||||||
|  | 			sizeInBytes += entry.Size() | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return sizeInBytes | ||||||
|  | } | ||||||
|  |  | ||||||
| func (s *BooleanSearcher) computeQueryNorm() { | func (s *BooleanSearcher) computeQueryNorm() { | ||||||
| 	// first calculate sum of squared weights | 	// first calculate sum of squared weights | ||||||
| 	sumOfSquaredWeights := 0.0 | 	sumOfSquaredWeights := 0.0 | ||||||
| @@ -284,6 +319,7 @@ func (s *BooleanSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch | |||||||
| 			return nil, err | 			return nil, err | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	return rv, nil | 	return rv, nil | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -296,6 +332,14 @@ func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInter | |||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | 	// Advance the searchers only if the currentID cursor is trailing the lookup ID, | ||||||
|  | 	// additionally if the mustNotSearcher has been initialized, ensure that the | ||||||
|  | 	// cursor used to track the mustNotSearcher (currMustNot, which isn't tracked by | ||||||
|  | 	// currentID) is trailing the lookup ID as well - for in the case where currentID | ||||||
|  | 	// is nil and currMustNot is already at or ahead of the lookup ID, we MUST NOT | ||||||
|  | 	// advance the currentID or the currMustNot cursors. | ||||||
|  | 	if (s.currentID == nil || s.currentID.Compare(ID) < 0) && | ||||||
|  | 		(s.currMustNot == nil || s.currMustNot.IndexInternalID.Compare(ID) < 0) { | ||||||
| 		var err error | 		var err error | ||||||
| 		if s.mustSearcher != nil { | 		if s.mustSearcher != nil { | ||||||
| 			if s.currMust != nil { | 			if s.currMust != nil { | ||||||
| @@ -306,6 +350,7 @@ func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInter | |||||||
| 				return nil, err | 				return nil, err | ||||||
| 			} | 			} | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
| 		if s.shouldSearcher != nil { | 		if s.shouldSearcher != nil { | ||||||
| 			if s.currShould != nil { | 			if s.currShould != nil { | ||||||
| 				ctx.DocumentMatchPool.Put(s.currShould) | 				ctx.DocumentMatchPool.Put(s.currShould) | ||||||
| @@ -315,6 +360,7 @@ func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInter | |||||||
| 				return nil, err | 				return nil, err | ||||||
| 			} | 			} | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
| 		if s.mustNotSearcher != nil { | 		if s.mustNotSearcher != nil { | ||||||
| 			if s.currMustNot != nil { | 			if s.currMustNot != nil { | ||||||
| 				ctx.DocumentMatchPool.Put(s.currMustNot) | 				ctx.DocumentMatchPool.Put(s.currMustNot) | ||||||
| @@ -332,6 +378,7 @@ func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInter | |||||||
| 		} else { | 		} else { | ||||||
| 			s.currentID = nil | 			s.currentID = nil | ||||||
| 		} | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
| 	return s.Next(ctx) | 	return s.Next(ctx) | ||||||
| } | } | ||||||
|   | |||||||
							
								
								
									
										57
									
								
								vendor/github.com/blevesearch/bleve/search/searcher/search_conjunction.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										57
									
								
								vendor/github.com/blevesearch/bleve/search/searcher/search_conjunction.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -16,13 +16,22 @@ package searcher | |||||||
|  |  | ||||||
| import ( | import ( | ||||||
| 	"math" | 	"math" | ||||||
|  | 	"reflect" | ||||||
| 	"sort" | 	"sort" | ||||||
|  |  | ||||||
| 	"github.com/blevesearch/bleve/index" | 	"github.com/blevesearch/bleve/index" | ||||||
| 	"github.com/blevesearch/bleve/search" | 	"github.com/blevesearch/bleve/search" | ||||||
| 	"github.com/blevesearch/bleve/search/scorer" | 	"github.com/blevesearch/bleve/search/scorer" | ||||||
|  | 	"github.com/blevesearch/bleve/size" | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  | var reflectStaticSizeConjunctionSearcher int | ||||||
|  |  | ||||||
|  | func init() { | ||||||
|  | 	var cs ConjunctionSearcher | ||||||
|  | 	reflectStaticSizeConjunctionSearcher = int(reflect.TypeOf(cs).Size()) | ||||||
|  | } | ||||||
|  |  | ||||||
| type ConjunctionSearcher struct { | type ConjunctionSearcher struct { | ||||||
| 	indexReader index.IndexReader | 	indexReader index.IndexReader | ||||||
| 	searchers   OrderedSearcherList | 	searchers   OrderedSearcherList | ||||||
| @@ -34,14 +43,27 @@ type ConjunctionSearcher struct { | |||||||
| 	options     search.SearcherOptions | 	options     search.SearcherOptions | ||||||
| } | } | ||||||
|  |  | ||||||
| func NewConjunctionSearcher(indexReader index.IndexReader, qsearchers []search.Searcher, options search.SearcherOptions) (*ConjunctionSearcher, error) { | func NewConjunctionSearcher(indexReader index.IndexReader, | ||||||
| 	// build the downstream searchers | 	qsearchers []search.Searcher, options search.SearcherOptions) ( | ||||||
|  | 	search.Searcher, error) { | ||||||
|  | 	// build the sorted downstream searchers | ||||||
| 	searchers := make(OrderedSearcherList, len(qsearchers)) | 	searchers := make(OrderedSearcherList, len(qsearchers)) | ||||||
| 	for i, searcher := range qsearchers { | 	for i, searcher := range qsearchers { | ||||||
| 		searchers[i] = searcher | 		searchers[i] = searcher | ||||||
| 	} | 	} | ||||||
| 	// sort the searchers |  | ||||||
| 	sort.Sort(searchers) | 	sort.Sort(searchers) | ||||||
|  |  | ||||||
|  | 	// attempt the "unadorned" conjunction optimization only when we | ||||||
|  | 	// do not need extra information like freq-norm's or term vectors | ||||||
|  | 	if len(searchers) > 1 && | ||||||
|  | 		options.Score == "none" && !options.IncludeTermVectors { | ||||||
|  | 		rv, err := optimizeCompositeSearcher("conjunction:unadorned", | ||||||
|  | 			indexReader, searchers, options) | ||||||
|  | 		if err != nil || rv != nil { | ||||||
|  | 			return rv, err | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
| 	// build our searcher | 	// build our searcher | ||||||
| 	rv := ConjunctionSearcher{ | 	rv := ConjunctionSearcher{ | ||||||
| 		indexReader: indexReader, | 		indexReader: indexReader, | ||||||
| @@ -51,9 +73,36 @@ func NewConjunctionSearcher(indexReader index.IndexReader, qsearchers []search.S | |||||||
| 		scorer:      scorer.NewConjunctionQueryScorer(options), | 		scorer:      scorer.NewConjunctionQueryScorer(options), | ||||||
| 	} | 	} | ||||||
| 	rv.computeQueryNorm() | 	rv.computeQueryNorm() | ||||||
|  |  | ||||||
|  | 	// attempt push-down conjunction optimization when there's >1 searchers | ||||||
|  | 	if len(searchers) > 1 { | ||||||
|  | 		rv, err := optimizeCompositeSearcher("conjunction", | ||||||
|  | 			indexReader, searchers, options) | ||||||
|  | 		if err != nil || rv != nil { | ||||||
|  | 			return rv, err | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
| 	return &rv, nil | 	return &rv, nil | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (s *ConjunctionSearcher) Size() int { | ||||||
|  | 	sizeInBytes := reflectStaticSizeConjunctionSearcher + size.SizeOfPtr + | ||||||
|  | 		s.scorer.Size() | ||||||
|  |  | ||||||
|  | 	for _, entry := range s.searchers { | ||||||
|  | 		sizeInBytes += entry.Size() | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	for _, entry := range s.currs { | ||||||
|  | 		if entry != nil { | ||||||
|  | 			sizeInBytes += entry.Size() | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return sizeInBytes | ||||||
|  | } | ||||||
|  |  | ||||||
| func (s *ConjunctionSearcher) computeQueryNorm() { | func (s *ConjunctionSearcher) computeQueryNorm() { | ||||||
| 	// first calculate sum of squared weights | 	// first calculate sum of squared weights | ||||||
| 	sumOfSquaredWeights := 0.0 | 	sumOfSquaredWeights := 0.0 | ||||||
| @@ -108,7 +157,7 @@ func (s *ConjunctionSearcher) Next(ctx *search.SearchContext) (*search.DocumentM | |||||||
| 	var rv *search.DocumentMatch | 	var rv *search.DocumentMatch | ||||||
| 	var err error | 	var err error | ||||||
| OUTER: | OUTER: | ||||||
| 	for s.currs[s.maxIDIdx] != nil { | 	for s.maxIDIdx < len(s.currs) && s.currs[s.maxIDIdx] != nil { | ||||||
| 		maxID := s.currs[s.maxIDIdx].IndexInternalID | 		maxID := s.currs[s.maxIDIdx].IndexInternalID | ||||||
|  |  | ||||||
| 		i := 0 | 		i := 0 | ||||||
|   | |||||||
							
								
								
									
										309
									
								
								vendor/github.com/blevesearch/bleve/search/searcher/search_disjunction.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										309
									
								
								vendor/github.com/blevesearch/bleve/search/searcher/search_disjunction.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -1,4 +1,4 @@ | |||||||
| //  Copyright (c) 2014 Couchbase, Inc. | //  Copyright (c) 2018 Couchbase, Inc. | ||||||
| // | // | ||||||
| // Licensed under the Apache License, Version 2.0 (the "License"); | // Licensed under the Apache License, Version 2.0 (the "License"); | ||||||
| // you may not use this file except in compliance with the License. | // you may not use this file except in compliance with the License. | ||||||
| @@ -16,12 +16,9 @@ package searcher | |||||||
|  |  | ||||||
| import ( | import ( | ||||||
| 	"fmt" | 	"fmt" | ||||||
| 	"math" |  | ||||||
| 	"sort" |  | ||||||
|  |  | ||||||
| 	"github.com/blevesearch/bleve/index" | 	"github.com/blevesearch/bleve/index" | ||||||
| 	"github.com/blevesearch/bleve/search" | 	"github.com/blevesearch/bleve/search" | ||||||
| 	"github.com/blevesearch/bleve/search/scorer" |  | ||||||
| ) | ) | ||||||
|  |  | ||||||
| // DisjunctionMaxClauseCount is a compile time setting that applications can | // DisjunctionMaxClauseCount is a compile time setting that applications can | ||||||
| @@ -29,17 +26,74 @@ import ( | |||||||
| // error instead of exeucting searches when the size exceeds this value. | // error instead of exeucting searches when the size exceeds this value. | ||||||
| var DisjunctionMaxClauseCount = 0 | var DisjunctionMaxClauseCount = 0 | ||||||
|  |  | ||||||
| type DisjunctionSearcher struct { | // DisjunctionHeapTakeover is a compile time setting that applications can | ||||||
| 	indexReader  index.IndexReader | // adjust to control when the DisjunctionSearcher will switch from a simple | ||||||
| 	searchers    OrderedSearcherList | // slice implementation to a heap implementation. | ||||||
| 	numSearchers int | var DisjunctionHeapTakeover = 10 | ||||||
| 	queryNorm    float64 |  | ||||||
| 	currs        []*search.DocumentMatch | func NewDisjunctionSearcher(indexReader index.IndexReader, | ||||||
| 	scorer       *scorer.DisjunctionQueryScorer | 	qsearchers []search.Searcher, min float64, options search.SearcherOptions) ( | ||||||
| 	min          int | 	search.Searcher, error) { | ||||||
| 	matching     []*search.DocumentMatch | 	return newDisjunctionSearcher(indexReader, qsearchers, min, options, true) | ||||||
| 	matchingIdxs []int | } | ||||||
| 	initialized  bool |  | ||||||
|  | func newDisjunctionSearcher(indexReader index.IndexReader, | ||||||
|  | 	qsearchers []search.Searcher, min float64, options search.SearcherOptions, | ||||||
|  | 	limit bool) (search.Searcher, error) { | ||||||
|  | 	// attempt the "unadorned" disjunction optimization only when we | ||||||
|  | 	// do not need extra information like freq-norm's or term vectors | ||||||
|  | 	// and the requested min is simple | ||||||
|  | 	if len(qsearchers) > 1 && min <= 1 && | ||||||
|  | 		options.Score == "none" && !options.IncludeTermVectors { | ||||||
|  | 		rv, err := optimizeCompositeSearcher("disjunction:unadorned", | ||||||
|  | 			indexReader, qsearchers, options) | ||||||
|  | 		if err != nil || rv != nil { | ||||||
|  | 			return rv, err | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	if len(qsearchers) > DisjunctionHeapTakeover { | ||||||
|  | 		return newDisjunctionHeapSearcher(indexReader, qsearchers, min, options, | ||||||
|  | 			limit) | ||||||
|  | 	} | ||||||
|  | 	return newDisjunctionSliceSearcher(indexReader, qsearchers, min, options, | ||||||
|  | 		limit) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func optimizeCompositeSearcher(optimizationKind string, | ||||||
|  | 	indexReader index.IndexReader, qsearchers []search.Searcher, | ||||||
|  | 	options search.SearcherOptions) (search.Searcher, error) { | ||||||
|  | 	var octx index.OptimizableContext | ||||||
|  |  | ||||||
|  | 	for _, searcher := range qsearchers { | ||||||
|  | 		o, ok := searcher.(index.Optimizable) | ||||||
|  | 		if !ok { | ||||||
|  | 			return nil, nil | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		var err error | ||||||
|  | 		octx, err = o.Optimize(optimizationKind, octx) | ||||||
|  | 		if err != nil { | ||||||
|  | 			return nil, err | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		if octx == nil { | ||||||
|  | 			return nil, nil | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	optimized, err := octx.Finish() | ||||||
|  | 	if err != nil || optimized == nil { | ||||||
|  | 		return nil, err | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	tfr, ok := optimized.(index.TermFieldReader) | ||||||
|  | 	if !ok { | ||||||
|  | 		return nil, nil | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return newTermSearcherFromReader(indexReader, tfr, | ||||||
|  | 		[]byte(optimizationKind), "*", 1.0, options) | ||||||
| } | } | ||||||
|  |  | ||||||
| func tooManyClauses(count int) bool { | func tooManyClauses(count int) bool { | ||||||
| @@ -49,226 +103,7 @@ func tooManyClauses(count int) bool { | |||||||
| 	return false | 	return false | ||||||
| } | } | ||||||
|  |  | ||||||
| func tooManyClausesErr() error { | func tooManyClausesErr(count int) error { | ||||||
| 	return fmt.Errorf("TooManyClauses[maxClauseCount is set to %d]", | 	return fmt.Errorf("TooManyClauses[%d > maxClauseCount, which is set to %d]", | ||||||
| 		DisjunctionMaxClauseCount) | 		count, DisjunctionMaxClauseCount) | ||||||
| } |  | ||||||
|  |  | ||||||
| func NewDisjunctionSearcher(indexReader index.IndexReader, |  | ||||||
| 	qsearchers []search.Searcher, min float64, options search.SearcherOptions) ( |  | ||||||
| 	*DisjunctionSearcher, error) { |  | ||||||
| 	return newDisjunctionSearcher(indexReader, qsearchers, min, options, |  | ||||||
| 		true) |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func newDisjunctionSearcher(indexReader index.IndexReader, |  | ||||||
| 	qsearchers []search.Searcher, min float64, options search.SearcherOptions, |  | ||||||
| 	limit bool) ( |  | ||||||
| 	*DisjunctionSearcher, error) { |  | ||||||
| 	if limit && tooManyClauses(len(qsearchers)) { |  | ||||||
| 		return nil, tooManyClausesErr() |  | ||||||
| 	} |  | ||||||
| 	// build the downstream searchers |  | ||||||
| 	searchers := make(OrderedSearcherList, len(qsearchers)) |  | ||||||
| 	for i, searcher := range qsearchers { |  | ||||||
| 		searchers[i] = searcher |  | ||||||
| 	} |  | ||||||
| 	// sort the searchers |  | ||||||
| 	sort.Sort(sort.Reverse(searchers)) |  | ||||||
| 	// build our searcher |  | ||||||
| 	rv := DisjunctionSearcher{ |  | ||||||
| 		indexReader:  indexReader, |  | ||||||
| 		searchers:    searchers, |  | ||||||
| 		numSearchers: len(searchers), |  | ||||||
| 		currs:        make([]*search.DocumentMatch, len(searchers)), |  | ||||||
| 		scorer:       scorer.NewDisjunctionQueryScorer(options), |  | ||||||
| 		min:          int(min), |  | ||||||
| 		matching:     make([]*search.DocumentMatch, len(searchers)), |  | ||||||
| 		matchingIdxs: make([]int, len(searchers)), |  | ||||||
| 	} |  | ||||||
| 	rv.computeQueryNorm() |  | ||||||
| 	return &rv, nil |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func (s *DisjunctionSearcher) computeQueryNorm() { |  | ||||||
| 	// first calculate sum of squared weights |  | ||||||
| 	sumOfSquaredWeights := 0.0 |  | ||||||
| 	for _, searcher := range s.searchers { |  | ||||||
| 		sumOfSquaredWeights += searcher.Weight() |  | ||||||
| 	} |  | ||||||
| 	// now compute query norm from this |  | ||||||
| 	s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights) |  | ||||||
| 	// finally tell all the downstream searchers the norm |  | ||||||
| 	for _, searcher := range s.searchers { |  | ||||||
| 		searcher.SetQueryNorm(s.queryNorm) |  | ||||||
| 	} |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func (s *DisjunctionSearcher) initSearchers(ctx *search.SearchContext) error { |  | ||||||
| 	var err error |  | ||||||
| 	// get all searchers pointing at their first match |  | ||||||
| 	for i, searcher := range s.searchers { |  | ||||||
| 		if s.currs[i] != nil { |  | ||||||
| 			ctx.DocumentMatchPool.Put(s.currs[i]) |  | ||||||
| 		} |  | ||||||
| 		s.currs[i], err = searcher.Next(ctx) |  | ||||||
| 		if err != nil { |  | ||||||
| 			return err |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	err = s.updateMatches() |  | ||||||
| 	if err != nil { |  | ||||||
| 		return err |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	s.initialized = true |  | ||||||
| 	return nil |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func (s *DisjunctionSearcher) updateMatches() error { |  | ||||||
| 	matching := s.matching[:0] |  | ||||||
| 	matchingIdxs := s.matchingIdxs[:0] |  | ||||||
|  |  | ||||||
| 	for i := 0; i < len(s.currs); i++ { |  | ||||||
| 		curr := s.currs[i] |  | ||||||
| 		if curr == nil { |  | ||||||
| 			continue |  | ||||||
| 		} |  | ||||||
|  |  | ||||||
| 		if len(matching) > 0 { |  | ||||||
| 			cmp := curr.IndexInternalID.Compare(matching[0].IndexInternalID) |  | ||||||
| 			if cmp > 0 { |  | ||||||
| 				continue |  | ||||||
| 			} |  | ||||||
|  |  | ||||||
| 			if cmp < 0 { |  | ||||||
| 				matching = matching[:0] |  | ||||||
| 				matchingIdxs = matchingIdxs[:0] |  | ||||||
| 			} |  | ||||||
| 		} |  | ||||||
|  |  | ||||||
| 		matching = append(matching, curr) |  | ||||||
| 		matchingIdxs = append(matchingIdxs, i) |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	s.matching = matching |  | ||||||
| 	s.matchingIdxs = matchingIdxs |  | ||||||
|  |  | ||||||
| 	return nil |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func (s *DisjunctionSearcher) Weight() float64 { |  | ||||||
| 	var rv float64 |  | ||||||
| 	for _, searcher := range s.searchers { |  | ||||||
| 		rv += searcher.Weight() |  | ||||||
| 	} |  | ||||||
| 	return rv |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func (s *DisjunctionSearcher) SetQueryNorm(qnorm float64) { |  | ||||||
| 	for _, searcher := range s.searchers { |  | ||||||
| 		searcher.SetQueryNorm(qnorm) |  | ||||||
| 	} |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func (s *DisjunctionSearcher) Next(ctx *search.SearchContext) ( |  | ||||||
| 	*search.DocumentMatch, error) { |  | ||||||
| 	if !s.initialized { |  | ||||||
| 		err := s.initSearchers(ctx) |  | ||||||
| 		if err != nil { |  | ||||||
| 			return nil, err |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| 	var err error |  | ||||||
| 	var rv *search.DocumentMatch |  | ||||||
|  |  | ||||||
| 	found := false |  | ||||||
| 	for !found && len(s.matching) > 0 { |  | ||||||
| 		if len(s.matching) >= s.min { |  | ||||||
| 			found = true |  | ||||||
| 			// score this match |  | ||||||
| 			rv = s.scorer.Score(ctx, s.matching, len(s.matching), s.numSearchers) |  | ||||||
| 		} |  | ||||||
|  |  | ||||||
| 		// invoke next on all the matching searchers |  | ||||||
| 		for _, i := range s.matchingIdxs { |  | ||||||
| 			searcher := s.searchers[i] |  | ||||||
| 			if s.currs[i] != rv { |  | ||||||
| 				ctx.DocumentMatchPool.Put(s.currs[i]) |  | ||||||
| 			} |  | ||||||
| 			s.currs[i], err = searcher.Next(ctx) |  | ||||||
| 			if err != nil { |  | ||||||
| 				return nil, err |  | ||||||
| 			} |  | ||||||
| 		} |  | ||||||
|  |  | ||||||
| 		err = s.updateMatches() |  | ||||||
| 		if err != nil { |  | ||||||
| 			return nil, err |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| 	return rv, nil |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func (s *DisjunctionSearcher) Advance(ctx *search.SearchContext, |  | ||||||
| 	ID index.IndexInternalID) (*search.DocumentMatch, error) { |  | ||||||
| 	if !s.initialized { |  | ||||||
| 		err := s.initSearchers(ctx) |  | ||||||
| 		if err != nil { |  | ||||||
| 			return nil, err |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| 	// get all searchers pointing at their first match |  | ||||||
| 	var err error |  | ||||||
| 	for i, searcher := range s.searchers { |  | ||||||
| 		if s.currs[i] != nil { |  | ||||||
| 			if s.currs[i].IndexInternalID.Compare(ID) >= 0 { |  | ||||||
| 				continue |  | ||||||
| 			} |  | ||||||
| 			ctx.DocumentMatchPool.Put(s.currs[i]) |  | ||||||
| 		} |  | ||||||
| 		s.currs[i], err = searcher.Advance(ctx, ID) |  | ||||||
| 		if err != nil { |  | ||||||
| 			return nil, err |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	err = s.updateMatches() |  | ||||||
| 	if err != nil { |  | ||||||
| 		return nil, err |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	return s.Next(ctx) |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func (s *DisjunctionSearcher) Count() uint64 { |  | ||||||
| 	// for now return a worst case |  | ||||||
| 	var sum uint64 |  | ||||||
| 	for _, searcher := range s.searchers { |  | ||||||
| 		sum += searcher.Count() |  | ||||||
| 	} |  | ||||||
| 	return sum |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func (s *DisjunctionSearcher) Close() (rv error) { |  | ||||||
| 	for _, searcher := range s.searchers { |  | ||||||
| 		err := searcher.Close() |  | ||||||
| 		if err != nil && rv == nil { |  | ||||||
| 			rv = err |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| 	return rv |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func (s *DisjunctionSearcher) Min() int { |  | ||||||
| 	return s.min |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func (s *DisjunctionSearcher) DocumentMatchPoolSize() int { |  | ||||||
| 	rv := len(s.currs) |  | ||||||
| 	for _, s := range s.searchers { |  | ||||||
| 		rv += s.DocumentMatchPoolSize() |  | ||||||
| 	} |  | ||||||
| 	return rv |  | ||||||
| } | } | ||||||
|   | |||||||
							
								
								
									
										343
									
								
								vendor/github.com/blevesearch/bleve/search/searcher/search_disjunction_heap.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										343
									
								
								vendor/github.com/blevesearch/bleve/search/searcher/search_disjunction_heap.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,343 @@ | |||||||
|  | //  Copyright (c) 2018 Couchbase, Inc. | ||||||
|  | // | ||||||
|  | // Licensed under the Apache License, Version 2.0 (the "License"); | ||||||
|  | // you may not use this file except in compliance with the License. | ||||||
|  | // You may obtain a copy of the License at | ||||||
|  | // | ||||||
|  | // 		http://www.apache.org/licenses/LICENSE-2.0 | ||||||
|  | // | ||||||
|  | // Unless required by applicable law or agreed to in writing, software | ||||||
|  | // distributed under the License is distributed on an "AS IS" BASIS, | ||||||
|  | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||||
|  | // See the License for the specific language governing permissions and | ||||||
|  | // limitations under the License. | ||||||
|  |  | ||||||
|  | package searcher | ||||||
|  |  | ||||||
|  | import ( | ||||||
|  | 	"bytes" | ||||||
|  | 	"container/heap" | ||||||
|  | 	"math" | ||||||
|  | 	"reflect" | ||||||
|  |  | ||||||
|  | 	"github.com/blevesearch/bleve/index" | ||||||
|  | 	"github.com/blevesearch/bleve/search" | ||||||
|  | 	"github.com/blevesearch/bleve/search/scorer" | ||||||
|  | 	"github.com/blevesearch/bleve/size" | ||||||
|  | ) | ||||||
|  |  | ||||||
|  | var reflectStaticSizeDisjunctionHeapSearcher int | ||||||
|  | var reflectStaticSizeSearcherCurr int | ||||||
|  |  | ||||||
|  | func init() { | ||||||
|  | 	var dhs DisjunctionHeapSearcher | ||||||
|  | 	reflectStaticSizeDisjunctionHeapSearcher = int(reflect.TypeOf(dhs).Size()) | ||||||
|  |  | ||||||
|  | 	var sc SearcherCurr | ||||||
|  | 	reflectStaticSizeSearcherCurr = int(reflect.TypeOf(sc).Size()) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | type SearcherCurr struct { | ||||||
|  | 	searcher search.Searcher | ||||||
|  | 	curr     *search.DocumentMatch | ||||||
|  | } | ||||||
|  |  | ||||||
|  | type DisjunctionHeapSearcher struct { | ||||||
|  | 	indexReader index.IndexReader | ||||||
|  |  | ||||||
|  | 	numSearchers int | ||||||
|  | 	scorer       *scorer.DisjunctionQueryScorer | ||||||
|  | 	min          int | ||||||
|  | 	queryNorm    float64 | ||||||
|  | 	initialized  bool | ||||||
|  | 	searchers    []search.Searcher | ||||||
|  | 	heap         []*SearcherCurr | ||||||
|  |  | ||||||
|  | 	matching      []*search.DocumentMatch | ||||||
|  | 	matchingCurrs []*SearcherCurr | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func newDisjunctionHeapSearcher(indexReader index.IndexReader, | ||||||
|  | 	searchers []search.Searcher, min float64, options search.SearcherOptions, | ||||||
|  | 	limit bool) ( | ||||||
|  | 	*DisjunctionHeapSearcher, error) { | ||||||
|  | 	if limit && tooManyClauses(len(searchers)) { | ||||||
|  | 		return nil, tooManyClausesErr(len(searchers)) | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	// build our searcher | ||||||
|  | 	rv := DisjunctionHeapSearcher{ | ||||||
|  | 		indexReader:   indexReader, | ||||||
|  | 		searchers:     searchers, | ||||||
|  | 		numSearchers:  len(searchers), | ||||||
|  | 		scorer:        scorer.NewDisjunctionQueryScorer(options), | ||||||
|  | 		min:           int(min), | ||||||
|  | 		matching:      make([]*search.DocumentMatch, len(searchers)), | ||||||
|  | 		matchingCurrs: make([]*SearcherCurr, len(searchers)), | ||||||
|  | 		heap:          make([]*SearcherCurr, 0, len(searchers)), | ||||||
|  | 	} | ||||||
|  | 	rv.computeQueryNorm() | ||||||
|  | 	return &rv, nil | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (s *DisjunctionHeapSearcher) Size() int { | ||||||
|  | 	sizeInBytes := reflectStaticSizeDisjunctionHeapSearcher + size.SizeOfPtr + | ||||||
|  | 		s.scorer.Size() | ||||||
|  |  | ||||||
|  | 	for _, entry := range s.searchers { | ||||||
|  | 		sizeInBytes += entry.Size() | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	for _, entry := range s.matching { | ||||||
|  | 		if entry != nil { | ||||||
|  | 			sizeInBytes += entry.Size() | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	// for matchingCurrs and heap, just use static size * len | ||||||
|  | 	// since searchers and document matches already counted above | ||||||
|  | 	sizeInBytes += len(s.matchingCurrs) * reflectStaticSizeSearcherCurr | ||||||
|  | 	sizeInBytes += len(s.heap) * reflectStaticSizeSearcherCurr | ||||||
|  |  | ||||||
|  | 	return sizeInBytes | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (s *DisjunctionHeapSearcher) computeQueryNorm() { | ||||||
|  | 	// first calculate sum of squared weights | ||||||
|  | 	sumOfSquaredWeights := 0.0 | ||||||
|  | 	for _, searcher := range s.searchers { | ||||||
|  | 		sumOfSquaredWeights += searcher.Weight() | ||||||
|  | 	} | ||||||
|  | 	// now compute query norm from this | ||||||
|  | 	s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights) | ||||||
|  | 	// finally tell all the downstream searchers the norm | ||||||
|  | 	for _, searcher := range s.searchers { | ||||||
|  | 		searcher.SetQueryNorm(s.queryNorm) | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (s *DisjunctionHeapSearcher) initSearchers(ctx *search.SearchContext) error { | ||||||
|  | 	// alloc a single block of SearcherCurrs | ||||||
|  | 	block := make([]SearcherCurr, len(s.searchers)) | ||||||
|  |  | ||||||
|  | 	// get all searchers pointing at their first match | ||||||
|  | 	for i, searcher := range s.searchers { | ||||||
|  | 		curr, err := searcher.Next(ctx) | ||||||
|  | 		if err != nil { | ||||||
|  | 			return err | ||||||
|  | 		} | ||||||
|  | 		if curr != nil { | ||||||
|  | 			block[i].searcher = searcher | ||||||
|  | 			block[i].curr = curr | ||||||
|  | 			heap.Push(s, &block[i]) | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	err := s.updateMatches() | ||||||
|  | 	if err != nil { | ||||||
|  | 		return err | ||||||
|  | 	} | ||||||
|  | 	s.initialized = true | ||||||
|  | 	return nil | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (s *DisjunctionHeapSearcher) updateMatches() error { | ||||||
|  | 	matching := s.matching[:0] | ||||||
|  | 	matchingCurrs := s.matchingCurrs[:0] | ||||||
|  |  | ||||||
|  | 	if len(s.heap) > 0 { | ||||||
|  |  | ||||||
|  | 		// top of the heap is our next hit | ||||||
|  | 		next := heap.Pop(s).(*SearcherCurr) | ||||||
|  | 		matching = append(matching, next.curr) | ||||||
|  | 		matchingCurrs = append(matchingCurrs, next) | ||||||
|  |  | ||||||
|  | 		// now as long as top of heap matches, keep popping | ||||||
|  | 		for len(s.heap) > 0 && bytes.Compare(next.curr.IndexInternalID, s.heap[0].curr.IndexInternalID) == 0 { | ||||||
|  | 			next = heap.Pop(s).(*SearcherCurr) | ||||||
|  | 			matching = append(matching, next.curr) | ||||||
|  | 			matchingCurrs = append(matchingCurrs, next) | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	s.matching = matching | ||||||
|  | 	s.matchingCurrs = matchingCurrs | ||||||
|  |  | ||||||
|  | 	return nil | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (s *DisjunctionHeapSearcher) Weight() float64 { | ||||||
|  | 	var rv float64 | ||||||
|  | 	for _, searcher := range s.searchers { | ||||||
|  | 		rv += searcher.Weight() | ||||||
|  | 	} | ||||||
|  | 	return rv | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (s *DisjunctionHeapSearcher) SetQueryNorm(qnorm float64) { | ||||||
|  | 	for _, searcher := range s.searchers { | ||||||
|  | 		searcher.SetQueryNorm(qnorm) | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (s *DisjunctionHeapSearcher) Next(ctx *search.SearchContext) ( | ||||||
|  | 	*search.DocumentMatch, error) { | ||||||
|  | 	if !s.initialized { | ||||||
|  | 		err := s.initSearchers(ctx) | ||||||
|  | 		if err != nil { | ||||||
|  | 			return nil, err | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	var rv *search.DocumentMatch | ||||||
|  | 	found := false | ||||||
|  | 	for !found && len(s.matching) > 0 { | ||||||
|  | 		if len(s.matching) >= s.min { | ||||||
|  | 			found = true | ||||||
|  | 			// score this match | ||||||
|  | 			rv = s.scorer.Score(ctx, s.matching, len(s.matching), s.numSearchers) | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		// invoke next on all the matching searchers | ||||||
|  | 		for _, matchingCurr := range s.matchingCurrs { | ||||||
|  | 			if matchingCurr.curr != rv { | ||||||
|  | 				ctx.DocumentMatchPool.Put(matchingCurr.curr) | ||||||
|  | 			} | ||||||
|  | 			curr, err := matchingCurr.searcher.Next(ctx) | ||||||
|  | 			if err != nil { | ||||||
|  | 				return nil, err | ||||||
|  | 			} | ||||||
|  | 			if curr != nil { | ||||||
|  | 				matchingCurr.curr = curr | ||||||
|  | 				heap.Push(s, matchingCurr) | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		err := s.updateMatches() | ||||||
|  | 		if err != nil { | ||||||
|  | 			return nil, err | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return rv, nil | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (s *DisjunctionHeapSearcher) Advance(ctx *search.SearchContext, | ||||||
|  | 	ID index.IndexInternalID) (*search.DocumentMatch, error) { | ||||||
|  | 	if !s.initialized { | ||||||
|  | 		err := s.initSearchers(ctx) | ||||||
|  | 		if err != nil { | ||||||
|  | 			return nil, err | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	// if there is anything in matching, toss it back onto the heap | ||||||
|  | 	for _, matchingCurr := range s.matchingCurrs { | ||||||
|  | 		heap.Push(s, matchingCurr) | ||||||
|  | 	} | ||||||
|  | 	s.matching = s.matching[:0] | ||||||
|  | 	s.matchingCurrs = s.matchingCurrs[:0] | ||||||
|  |  | ||||||
|  | 	// find all searchers that actually need to be advanced | ||||||
|  | 	// advance them, using s.matchingCurrs as temp storage | ||||||
|  | 	for len(s.heap) > 0 && bytes.Compare(s.heap[0].curr.IndexInternalID, ID) < 0 { | ||||||
|  | 		searcherCurr := heap.Pop(s).(*SearcherCurr) | ||||||
|  | 		ctx.DocumentMatchPool.Put(searcherCurr.curr) | ||||||
|  | 		curr, err := searcherCurr.searcher.Advance(ctx, ID) | ||||||
|  | 		if err != nil { | ||||||
|  | 			return nil, err | ||||||
|  | 		} | ||||||
|  | 		if curr != nil { | ||||||
|  | 			searcherCurr.curr = curr | ||||||
|  | 			s.matchingCurrs = append(s.matchingCurrs, searcherCurr) | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	// now all of the searchers that we advanced have to be pushed back | ||||||
|  | 	for _, matchingCurr := range s.matchingCurrs { | ||||||
|  | 		heap.Push(s, matchingCurr) | ||||||
|  | 	} | ||||||
|  | 	// reset our temp space | ||||||
|  | 	s.matchingCurrs = s.matchingCurrs[:0] | ||||||
|  |  | ||||||
|  | 	err := s.updateMatches() | ||||||
|  | 	if err != nil { | ||||||
|  | 		return nil, err | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return s.Next(ctx) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (s *DisjunctionHeapSearcher) Count() uint64 { | ||||||
|  | 	// for now return a worst case | ||||||
|  | 	var sum uint64 | ||||||
|  | 	for _, searcher := range s.searchers { | ||||||
|  | 		sum += searcher.Count() | ||||||
|  | 	} | ||||||
|  | 	return sum | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (s *DisjunctionHeapSearcher) Close() (rv error) { | ||||||
|  | 	for _, searcher := range s.searchers { | ||||||
|  | 		err := searcher.Close() | ||||||
|  | 		if err != nil && rv == nil { | ||||||
|  | 			rv = err | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	return rv | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (s *DisjunctionHeapSearcher) Min() int { | ||||||
|  | 	return s.min | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (s *DisjunctionHeapSearcher) DocumentMatchPoolSize() int { | ||||||
|  | 	rv := len(s.searchers) | ||||||
|  | 	for _, s := range s.searchers { | ||||||
|  | 		rv += s.DocumentMatchPoolSize() | ||||||
|  | 	} | ||||||
|  | 	return rv | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // a disjunction searcher implements the index.Optimizable interface | ||||||
|  | // but only activates on an edge case where the disjunction is a | ||||||
|  | // wrapper around a single Optimizable child searcher | ||||||
|  | func (s *DisjunctionHeapSearcher) Optimize(kind string, octx index.OptimizableContext) ( | ||||||
|  | 	index.OptimizableContext, error) { | ||||||
|  | 	if len(s.searchers) == 1 { | ||||||
|  | 		o, ok := s.searchers[0].(index.Optimizable) | ||||||
|  | 		if ok { | ||||||
|  | 			return o.Optimize(kind, octx) | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return octx, nil | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // heap impl | ||||||
|  |  | ||||||
|  | func (s *DisjunctionHeapSearcher) Len() int { return len(s.heap) } | ||||||
|  |  | ||||||
|  | func (s *DisjunctionHeapSearcher) Less(i, j int) bool { | ||||||
|  | 	if s.heap[i].curr == nil { | ||||||
|  | 		return true | ||||||
|  | 	} else if s.heap[j].curr == nil { | ||||||
|  | 		return false | ||||||
|  | 	} | ||||||
|  | 	return bytes.Compare(s.heap[i].curr.IndexInternalID, s.heap[j].curr.IndexInternalID) < 0 | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (s *DisjunctionHeapSearcher) Swap(i, j int) { | ||||||
|  | 	s.heap[i], s.heap[j] = s.heap[j], s.heap[i] | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (s *DisjunctionHeapSearcher) Push(x interface{}) { | ||||||
|  | 	s.heap = append(s.heap, x.(*SearcherCurr)) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (s *DisjunctionHeapSearcher) Pop() interface{} { | ||||||
|  | 	old := s.heap | ||||||
|  | 	n := len(old) | ||||||
|  | 	x := old[n-1] | ||||||
|  | 	s.heap = old[0 : n-1] | ||||||
|  | 	return x | ||||||
|  | } | ||||||
							
								
								
									
										298
									
								
								vendor/github.com/blevesearch/bleve/search/searcher/search_disjunction_slice.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										298
									
								
								vendor/github.com/blevesearch/bleve/search/searcher/search_disjunction_slice.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,298 @@ | |||||||
|  | //  Copyright (c) 2018 Couchbase, Inc. | ||||||
|  | // | ||||||
|  | // Licensed under the Apache License, Version 2.0 (the "License"); | ||||||
|  | // you may not use this file except in compliance with the License. | ||||||
|  | // You may obtain a copy of the License at | ||||||
|  | // | ||||||
|  | // 		http://www.apache.org/licenses/LICENSE-2.0 | ||||||
|  | // | ||||||
|  | // Unless required by applicable law or agreed to in writing, software | ||||||
|  | // distributed under the License is distributed on an "AS IS" BASIS, | ||||||
|  | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||||
|  | // See the License for the specific language governing permissions and | ||||||
|  | // limitations under the License. | ||||||
|  |  | ||||||
|  | package searcher | ||||||
|  |  | ||||||
|  | import ( | ||||||
|  | 	"math" | ||||||
|  | 	"reflect" | ||||||
|  | 	"sort" | ||||||
|  |  | ||||||
|  | 	"github.com/blevesearch/bleve/index" | ||||||
|  | 	"github.com/blevesearch/bleve/search" | ||||||
|  | 	"github.com/blevesearch/bleve/search/scorer" | ||||||
|  | 	"github.com/blevesearch/bleve/size" | ||||||
|  | ) | ||||||
|  |  | ||||||
|  | var reflectStaticSizeDisjunctionSliceSearcher int | ||||||
|  |  | ||||||
|  | func init() { | ||||||
|  | 	var ds DisjunctionSliceSearcher | ||||||
|  | 	reflectStaticSizeDisjunctionSliceSearcher = int(reflect.TypeOf(ds).Size()) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | type DisjunctionSliceSearcher struct { | ||||||
|  | 	indexReader  index.IndexReader | ||||||
|  | 	searchers    OrderedSearcherList | ||||||
|  | 	numSearchers int | ||||||
|  | 	queryNorm    float64 | ||||||
|  | 	currs        []*search.DocumentMatch | ||||||
|  | 	scorer       *scorer.DisjunctionQueryScorer | ||||||
|  | 	min          int | ||||||
|  | 	matching     []*search.DocumentMatch | ||||||
|  | 	matchingIdxs []int | ||||||
|  | 	initialized  bool | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func newDisjunctionSliceSearcher(indexReader index.IndexReader, | ||||||
|  | 	qsearchers []search.Searcher, min float64, options search.SearcherOptions, | ||||||
|  | 	limit bool) ( | ||||||
|  | 	*DisjunctionSliceSearcher, error) { | ||||||
|  | 	if limit && tooManyClauses(len(qsearchers)) { | ||||||
|  | 		return nil, tooManyClausesErr(len(qsearchers)) | ||||||
|  | 	} | ||||||
|  | 	// build the downstream searchers | ||||||
|  | 	searchers := make(OrderedSearcherList, len(qsearchers)) | ||||||
|  | 	for i, searcher := range qsearchers { | ||||||
|  | 		searchers[i] = searcher | ||||||
|  | 	} | ||||||
|  | 	// sort the searchers | ||||||
|  | 	sort.Sort(sort.Reverse(searchers)) | ||||||
|  | 	// build our searcher | ||||||
|  | 	rv := DisjunctionSliceSearcher{ | ||||||
|  | 		indexReader:  indexReader, | ||||||
|  | 		searchers:    searchers, | ||||||
|  | 		numSearchers: len(searchers), | ||||||
|  | 		currs:        make([]*search.DocumentMatch, len(searchers)), | ||||||
|  | 		scorer:       scorer.NewDisjunctionQueryScorer(options), | ||||||
|  | 		min:          int(min), | ||||||
|  | 		matching:     make([]*search.DocumentMatch, len(searchers)), | ||||||
|  | 		matchingIdxs: make([]int, len(searchers)), | ||||||
|  | 	} | ||||||
|  | 	rv.computeQueryNorm() | ||||||
|  | 	return &rv, nil | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (s *DisjunctionSliceSearcher) Size() int { | ||||||
|  | 	sizeInBytes := reflectStaticSizeDisjunctionSliceSearcher + size.SizeOfPtr + | ||||||
|  | 		s.scorer.Size() | ||||||
|  |  | ||||||
|  | 	for _, entry := range s.searchers { | ||||||
|  | 		sizeInBytes += entry.Size() | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	for _, entry := range s.currs { | ||||||
|  | 		if entry != nil { | ||||||
|  | 			sizeInBytes += entry.Size() | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	for _, entry := range s.matching { | ||||||
|  | 		if entry != nil { | ||||||
|  | 			sizeInBytes += entry.Size() | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	sizeInBytes += len(s.matchingIdxs) * size.SizeOfInt | ||||||
|  |  | ||||||
|  | 	return sizeInBytes | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (s *DisjunctionSliceSearcher) computeQueryNorm() { | ||||||
|  | 	// first calculate sum of squared weights | ||||||
|  | 	sumOfSquaredWeights := 0.0 | ||||||
|  | 	for _, searcher := range s.searchers { | ||||||
|  | 		sumOfSquaredWeights += searcher.Weight() | ||||||
|  | 	} | ||||||
|  | 	// now compute query norm from this | ||||||
|  | 	s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights) | ||||||
|  | 	// finally tell all the downstream searchers the norm | ||||||
|  | 	for _, searcher := range s.searchers { | ||||||
|  | 		searcher.SetQueryNorm(s.queryNorm) | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (s *DisjunctionSliceSearcher) initSearchers(ctx *search.SearchContext) error { | ||||||
|  | 	var err error | ||||||
|  | 	// get all searchers pointing at their first match | ||||||
|  | 	for i, searcher := range s.searchers { | ||||||
|  | 		if s.currs[i] != nil { | ||||||
|  | 			ctx.DocumentMatchPool.Put(s.currs[i]) | ||||||
|  | 		} | ||||||
|  | 		s.currs[i], err = searcher.Next(ctx) | ||||||
|  | 		if err != nil { | ||||||
|  | 			return err | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	err = s.updateMatches() | ||||||
|  | 	if err != nil { | ||||||
|  | 		return err | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	s.initialized = true | ||||||
|  | 	return nil | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (s *DisjunctionSliceSearcher) updateMatches() error { | ||||||
|  | 	matching := s.matching[:0] | ||||||
|  | 	matchingIdxs := s.matchingIdxs[:0] | ||||||
|  |  | ||||||
|  | 	for i := 0; i < len(s.currs); i++ { | ||||||
|  | 		curr := s.currs[i] | ||||||
|  | 		if curr == nil { | ||||||
|  | 			continue | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		if len(matching) > 0 { | ||||||
|  | 			cmp := curr.IndexInternalID.Compare(matching[0].IndexInternalID) | ||||||
|  | 			if cmp > 0 { | ||||||
|  | 				continue | ||||||
|  | 			} | ||||||
|  |  | ||||||
|  | 			if cmp < 0 { | ||||||
|  | 				matching = matching[:0] | ||||||
|  | 				matchingIdxs = matchingIdxs[:0] | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		matching = append(matching, curr) | ||||||
|  | 		matchingIdxs = append(matchingIdxs, i) | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	s.matching = matching | ||||||
|  | 	s.matchingIdxs = matchingIdxs | ||||||
|  |  | ||||||
|  | 	return nil | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (s *DisjunctionSliceSearcher) Weight() float64 { | ||||||
|  | 	var rv float64 | ||||||
|  | 	for _, searcher := range s.searchers { | ||||||
|  | 		rv += searcher.Weight() | ||||||
|  | 	} | ||||||
|  | 	return rv | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (s *DisjunctionSliceSearcher) SetQueryNorm(qnorm float64) { | ||||||
|  | 	for _, searcher := range s.searchers { | ||||||
|  | 		searcher.SetQueryNorm(qnorm) | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (s *DisjunctionSliceSearcher) Next(ctx *search.SearchContext) ( | ||||||
|  | 	*search.DocumentMatch, error) { | ||||||
|  | 	if !s.initialized { | ||||||
|  | 		err := s.initSearchers(ctx) | ||||||
|  | 		if err != nil { | ||||||
|  | 			return nil, err | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	var err error | ||||||
|  | 	var rv *search.DocumentMatch | ||||||
|  |  | ||||||
|  | 	found := false | ||||||
|  | 	for !found && len(s.matching) > 0 { | ||||||
|  | 		if len(s.matching) >= s.min { | ||||||
|  | 			found = true | ||||||
|  | 			// score this match | ||||||
|  | 			rv = s.scorer.Score(ctx, s.matching, len(s.matching), s.numSearchers) | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		// invoke next on all the matching searchers | ||||||
|  | 		for _, i := range s.matchingIdxs { | ||||||
|  | 			searcher := s.searchers[i] | ||||||
|  | 			if s.currs[i] != rv { | ||||||
|  | 				ctx.DocumentMatchPool.Put(s.currs[i]) | ||||||
|  | 			} | ||||||
|  | 			s.currs[i], err = searcher.Next(ctx) | ||||||
|  | 			if err != nil { | ||||||
|  | 				return nil, err | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		err = s.updateMatches() | ||||||
|  | 		if err != nil { | ||||||
|  | 			return nil, err | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	return rv, nil | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (s *DisjunctionSliceSearcher) Advance(ctx *search.SearchContext, | ||||||
|  | 	ID index.IndexInternalID) (*search.DocumentMatch, error) { | ||||||
|  | 	if !s.initialized { | ||||||
|  | 		err := s.initSearchers(ctx) | ||||||
|  | 		if err != nil { | ||||||
|  | 			return nil, err | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	// get all searchers pointing at their first match | ||||||
|  | 	var err error | ||||||
|  | 	for i, searcher := range s.searchers { | ||||||
|  | 		if s.currs[i] != nil { | ||||||
|  | 			if s.currs[i].IndexInternalID.Compare(ID) >= 0 { | ||||||
|  | 				continue | ||||||
|  | 			} | ||||||
|  | 			ctx.DocumentMatchPool.Put(s.currs[i]) | ||||||
|  | 		} | ||||||
|  | 		s.currs[i], err = searcher.Advance(ctx, ID) | ||||||
|  | 		if err != nil { | ||||||
|  | 			return nil, err | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	err = s.updateMatches() | ||||||
|  | 	if err != nil { | ||||||
|  | 		return nil, err | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return s.Next(ctx) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (s *DisjunctionSliceSearcher) Count() uint64 { | ||||||
|  | 	// for now return a worst case | ||||||
|  | 	var sum uint64 | ||||||
|  | 	for _, searcher := range s.searchers { | ||||||
|  | 		sum += searcher.Count() | ||||||
|  | 	} | ||||||
|  | 	return sum | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (s *DisjunctionSliceSearcher) Close() (rv error) { | ||||||
|  | 	for _, searcher := range s.searchers { | ||||||
|  | 		err := searcher.Close() | ||||||
|  | 		if err != nil && rv == nil { | ||||||
|  | 			rv = err | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	return rv | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (s *DisjunctionSliceSearcher) Min() int { | ||||||
|  | 	return s.min | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func (s *DisjunctionSliceSearcher) DocumentMatchPoolSize() int { | ||||||
|  | 	rv := len(s.currs) | ||||||
|  | 	for _, s := range s.searchers { | ||||||
|  | 		rv += s.DocumentMatchPoolSize() | ||||||
|  | 	} | ||||||
|  | 	return rv | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // a disjunction searcher implements the index.Optimizable interface | ||||||
|  | // but only activates on an edge case where the disjunction is a | ||||||
|  | // wrapper around a single Optimizable child searcher | ||||||
|  | func (s *DisjunctionSliceSearcher) Optimize(kind string, octx index.OptimizableContext) ( | ||||||
|  | 	index.OptimizableContext, error) { | ||||||
|  | 	if len(s.searchers) == 1 { | ||||||
|  | 		o, ok := s.searchers[0].(index.Optimizable) | ||||||
|  | 		if ok { | ||||||
|  | 			return o.Optimize(kind, octx) | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return octx, nil | ||||||
|  | } | ||||||
							
								
								
									
										16
									
								
								vendor/github.com/blevesearch/bleve/search/searcher/search_docid.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										16
									
								
								vendor/github.com/blevesearch/bleve/search/searcher/search_docid.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -15,11 +15,21 @@ | |||||||
| package searcher | package searcher | ||||||
|  |  | ||||||
| import ( | import ( | ||||||
|  | 	"reflect" | ||||||
|  |  | ||||||
| 	"github.com/blevesearch/bleve/index" | 	"github.com/blevesearch/bleve/index" | ||||||
| 	"github.com/blevesearch/bleve/search" | 	"github.com/blevesearch/bleve/search" | ||||||
| 	"github.com/blevesearch/bleve/search/scorer" | 	"github.com/blevesearch/bleve/search/scorer" | ||||||
|  | 	"github.com/blevesearch/bleve/size" | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  | var reflectStaticSizeDocIDSearcher int | ||||||
|  |  | ||||||
|  | func init() { | ||||||
|  | 	var ds DocIDSearcher | ||||||
|  | 	reflectStaticSizeDocIDSearcher = int(reflect.TypeOf(ds).Size()) | ||||||
|  | } | ||||||
|  |  | ||||||
| // DocIDSearcher returns documents matching a predefined set of identifiers. | // DocIDSearcher returns documents matching a predefined set of identifiers. | ||||||
| type DocIDSearcher struct { | type DocIDSearcher struct { | ||||||
| 	reader index.DocIDReader | 	reader index.DocIDReader | ||||||
| @@ -42,6 +52,12 @@ func NewDocIDSearcher(indexReader index.IndexReader, ids []string, boost float64 | |||||||
| 	}, nil | 	}, nil | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (s *DocIDSearcher) Size() int { | ||||||
|  | 	return reflectStaticSizeDocIDSearcher + size.SizeOfPtr + | ||||||
|  | 		s.reader.Size() + | ||||||
|  | 		s.scorer.Size() | ||||||
|  | } | ||||||
|  |  | ||||||
| func (s *DocIDSearcher) Count() uint64 { | func (s *DocIDSearcher) Count() uint64 { | ||||||
| 	return uint64(s.count) | 	return uint64(s.count) | ||||||
| } | } | ||||||
|   | |||||||
							
								
								
									
										15
									
								
								vendor/github.com/blevesearch/bleve/search/searcher/search_filter.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										15
									
								
								vendor/github.com/blevesearch/bleve/search/searcher/search_filter.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -15,10 +15,20 @@ | |||||||
| package searcher | package searcher | ||||||
|  |  | ||||||
| import ( | import ( | ||||||
|  | 	"reflect" | ||||||
|  |  | ||||||
| 	"github.com/blevesearch/bleve/index" | 	"github.com/blevesearch/bleve/index" | ||||||
| 	"github.com/blevesearch/bleve/search" | 	"github.com/blevesearch/bleve/search" | ||||||
|  | 	"github.com/blevesearch/bleve/size" | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  | var reflectStaticSizeFilteringSearcher int | ||||||
|  |  | ||||||
|  | func init() { | ||||||
|  | 	var fs FilteringSearcher | ||||||
|  | 	reflectStaticSizeFilteringSearcher = int(reflect.TypeOf(fs).Size()) | ||||||
|  | } | ||||||
|  |  | ||||||
| // FilterFunc defines a function which can filter documents | // FilterFunc defines a function which can filter documents | ||||||
| // returning true means keep the document | // returning true means keep the document | ||||||
| // returning false means do not keep the document | // returning false means do not keep the document | ||||||
| @@ -38,6 +48,11 @@ func NewFilteringSearcher(s search.Searcher, filter FilterFunc) *FilteringSearch | |||||||
| 	} | 	} | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (f *FilteringSearcher) Size() int { | ||||||
|  | 	return reflectStaticSizeFilteringSearcher + size.SizeOfPtr + | ||||||
|  | 		f.child.Size() | ||||||
|  | } | ||||||
|  |  | ||||||
| func (f *FilteringSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) { | func (f *FilteringSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) { | ||||||
| 	next, err := f.child.Next(ctx) | 	next, err := f.child.Next(ctx) | ||||||
| 	for next != nil && err == nil { | 	for next != nil && err == nil { | ||||||
|   | |||||||
							
								
								
									
										49
									
								
								vendor/github.com/blevesearch/bleve/search/searcher/search_fuzzy.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										49
									
								
								vendor/github.com/blevesearch/bleve/search/searcher/search_fuzzy.go
									
									
									
										generated
									
									
										vendored
									
									
								
							| @@ -15,13 +15,26 @@ | |||||||
| package searcher | package searcher | ||||||
|  |  | ||||||
| import ( | import ( | ||||||
|  | 	"fmt" | ||||||
|  |  | ||||||
| 	"github.com/blevesearch/bleve/index" | 	"github.com/blevesearch/bleve/index" | ||||||
| 	"github.com/blevesearch/bleve/search" | 	"github.com/blevesearch/bleve/search" | ||||||
| ) | ) | ||||||
|  |  | ||||||
|  | var MaxFuzziness = 2 | ||||||
|  |  | ||||||
| func NewFuzzySearcher(indexReader index.IndexReader, term string, | func NewFuzzySearcher(indexReader index.IndexReader, term string, | ||||||
| 	prefix, fuzziness int, field string, boost float64, | 	prefix, fuzziness int, field string, boost float64, | ||||||
| 	options search.SearcherOptions) (search.Searcher, error) { | 	options search.SearcherOptions) (search.Searcher, error) { | ||||||
|  |  | ||||||
|  | 	if fuzziness > MaxFuzziness { | ||||||
|  | 		return nil, fmt.Errorf("fuzziness exceeds max (%d)", MaxFuzziness) | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	if fuzziness < 0 { | ||||||
|  | 		return nil, fmt.Errorf("invalid fuzziness, negative") | ||||||
|  | 	} | ||||||
|  |  | ||||||
| 	// Note: we don't byte slice the term for a prefix because of runes. | 	// Note: we don't byte slice the term for a prefix because of runes. | ||||||
| 	prefixTerm := "" | 	prefixTerm := "" | ||||||
| 	for i, r := range term { | 	for i, r := range term { | ||||||
| @@ -31,7 +44,6 @@ func NewFuzzySearcher(indexReader index.IndexReader, term string, | |||||||
| 			break | 			break | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	candidateTerms, err := findFuzzyCandidateTerms(indexReader, term, fuzziness, | 	candidateTerms, err := findFuzzyCandidateTerms(indexReader, term, fuzziness, | ||||||
| 		field, prefixTerm) | 		field, prefixTerm) | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| @@ -45,12 +57,40 @@ func NewFuzzySearcher(indexReader index.IndexReader, term string, | |||||||
| func findFuzzyCandidateTerms(indexReader index.IndexReader, term string, | func findFuzzyCandidateTerms(indexReader index.IndexReader, term string, | ||||||
| 	fuzziness int, field, prefixTerm string) (rv []string, err error) { | 	fuzziness int, field, prefixTerm string) (rv []string, err error) { | ||||||
| 	rv = make([]string, 0) | 	rv = make([]string, 0) | ||||||
|  |  | ||||||
|  | 	// in case of advanced reader implementations directly call | ||||||
|  | 	// the levenshtein automaton based iterator to collect the | ||||||
|  | 	// candidate terms | ||||||
|  | 	if ir, ok := indexReader.(index.IndexReaderFuzzy); ok { | ||||||
|  | 		fieldDict, err := ir.FieldDictFuzzy(field, term, fuzziness, prefixTerm) | ||||||
|  | 		if err != nil { | ||||||
|  | 			return nil, err | ||||||
|  | 		} | ||||||
|  | 		defer func() { | ||||||
|  | 			if cerr := fieldDict.Close(); cerr != nil && err == nil { | ||||||
|  | 				err = cerr | ||||||
|  | 			} | ||||||
|  | 		}() | ||||||
|  | 		tfd, err := fieldDict.Next() | ||||||
|  | 		for err == nil && tfd != nil { | ||||||
|  | 			rv = append(rv, tfd.Term) | ||||||
|  | 			if tooManyClauses(len(rv)) { | ||||||
|  | 				return nil, tooManyClausesErr(len(rv)) | ||||||
|  | 			} | ||||||
|  | 			tfd, err = fieldDict.Next() | ||||||
|  | 		} | ||||||
|  | 		return rv, err | ||||||
|  | 	} | ||||||
|  |  | ||||||
| 	var fieldDict index.FieldDict | 	var fieldDict index.FieldDict | ||||||
| 	if len(prefixTerm) > 0 { | 	if len(prefixTerm) > 0 { | ||||||
| 		fieldDict, err = indexReader.FieldDictPrefix(field, []byte(prefixTerm)) | 		fieldDict, err = indexReader.FieldDictPrefix(field, []byte(prefixTerm)) | ||||||
| 	} else { | 	} else { | ||||||
| 		fieldDict, err = indexReader.FieldDict(field) | 		fieldDict, err = indexReader.FieldDict(field) | ||||||
| 	} | 	} | ||||||
|  | 	if err != nil { | ||||||
|  | 		return nil, err | ||||||
|  | 	} | ||||||
| 	defer func() { | 	defer func() { | ||||||
| 		if cerr := fieldDict.Close(); cerr != nil && err == nil { | 		if cerr := fieldDict.Close(); cerr != nil && err == nil { | ||||||
| 			err = cerr | 			err = cerr | ||||||
| @@ -58,13 +98,16 @@ func findFuzzyCandidateTerms(indexReader index.IndexReader, term string, | |||||||
| 	}() | 	}() | ||||||
|  |  | ||||||
| 	// enumerate terms and check levenshtein distance | 	// enumerate terms and check levenshtein distance | ||||||
|  | 	var reuse []int | ||||||
| 	tfd, err := fieldDict.Next() | 	tfd, err := fieldDict.Next() | ||||||
| 	for err == nil && tfd != nil { | 	for err == nil && tfd != nil { | ||||||
| 		ld, exceeded := search.LevenshteinDistanceMax(term, tfd.Term, fuzziness) | 		var ld int | ||||||
|  | 		var exceeded bool | ||||||
|  | 		ld, exceeded, reuse = search.LevenshteinDistanceMaxReuseSlice(term, tfd.Term, fuzziness, reuse) | ||||||
| 		if !exceeded && ld <= fuzziness { | 		if !exceeded && ld <= fuzziness { | ||||||
| 			rv = append(rv, tfd.Term) | 			rv = append(rv, tfd.Term) | ||||||
| 			if tooManyClauses(len(rv)) { | 			if tooManyClauses(len(rv)) { | ||||||
| 				return rv, tooManyClausesErr() | 				return nil, tooManyClausesErr(len(rv)) | ||||||
| 			} | 			} | ||||||
| 		} | 		} | ||||||
| 		tfd, err = fieldDict.Next() | 		tfd, err = fieldDict.Next() | ||||||
|   | |||||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user