File tree Expand file tree Collapse file tree 2 files changed +2
-2
lines changed
Expand file tree Collapse file tree 2 files changed +2
-2
lines changed Original file line number Diff line number Diff line change @@ -84,7 +84,7 @@ export class RegexChunker {
8484 const segments = cleaned . split ( this . regex ) . filter ( ( s ) => s . trim ( ) . length > 0 )
8585
8686 if ( segments . length <= 1 ) {
87- logger . warn ( 'Regex pattern did not produce any splits, falling back to character splitting' )
87+ logger . warn ( 'Regex pattern did not produce any splits, falling back to word-boundary splitting' )
8888 const chunkSizeChars = tokensToChars ( this . chunkSize )
8989 let chunks = splitAtWordBoundaries ( cleaned , chunkSizeChars )
9090 if ( this . chunkOverlap > 0 ) {
Original file line number Diff line number Diff line change @@ -28,7 +28,7 @@ export class SentenceChunker {
2828 private splitSentences ( text : string ) : string [ ] {
2929 return text
3030 . split (
31- / (?< ! \b (?: M r | M r s | M s | D r | P r o f | S r | J r | S t | R e v | G e n | S g t | C a p t | L t | C o l | M a j | N o | F i g | V o l | C h | v s | e t c | I n c | L t d | C o r p | C o | a p p r o x | d e p t | e s t | g o v t | A v e | B l v d | R d | J a n | F e b | M a r | A p r | A u g | S e p | O c t | N o v | D e c | i \. e | e \. g ) \. ) (?< ! [ A - Z ] \. ) (?< ! \. \. ) (?< ! \d \. ) (?< = [ . ! ? ] ) \s + /
31+ / (?< ! \b (?: M r | M r s | M s | D r | P r o f | S r | J r | S t | R e v | G e n | S g t | C a p t | L t | C o l | M a j | N o | F i g | V o l | C h | v s | e t c | I n c | L t d | C o r p | C o | a p p r o x | d e p t | e s t | g o v t | A v e | B l v d | R d | J a n | F e b | M a r | A p r | J u n | J u l | A u g | S e p | O c t | N o v | D e c | i \. e | e \. g ) \. ) (?< ! [ A - Z ] \. ) (?< ! \. \. ) (?< ! \d \. ) (?< = [ . ! ? ] ) \s + /
3232 )
3333 . filter ( ( s ) => s . trim ( ) . length > 0 )
3434 }
You can’t perform that action at this time.
0 commit comments