Enum textwrap::WordSeparator

source · [−]

pub enum WordSeparator {
    AsciiSpace,
    UnicodeBreakProperties,
    Custom(fn(line: &str) -> Box<dyn Iterator<Item = Word<'_>> + '_>),
}

Expand description

Describes where words occur in a line of text.

The simplest approach is say that words are separated by one or more ASCII spaces (' '). This works for Western languages without emojis. A more complex approach is to use the Unicode line breaking algorithm, which finds break points in non-ASCII text.

The line breaks occur between words, please see WordSplitter for options of how to handle hyphenation of individual words.

Examples

use textwrap::core::Word;
use textwrap::WordSeparator::AsciiSpace;

let words = AsciiSpace.find_words("Hello World!").collect::<Vec<_>>();
assert_eq!(words, vec![Word::from("Hello "), Word::from("World!")]);

Variants

`AsciiSpace`

Find words by splitting on runs of ' ' characters.

Examples

use textwrap::core::Word;
use textwrap::WordSeparator::AsciiSpace;

let words = AsciiSpace.find_words("Hello   World!").collect::<Vec<_>>();
assert_eq!(words, vec![Word::from("Hello   "),
                       Word::from("World!")]);

`UnicodeBreakProperties`

Split line into words using Unicode break properties.

This word separator uses the Unicode line breaking algorithm described in Unicode Standard Annex #14 to find legal places to break lines. There is a small difference in that the U+002D (Hyphen-Minus) and U+00AD (Soft Hyphen) don’t create a line break: to allow a line break at a hyphen, use WordSplitter::HyphenSplitter. Soft hyphens are not currently supported.

Examples

Unlike WordSeparator::AsciiSpace, the Unicode line breaking algorithm will find line break opportunities between some characters with no intervening whitespace:

#[cfg(feature = "unicode-linebreak")] {
use textwrap::core::Word;
use textwrap::WordSeparator::UnicodeBreakProperties;

assert_eq!(UnicodeBreakProperties.find_words("Emojis: 😂😍").collect::<Vec<_>>(),
           vec![Word::from("Emojis: "),
                Word::from("😂"),
                Word::from("😍")]);

assert_eq!(UnicodeBreakProperties.find_words("CJK: 你好").collect::<Vec<_>>(),
           vec![Word::from("CJK: "),
                Word::from("你"),
                Word::from("好")]);
}

A U+2060 (Word Joiner) character can be inserted if you want to manually override the defaults and keep the characters together:

#[cfg(feature = "unicode-linebreak")] {
use textwrap::core::Word;
use textwrap::WordSeparator::UnicodeBreakProperties;

assert_eq!(UnicodeBreakProperties.find_words("Emojis: 😂\u{2060}😍").collect::<Vec<_>>(),
           vec![Word::from("Emojis: "),
                Word::from("😂\u{2060}😍")]);
}

The Unicode line breaking algorithm will also automatically suppress break breaks around certain punctuation characters::

#[cfg(feature = "unicode-linebreak")] {
use textwrap::core::Word;
use textwrap::WordSeparator::UnicodeBreakProperties;

assert_eq!(UnicodeBreakProperties.find_words("[ foo ] bar !").collect::<Vec<_>>(),
           vec![Word::from("[ foo ] "),
                Word::from("bar !")]);
}

`Custom(fn(line: &str) -> Box<dyn Iterator<Item = Word<'_>> + '_>)`

Find words using a custom word separator

Enum textwrap::WordSeparator

Variants

AsciiSpace

UnicodeBreakProperties

Custom(fn(line: &str) -> Box<dyn Iterator<Item = Word<'_>> + '_>)

Implementations

impl WordSeparator

pub fn find_words<'a>( &self, line: &'a str) -> Box<dyn Iterator<Item = Word<'a>> + 'a>

Trait Implementations

impl Clone for WordSeparator

fn clone(&self) -> WordSeparator

fn clone_from(&mut self, source: &Self)

impl Debug for WordSeparator

fn fmt(&self, f: &mut Formatter<'_>) -> Result

impl Copy for WordSeparator

Auto Trait Implementations

impl RefUnwindSafe for WordSeparator

impl Send for WordSeparator

impl Sync for WordSeparator

impl Unpin for WordSeparator

impl UnwindSafe for WordSeparator

Blanket Implementations

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> ToOwned for Twhere T: Clone,

type Owned = T

fn to_owned(&self) -> T

fn clone_into(&self, target: &mut T)

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

`AsciiSpace`

`UnicodeBreakProperties`

`Custom(fn(line: &str) -> Box<dyn Iterator<Item = Word<'_>> + '_>)`

pub fn find_words<'a>(
&self,
line: &'a str
) -> Box<dyn Iterator<Item = Word<'a>> + 'a>

impl<T> Any for Twhere
T: 'static + ?Sized,

impl<T> Borrow<T> for Twhere
T: ?Sized,

impl<T> BorrowMut<T> for Twhere
T: ?Sized,

impl<T, U> Into<U> for Twhere
U: From<T>,

impl<T> ToOwned for Twhere
T: Clone,

impl<T, U> TryFrom<U> for Twhere
U: Into<T>,

impl<T, U> TryInto<U> for Twhere
U: TryFrom<T>,