Struct unic_langid_impl::LanguageIdentifier[−][src]

pub struct LanguageIdentifier {
    pub language: Language,
    pub script: Option<Script>,
    pub region: Option<Region>,
    // some fields omitted
}

Expand description

LanguageIdentifier is a core struct representing a Unicode Language Identifier.

Examples

use unic_langid_impl::LanguageIdentifier;

let li: LanguageIdentifier = "en-US".parse()
    .expect("Failed to parse.");

assert_eq!(li.language, "en");
assert_eq!(li.script, None);
assert_eq!(li.region.as_ref().map(Into::into), Some("US"));
assert_eq!(li.variants().len(), 0);

Parsing

Unicode recognizes three levels of standard conformance for any language identifier:

well-formed - syntactically correct
valid - well-formed and only uses registered language subtags, extensions, keywords, types…
canonical - valid and no deprecated codes or structure.

At the moment parsing normalizes a well-formed language identifier converting _ separators to - and adjusting casing to conform to the Unicode standard.

Any bogus subtags will cause the parsing to fail with an error. No subtag validation is performed.

Examples:

use unic_langid_impl::LanguageIdentifier;

let li: LanguageIdentifier = "eN_latn_Us-Valencia".parse()
    .expect("Failed to parse.");

assert_eq!(li.language, "en");
assert_eq!(li.script.as_ref().map(Into::into), Some("Latn"));
assert_eq!(li.region.as_ref().map(Into::into), Some("US"));
assert_eq!(li.variants().map(|v| v.as_str()).collect::<Vec<_>>(), &["valencia"]);

Fields

language: Languagescript: Option<Script>region: Option<Region>

Implementations

[src]

impl LanguageIdentifier

[src]

pub fn from_bytes(v: &[u8 ]) -> Result<Self, LanguageIdentifierError>

A constructor which takes a utf8 slice, parses it and produces a well-formed LanguageIdentifier.

Examples

use unic_langid_impl::LanguageIdentifier;

let li = LanguageIdentifier::from_bytes("en-US".as_bytes())
    .expect("Parsing failed.");

assert_eq!(li.to_string(), "en-US");

[src]

pub fn from_parts(
    language: Language,
    script: Option<Script>,
    region: Option<Region>,
    variants: &[Variant ]
) -> Self

A constructor which takes optional subtags as AsRef<[u8]>, parses them and produces a well-formed LanguageIdentifier.

Examples

use unic_langid_impl::LanguageIdentifier;

let li = LanguageIdentifier::from_parts(
    "fr".parse().expect("Parsing failed."),
    None,
    Some("CA".parse().expect("Parsing failed.")),
    &[]
);

assert_eq!(li.to_string(), "fr-CA");

[src]

pub const fn from_raw_parts_unchecked(
    language: Language,
    script: Option<Script>,
    region: Option<Region>,
    variants: Option<Box<[Variant ]>>
) -> Self

Unchecked

This function accepts subtags expecting variants to be deduplicated and ordered.

[src]

pub fn into_parts(
self
) -> (Language, Option<Script>, Option<Region>, Vec<Variant>)

Consumes LanguageIdentifier and produces raw internal representations of all subtags in form of u64/u32.

Primarily used for storing internal representation and restoring via from_raw_parts_unchecked.

Examples

use unic_langid_impl::LanguageIdentifier;
use tinystr::{TinyStr8, TinyStr4};

let li: LanguageIdentifier = "en-US".parse()
    .expect("Parsing failed.");

let (lang, script, region, variants) = li.into_parts();

// let li2 = LanguageIdentifier::from_raw_parts_unchecked(
//     lang.map(|l| unsafe { TinyStr8::new_unchecked(l) }),
//    script.map(|s| unsafe { TinyStr4::new_unchecked(s) }),
//    region.map(|r| unsafe { TinyStr4::new_unchecked(r) }),
//    variants.map(|v| v.into_iter().map(|v| unsafe { TinyStr8::new_unchecked(*v) }).collect()),
//);

//assert_eq!(li2.to_string(), "en-US");

[src]

pub fn matches<O: AsRef<Self>>(
    &self,
    other: &O,
    self_as_range: bool,
    other_as_range: bool
) -> bool

Compares a LanguageIdentifier to another AsRef<LanguageIdentifier allowing for either side to use the missing fields as wildcards.

This allows for matching between en (treated as en-*-*-*) and en-US.

Examples

use unic_langid_impl::LanguageIdentifier;

let li1: LanguageIdentifier = "en".parse()
    .expect("Parsing failed.");

let li2: LanguageIdentifier = "en-US".parse()
    .expect("Parsing failed.");

assert_ne!(li1, li2); // "en" != "en-US"
assert_ne!(li1.to_string(), li2.to_string()); // "en" != "en-US"

assert_eq!(li1.matches(&li2, false, false), false); // "en" != "en-US"
assert_eq!(li1.matches(&li2, true, false), true); // "en-*-*-*" == "en-US"
assert_eq!(li1.matches(&li2, false, true), false); // "en" != "en-*-US-*"
assert_eq!(li1.matches(&li2, true, true), true); // "en-*-*-*" == "en-*-US-*"

[src]

pub fn variants(&self) -> impl ExactSizeIterator<Item = &Variant>

Returns a vector of variants subtags of the LanguageIdentifier.

Examples

use unic_langid_impl::LanguageIdentifier;

let li1: LanguageIdentifier = "ca-ES-valencia".parse()
    .expect("Parsing failed.");

assert_eq!(li1.variants().map(|v| v.as_str()).collect::<Vec<_>>(), &["valencia"]);

let li2: LanguageIdentifier = "de".parse()
    .expect("Parsing failed.");

assert_eq!(li2.variants().len(), 0);

[src]

pub fn set_variants(&mut self, variants: &[Variant ])

Sets variant subtags of the LanguageIdentifier.

Examples

use unic_langid_impl::LanguageIdentifier;

let mut li: LanguageIdentifier = "ca-ES".parse()
    .expect("Parsing failed.");

li.set_variants(&["valencia".parse().expect("Parsing failed.")]);

assert_eq!(li.to_string(), "ca-ES-valencia");

[src]

pub fn has_variant(&self, variant: Variant) -> bool

Tests if a variant subtag is present in the LanguageIdentifier.

Examples

use unic_langid_impl::LanguageIdentifier;

let mut li: LanguageIdentifier = "ca-ES-macos".parse()
    .expect("Parsing failed.");

assert_eq!(li.has_variant("valencia".parse().unwrap()), false);
assert_eq!(li.has_variant("macos".parse().unwrap()), true);

[src]

pub fn clear_variants(&mut self)

Clears variant subtags of the LanguageIdentifier.

Examples

use unic_langid_impl::LanguageIdentifier;

let mut li: LanguageIdentifier = "ca-ES-valencia".parse()
    .expect("Parsing failed.");

li.clear_variants();

assert_eq!(li.to_string(), "ca-ES");

[src]

pub fn character_direction(&self) -> CharacterDirection

Returns character direction of the LanguageIdentifier.

Examples

use unic_langid_impl::{LanguageIdentifier, CharacterDirection};

let li1: LanguageIdentifier = "es-AR".parse()
    .expect("Parsing failed.");
let li2: LanguageIdentifier = "fa".parse()
    .expect("Parsing failed.");

assert_eq!(li1.character_direction(), CharacterDirection::LTR);
assert_eq!(li2.character_direction(), CharacterDirection::RTL);