unidata

package
v1.1.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: May 31, 2020 License: MIT Imports: 3 Imported by: 0

Documentation

Overview

Package unidata contains information about Unicode characters.

Index

Constants

View Source
const (
	GenderNone = 0
	GenderSign = 1
	GenderRole = 2
)
View Source
const (
	WidthAmbiguous = uint8(iota) // Ambiguous, A
	WidthFullWidth               // FullWidth, F
	WidthHalfWidth               // Halfwidth, H
	WidthNarrow                  // Narrow, N
	WidthNeutral                 // Neutral (Not East Asian), Na
	WidthWide                    // Wide, W
)
View Source
const (
	CatUppercaseLetter      = uint8(iota) // Lu – an uppercase letter
	CatLowercaseLetter                    // Ll – a lowercase letter
	CatTitlecaseLetter                    // Lt – a digraphic character, with first part uppercase
	CatCasedLetter                        // LC – Lu | Ll | Lt
	CatModifierLetter                     // Lm – a modifier letter
	CatOtherLetter                        // Lo – other letters, including syllables and ideographs
	CatLetter                             // L  – Lu | Ll | Lt | Lm | Lo
	CatNonspacingMark                     // Mn – a nonspacing combining mark (zero advance width)
	CatSpacingMark                        // Mc – a spacing combining mark (positive advance width)
	CatEnclosingMark                      // Me – an enclosing combining mark
	CatMark                               // M  – Mn | Mc | Me
	CatDecimalNumber                      // Nd – a decimal digit
	CatLetterNumber                       // Nl – a letterlike numeric character
	CatOtherNumber                        // No – a numeric character of other type
	CatNumber                             // N  – Nd | Nl | No
	CatConnectorPunctuation               // Pc – a connecting punctuation mark, like a tie
	CatDashPunctuation                    // Pd – a dash or hyphen punctuation mark
	CatOpenPunctuation                    // Ps – an opening punctuation mark (of a pair)
	CatClosePunctuation                   // Pe – a closing punctuation mark (of a pair)
	CatInitialPunctuation                 // Pi – an initial quotation mark
	CatFinalPunctuation                   // Pf – a final quotation mark
	CatOtherPunctuation                   // Po – a punctuation mark of other type
	CatPunctuation                        // P  – Pc | Pd | Ps | Pe | Pi | Pf | Po
	CatMathSymbol                         // Sm – a symbol of mathematical use
	CatCurrencySymbol                     // Sc – a currency sign
	CatModifierSymbol                     // Sk – a non-letterlike modifier symbol
	CatOtherSymbol                        // So – a symbol of other type
	CatSymbol                             // S  – Sm | Sc | Sk | So
	CatSpaceSeparator                     // Zs – a space character (of various non-zero widths)
	CatLineSeparator                      // Zl – U+2028 LINE SEPARATOR only
	CatParagraphSeparator                 // Zp – U+2029 PARAGRAPH SEPARATOR only
	CatSeparator                          // Z  – Zs | Zl | Zp
	CatControl                            // Cc – a C0 or C1 control code
	CatFormat                             // Cf – a format control character
	CatSurrogate                          // Cs – a surrogate code point
	CatPrivateUse                         // Co – a private-use character
	CatUnassigned                         // Cn – a reserved unassigned code point or a noncharacter
	CatOther                              // C  – Cc | Cf | Cs | Co | Cn
)

http://www.unicode.org/reports/tr44/#General_Category_Values

View Source
const UnknownCodepoint = "CODEPOINT NOT IN UNICODE"

Variables

View Source
var (
	Blocks = map[string][]uint32{}/* 300 elements not displayed */

	Blockmap = make(map[string]string)
)

https://www.unicode.org/Public/UCD/latest/ucd/Blocks.txt TODO: generate this from the data file.

View Source
var (
	Catmap = map[string]uint8{}/* 141 elements not displayed */

	Catnames = map[uint8]string{
		CatUppercaseLetter:      "Uppercase_Letter",
		CatLowercaseLetter:      "Lowercase_Letter",
		CatTitlecaseLetter:      "Titlecase_Letter",
		CatCasedLetter:          "Cased_Letter",
		CatModifierLetter:       "Modifier_Letter",
		CatOtherLetter:          "Other_Letter",
		CatLetter:               "Letter",
		CatNonspacingMark:       "Nonspacing_Mark",
		CatSpacingMark:          "Spacing_Mark",
		CatEnclosingMark:        "Enclosing_Mark",
		CatMark:                 "Mark",
		CatDecimalNumber:        "Decimal_Number",
		CatLetterNumber:         "Letter_Number",
		CatOtherNumber:          "Other_Number",
		CatNumber:               "Number",
		CatConnectorPunctuation: "Connector_Punctuation",
		CatDashPunctuation:      "Dash_Punctuation",
		CatOpenPunctuation:      "Open_Punctuation",
		CatClosePunctuation:     "Close_Punctuation",
		CatInitialPunctuation:   "Initial_Punctuation",
		CatFinalPunctuation:     "Final_Punctuation",
		CatOtherPunctuation:     "Other_Punctuation",
		CatPunctuation:          "Punctuation",
		CatMathSymbol:           "Math_Symbol",
		CatCurrencySymbol:       "Currency_Symbol",
		CatModifierSymbol:       "Modifier_Symbol",
		CatOtherSymbol:          "Other_Symbol",
		CatSymbol:               "Symbol",
		CatSpaceSeparator:       "Space_Separator",
		CatLineSeparator:        "Line_Separator",
		CatParagraphSeparator:   "Paragraph_Separator",
		CatSeparator:            "Separator",
		CatControl:              "Control",
		CatFormat:               "Format",
		CatSurrogate:            "Surrogate",
		CatPrivateUse:           "Private_Use",
		CatUnassigned:           "Unassigned",
		CatOther:                "Other",
	}
)
View Source
var Codepoints = map[string]Codepoint{}/* 33797 elements not displayed */
View Source
var EmojiGroups = []string{"Smileys & Emotion", "People & Body", "Component", "Animals & Nature", "Food & Drink", "Travel & Places", "Activities", "Objects", "Symbols", "Flags"}
View Source
var EmojiSubgroups = map[string][]string{"Activities": []string{"event", "award-medal", "sport", "game", "arts & crafts"}, "Animals & Nature": []string{"animal-mammal", "animal-bird", "animal-amphibian", "animal-reptile", "animal-marine", "animal-bug", "plant-flower", "plant-other"}, "Component": []string{"skin-tone", "hair-style"}, "Flags": []string{"flag", "country-flag", "subdivision-flag"}, "Food & Drink": []string{"food-fruit", "food-vegetable", "food-prepared", "food-asian", "food-marine", "food-sweet", "drink", "dishware"}, "Objects": []string{"clothing", "sound", "music", "musical-instrument", "phone", "computer", "light & video", "book-paper", "money", "mail", "writing", "office", "lock", "tool", "science", "medical", "household", "other-object"}, "People & Body": []string{"hand-fingers-open", "hand-fingers-partial", "hand-single-finger", "hand-fingers-closed", "hands", "hand-prop", "body-parts", "person", "person-gesture", "person-role", "person-fantasy", "person-activity", "person-sport", "person-resting", "family", "person-symbol"}, "Smileys & Emotion": []string{"face-smiling", "face-affection", "face-tongue", "face-hand", "face-neutral-skeptical", "face-sleepy", "face-unwell", "face-hat", "face-glasses", "face-concerned", "face-negative", "face-costume", "cat-face", "monkey-face", "emotion"}, "Symbols": []string{"transport-sign", "warning", "arrow", "religion", "zodiac", "av-symbol", "gender", "math", "punctuation", "currency", "other-symbol", "keycap", "alphanum", "geometric"}, "Travel & Places": []string{"place-map", "place-geographic", "place-building", "place-religious", "place-other", "transport-ground", "transport-water", "transport-air", "hotel", "time", "sky & weather"}}
View Source
var Emojis = []Emoji{}/* 1667 elements not displayed */
View Source
var Entities = map[rune]string{}/* 1446 elements not displayed */

Functions

func CanonicalCategory

func CanonicalCategory(cat string) string

CanonicalCategory transforms a category name to the canonical representation.

func ToCodepoint added in v1.1.0

func ToCodepoint(s string) (int64, error)

ToCodepoint converts a human input string to a codepoint.

The input can be as U+41, U+0041, U41, 0x41, 0o101, 0b1000001

Types

type Codepoint

type Codepoint struct {
	Width, Cat uint8
	Codepoint  uint32
	Name       string
}

Codepoint is a single codepoint.

func FindCodepoint

func FindCodepoint(c rune) (Codepoint, bool)

FindCodepoint finds a codepoint

type Emoji

type Emoji struct {
	Codepoints            []uint32
	Name, Group, Subgroup string
	SkinTones             bool
	Genders               int
}

Emoji is an emoji sequence.

func (Emoji) String

func (e Emoji) String() string

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL