termdash/internal/wrap/wrap.go

410 lines
11 KiB
Go

// Copyright 2018 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package wrap implements line wrapping at character or word boundaries.
package wrap
import (
"errors"
"fmt"
"strings"
"unicode"
"github.com/mum4k/termdash/internal/canvas/buffer"
"github.com/mum4k/termdash/internal/runewidth"
)
// Mode sets the wrapping mode.
type Mode int
// String implements fmt.Stringer()
func (m Mode) String() string {
if n, ok := modeNames[m]; ok {
return n
}
return "ModeUnknown"
}
// modeNames maps Mode values to human readable names.
var modeNames = map[Mode]string{
Never: "WrapModeNever",
AtRunes: "WrapModeAtRunes",
AtWords: "WrapModeAtWords",
}
const (
// Never is the default wrapping mode, which disables line wrapping.
Never Mode = iota
// AtRunes is a wrapping mode where if the width of the text crosses the
// width of the canvas, wrapping is performed at rune boundaries.
AtRunes
// AtWords is a wrapping mode where if the width of the text crosses the
// width of the canvas, wrapping is performed at word boundaries. The
// wrapping still switches back to the AtRunes mode for any words that are
// longer than the width.
AtWords
)
// ValidText validates the provided text for wrapping.
// The text must not be empty, contain any control or
// space characters other than '\n' and ' '.
func ValidText(text string) error {
if text == "" {
return errors.New("the text cannot be empty")
}
for _, c := range text {
if c == ' ' || c == '\n' { // Allowed space and control runes.
continue
}
if unicode.IsControl(c) {
return fmt.Errorf("the provided text %q cannot contain control characters, found: %q", text, c)
}
if unicode.IsSpace(c) {
return fmt.Errorf("the provided text %q cannot contain space character %q", text, c)
}
}
return nil
}
// ValidCells validates the provided cells for wrapping.
// The text in the cells must follow the same rules as described for ValidText.
func ValidCells(cells []*buffer.Cell) error {
var b strings.Builder
for _, c := range cells {
b.WriteRune(c.Rune)
}
return ValidText(b.String())
}
// Cells returns the cells wrapped into individual lines according to the
// specified width and wrapping mode.
//
// This function consumes any cells that contain newline characters and uses
// them to start new lines.
//
// If the mode is AtWords, this function also drops cells with leading space
// character before a word at which the wrap occurs.
func Cells(cells []*buffer.Cell, width int, m Mode) ([][]*buffer.Cell, error) {
if err := ValidCells(cells); err != nil {
return nil, err
}
switch m {
case Never:
case AtRunes:
case AtWords:
default:
return nil, fmt.Errorf("unsupported wrapping mode %v(%d)", m, m)
}
if width <= 0 {
return nil, nil
}
cs := newCellScanner(cells, width, m)
for state := scanCellRunes; state != nil; state = state(cs) {
}
return cs.lines, nil
}
// cellScannerState is a state in the FSM that scans the input text and identifies
// newlines.
type cellScannerState func(*cellScanner) cellScannerState
// cellScanner tracks the progress of scanning the input cells when finding
// lines.
type cellScanner struct {
// cells are the cells being scanned.
cells []*buffer.Cell
// nextIdx is the index of the cell that will be returned by next.
nextIdx int
// wordStartIdx stores the starting index of the current word.
// A starting position of a word includes any leading space characters.
// E.g.: hello world
// ^
// lastWordIdx
wordStartIdx int
// wordEndIdx stores the ending index of the current word.
// The word consists of all indexes that are
// wordStartIdx <= idx < wordEndIdx.
// A word also includes any punctuation after it.
wordEndIdx int
// width is the width of the canvas the text will be drawn on.
width int
// posX tracks the horizontal position of the current cell on the canvas.
posX int
// mode is the wrapping mode.
mode Mode
// atRunesInWord overrides the mode back to AtRunes.
atRunesInWord bool
// lines are the identified lines.
lines [][]*buffer.Cell
// line is the current line.
line []*buffer.Cell
}
// newCellScanner returns a scanner of the provided cells.
func newCellScanner(cells []*buffer.Cell, width int, m Mode) *cellScanner {
return &cellScanner{
cells: cells,
width: width,
mode: m,
}
}
// next returns the next cell and advances the scanner.
// Returns nil when there are no more cells to scan.
func (cs *cellScanner) next() *buffer.Cell {
c := cs.peek()
if c != nil {
cs.nextIdx++
}
return c
}
// peek returns the next cell without advancing the scanner's position.
// Returns nil when there are no more cells to peek at.
func (cs *cellScanner) peek() *buffer.Cell {
if cs.nextIdx >= len(cs.cells) {
return nil
}
return cs.cells[cs.nextIdx]
}
// peekPrev returns the previous cell without changing the scanner's position.
// Returns nil if the scanner is at the first cell.
func (cs *cellScanner) peekPrev() *buffer.Cell {
if cs.nextIdx == 0 {
return nil
}
return cs.cells[cs.nextIdx-1]
}
// wordCells returns all the cells that belong to the current word.
func (cs *cellScanner) wordCells() []*buffer.Cell {
return cs.cells[cs.wordStartIdx:cs.wordEndIdx]
}
// wordWidth returns the width of the current word in cells when printed on the
// terminal.
func (cs *cellScanner) wordWidth() int {
var b strings.Builder
for _, wc := range cs.wordCells() {
b.WriteRune(wc.Rune)
}
return runewidth.StringWidth(b.String())
}
// isWordStart determines if the scanner is at the beginning of a word.
func (cs *cellScanner) isWordStart() bool {
if cs.mode != AtWords {
return false
}
current := cs.peekPrev()
next := cs.peek()
if current == nil || next == nil {
return false
}
switch nr := next.Rune; {
case nr == '\n':
case nr == ' ':
default:
return true
}
return false
}
// scanCellRunes scans the cells a rune at a time.
func scanCellRunes(cs *cellScanner) cellScannerState {
for {
cell := cs.next()
if cell == nil {
return scanEOF
}
r := cell.Rune
if r == '\n' {
return newLineForLineBreak
}
if cs.mode == Never {
return runeToCurrentLine
}
if cs.atRunesInWord && !isWordCell(cell) {
cs.atRunesInWord = false
}
if !cs.atRunesInWord && cs.isWordStart() {
return markWordStart
}
if runeWrapNeeded(r, cs.posX, cs.width) {
return newLineForAtRunes
}
return runeToCurrentLine
}
}
// runeToCurrentLine scans a single cell rune onto the current line.
func runeToCurrentLine(cs *cellScanner) cellScannerState {
cell := cs.peekPrev()
// Move horizontally within the line for each scanned cell.
cs.posX += runewidth.RuneWidth(cell.Rune)
// Copy the cell into the current line.
cs.line = append(cs.line, cell)
return scanCellRunes
}
// newLineForLineBreak processes a newline character cell.
func newLineForLineBreak(cs *cellScanner) cellScannerState {
cs.lines = append(cs.lines, cs.line)
cs.posX = 0
cs.line = nil
return scanCellRunes
}
// newLineForAtRunes processes a line wrap at rune boundaries due to canvas width.
func newLineForAtRunes(cs *cellScanner) cellScannerState {
// The character on which we wrapped will be printed and is the start of
// new line.
cs.lines = append(cs.lines, cs.line)
cs.posX = runewidth.RuneWidth(cs.peekPrev().Rune)
cs.line = []*buffer.Cell{cs.peekPrev()}
return scanCellRunes
}
// scanEOF terminates the scanning.
func scanEOF(cs *cellScanner) cellScannerState {
// Need to add the current line if it isn't empty, or if the previous rune
// was a newline.
// Newlines aren't copied onto the lines so just checking for emptiness
// isn't enough. We still want to include trailing empty newlines if
// they are in the input text.
if len(cs.line) > 0 || cs.peekPrev().Rune == '\n' {
cs.lines = append(cs.lines, cs.line)
}
return nil
}
// markWordStart stores the starting position of the current word.
func markWordStart(cs *cellScanner) cellScannerState {
cs.wordStartIdx = cs.nextIdx - 1
cs.wordEndIdx = cs.nextIdx
return scanWord
}
// scanWord scans the entire word until it finds its end.
func scanWord(cs *cellScanner) cellScannerState {
for {
if isWordCell(cs.peek()) {
cs.next()
cs.wordEndIdx++
continue
}
return wordToCurrentLine
}
}
// wordToCurrentLine decides how to place the word into the output.
func wordToCurrentLine(cs *cellScanner) cellScannerState {
wordCells := cs.wordCells()
wordWidth := cs.wordWidth()
if cs.posX+wordWidth <= cs.width {
// Place the word onto the current line.
cs.posX += wordWidth
cs.line = append(cs.line, wordCells...)
return scanCellRunes
}
return wrapWord
}
// wrapWord wraps the word onto the next line or lines.
func wrapWord(cs *cellScanner) cellScannerState {
// Edge-case - the word starts the line and immediately doesn't fit.
if cs.posX > 0 {
cs.lines = append(cs.lines, cs.line)
cs.posX = 0
cs.line = nil
}
for i, wc := range cs.wordCells() {
if i == 0 && wc.Rune == ' ' {
// Skip the leading space when word wrapping.
continue
}
if !runeWrapNeeded(wc.Rune, cs.posX, cs.width) {
cs.posX += runewidth.RuneWidth(wc.Rune)
cs.line = append(cs.line, wc)
continue
}
// Replace the last placed rune with a dash indicating we wrapped the
// word. Only do this for half-width runes.
lastIdx := len(cs.line) - 1
last := cs.line[lastIdx]
lastRW := runewidth.RuneWidth(last.Rune)
if cs.width > 1 && lastRW == 1 {
cs.line[lastIdx] = buffer.NewCell('-', last.Opts)
// Reset the scanner's position back to start scanning at the first
// rune of this word that wasn't placed.
cs.nextIdx = cs.wordStartIdx + i - 1
} else {
// Edge-case width is one, no space to put the dash rune.
cs.nextIdx = cs.wordStartIdx + i
}
cs.atRunesInWord = true
return scanCellRunes
}
cs.nextIdx = cs.wordEndIdx
return scanCellRunes
}
// isWordCell determines if the cell contains a rune that belongs to a word.
func isWordCell(c *buffer.Cell) bool {
if c == nil {
return false
}
switch r := c.Rune; {
case r == '\n':
case r == ' ':
default:
return true
}
return false
}
// runeWrapNeeded returns true if wrapping is needed for the rune at the horizontal
// position on the canvas that has the specified width.
func runeWrapNeeded(r rune, posX, width int) bool {
rw := runewidth.RuneWidth(r)
return posX > width-rw
}