9base

revived minimalist port of Plan 9 userland to Unix
git clone git://git.suckless.org/9base
Log | Files | Refs | README | LICENSE

commit 78a6c092e93295e60ed4cba344117a34b586d7da
parent e70375948b9e9afebde07fb294abf5f039f19d44
Author: anselm@garbe.us <unknown>
Date:   Thu, 27 May 2010 13:02:29 +0100

added fmt as well
Diffstat:
Makefile | 4++--
fmt/Makefile | 10++++++++++
fmt/fmt.1 | 90+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
fmt/fmt.c | 241+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 343 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile @@ -3,8 +3,8 @@ include config.mk SUBDIRS = lib9 yacc awk basename bc cal cat cleanname date dc du echo ed \ - factor fortune freq getflags grep hoc ls mk mkdir mtime primes rc read \ - sha1sum sed seq sleep sort tail tee test touch tr troff uniq + factor fortune fmt freq getflags grep hoc ls mk mkdir mtime primes \ + rc read sha1sum sed seq sleep sort tail tee test touch tr troff uniq all: @echo 9base build options: diff --git a/fmt/Makefile b/fmt/Makefile @@ -0,0 +1,10 @@ +# fmt - fmt unix port from plan9 +# Depends on ../lib9 + +TARG = fmt + +include ../std.mk + +pre-uninstall: + +post-install: diff --git a/fmt/fmt.1 b/fmt/fmt.1 @@ -0,0 +1,90 @@ +.TH FMT 1 +.SH NAME +fmt, htmlfmt \- simple text formatters +.SH SYNOPSIS +.B fmt +[ +.I option ... +] +[ +.I file ... +] +.PP +.B htmlfmt +[ +.B -a +] [ +.B -c +.I charset +] [ +.B -u +.I url +] [ +.I file ... +] +.SH DESCRIPTION +.I Fmt +copies the given +.I files +(standard input by default) +to its standard output, filling and indenting lines. +The options are +.TP +.BI -l " n +Output line length is +.IR n , +including indent (default 70). +.TP +.BI -w " n +A synonym for +.BR -l . +.TP +.BI -i " n +Indent +.I n +spaces (default 0). +.TP +.BI -j +Do not join short lines: only fold long lines. +.PP +Empty lines and initial white space in input lines are preserved. +Empty lines are inserted between input files. +.PP +.I Fmt +is idempotent: it leaves already formatted text unchanged. +.PP +.I Htmlfmt +performs a similar service, but accepts as input text formatted with +HTML tags. +It accepts +.IR fmt 's +.B -l +and +.B -w +flags and also: +.TP +.BI -a +Normally +.I htmlfmt +suppresses the contents of form fields and anchors (URLs and image files); this flag +causes it to print them, in square brackets. +.TP +.BI -c " charset +change the default character set from iso-8859-1 to +.IR charset . +This is the character set assumed if there isn't one +specified by the html itself in a <meta> directive. +.TP +.BI -u " url +Use +.I url +as the base URL for the document when displaying anchors; sets +.BI -a . +.SH SOURCE +.B \*9/src/cmd/fmt.c +.PP +.B \*9/src/cmd/htmlfmt +.SH BUGS +.I Htmlfmt +makes no attempt to render the two-dimensional geometry of tables; +it just treats the table entries as plain, to-be-formatted text. diff --git a/fmt/fmt.c b/fmt/fmt.c @@ -0,0 +1,241 @@ +#include <u.h> +#include <libc.h> +#include <bio.h> +#include <ctype.h> + +/* + * block up paragraphs, possibly with indentation + */ + +int extraindent = 0; /* how many spaces to indent all lines */ +int indent = 0; /* current value of indent, before extra indent */ +int length = 70; /* how many columns per output line */ +int join = 1; /* can lines be joined? */ +int maxtab = 8; +Biobuf bin; +Biobuf bout; + +typedef struct Word Word; +struct Word{ + int bol; + int indent; + char text[1]; +}; + +void fmt(void); + +void +usage(void) +{ + fprint(2, "usage: %s [-j] [-i indent] [-l length] [file...]\n", argv0); + exits("usage"); +} + +void +main(int argc, char **argv) +{ + int i, f; + char *s, *err; + + ARGBEGIN{ + case 'i': + extraindent = atoi(EARGF(usage())); + break; + case 'j': + join = 0; + break; + case 'w': + case 'l': + length = atoi(EARGF(usage())); + break; + default: + usage(); + }ARGEND + + if(length <= indent){ + fprint(2, "%s: line length<=indentation\n", argv0); + exits("length"); + } + + s=getenv("tabstop"); + if(s!=nil && atoi(s)>0) + maxtab=atoi(s); + err = nil; + Binit(&bout, 1, OWRITE); + if(argc <= 0){ + Binit(&bin, 0, OREAD); + fmt(); + }else{ + for(i=0; i<argc; i++){ + f = open(argv[i], OREAD); + if(f < 0){ + fprint(2, "%s: can't open %s: %r\n", argv0, argv[i]); + err = "open"; + }else{ + Binit(&bin, f, OREAD); + fmt(); + Bterm(&bin); + if(i != argc-1) + Bputc(&bout, '\n'); + } + } + } + exits(err); +} + +int +indentof(char **linep) +{ + int i, ind; + char *line; + + ind = 0; + line = *linep; + for(i=0; line[i]; i++) + switch(line[i]){ + default: + *linep = line; + return ind; + case ' ': + ind++; + break; + case '\t': + ind += maxtab; + ind -= ind%maxtab; + break; + } + + /* plain white space doesn't change the indent */ + *linep = ""; + return indent; +} + +Word** +addword(Word **words, int *nwordp, char *s, int l, int indent, int bol) +{ + Word *w; + + w = malloc(sizeof(Word)+l+1); + memmove(w->text, s, l); + w->text[l] = '\0'; + w->indent = indent; + w->bol = bol; + words = realloc(words, (*nwordp+1)*sizeof(Word*)); + words[(*nwordp)++] = w; + return words; +} + +Word** +parseline(char *line, Word **words, int *nwordp) +{ + int ind, l, bol; + + ind = indentof(&line); + indent = ind; + bol = 1; + for(;;){ + /* find next word */ + while(*line==' ' || *line=='\t') + line++; + if(*line == '\0'){ + if(bol) + return addword(words, nwordp, "", 0, -1, bol); + break; + } + /* how long is this word? */ + for(l=0; line[l]; l++) + if(line[l]==' ' || line[l]=='\t') + break; + words = addword(words, nwordp, line, l, indent, bol); + bol = 0; + line += l; + } + return words; +} + +void +printindent(int w) +{ + while(w >= maxtab){ + Bputc(&bout, '\t'); + w -= maxtab; + } + while(w > 0){ + Bputc(&bout, ' '); + w--; + } +} + +/* give extra space if word ends with period, etc. */ +int +nspaceafter(char *s) +{ + int n; + + n = strlen(s); + if(n < 2) + return 1; + if(isupper((uchar)s[0]) && n < 4) + return 1; + if(strchr(".!?", s[n-1]) != nil) + return 2; + return 1; +} + + +void +printwords(Word **w, int nw) +{ + int i, j, n, col, nsp; + + /* one output line per loop */ + for(i=0; i<nw; ){ + /* if it's a blank line, print it */ + if(w[i]->indent == -1){ + Bputc(&bout, '\n'); + if(++i == nw) /* out of words */ + break; + } + /* emit leading indent */ + col = extraindent+w[i]->indent; + printindent(col); + /* emit words until overflow; always emit at least one word */ + for(n=0;; n++){ + Bprint(&bout, "%s", w[i]->text); + col += utflen(w[i]->text); + if(++i == nw) + break; /* out of words */ + if(w[i]->indent != w[i-1]->indent) + break; /* indent change */ + nsp = nspaceafter(w[i-1]->text); + if(col+nsp+utflen(w[i]->text) > extraindent+length) + break; /* fold line */ + if(!join && w[i]->bol) + break; + for(j=0; j<nsp; j++) + Bputc(&bout, ' '); /* emit space; another word will follow */ + col += nsp; + } + /* emit newline */ + Bputc(&bout, '\n'); + } +} + +void +fmt(void) +{ + char *s; + int i, nw; + Word **w; + + nw = 0; + w = nil; + while((s = Brdstr(&bin, '\n', 1)) != nil){ + w = parseline(s, w, &nw); + free(s); + } + printwords(w, nw); + for(i=0; i<nw; i++) + free(w[i]); + free(w); +}