// Copyright 2016 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package pipeline import ( "bytes" "errors" "fmt" "go/ast" "go/constant" "go/format" "go/token" "go/types" "path/filepath" "strings" "unicode" "unicode/utf8" fmtparser "golang.org/x/text/internal/format" "golang.org/x/tools/go/callgraph" "golang.org/x/tools/go/callgraph/cha" "golang.org/x/tools/go/loader" "golang.org/x/tools/go/ssa" "golang.org/x/tools/go/ssa/ssautil" ) const debug = false // TODO: // - merge information into existing files // - handle different file formats (PO, XLIFF) // - handle features (gender, plural) // - message rewriting // - `msg:"etc"` tags // Extract extracts all strings form the package defined in Config. func Extract(c *Config) (*State, error) { x, err := newExtracter(c) if err != nil { return nil, wrap(err, "") } if err := x.seedEndpoints(); err != nil { return nil, err } x.extractMessages() return &State{ Config: *c, program: x.iprog, Extracted: Messages{ Language: c.SourceLanguage, Messages: x.messages, }, }, nil } type extracter struct { conf loader.Config iprog *loader.Program prog *ssa.Program callGraph *callgraph.Graph // Calls and other expressions to collect. globals map[token.Pos]*constData funcs map[token.Pos]*callData messages []Message } func newExtracter(c *Config) (x *extracter, err error) { x = &extracter{ conf: loader.Config{}, globals: map[token.Pos]*constData{}, funcs: map[token.Pos]*callData{}, } x.iprog, err = loadPackages(&x.conf, c.Packages) if err != nil { return nil, wrap(err, "") } x.prog = ssautil.CreateProgram(x.iprog, ssa.GlobalDebug|ssa.BareInits) x.prog.Build() x.callGraph = cha.CallGraph(x.prog) return x, nil } func (x *extracter) globalData(pos token.Pos) *constData { cd := x.globals[pos] if cd == nil { cd = &constData{} x.globals[pos] = cd } return cd } func (x *extracter) seedEndpoints() error { pkgInfo := x.iprog.Package("golang.org/x/text/message") if pkgInfo == nil { return errors.New("pipeline: golang.org/x/text/message is not imported") } pkg := x.prog.Package(pkgInfo.Pkg) typ := types.NewPointer(pkg.Type("Printer").Type()) x.processGlobalVars() x.handleFunc(x.prog.LookupMethod(typ, pkg.Pkg, "Printf"), &callData{ formatPos: 1, argPos: 2, isMethod: true, }) x.handleFunc(x.prog.LookupMethod(typ, pkg.Pkg, "Sprintf"), &callData{ formatPos: 1, argPos: 2, isMethod: true, }) x.handleFunc(x.prog.LookupMethod(typ, pkg.Pkg, "Fprintf"), &callData{ formatPos: 2, argPos: 3, isMethod: true, }) return nil } // processGlobalVars finds string constants that are assigned to global // variables. func (x *extracter) processGlobalVars() { for _, p := range x.prog.AllPackages() { m, ok := p.Members["init"] if !ok { continue } for _, b := range m.(*ssa.Function).Blocks { for _, i := range b.Instrs { s, ok := i.(*ssa.Store) if !ok { continue } a, ok := s.Addr.(*ssa.Global) if !ok { continue } t := a.Type() for { p, ok := t.(*types.Pointer) if !ok { break } t = p.Elem() } if b, ok := t.(*types.Basic); !ok || b.Kind() != types.String { continue } x.visitInit(a, s.Val) } } } } type constData struct { call *callData // to provide a signature for the constants values []constVal others []token.Pos // Assigned to other global data. } func (d *constData) visit(x *extracter, f func(c constant.Value)) { for _, v := range d.values { f(v.value) } for _, p := range d.others { if od, ok := x.globals[p]; ok { od.visit(x, f) } } } type constVal struct { value constant.Value pos token.Pos } type callData struct { call ssa.CallInstruction expr *ast.CallExpr formats []constant.Value callee *callData isMethod bool formatPos int argPos int // varargs at this position in the call argTypes []int // arguments extractable from this position } func (c *callData) callFormatPos() int { c = c.callee if c.isMethod { return c.formatPos - 1 } return c.formatPos } func (c *callData) callArgsStart() int { c = c.callee if c.isMethod { return c.argPos - 1 } return c.argPos } func (c *callData) Pos() token.Pos { return c.call.Pos() } func (c *callData) Pkg() *types.Package { return c.call.Parent().Pkg.Pkg } func (x *extracter) handleFunc(f *ssa.Function, fd *callData) { for _, e := range x.callGraph.Nodes[f].In { if e.Pos() == 0 { continue } call := e.Site caller := x.funcs[call.Pos()] if caller != nil { // TODO: theoretically a format string could be passed to multiple // arguments of a function. Support this eventually. continue } x.debug(call, "CALL", f.String()) caller = &callData{ call: call, callee: fd, formatPos: -1, argPos: -1, } // Offset by one if we are invoking an interface method. offset := 0 if call.Common().IsInvoke() { offset = -1 } x.funcs[call.Pos()] = caller if fd.argPos >= 0 { x.visitArgs(caller, call.Common().Args[fd.argPos+offset]) } x.visitFormats(caller, call.Common().Args[fd.formatPos+offset]) } } type posser interface { Pos() token.Pos Parent() *ssa.Function } func (x *extracter) debug(v posser, header string, args ...interface{}) { if debug { pos := "" if p := v.Parent(); p != nil { pos = posString(&x.conf, p.Package().Pkg, v.Pos()) } if header != "CALL" && header != "INSERT" { header = " " + header } fmt.Printf("%-32s%-10s%-15T ", pos+fmt.Sprintf("@%d", v.Pos()), header, v) for _, a := range args { fmt.Printf(" %v", a) } fmt.Println() } } // visitInit evaluates and collects values assigned to global variables in an // init function. func (x *extracter) visitInit(global *ssa.Global, v ssa.Value) { if v == nil { return } x.debug(v, "GLOBAL", v) switch v := v.(type) { case *ssa.Phi: for _, e := range v.Edges { x.visitInit(global, e) } case *ssa.Const: // Only record strings with letters. if str := constant.StringVal(v.Value); isMsg(str) { cd := x.globalData(global.Pos()) cd.values = append(cd.values, constVal{v.Value, v.Pos()}) } // TODO: handle %m-directive. case *ssa.Global: cd := x.globalData(global.Pos()) cd.others = append(cd.others, v.Pos()) case *ssa.FieldAddr, *ssa.Field: // TODO: mark field index v.Field of v.X.Type() for extraction. extract // an example args as to give parameters for the translator. case *ssa.Slice: if v.Low == nil && v.High == nil && v.Max == nil { x.visitInit(global, v.X) } case *ssa.Alloc: if ref := v.Referrers(); ref == nil { for _, r := range *ref { values := []ssa.Value{} for _, o := range r.Operands(nil) { if o == nil || *o == v { continue } values = append(values, *o) } // TODO: return something different if we care about multiple // values as well. if len(values) == 1 { x.visitInit(global, values[0]) } } } case ssa.Instruction: rands := v.Operands(nil) if len(rands) == 1 && rands[0] != nil { x.visitInit(global, *rands[0]) } } return } // visitFormats finds the original source of the value. The returned index is // position of the argument if originated from a function argument or -1 // otherwise. func (x *extracter) visitFormats(call *callData, v ssa.Value) { if v == nil { return } x.debug(v, "VALUE", v) switch v := v.(type) { case *ssa.Phi: for _, e := range v.Edges { x.visitFormats(call, e) } case *ssa.Const: // Only record strings with letters. if isMsg(constant.StringVal(v.Value)) { x.debug(call.call, "FORMAT", v.Value.ExactString()) call.formats = append(call.formats, v.Value) } // TODO: handle %m-directive. case *ssa.Global: x.globalData(v.Pos()).call = call case *ssa.FieldAddr, *ssa.Field: // TODO: mark field index v.Field of v.X.Type() for extraction. extract // an example args as to give parameters for the translator. case *ssa.Slice: if v.Low == nil && v.High == nil && v.Max == nil { x.visitFormats(call, v.X) } case *ssa.Parameter: // TODO: handle the function for the index parameter. f := v.Parent() for i, p := range f.Params { if p == v { if call.formatPos < 0 { call.formatPos = i // TODO: is there a better way to detect this is calling // a method rather than a function? call.isMethod = len(f.Params) > f.Signature.Params().Len() x.handleFunc(v.Parent(), call) } else if debug && i != call.formatPos { // TODO: support this. fmt.Printf("WARNING:%s: format string passed to arg %d and %d\n", posString(&x.conf, call.Pkg(), call.Pos()), call.formatPos, i) } } } case *ssa.Alloc: if ref := v.Referrers(); ref == nil { for _, r := range *ref { values := []ssa.Value{} for _, o := range r.Operands(nil) { if o == nil || *o == v { continue } values = append(values, *o) } // TODO: return something different if we care about multiple // values as well. if len(values) == 1 { x.visitFormats(call, values[0]) } } } // TODO: // case *ssa.Index: // // Get all values in the array if applicable // case *ssa.IndexAddr: // // Get all values in the slice or *array if applicable. // case *ssa.Lookup: // // Get all values in the map if applicable. case *ssa.FreeVar: // TODO: find the link between free variables and parameters: // // func freeVar(p *message.Printer, str string) { // fn := func(p *message.Printer) { // p.Printf(str) // } // fn(p) // } case *ssa.Call: case ssa.Instruction: rands := v.Operands(nil) if len(rands) == 1 && rands[0] != nil { x.visitFormats(call, *rands[0]) } } } // Note: a function may have an argument marked as both format and passthrough. // visitArgs collects information on arguments. For wrapped functions it will // just determine the position of the variable args slice. func (x *extracter) visitArgs(fd *callData, v ssa.Value) { if v == nil { return } x.debug(v, "ARGV", v) switch v := v.(type) { case *ssa.Slice: if v.Low == nil && v.High == nil && v.Max == nil { x.visitArgs(fd, v.X) } case *ssa.Parameter: // TODO: handle the function for the index parameter. f := v.Parent() for i, p := range f.Params { if p == v { fd.argPos = i } } case *ssa.Alloc: if ref := v.Referrers(); ref == nil { for _, r := range *ref { values := []ssa.Value{} for _, o := range r.Operands(nil) { if o == nil || *o == v { continue } values = append(values, *o) } // TODO: return something different if we care about // multiple values as well. if len(values) == 1 { x.visitArgs(fd, values[0]) } } } case ssa.Instruction: rands := v.Operands(nil) if len(rands) == 1 && rands[0] != nil { x.visitArgs(fd, *rands[0]) } } } // print returns Go syntax for the specified node. func (x *extracter) print(n ast.Node) string { var buf bytes.Buffer format.Node(&buf, x.conf.Fset, n) return buf.String() } type packageExtracter struct { f *ast.File x *extracter info *loader.PackageInfo cmap ast.CommentMap } func (px packageExtracter) getComment(n ast.Node) string { cs := px.cmap.Filter(n).Comments() if len(cs) > 0 { return strings.TrimSpace(cs[0].Text()) } return "" } func (x *extracter) extractMessages() { prog := x.iprog files := []packageExtracter{} for _, info := range x.iprog.AllPackages { for _, f := range info.Files { // Associate comments with nodes. px := packageExtracter{ f, x, info, ast.NewCommentMap(prog.Fset, f, f.Comments), } files = append(files, px) } } for _, px := range files { ast.Inspect(px.f, func(n ast.Node) bool { switch v := n.(type) { case *ast.CallExpr: if d := x.funcs[v.Lparen]; d != nil { d.expr = v } } return true }) } for _, px := range files { ast.Inspect(px.f, func(n ast.Node) bool { switch v := n.(type) { case *ast.CallExpr: return px.handleCall(v) case *ast.ValueSpec: return px.handleGlobal(v) } return true }) } } func (px packageExtracter) handleGlobal(spec *ast.ValueSpec) bool { comment := px.getComment(spec) for _, ident := range spec.Names { data, ok := px.x.globals[ident.Pos()] if !ok { continue } name := ident.Name var arguments []argument if data.call != nil { arguments = px.getArguments(data.call) } else if !strings.HasPrefix(name, "msg") && !strings.HasPrefix(name, "Msg") { continue } data.visit(px.x, func(c constant.Value) { px.addMessage(spec.Pos(), []string{name}, c, comment, arguments) }) } return true } func (px packageExtracter) handleCall(call *ast.CallExpr) bool { x := px.x data := x.funcs[call.Lparen] if data == nil || len(data.formats) == 0 { return true } if data.expr != call { panic("invariant `data.call != call` failed") } x.debug(data.call, "INSERT", data.formats) argn := data.callFormatPos() if argn >= len(call.Args) { return true } format := call.Args[argn] arguments := px.getArguments(data) comment := "" key := []string{} if ident, ok := format.(*ast.Ident); ok { key = append(key, ident.Name) if v, ok := ident.Obj.Decl.(*ast.ValueSpec); ok && v.Comment != nil { // TODO: get comment above ValueSpec as well comment = v.Comment.Text() } } if c := px.getComment(call.Args[0]); c != "" { comment = c } formats := data.formats for _, c := range formats { px.addMessage(call.Lparen, key, c, comment, arguments) } return true } func (px packageExtracter) getArguments(data *callData) []argument { arguments := []argument{} x := px.x info := px.info if data.callArgsStart() >= 0 { args := data.expr.Args[data.callArgsStart():] for i, arg := range args { expr := x.print(arg) val := "" if v := info.Types[arg].Value; v != nil { val = v.ExactString() switch arg.(type) { case *ast.BinaryExpr, *ast.UnaryExpr: expr = val } } arguments = append(arguments, argument{ ArgNum: i + 1, Type: info.Types[arg].Type.String(), UnderlyingType: info.Types[arg].Type.Underlying().String(), Expr: expr, Value: val, Comment: px.getComment(arg), Position: posString(&x.conf, info.Pkg, arg.Pos()), // TODO report whether it implements // interfaces plural.Interface, // gender.Interface. }) } } return arguments } func (px packageExtracter) addMessage( pos token.Pos, key []string, c constant.Value, comment string, arguments []argument) { x := px.x fmtMsg := constant.StringVal(c) ph := placeholders{index: map[string]string{}} trimmed, _, _ := trimWS(fmtMsg) p := fmtparser.Parser{} simArgs := make([]interface{}, len(arguments)) for i, v := range arguments { simArgs[i] = v } msg := "" p.Reset(simArgs) for p.SetFormat(trimmed); p.Scan(); { name := "" var arg *argument switch p.Status { case fmtparser.StatusText: msg += p.Text() continue case fmtparser.StatusSubstitution, fmtparser.StatusBadWidthSubstitution, fmtparser.StatusBadPrecSubstitution: arguments[p.ArgNum-1].used = true arg = &arguments[p.ArgNum-1] name = getID(arg) case fmtparser.StatusBadArgNum, fmtparser.StatusMissingArg: arg = &argument{ ArgNum: p.ArgNum, Position: posString(&x.conf, px.info.Pkg, pos), } name, arg.UnderlyingType = verbToPlaceholder(p.Text(), p.ArgNum) } sub := p.Text() if !p.HasIndex { r, sz := utf8.DecodeLastRuneInString(sub) sub = fmt.Sprintf("%s[%d]%c", sub[:len(sub)-sz], p.ArgNum, r) } msg += fmt.Sprintf("{%s}", ph.addArg(arg, name, sub)) } key = append(key, msg) // Add additional Placeholders that can be used in translations // that are not present in the string. for _, arg := range arguments { if arg.used { continue } ph.addArg(&arg, getID(&arg), fmt.Sprintf("%%[%d]v", arg.ArgNum)) } x.messages = append(x.messages, Message{ ID: key, Key: fmtMsg, Message: Text{Msg: msg}, // TODO(fix): this doesn't get the before comment. Comment: comment, Placeholders: ph.slice, Position: posString(&x.conf, px.info.Pkg, pos), }) } func posString(conf *loader.Config, pkg *types.Package, pos token.Pos) string { p := conf.Fset.Position(pos) file := fmt.Sprintf("%s:%d:%d", filepath.Base(p.Filename), p.Line, p.Column) return filepath.Join(pkg.Path(), file) } func getID(arg *argument) string { s := getLastComponent(arg.Expr) s = strip(s) s = strings.Replace(s, " ", "", -1) // For small variable names, use user-defined types for more info. if len(s) <= 2 && arg.UnderlyingType != arg.Type { s = getLastComponent(arg.Type) } return strings.Title(s) } // strip is a dirty hack to convert function calls to placeholder IDs. func strip(s string) string { s = strings.Map(func(r rune) rune { if unicode.IsSpace(r) || r == '-' { return '_' } if !unicode.In(r, unicode.Letter, unicode.Mark, unicode.Number) { return -1 } return r }, s) // Strip "Get" from getter functions. if strings.HasPrefix(s, "Get") || strings.HasPrefix(s, "get") { if len(s) > len("get") { r, _ := utf8.DecodeRuneInString(s) if !unicode.In(r, unicode.Ll, unicode.M) { // not lower or mark s = s[len("get"):] } } } return s } // verbToPlaceholder gives a name for a placeholder based on the substitution // verb. This is only to be used if there is otherwise no other type information // available. func verbToPlaceholder(sub string, pos int) (name, underlying string) { r, _ := utf8.DecodeLastRuneInString(sub) name = fmt.Sprintf("Arg_%d", pos) switch r { case 's', 'q': underlying = "string" case 'd': name = "Integer" underlying = "int" case 'e', 'f', 'g': name = "Number" underlying = "float64" case 'm': name = "Message" underlying = "string" default: underlying = "interface{}" } return name, underlying } type placeholders struct { index map[string]string slice []Placeholder } func (p *placeholders) addArg(arg *argument, name, sub string) (id string) { id = name alt, ok := p.index[id] for i := 1; ok && alt != sub; i++ { id = fmt.Sprintf("%s_%d", name, i) alt, ok = p.index[id] } p.index[id] = sub p.slice = append(p.slice, Placeholder{ ID: id, String: sub, Type: arg.Type, UnderlyingType: arg.UnderlyingType, ArgNum: arg.ArgNum, Expr: arg.Expr, Comment: arg.Comment, }) return id } func getLastComponent(s string) string { return s[1+strings.LastIndexByte(s, '.'):] } // isMsg returns whether s should be translated. func isMsg(s string) bool { // TODO: parse as format string and omit strings that contain letters // coming from format verbs. for _, r := range s { if unicode.In(r, unicode.L) { return true } } return false }