Commit 4bdaf59c authored by Robert Griesemer's avatar Robert Griesemer

godoc: support for regular expression full text search

Regular expressions may now be used in conjuction with full text
search. Godoc will show the first 10000 occurences in the source
code and highlight the respective text segments.

- added new flag -testDir to specify a small directory for testing
  (fast index creation; default = "")

- use new FormatText function to format text and Go source
  code in HTML, supporting multiple kinds of text selections
  simulatenously); this replaces the uses of go/printer
  Stylers

- for now removed currently unused mechanism for identifier-
  specific JS popups (will come back in some form once we
  have type or other useful information)

- various typo fixes and minor cleanups throughout

Missing:
- indexing of non-.go files

R=r, r2
CC=golang-dev, rsc
https://golang.org/cl/3699041
parent dd916be3
......@@ -161,9 +161,30 @@ span.comment {
color: #002090;
}
span.highlight {
background: #FFFF90;
background: #FF9900;
font-weight: bold;
}
span.highlight-comment {
background: #FF9900;
font-weight: bold;
color: #002090;
}
span.selection {
background: #FFFF00
}
span.selection-comment {
color: #002090;
background: #FFFF00
}
span.selection-highlight {
background: #FF9900;
font-weight: bold;
}
span.selection-highlight-comment {
background: #FF9900;
font-weight: bold;
color: #002090;
}
span.alert {
color: #D00000;
}
......
......@@ -4,10 +4,9 @@
license that can be found in the LICENSE file.
-->
{.section Accurate}
{.or}
{.section Alert}
<p>
<span class="alert" style="font-size:120%">Indexing in progress - result may be inaccurate.</span>
<span class="alert" style="font-size:120%">{@}</span>
</p>
{.end}
{.section Alt}
......@@ -27,7 +26,7 @@
{.repeated section Groups}
{.repeated section Infos}
<a href="/{File.Path|url-src}?h={Query|urlquery-esc}#L{@|infoLine}">{File.Path|url-src}:{@|infoLine}</a>
<pre>{@|infoSnippet}</pre>
{@|infoSnippet}
{.end}
{.end}
{.end}
......@@ -59,11 +58,11 @@
{.end}
{.section Textual}
{.section Complete}
<h2 id="Textual">{Found|html-esc} textual occurences</h2>
<h2 id="Textual">{Found|html-esc} textual occurrences</h2>
{.or}
<h2 id="Textual">More than {Found|html-esc} textual occurences</h2>
<h2 id="Textual">More than {Found|html-esc} textual occurrences</h2>
<p>
<span class="alert" style="font-size:120%">Not all files or lines containing {Query|html-esc} are shown.</span>
<span class="alert" style="font-size:120%">Not all files or lines containing "{Query|html-esc}" are shown.</span>
</p>
{.end}
<p>
......@@ -71,12 +70,16 @@
{.repeated section @}
<tr>
<td align="left" valign="top">
<a href="/{Filename|url-src}?g={Query|urlquery-esc}">{Filename|url-src}</a>:
<a href="/{Filename|url-src}?h={Query|urlquery-esc}">{Filename|url-src}</a>:
</td>
<td align="left" width="4"></td>
<th align="left" valign="top">{Lines|numlines}</th>
<td align="left" width="4"></td>
<td align="left">{Lines Complete|linelist}</td>
<td align="left">
{.repeated section Lines}
<a href="/{Filename|url-src}?h={Query|urlquery-esc}#L{@|html-esc}">{@|html-esc}</a>
{.end}
</td>
</tr>
{.end}
{.section Complete}
......
QUERY
{Query}
{.section Accurate}
{.or}
INDEXING IN PROGRESS - RESULT MAY BE INACCURATE
{.section Alert}
{@}
{.end}
{.section Alt}
......
<!--
Copyright 2009 The Go Authors. All rights reserved.
Use of this source code is governed by a BSD-style
license that can be found in the LICENSE file.
-->
<script src="http://www.google.com/jsapi"></script>
<script src="/doc/popups.js"></script>
<script>
{# IdList is HTML-escaped by godoc}
var popup_data = {IdList}
google.load("jquery", "1");
google.setOnLoadCallback(function() {.meta-left}
godocs_bindPopups(popup_data);
{.meta-right});
</script>
{# Source is HTML-escaped by godoc}
<pre>{Source}</pre>
......@@ -8,6 +8,7 @@ TARG=godoc
GOFILES=\
codewalk.go\
dirtrees.go\
format.go\
godoc.go\
index.go\
main.go\
......
......@@ -48,7 +48,7 @@ The flags are:
-timestamps=true
show timestamps with directory listings
-fulltext=false
build full text index for string search results
build full text index for regular expression queries
-path=""
additional package directories (colon-separated)
-html
......
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// This file implements FormatSelections and FormatText.
// FormatText is used to HTML-format Go and non-Go source
// text with line numbers and highlighted sections. It is
// built on top of FormatSelections, a generic formatter
// for "selected" text.
package main
import (
"bytes"
"fmt"
"go/scanner"
"go/token"
"io"
"regexp"
"strconv"
"template"
)
// ----------------------------------------------------------------------------
// Implementation of FormatSelections
// A Selection is a function returning offset pairs []int{a, b}
// describing consecutive non-overlapping text segments [a, b).
// If there are no more segments, a Selection must return nil.
//
// TODO It's more efficient to return a pair (a, b int) instead
// of creating lots of slices. Need to determine how to
// indicate the end of a Selection.
//
type Selection func() []int
// A LinkWriter writes some start or end "tag" to w for the text offset offs.
// It is called by FormatSelections at the start or end of each link segment.
//
type LinkWriter func(w io.Writer, offs int, start bool)
// A SegmentWriter formats a text according to selections and writes it to w.
// The selections parameter is a bit set indicating which selections provided
// to FormatSelections overlap with the text segment: If the n'th bit is set
// in selections, the n'th selection provided to FormatSelections is overlapping
// with the text.
//
type SegmentWriter func(w io.Writer, text []byte, selections int)
// FormatSelections takes a text and writes it to w using link and segment
// writers lw and sw as follows: lw is invoked for consecutive segment starts
// and ends as specified through the links selection, and sw is invoked for
// consecutive segments of text overlapped by the same selections as specified
// by selections. The link writer lw may be nil, in which case the links
// Selection is ignored.
//
func FormatSelections(w io.Writer, text []byte, lw LinkWriter, links Selection, sw SegmentWriter, selections ...Selection) {
if lw != nil {
selections = append(selections, links)
}
// compute the sequence of consecutive segment changes
changes := newMerger(selections)
// The i'th bit in bitset indicates that the text
// at the current offset is covered by selections[i].
bitset := 0
lastOffs := 0
for {
// get the next segment change
index, offs, start := changes.next()
if index < 0 || offs > len(text) {
// no more segment changes or the next change
// is past the end of the text - we're done
break
}
// determine the kind of segment change
if index == len(selections)-1 {
// we have a link segment change:
// format the previous selection segment, write the
// link tag and start a new selection segment
sw(w, text[lastOffs:offs], bitset)
lastOffs = offs
lw(w, offs, start)
} else {
// we have a selection change:
// format the previous selection segment, determine
// the new selection bitset and start a new segment
sw(w, text[lastOffs:offs], bitset)
lastOffs = offs
mask := 1 << uint(index)
if start {
bitset |= mask
} else {
bitset &^= mask
}
}
}
sw(w, text[lastOffs:], bitset)
}
// A merger merges a slice of Selections and produces a sequence of
// consecutive segment change events through repeated next() calls.
//
type merger struct {
selections []Selection
segments [][]int // segments[i] is the next segment of selections[i]
}
const infinity int = 2e9
func newMerger(selections []Selection) *merger {
segments := make([][]int, len(selections))
for i, sel := range selections {
segments[i] = []int{infinity, infinity}
if sel != nil {
if seg := sel(); seg != nil {
segments[i] = seg
}
}
}
return &merger{selections, segments}
}
// next returns the next segment change: index specifies the Selection
// to which the segment belongs, offs is the segment start or end offset
// as determined by the start value. If there are no more segment changes,
// next returns an index value < 0.
//
func (m *merger) next() (index, offs int, start bool) {
// find the next smallest offset where a segment starts or ends
offs = infinity
index = -1
for i, seg := range m.segments {
switch {
case seg[0] < offs:
offs = seg[0]
index = i
start = true
case seg[1] < offs:
offs = seg[1]
index = i
start = false
}
}
if index < 0 {
// no offset found => all selections merged
return
}
// offset found - it's either the start or end offset but
// either way it is ok to consume the start offset: set it
// to infinity so it won't be considered in the following
// next call
m.segments[index][0] = infinity
if start {
return
}
// end offset found - consume it
m.segments[index][1] = infinity
// advance to the next segment for that selection
seg := m.selections[index]()
if seg == nil {
return
}
m.segments[index] = seg
return
}
// ----------------------------------------------------------------------------
// Implementation of FormatText
// lineSelection returns the line segments for text as a Selection.
func lineSelection(text []byte) Selection {
i, j := 0, 0
return func() (seg []int) {
// find next newline, if any
for j < len(text) {
j++
if text[j-1] == '\n' {
break
}
}
if i < j {
// text[i:j] constitutes a line
seg = []int{i, j}
i = j
}
return
}
}
// commentSelection returns the sequence of consecutive comments
// in the Go src text as a Selection.
//
func commentSelection(src []byte) Selection {
var s scanner.Scanner
file := s.Init(token.NewFileSet(), "", src, nil, scanner.ScanComments+scanner.InsertSemis)
return func() (seg []int) {
for {
pos, tok, lit := s.Scan()
if tok == token.EOF {
break
}
offs := file.Offset(pos)
if tok == token.COMMENT {
seg = []int{offs, offs + len(lit)}
break
}
}
return
}
}
// makeSelection is a helper function to make a Selection from a slice of pairs.
func makeSelection(matches [][]int) Selection {
return func() (seg []int) {
if len(matches) > 0 {
seg = matches[0]
matches = matches[1:]
}
return
}
}
// regexpSelection computes the Selection for the regular expression expr in text.
func regexpSelection(text []byte, expr string) Selection {
var matches [][]int
if rx, err := regexp.Compile(expr); err == nil {
matches = rx.FindAllIndex(text, -1)
}
return makeSelection(matches)
}
var selRx = regexp.MustCompile(`^([0-9]+):([0-9]+)`)
// rangeSelection computes the Selection for a text range described
// by the argument str; the range description must match the selRx
// regular expression.
//
func rangeSelection(str string) Selection {
m := selRx.FindStringSubmatch(str)
if len(m) >= 2 {
from, _ := strconv.Atoi(m[1])
to, _ := strconv.Atoi(m[2])
if from < to {
return makeSelection([][]int{[]int{from, to}})
}
}
return nil
}
// Span tags for all the possible selection combinations that may
// be generated by FormatText. Selections are indicated by a bitset,
// and the value of the bitset specifies the tag to be used.
//
// bit 0: comments
// bit 1: highlights
// bit 2: selections
//
var startTags = [][]byte{
/* 000 */ []byte(``),
/* 001 */ []byte(`<span class ="comment">`),
/* 010 */ []byte(`<span class="highlight">`),
/* 011 */ []byte(`<span class="highlight-comment">`),
/* 100 */ []byte(`<span class="selection">`),
/* 101 */ []byte(`<span class="selection-comment">`),
/* 110 */ []byte(`<span class="selection-highlight">`),
/* 111 */ []byte(`<span class="selection-highlight-comment">`),
}
var endTag = []byte(`</span>`)
func selectionTag(w io.Writer, text []byte, selections int) {
if len(text) > 0 {
if selections < len(startTags) {
if tag := startTags[selections]; len(tag) > 0 {
w.Write(tag)
template.HTMLEscape(w, text)
w.Write(endTag)
return
}
}
template.HTMLEscape(w, text)
}
}
// FormatText HTML-escapes text and returns it wrapped in <pre> tags.
// Conscutive text segments are wrapped in HTML spans (with tags as
// defined by startTags and endTag) as follows:
//
// - if line >= 0, line numbers are printed before each line, starting
// with the value of line
// - if the text is Go source, comments get the "comment" span class
// - each occurrence of the regular expression pattern gets the "highlight"
// span class
// - text segments covered by selection get the "selection" span class
//
// Comments, highlights, and selections may overlap arbitrarily; the respective
// HTML span classes are specified in the startTags variable.
//
func FormatText(text []byte, line int, goSource bool, pattern string, selection Selection) []byte {
var buf bytes.Buffer
buf.WriteString("<pre>\n")
var comments, highlights Selection
if goSource {
comments = commentSelection(text)
}
if pattern != "" {
highlights = regexpSelection(text, pattern)
}
if comments != nil || highlights != nil || selection != nil {
var lineTag LinkWriter
if line >= 0 {
lineTag = func(w io.Writer, _ int, start bool) {
if start {
fmt.Fprintf(w, "<a id=\"L%d\"></a>%5d\t", line, line)
line++
}
}
}
FormatSelections(&buf, text, lineTag, lineSelection(text), selectionTag, comments, highlights, selection)
} else {
template.HTMLEscape(&buf, text)
}
buf.WriteString("</pre>\n")
return buf.Bytes()
}
This diff is collapsed.
......@@ -7,7 +7,7 @@
//
// Algorithm for identifier index:
// - traverse all .go files of the file tree specified by root
// - for each word (identifier) encountered, collect all occurences (spots)
// - for each word (identifier) encountered, collect all occurrences (spots)
// into a list; this produces a list of spots for each word
// - reduce the lists: from a list of spots to a list of FileRuns,
// and from a list of FileRuns into a list of PakRuns
......@@ -48,6 +48,7 @@ import (
"io/ioutil"
"os"
pathutil "path"
"regexp"
"sort"
"strings"
)
......@@ -247,7 +248,7 @@ type File struct {
}
// A Spot describes a single occurence of a word.
// A Spot describes a single occurrence of a word.
type Spot struct {
File *File
Info SpotInfo
......@@ -435,7 +436,7 @@ const excludeTestFiles = false
type IndexResult struct {
Decls RunList // package-level declarations (with snippets)
Others RunList // all other occurences
Others RunList // all other occurrences
}
......@@ -445,7 +446,7 @@ type Statistics struct {
Files int // number of indexed source files
Lines int // number of lines (all files)
Words int // number of different identifiers
Spots int // number of identifier occurences
Spots int // number of identifier occurrences
}
......@@ -709,7 +710,7 @@ func (x *Indexer) visitFile(dirname string, f *os.FileInfo) {
type LookupResult struct {
Decls HitList // package-level declarations (with snippets)
Others HitList // all other occurences
Others HitList // all other occurrences
}
......@@ -833,14 +834,14 @@ func isIdentifier(s string) bool {
// For a given query, which is either a single identifier or a qualified
// identifier, Lookup returns a LookupResult, and a list of alternative
// spellings, if any. If the query syntax is wrong, illegal is set.
func (x *Index) Lookup(query string) (match *LookupResult, alt *AltWords, illegal bool) {
// spellings, if any. If the query syntax is wrong, an error is reported.
func (x *Index) Lookup(query string) (match *LookupResult, alt *AltWords, err os.Error) {
ss := strings.Split(query, ".", -1)
// check query syntax
for _, s := range ss {
if !isIdentifier(s) {
illegal = true
err = os.NewError("all query parts must be identifiers")
return
}
}
......@@ -860,7 +861,7 @@ func (x *Index) Lookup(query string) (match *LookupResult, alt *AltWords, illega
}
default:
illegal = true
err = os.NewError("query is not a (qualified) identifier")
}
return
......@@ -886,60 +887,91 @@ func (list positionList) Less(i, j int) bool { return list[i].filename < list[j]
func (list positionList) Swap(i, j int) { list[i], list[j] = list[j], list[i] }
// A Positions value specifies a file and line numbers within that file.
type Positions struct {
// unique returns the list sorted and with duplicate entries removed
func unique(list []int) []int {
sort.SortInts(list)
var last int
i := 0
for _, x := range list {
if i == 0 || x != last {
last = x
list[i] = x
i++
}
}
return list[0:i]
}
// A FileLines value specifies a file and line numbers within that file.
type FileLines struct {
Filename string
Lines []int
}
// LookupString returns the number and list of positions where a string
// s is found in the full text index and whether the result is complete
// or not. At most n positions (filename and line) are returned (and thus
// found <= n). The result is incomplete if the index is not present or
// if there are more than n occurrences of s.
// LookupRegexp returns the number of matches and the matches where a regular
// expression r is found in the full text index. At most n matches are
// returned (thus found <= n).
//
func (x *Index) LookupString(s string, n int) (found int, result []Positions, complete bool) {
if x.suffixes == nil {
func (x *Index) LookupRegexp(r *regexp.Regexp, n int) (found int, result []FileLines) {
if x.suffixes == nil || n <= 0 {
return
}
offsets := x.suffixes.Lookup([]byte(s), n+1)
if len(offsets) <= n {
complete = true
} else {
offsets = offsets[0:n]
}
found = len(offsets)
// compute file names and lines and sort the list by filename
list := make(positionList, len(offsets))
for i, offs := range offsets {
// by construction, an offs corresponds to
// the Pos value for the file set - use it
// to get full Position information
pos := x.fset.Position(token.Pos(offs))
list[i].filename = pos.Filename
list[i].line = pos.Line
// n > 0
var list positionList
// FindAllIndex may returns matches that span across file boundaries.
// Such matches are unlikely, buf after eliminating them we may end up
// with fewer than n matches. If we don't have enough at the end, redo
// the search with an increased value n1, but only if FindAllIndex
// returned all the requested matches in the first place (if it
// returned fewer than that there cannot be more).
for n1 := n; found < n; n1 += n - found {
found = 0
matches := x.suffixes.FindAllIndex(r, n1)
// compute files, exclude matches that span file boundaries,
// and map offsets to file-local offsets
list = make(positionList, len(matches))
for _, m := range matches {
// by construction, an offset corresponds to the Pos value
// for the file set - use it to get the file and line
p := token.Pos(m[0])
if file := x.fset.File(p); file != nil {
if base := file.Base(); base <= m[1] && m[1] <= base+file.Size() {
// match [m[0], m[1]) is within the file boundaries
list[found].filename = file.Name()
list[found].line = file.Line(p)
found++
}
}
}
if found == n || len(matches) < n1 {
// found all matches or there's no chance to find more
break
}
}
sort.Sort(list)
list = list[0:found]
sort.Sort(list) // sort by filename
// compact positions with equal file names
// collect matches belonging to the same file
var last string
var lines []int
for _, pos := range list {
if pos.filename != last {
if len(lines) > 0 {
result = append(result, Positions{last, lines})
lines = nil
}
last = pos.filename
addLines := func() {
if len(lines) > 0 {
// remove duplicate lines
result = append(result, FileLines{last, unique(lines)})
lines = nil
}
lines = append(lines, pos.line)
}
if len(lines) > 0 {
result = append(result, Positions{last, lines})
for _, m := range list {
if m.filename != last {
addLines()
last = m.filename
}
lines = append(lines, m.line)
}
addLines()
return
}
......@@ -74,7 +74,7 @@ func serveError(w http.ResponseWriter, r *http.Request, relpath string, err os.E
func exec(rw http.ResponseWriter, args []string) (status int) {
r, w, err := os.Pipe()
if err != nil {
log.Printf("os.Pipe(): %v\n", err)
log.Printf("os.Pipe(): %v", err)
return 2
}
......@@ -87,7 +87,7 @@ func exec(rw http.ResponseWriter, args []string) (status int) {
defer r.Close()
w.Close()
if err != nil {
log.Printf("os.ForkExec(%q): %v\n", bin, err)
log.Printf("os.ForkExec(%q): %v", bin, err)
return 2
}
......@@ -96,7 +96,7 @@ func exec(rw http.ResponseWriter, args []string) (status int) {
wait, err := os.Wait(pid, 0)
if err != nil {
os.Stderr.Write(buf.Bytes())
log.Printf("os.Wait(%d, 0): %v\n", pid, err)
log.Printf("os.Wait(%d, 0): %v", pid, err)
return 2
}
status = wait.ExitStatus()
......@@ -127,8 +127,7 @@ func dosync(w http.ResponseWriter, r *http.Request) {
// TODO(gri): The directory tree may be temporarily out-of-sync.
// Consider keeping separate time stamps so the web-
// page can indicate this discrepancy.
fsTree.set(newDirectory(*goroot, nil, -1))
invalidateIndex()
initFSTree()
fallthrough
case 1:
// sync failed because no files changed;
......@@ -238,11 +237,14 @@ func main() {
// HTTP server mode.
var handler http.Handler = http.DefaultServeMux
if *verbose {
log.Printf("Go Documentation Server\n")
log.Printf("version = %s\n", runtime.Version())
log.Printf("address = %s\n", *httpAddr)
log.Printf("goroot = %s\n", *goroot)
log.Printf("tabwidth = %d\n", *tabwidth)
log.Printf("Go Documentation Server")
log.Printf("version = %s", runtime.Version())
log.Printf("address = %s", *httpAddr)
log.Printf("goroot = %s", *goroot)
log.Printf("tabwidth = %d", *tabwidth)
if *fulltextIndex {
log.Print("full text index enabled")
}
if !fsMap.IsEmpty() {
log.Print("user-defined mapping:")
fsMap.Fprint(os.Stderr)
......@@ -257,10 +259,7 @@ func main() {
// Initialize default directory tree with corresponding timestamp.
// (Do it in a goroutine so that launch is quick.)
go func() {
fsTree.set(newDirectory(*goroot, nil, -1))
invalidateIndex()
}()
go initFSTree()
// Initialize directory trees for user-defined file systems (-path flag).
initDirTrees()
......
......@@ -13,41 +13,21 @@ import (
"bytes"
"go/ast"
"go/token"
"go/printer"
"fmt"
)
type Snippet struct {
Line int
Text string
}
type snippetStyler struct {
Styler // defined in godoc.go
highlight *ast.Ident // identifier to highlight
}
func (s *snippetStyler) LineTag(line int) (text []uint8, tag printer.HTMLTag) {
return // no LineTag for snippets
}
func (s *snippetStyler) Ident(id *ast.Ident) (text []byte, tag printer.HTMLTag) {
text = []byte(id.Name)
if s.highlight == id {
tag = printer.HTMLTag{"<span class=highlight>", "</span>"}
}
return
Text []byte
}
func newSnippet(fset *token.FileSet, decl ast.Decl, id *ast.Ident) *Snippet {
// TODO instead of pretty-printing the node, should use the original source instead
var buf bytes.Buffer
writeNode(&buf, fset, decl, true, &snippetStyler{highlight: id})
return &Snippet{fset.Position(id.Pos()).Line, buf.String()}
writeNode(&buf, fset, decl, true)
return &Snippet{fset.Position(id.Pos()).Line, FormatText(buf.Bytes(), -1, true, id.Name, nil)}
}
......@@ -113,9 +93,11 @@ func NewSnippet(fset *token.FileSet, decl ast.Decl, id *ast.Ident) (s *Snippet)
// handle failure gracefully
if s == nil {
var buf bytes.Buffer
fmt.Fprintf(&buf, `<span class="alert">could not generate a snippet for <span class="highlight">%s</span></span>`, id.Name)
s = &Snippet{
fset.Position(id.Pos()).Line,
fmt.Sprintf(`could not generate a snippet for <span class="highlight">%s</span>`, id.Name),
buf.Bytes(),
}
}
return
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment