Reimplement the comment extractor with CSP

This commit is contained in:
Sameer Rahmani 2022-07-18 19:04:40 +01:00
parent 49b9afbc83
commit 2f15bfdfd3
4 changed files with 173 additions and 104 deletions

View File

@ -27,13 +27,15 @@ var listCmd = &cobra.Command{
Short: "Lists all the TODOs and annotations in the project", Short: "Lists all the TODOs and annotations in the project",
Long: `Lists all the TODOs and annotations in the project`, Long: `Lists all the TODOs and annotations in the project`,
Run: func(cmd *cobra.Command, args []string) { Run: func(cmd *cobra.Command, args []string) {
files, err := core.FindAllFiles(state) ch := make(chan string, 10)
if err != nil { state.WaitGroup.Add(2)
panic(err) go core.FindAllFiles(state, ch)
} go core.ProcessFiles(state, ch)
go state.LogErrors()
core.ProcessFiles(state, *files) state.WaitGroup.Wait()
close(state.ErrChannel)
}, },
} }

View File

@ -20,14 +20,14 @@ package core
import ( import (
"bufio" "bufio"
"fmt" "fmt"
"os" "os"
"path" "path"
"path/filepath" "path/filepath"
"sync"
"time"
"strings" "strings"
log "github.com/sirupsen/logrus"
"github.com/spf13/cobra"
) )
type Location struct { type Location struct {
@ -35,15 +35,16 @@ type Location struct {
End int End int
} }
type RawCommetBlock struct { type RawCommentBlock struct {
LineNo int LineNo int
Lines []string Lines []string
File *File
} }
type FileRawBlocks struct { type File struct {
Path *string Path string
Blocks []RawCommetBlock Blocks []RawCommentBlock
lines *[]string Lines []string
} }
type Block struct { type Block struct {
@ -54,6 +55,10 @@ type Block struct {
WrappedCode string WrappedCode string
Loc Location Loc Location
} }
type FileRawBlock struct {
Block RawCommentBlock
Lang *Lang
}
type CommentStack struct { type CommentStack struct {
storage []*Block storage []*Block
@ -73,41 +78,53 @@ func (s *CommentStack) Pop() *Block {
return item return item
} }
func (b *RawCommetBlock) append(txt string) { func (b *RawCommentBlock) append(txt string) {
b.Lines = append(b.Lines, txt) b.Lines = append(b.Lines, txt)
} }
func (b *RawCommetBlock) empty() bool { func (b *RawCommentBlock) empty() bool {
return len(b.Lines) == 0 return len(b.Lines) == 0
} }
func FindCommentBlocks(s *State, file *string) (*FileRawBlocks, error) { func FindCommentBlocks(s *State, file string, output chan FileRawBlock, w *sync.WaitGroup) {
fullPath := path.Join(s.ProjectRoot, *file) fullPath := path.Join(s.ProjectRoot, file)
readFile, err := os.Open(fullPath) readFile, err := os.Open(fullPath)
if err != nil { if err != nil {
return nil, err s.ErrChannel <- err
close(output)
w.Done()
return
} }
defer readFile.Close()
ext := filepath.Ext(*file) ext := filepath.Ext(file)
lang := s.GetLangForExt(ext) lang := s.GetLangForExt(ext)
if lang == nil { if lang == nil {
log.Warnf("don't know about the language for extension '%s'", ext) close(output)
return nil, nil w.Done()
// log.Warnf("don't know about the language for extension '%s'", ext)
return
} }
scanner := bufio.NewScanner(readFile) scanner := bufio.NewScanner(readFile)
scanner.Split(bufio.ScanLines) scanner.Split(bufio.ScanLines)
var commentblocks []RawCommetBlock curFile := File{
Path: file,
Blocks: []RawCommentBlock{},
Lines: []string{},
}
var line = 1 var line = 1
var block RawCommetBlock block := RawCommentBlock{0, []string{}, &curFile}
var lines []string
for scanner.Scan() { for scanner.Scan() {
rawText := scanner.Text() rawText := scanner.Text()
lines = append(lines, rawText) curFile.Lines = append(curFile.Lines, rawText)
txt := strings.TrimSpace(rawText) txt := strings.TrimSpace(rawText)
if strings.HasPrefix(txt, *lang.CommentBlockMarker) { if strings.HasPrefix(txt, *lang.CommentBlockMarker) {
@ -117,37 +134,44 @@ func FindCommentBlocks(s *State, file *string) (*FileRawBlocks, error) {
block.append(txt) block.append(txt)
} else { } else {
if !block.empty() { if !block.empty() {
commentblocks = append(commentblocks, block) curFile.Blocks = append(curFile.Blocks, block)
output <- FileRawBlock{block, lang}
} }
block = RawCommetBlock{} block = RawCommentBlock{0, []string{}, &curFile}
} }
line++ line++
} }
return &FileRawBlocks{ if err = readFile.Close(); err != nil {
file,
commentblocks, close(output)
&lines, w.Done()
}, nil s.ErrChannel <- err
return
}
w.Done()
close(output)
} }
func getBlockFirstLine(lang *Lang, block RawCommetBlock) *string { func getBlockFirstLine(lang *Lang, block RawCommentBlock) *string {
txt := strings.TrimLeft(block.Lines[0], *lang.CommentBlockMarker) txt := strings.TrimLeft(block.Lines[0], *lang.CommentBlockMarker)
txt = strings.TrimSpace(txt) txt = strings.TrimSpace(txt)
return &txt return &txt
} }
func JoinComments(state *State, commentMarker string, commentLines *[]string) *string { func JoinComments(state *State, commentMarker string, b RawCommentBlock) *string {
firstLine := state.TrimTypeAndTags(&(*commentLines)[0]) commentLines := b.Lines
firstLine := state.TrimTypeAndTags(&commentLines[0])
result := []string{} result := []string{}
if firstLine != nil { if firstLine != nil {
result = append(result, *firstLine) result = append(result, *firstLine)
} }
if len(*commentLines) > 1 { if len(commentLines) > 1 {
for _, line := range (*commentLines)[1:] { for _, line := range (commentLines)[1:] {
trimmed := strings.TrimSpace(strings.TrimLeft(line, commentMarker)) trimmed := strings.TrimSpace(strings.TrimLeft(line, commentMarker))
result = append(result, trimmed) result = append(result, trimmed)
@ -158,20 +182,18 @@ func JoinComments(state *State, commentMarker string, commentLines *[]string) *s
return &txt return &txt
} }
func processCommentBlocks(s *State, file *FileRawBlocks) (*[]Block, error) { func processCommentBlocks(s *State, file chan FileRawBlock, output chan Block, w *sync.WaitGroup) {
ext := filepath.Ext(*file.Path) defer close(output)
lang := s.GetLangForExt(ext) defer w.Done()
if lang == nil {
log.Warnf("don't know about the language for extension '%s'", ext)
return nil, nil
}
// To keep track of opening and closing tags // To keep track of opening and closing tags
var stack CommentStack var stack CommentStack
var blocks []Block
for _, block := range file.Blocks { for fileBlock := range file {
block := fileBlock.Block
lang := fileBlock.Lang
if block.empty() { if block.empty() {
continue continue
} }
@ -187,18 +209,21 @@ func processCommentBlocks(s *State, file *FileRawBlocks) (*[]Block, error) {
} }
lastOpenBlock := stack.Pop() lastOpenBlock := stack.Pop()
if lastOpenBlock == nil { if lastOpenBlock == nil {
e := fmt.Errorf( // e := fmt.Errorf(
"closing comment with no Opening at '%s:%d'", // "closing comment with no Opening at '%s:%d'",
*file.Path, block.LineNo, // block.File.Path, block.LineNo,
) // )
return nil, e // s.ErrChannel <- e
return
} }
if type_ == &lastOpenBlock.BlockType { if type_ == &lastOpenBlock.BlockType {
wrappedCode := []string{} wrappedCode := []string{}
for i := lastOpenBlock.Loc.End; i < block.LineNo; i++ { for i := lastOpenBlock.Loc.End; i < block.LineNo; i++ {
wrappedCode = append(wrappedCode, (*file.lines)[i]) ithLine := (block.File.Lines)[i]
wrappedCode = append(wrappedCode, ithLine)
} }
lastOpenBlock.WrappedCode = strings.Join(wrappedCode, "\n") lastOpenBlock.WrappedCode = strings.Join(wrappedCode, "\n")
} }
@ -211,43 +236,59 @@ func processCommentBlocks(s *State, file *FileRawBlocks) (*[]Block, error) {
processedBlock := Block{ processedBlock := Block{
BlockType: *type_, BlockType: *type_,
FilePath: file.Path, FilePath: &block.File.Path,
Tags: tags, Tags: tags,
Desc: *JoinComments(s, *lang.CommentBlockMarker, &block.Lines), Desc: *JoinComments(s, *lang.CommentBlockMarker, block),
WrappedCode: "", WrappedCode: "",
Loc: Location{block.LineNo, len(block.Lines) + block.LineNo}, Loc: Location{block.LineNo, len(block.Lines) + block.LineNo},
} }
stack.Push(&processedBlock) stack.Push(&processedBlock)
blocks = append(blocks, processedBlock) output <- processedBlock
} }
return &blocks, nil
} }
func ProcessFiles(state *State, files []string) error { func PrintResult(state *State, res chan Block, w *sync.WaitGroup) {
for _, file := range files { for b := range res {
blocks, err := FindCommentBlocks(state, &file) tags := strings.Join(b.Tags, ",")
cobra.CheckErr(err) fmt.Printf("* %s at %s:%d | Tags: %s\n%s\n%s\n\n",
b.BlockType, *b.FilePath, b.Loc.Start,
if blocks == nil { tags,
continue b.Desc,
} b.WrappedCode)
processedBlocks, err := processCommentBlocks(state, blocks)
cobra.CheckErr(err)
for _, b := range *processedBlocks {
tags := strings.Join(b.Tags, ",")
fmt.Printf("* %s at %s:%d | Tags: %s\n%s\n%s\n\n",
b.BlockType, *b.FilePath, b.Loc.Start,
tags,
b.Desc,
b.WrappedCode)
}
} }
w.Done()
}
func ProcessFiles(state *State, files chan string) error {
var workers sync.WaitGroup
for file := range files {
fileBlocks := make(chan FileRawBlock, 10)
processed := make(chan Block, 100)
if file == "utils/bazel/zlib.bzl" {
println("file: ", file)
}
workers.Add(3)
go FindCommentBlocks(state, file, fileBlocks, &workers)
go processCommentBlocks(state, fileBlocks, processed, &workers)
go PrintResult(state, processed, &workers)
}
controller := make(chan bool)
go func() {
workers.Wait()
close(controller)
}()
select {
case <-controller:
println("Wait done")
case <-time.After(time.Second * 20):
println("timeout")
}
state.WaitGroup.Done()
return nil return nil
} }

View File

@ -18,30 +18,19 @@
package core package core
import ( import (
"github.com/go-git/go-git/v5/plumbing/object" "fmt"
"io"
"github.com/spf13/cobra" "github.com/spf13/cobra"
) )
func FilterTargetFiles(state *State, storage *[]string) func(f *object.File) error { func FindAllFiles(state *State, files chan string) {
return func(f *object.File) error {
isbin, err := f.IsBinary()
if err != nil {
return err
}
if !isbin && f.Mode.IsFile() {
*storage = append(*storage, f.Name)
}
return nil
}
}
func FindAllFiles(state *State) (*[]string, error) {
if state == nil { if state == nil {
panic("state is nil") panic("state is nil")
} }
defer state.WaitGroup.Done()
defer close(files)
repo := state.Repo repo := state.Repo
head, err := repo.Head() head, err := repo.Head()
@ -53,10 +42,37 @@ func FindAllFiles(state *State) (*[]string, error) {
tree, err := commit.Tree() tree, err := commit.Tree()
cobra.CheckErr(err) cobra.CheckErr(err)
var files []string mm := 0
filter := FilterTargetFiles(state, &files) filesiter := tree.Files()
err = tree.Files().ForEach(filter) for {
cobra.CheckErr(err) f, err := filesiter.Next()
if err == io.EOF {
break
}
if err != nil {
state.ErrChannel <- err
continue
}
if f == nil {
state.ErrChannel <- fmt.Errorf("'f' is nil")
continue
}
isbin, err := f.IsBinary()
if err != nil {
state.ErrChannel <- err
continue
}
if !isbin && f.Mode.IsFile() {
mm++
files <- f.Name
}
}
cobra.CheckErr(err)
println("closing files", mm)
return &files, nil
} }

View File

@ -21,6 +21,7 @@ import (
"path" "path"
"regexp" "regexp"
"strings" "strings"
"sync"
git "github.com/go-git/go-git/v5" git "github.com/go-git/go-git/v5"
log "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus"
@ -38,6 +39,8 @@ type State struct {
TagPattern *regexp.Regexp TagPattern *regexp.Regexp
FirstLineContent *regexp.Regexp FirstLineContent *regexp.Regexp
DB *DB DB *DB
WaitGroup sync.WaitGroup
ErrChannel chan error
} }
func CreateState(projectRoot string, debug bool) (*State, error) { func CreateState(projectRoot string, debug bool) (*State, error) {
@ -77,9 +80,16 @@ func CreateState(projectRoot string, debug bool) (*State, error) {
state.FirstLineContent = regexp.MustCompile(`[A-Z]+:\s+(?:\[[^]]+\])*\s?(?P<text>.*)`) state.FirstLineContent = regexp.MustCompile(`[A-Z]+:\s+(?:\[[^]]+\])*\s?(?P<text>.*)`)
state.EnabledLangs = ExtsToLang state.EnabledLangs = ExtsToLang
state.ErrChannel = make(chan error, 10000)
return &state, nil return &state, nil
} }
func (s *State) LogErrors() {
for err := range s.ErrChannel {
log.Error(err)
}
}
func (s *State) GetLangForExt(ext string) *Lang { func (s *State) GetLangForExt(ext string) *Lang {
// If the extension is the filename itself. // If the extension is the filename itself.