From 2f15bfdfd3065a0aa331f489d203dc463e8bfc27 Mon Sep 17 00:00:00 2001 From: Sameer Rahmani Date: Mon, 18 Jul 2022 19:04:40 +0100 Subject: [PATCH] Reimplement the comment extractor with CSP --- cmd/list.go | 12 +-- pkg/core/blocks.go | 193 +++++++++++++++++++++++++++------------------ pkg/core/core.go | 62 +++++++++------ pkg/core/state.go | 10 +++ 4 files changed, 173 insertions(+), 104 deletions(-) diff --git a/cmd/list.go b/cmd/list.go index 3aa1db5..9c10239 100644 --- a/cmd/list.go +++ b/cmd/list.go @@ -27,13 +27,15 @@ var listCmd = &cobra.Command{ Short: "Lists all the TODOs and annotations in the project", Long: `Lists all the TODOs and annotations in the project`, Run: func(cmd *cobra.Command, args []string) { - files, err := core.FindAllFiles(state) + ch := make(chan string, 10) - if err != nil { - panic(err) - } + state.WaitGroup.Add(2) + go core.FindAllFiles(state, ch) + go core.ProcessFiles(state, ch) + go state.LogErrors() - core.ProcessFiles(state, *files) + state.WaitGroup.Wait() + close(state.ErrChannel) }, } diff --git a/pkg/core/blocks.go b/pkg/core/blocks.go index db24b06..4e202a1 100644 --- a/pkg/core/blocks.go +++ b/pkg/core/blocks.go @@ -20,14 +20,14 @@ package core import ( "bufio" "fmt" + "os" "path" "path/filepath" + "sync" + "time" "strings" - - log "github.com/sirupsen/logrus" - "github.com/spf13/cobra" ) type Location struct { @@ -35,15 +35,16 @@ type Location struct { End int } -type RawCommetBlock struct { +type RawCommentBlock struct { LineNo int Lines []string + File *File } -type FileRawBlocks struct { - Path *string - Blocks []RawCommetBlock - lines *[]string +type File struct { + Path string + Blocks []RawCommentBlock + Lines []string } type Block struct { @@ -54,6 +55,10 @@ type Block struct { WrappedCode string Loc Location } +type FileRawBlock struct { + Block RawCommentBlock + Lang *Lang +} type CommentStack struct { storage []*Block @@ -73,41 +78,53 @@ func (s *CommentStack) Pop() *Block { return item } -func (b *RawCommetBlock) append(txt string) { +func (b *RawCommentBlock) append(txt string) { b.Lines = append(b.Lines, txt) } -func (b *RawCommetBlock) empty() bool { +func (b *RawCommentBlock) empty() bool { return len(b.Lines) == 0 } -func FindCommentBlocks(s *State, file *string) (*FileRawBlocks, error) { - fullPath := path.Join(s.ProjectRoot, *file) +func FindCommentBlocks(s *State, file string, output chan FileRawBlock, w *sync.WaitGroup) { + fullPath := path.Join(s.ProjectRoot, file) readFile, err := os.Open(fullPath) if err != nil { - return nil, err + s.ErrChannel <- err + close(output) + w.Done() + + return } - defer readFile.Close() - ext := filepath.Ext(*file) + + ext := filepath.Ext(file) lang := s.GetLangForExt(ext) if lang == nil { - log.Warnf("don't know about the language for extension '%s'", ext) - return nil, nil + close(output) + w.Done() + // log.Warnf("don't know about the language for extension '%s'", ext) + return } scanner := bufio.NewScanner(readFile) scanner.Split(bufio.ScanLines) - var commentblocks []RawCommetBlock + curFile := File{ + Path: file, + Blocks: []RawCommentBlock{}, + Lines: []string{}, + } + var line = 1 - var block RawCommetBlock - var lines []string + block := RawCommentBlock{0, []string{}, &curFile} + for scanner.Scan() { rawText := scanner.Text() - lines = append(lines, rawText) + curFile.Lines = append(curFile.Lines, rawText) + txt := strings.TrimSpace(rawText) if strings.HasPrefix(txt, *lang.CommentBlockMarker) { @@ -117,37 +134,44 @@ func FindCommentBlocks(s *State, file *string) (*FileRawBlocks, error) { block.append(txt) } else { if !block.empty() { - commentblocks = append(commentblocks, block) + curFile.Blocks = append(curFile.Blocks, block) + output <- FileRawBlock{block, lang} } - block = RawCommetBlock{} + block = RawCommentBlock{0, []string{}, &curFile} } line++ } - return &FileRawBlocks{ - file, - commentblocks, - &lines, - }, nil + if err = readFile.Close(); err != nil { + + close(output) + w.Done() + s.ErrChannel <- err + return + } + + w.Done() + close(output) } -func getBlockFirstLine(lang *Lang, block RawCommetBlock) *string { +func getBlockFirstLine(lang *Lang, block RawCommentBlock) *string { txt := strings.TrimLeft(block.Lines[0], *lang.CommentBlockMarker) txt = strings.TrimSpace(txt) return &txt } -func JoinComments(state *State, commentMarker string, commentLines *[]string) *string { - firstLine := state.TrimTypeAndTags(&(*commentLines)[0]) +func JoinComments(state *State, commentMarker string, b RawCommentBlock) *string { + commentLines := b.Lines + firstLine := state.TrimTypeAndTags(&commentLines[0]) result := []string{} if firstLine != nil { result = append(result, *firstLine) } - if len(*commentLines) > 1 { - for _, line := range (*commentLines)[1:] { + if len(commentLines) > 1 { + for _, line := range (commentLines)[1:] { trimmed := strings.TrimSpace(strings.TrimLeft(line, commentMarker)) result = append(result, trimmed) @@ -158,20 +182,18 @@ func JoinComments(state *State, commentMarker string, commentLines *[]string) *s return &txt } -func processCommentBlocks(s *State, file *FileRawBlocks) (*[]Block, error) { - ext := filepath.Ext(*file.Path) - lang := s.GetLangForExt(ext) - - if lang == nil { - log.Warnf("don't know about the language for extension '%s'", ext) - return nil, nil - } +func processCommentBlocks(s *State, file chan FileRawBlock, output chan Block, w *sync.WaitGroup) { + defer close(output) + defer w.Done() // To keep track of opening and closing tags var stack CommentStack - var blocks []Block - for _, block := range file.Blocks { + for fileBlock := range file { + + block := fileBlock.Block + lang := fileBlock.Lang + if block.empty() { continue } @@ -187,18 +209,21 @@ func processCommentBlocks(s *State, file *FileRawBlocks) (*[]Block, error) { } lastOpenBlock := stack.Pop() + if lastOpenBlock == nil { - e := fmt.Errorf( - "closing comment with no Opening at '%s:%d'", - *file.Path, block.LineNo, - ) - return nil, e + // e := fmt.Errorf( + // "closing comment with no Opening at '%s:%d'", + // block.File.Path, block.LineNo, + // ) + // s.ErrChannel <- e + return } if type_ == &lastOpenBlock.BlockType { wrappedCode := []string{} for i := lastOpenBlock.Loc.End; i < block.LineNo; i++ { - wrappedCode = append(wrappedCode, (*file.lines)[i]) + ithLine := (block.File.Lines)[i] + wrappedCode = append(wrappedCode, ithLine) } lastOpenBlock.WrappedCode = strings.Join(wrappedCode, "\n") } @@ -211,43 +236,59 @@ func processCommentBlocks(s *State, file *FileRawBlocks) (*[]Block, error) { processedBlock := Block{ BlockType: *type_, - FilePath: file.Path, + FilePath: &block.File.Path, Tags: tags, - Desc: *JoinComments(s, *lang.CommentBlockMarker, &block.Lines), + Desc: *JoinComments(s, *lang.CommentBlockMarker, block), WrappedCode: "", Loc: Location{block.LineNo, len(block.Lines) + block.LineNo}, } stack.Push(&processedBlock) - blocks = append(blocks, processedBlock) + output <- processedBlock } - - return &blocks, nil } -func ProcessFiles(state *State, files []string) error { - for _, file := range files { - blocks, err := FindCommentBlocks(state, &file) - cobra.CheckErr(err) - - if blocks == nil { - continue - } - - processedBlocks, err := processCommentBlocks(state, blocks) - - cobra.CheckErr(err) - - for _, b := range *processedBlocks { - tags := strings.Join(b.Tags, ",") - fmt.Printf("* %s at %s:%d | Tags: %s\n%s\n%s\n\n", - b.BlockType, *b.FilePath, b.Loc.Start, - tags, - b.Desc, - b.WrappedCode) - } - +func PrintResult(state *State, res chan Block, w *sync.WaitGroup) { + for b := range res { + tags := strings.Join(b.Tags, ",") + fmt.Printf("* %s at %s:%d | Tags: %s\n%s\n%s\n\n", + b.BlockType, *b.FilePath, b.Loc.Start, + tags, + b.Desc, + b.WrappedCode) } + w.Done() +} + +func ProcessFiles(state *State, files chan string) error { + var workers sync.WaitGroup + + for file := range files { + fileBlocks := make(chan FileRawBlock, 10) + processed := make(chan Block, 100) + + if file == "utils/bazel/zlib.bzl" { + println("file: ", file) + } + workers.Add(3) + go FindCommentBlocks(state, file, fileBlocks, &workers) + go processCommentBlocks(state, fileBlocks, processed, &workers) + go PrintResult(state, processed, &workers) + } + + controller := make(chan bool) + go func() { + workers.Wait() + close(controller) + }() + + select { + case <-controller: + println("Wait done") + case <-time.After(time.Second * 20): + println("timeout") + } + state.WaitGroup.Done() return nil } diff --git a/pkg/core/core.go b/pkg/core/core.go index c291cdf..998fdb2 100644 --- a/pkg/core/core.go +++ b/pkg/core/core.go @@ -18,30 +18,19 @@ package core import ( - "github.com/go-git/go-git/v5/plumbing/object" + "fmt" + "io" + "github.com/spf13/cobra" ) -func FilterTargetFiles(state *State, storage *[]string) func(f *object.File) error { - return func(f *object.File) error { - isbin, err := f.IsBinary() - if err != nil { - return err - } - - if !isbin && f.Mode.IsFile() { - - *storage = append(*storage, f.Name) - } - - return nil - } -} - -func FindAllFiles(state *State) (*[]string, error) { +func FindAllFiles(state *State, files chan string) { if state == nil { panic("state is nil") } + defer state.WaitGroup.Done() + defer close(files) + repo := state.Repo head, err := repo.Head() @@ -53,10 +42,37 @@ func FindAllFiles(state *State) (*[]string, error) { tree, err := commit.Tree() cobra.CheckErr(err) - var files []string - filter := FilterTargetFiles(state, &files) - err = tree.Files().ForEach(filter) - cobra.CheckErr(err) + mm := 0 + filesiter := tree.Files() + for { + f, err := filesiter.Next() + if err == io.EOF { + break + } + if err != nil { + state.ErrChannel <- err + continue + } + + if f == nil { + state.ErrChannel <- fmt.Errorf("'f' is nil") + continue + } + + isbin, err := f.IsBinary() + if err != nil { + state.ErrChannel <- err + continue + } + + if !isbin && f.Mode.IsFile() { + mm++ + files <- f.Name + + } + } + + cobra.CheckErr(err) + println("closing files", mm) - return &files, nil } diff --git a/pkg/core/state.go b/pkg/core/state.go index c30bd6c..aa43574 100644 --- a/pkg/core/state.go +++ b/pkg/core/state.go @@ -21,6 +21,7 @@ import ( "path" "regexp" "strings" + "sync" git "github.com/go-git/go-git/v5" log "github.com/sirupsen/logrus" @@ -38,6 +39,8 @@ type State struct { TagPattern *regexp.Regexp FirstLineContent *regexp.Regexp DB *DB + WaitGroup sync.WaitGroup + ErrChannel chan error } func CreateState(projectRoot string, debug bool) (*State, error) { @@ -77,9 +80,16 @@ func CreateState(projectRoot string, debug bool) (*State, error) { state.FirstLineContent = regexp.MustCompile(`[A-Z]+:\s+(?:\[[^]]+\])*\s?(?P.*)`) state.EnabledLangs = ExtsToLang + state.ErrChannel = make(chan error, 10000) return &state, nil } +func (s *State) LogErrors() { + for err := range s.ErrChannel { + log.Error(err) + } +} + func (s *State) GetLangForExt(ext string) *Lang { // If the extension is the filename itself.