Reimplement the comment extractor with CSP

This commit is contained in:
Sameer Rahmani 2022-07-18 19:04:40 +01:00
parent 49b9afbc83
commit 2f15bfdfd3
4 changed files with 173 additions and 104 deletions

View File

@ -27,13 +27,15 @@ var listCmd = &cobra.Command{
Short: "Lists all the TODOs and annotations in the project",
Long: `Lists all the TODOs and annotations in the project`,
Run: func(cmd *cobra.Command, args []string) {
files, err := core.FindAllFiles(state)
ch := make(chan string, 10)
if err != nil {
panic(err)
}
state.WaitGroup.Add(2)
go core.FindAllFiles(state, ch)
go core.ProcessFiles(state, ch)
go state.LogErrors()
core.ProcessFiles(state, *files)
state.WaitGroup.Wait()
close(state.ErrChannel)
},
}

View File

@ -20,14 +20,14 @@ package core
import (
"bufio"
"fmt"
"os"
"path"
"path/filepath"
"sync"
"time"
"strings"
log "github.com/sirupsen/logrus"
"github.com/spf13/cobra"
)
type Location struct {
@ -35,15 +35,16 @@ type Location struct {
End int
}
type RawCommetBlock struct {
type RawCommentBlock struct {
LineNo int
Lines []string
File *File
}
type FileRawBlocks struct {
Path *string
Blocks []RawCommetBlock
lines *[]string
type File struct {
Path string
Blocks []RawCommentBlock
Lines []string
}
type Block struct {
@ -54,6 +55,10 @@ type Block struct {
WrappedCode string
Loc Location
}
type FileRawBlock struct {
Block RawCommentBlock
Lang *Lang
}
type CommentStack struct {
storage []*Block
@ -73,41 +78,53 @@ func (s *CommentStack) Pop() *Block {
return item
}
func (b *RawCommetBlock) append(txt string) {
func (b *RawCommentBlock) append(txt string) {
b.Lines = append(b.Lines, txt)
}
func (b *RawCommetBlock) empty() bool {
func (b *RawCommentBlock) empty() bool {
return len(b.Lines) == 0
}
func FindCommentBlocks(s *State, file *string) (*FileRawBlocks, error) {
fullPath := path.Join(s.ProjectRoot, *file)
func FindCommentBlocks(s *State, file string, output chan FileRawBlock, w *sync.WaitGroup) {
fullPath := path.Join(s.ProjectRoot, file)
readFile, err := os.Open(fullPath)
if err != nil {
return nil, err
s.ErrChannel <- err
close(output)
w.Done()
return
}
defer readFile.Close()
ext := filepath.Ext(*file)
ext := filepath.Ext(file)
lang := s.GetLangForExt(ext)
if lang == nil {
log.Warnf("don't know about the language for extension '%s'", ext)
return nil, nil
close(output)
w.Done()
// log.Warnf("don't know about the language for extension '%s'", ext)
return
}
scanner := bufio.NewScanner(readFile)
scanner.Split(bufio.ScanLines)
var commentblocks []RawCommetBlock
curFile := File{
Path: file,
Blocks: []RawCommentBlock{},
Lines: []string{},
}
var line = 1
var block RawCommetBlock
var lines []string
block := RawCommentBlock{0, []string{}, &curFile}
for scanner.Scan() {
rawText := scanner.Text()
lines = append(lines, rawText)
curFile.Lines = append(curFile.Lines, rawText)
txt := strings.TrimSpace(rawText)
if strings.HasPrefix(txt, *lang.CommentBlockMarker) {
@ -117,37 +134,44 @@ func FindCommentBlocks(s *State, file *string) (*FileRawBlocks, error) {
block.append(txt)
} else {
if !block.empty() {
commentblocks = append(commentblocks, block)
curFile.Blocks = append(curFile.Blocks, block)
output <- FileRawBlock{block, lang}
}
block = RawCommetBlock{}
block = RawCommentBlock{0, []string{}, &curFile}
}
line++
}
return &FileRawBlocks{
file,
commentblocks,
&lines,
}, nil
if err = readFile.Close(); err != nil {
close(output)
w.Done()
s.ErrChannel <- err
return
}
w.Done()
close(output)
}
func getBlockFirstLine(lang *Lang, block RawCommetBlock) *string {
func getBlockFirstLine(lang *Lang, block RawCommentBlock) *string {
txt := strings.TrimLeft(block.Lines[0], *lang.CommentBlockMarker)
txt = strings.TrimSpace(txt)
return &txt
}
func JoinComments(state *State, commentMarker string, commentLines *[]string) *string {
firstLine := state.TrimTypeAndTags(&(*commentLines)[0])
func JoinComments(state *State, commentMarker string, b RawCommentBlock) *string {
commentLines := b.Lines
firstLine := state.TrimTypeAndTags(&commentLines[0])
result := []string{}
if firstLine != nil {
result = append(result, *firstLine)
}
if len(*commentLines) > 1 {
for _, line := range (*commentLines)[1:] {
if len(commentLines) > 1 {
for _, line := range (commentLines)[1:] {
trimmed := strings.TrimSpace(strings.TrimLeft(line, commentMarker))
result = append(result, trimmed)
@ -158,20 +182,18 @@ func JoinComments(state *State, commentMarker string, commentLines *[]string) *s
return &txt
}
func processCommentBlocks(s *State, file *FileRawBlocks) (*[]Block, error) {
ext := filepath.Ext(*file.Path)
lang := s.GetLangForExt(ext)
if lang == nil {
log.Warnf("don't know about the language for extension '%s'", ext)
return nil, nil
}
func processCommentBlocks(s *State, file chan FileRawBlock, output chan Block, w *sync.WaitGroup) {
defer close(output)
defer w.Done()
// To keep track of opening and closing tags
var stack CommentStack
var blocks []Block
for _, block := range file.Blocks {
for fileBlock := range file {
block := fileBlock.Block
lang := fileBlock.Lang
if block.empty() {
continue
}
@ -187,18 +209,21 @@ func processCommentBlocks(s *State, file *FileRawBlocks) (*[]Block, error) {
}
lastOpenBlock := stack.Pop()
if lastOpenBlock == nil {
e := fmt.Errorf(
"closing comment with no Opening at '%s:%d'",
*file.Path, block.LineNo,
)
return nil, e
// e := fmt.Errorf(
// "closing comment with no Opening at '%s:%d'",
// block.File.Path, block.LineNo,
// )
// s.ErrChannel <- e
return
}
if type_ == &lastOpenBlock.BlockType {
wrappedCode := []string{}
for i := lastOpenBlock.Loc.End; i < block.LineNo; i++ {
wrappedCode = append(wrappedCode, (*file.lines)[i])
ithLine := (block.File.Lines)[i]
wrappedCode = append(wrappedCode, ithLine)
}
lastOpenBlock.WrappedCode = strings.Join(wrappedCode, "\n")
}
@ -211,43 +236,59 @@ func processCommentBlocks(s *State, file *FileRawBlocks) (*[]Block, error) {
processedBlock := Block{
BlockType: *type_,
FilePath: file.Path,
FilePath: &block.File.Path,
Tags: tags,
Desc: *JoinComments(s, *lang.CommentBlockMarker, &block.Lines),
Desc: *JoinComments(s, *lang.CommentBlockMarker, block),
WrappedCode: "",
Loc: Location{block.LineNo, len(block.Lines) + block.LineNo},
}
stack.Push(&processedBlock)
blocks = append(blocks, processedBlock)
output <- processedBlock
}
return &blocks, nil
}
func ProcessFiles(state *State, files []string) error {
for _, file := range files {
blocks, err := FindCommentBlocks(state, &file)
cobra.CheckErr(err)
if blocks == nil {
continue
}
processedBlocks, err := processCommentBlocks(state, blocks)
cobra.CheckErr(err)
for _, b := range *processedBlocks {
tags := strings.Join(b.Tags, ",")
fmt.Printf("* %s at %s:%d | Tags: %s\n%s\n%s\n\n",
b.BlockType, *b.FilePath, b.Loc.Start,
tags,
b.Desc,
b.WrappedCode)
}
func PrintResult(state *State, res chan Block, w *sync.WaitGroup) {
for b := range res {
tags := strings.Join(b.Tags, ",")
fmt.Printf("* %s at %s:%d | Tags: %s\n%s\n%s\n\n",
b.BlockType, *b.FilePath, b.Loc.Start,
tags,
b.Desc,
b.WrappedCode)
}
w.Done()
}
func ProcessFiles(state *State, files chan string) error {
var workers sync.WaitGroup
for file := range files {
fileBlocks := make(chan FileRawBlock, 10)
processed := make(chan Block, 100)
if file == "utils/bazel/zlib.bzl" {
println("file: ", file)
}
workers.Add(3)
go FindCommentBlocks(state, file, fileBlocks, &workers)
go processCommentBlocks(state, fileBlocks, processed, &workers)
go PrintResult(state, processed, &workers)
}
controller := make(chan bool)
go func() {
workers.Wait()
close(controller)
}()
select {
case <-controller:
println("Wait done")
case <-time.After(time.Second * 20):
println("timeout")
}
state.WaitGroup.Done()
return nil
}

View File

@ -18,30 +18,19 @@
package core
import (
"github.com/go-git/go-git/v5/plumbing/object"
"fmt"
"io"
"github.com/spf13/cobra"
)
func FilterTargetFiles(state *State, storage *[]string) func(f *object.File) error {
return func(f *object.File) error {
isbin, err := f.IsBinary()
if err != nil {
return err
}
if !isbin && f.Mode.IsFile() {
*storage = append(*storage, f.Name)
}
return nil
}
}
func FindAllFiles(state *State) (*[]string, error) {
func FindAllFiles(state *State, files chan string) {
if state == nil {
panic("state is nil")
}
defer state.WaitGroup.Done()
defer close(files)
repo := state.Repo
head, err := repo.Head()
@ -53,10 +42,37 @@ func FindAllFiles(state *State) (*[]string, error) {
tree, err := commit.Tree()
cobra.CheckErr(err)
var files []string
filter := FilterTargetFiles(state, &files)
err = tree.Files().ForEach(filter)
cobra.CheckErr(err)
mm := 0
filesiter := tree.Files()
for {
f, err := filesiter.Next()
if err == io.EOF {
break
}
if err != nil {
state.ErrChannel <- err
continue
}
if f == nil {
state.ErrChannel <- fmt.Errorf("'f' is nil")
continue
}
isbin, err := f.IsBinary()
if err != nil {
state.ErrChannel <- err
continue
}
if !isbin && f.Mode.IsFile() {
mm++
files <- f.Name
}
}
cobra.CheckErr(err)
println("closing files", mm)
return &files, nil
}

View File

@ -21,6 +21,7 @@ import (
"path"
"regexp"
"strings"
"sync"
git "github.com/go-git/go-git/v5"
log "github.com/sirupsen/logrus"
@ -38,6 +39,8 @@ type State struct {
TagPattern *regexp.Regexp
FirstLineContent *regexp.Regexp
DB *DB
WaitGroup sync.WaitGroup
ErrChannel chan error
}
func CreateState(projectRoot string, debug bool) (*State, error) {
@ -77,9 +80,16 @@ func CreateState(projectRoot string, debug bool) (*State, error) {
state.FirstLineContent = regexp.MustCompile(`[A-Z]+:\s+(?:\[[^]]+\])*\s?(?P<text>.*)`)
state.EnabledLangs = ExtsToLang
state.ErrChannel = make(chan error, 10000)
return &state, nil
}
func (s *State) LogErrors() {
for err := range s.ErrChannel {
log.Error(err)
}
}
func (s *State) GetLangForExt(ext string) *Lang {
// If the extension is the filename itself.