Finish up the basic bot behavior and the general speech recognition
This commit is contained in:
parent
b1f2020eaf
commit
c7b918ddae
|
@ -5,4 +5,5 @@ coqui/
|
|||
.env
|
||||
nc.tar.xz
|
||||
*.ogg
|
||||
*.wav
|
||||
*.wav
|
||||
models
|
10
builder
10
builder
|
@ -52,11 +52,6 @@ export CGO_LDFLAGS=-L$ME/coqui/
|
|||
export CGO_CXXFLAGS=-I$ME/coqui/
|
||||
export LD_LIBRARY_PATH=$ME/coqui/:$LD_LIBRARY_PATH
|
||||
|
||||
# The `builder` script is supposed to be run from the
|
||||
# root of the source tree
|
||||
ROOT_DIR=$ME
|
||||
BUILD_DIR=$ROOT_DIR/build
|
||||
|
||||
default_model="$ME/models/default.tflite"
|
||||
default_scorer="$ME/models/default.scorer"
|
||||
|
||||
|
@ -105,13 +100,14 @@ function setup() { ## Setup the working directory and make it ready for developm
|
|||
tar -Jxvf nc.tar.xz -C "$ME/coqui"
|
||||
|
||||
info "Installing coqui go binding..."
|
||||
go get -u github.com/asticode/go-asticoqui/...
|
||||
go mod download
|
||||
|
||||
info "Downloading the model..."
|
||||
wget "$MODEL" -O "$default_model$ME/models/default.tflite"
|
||||
mkdir -p "$(dirname $default_model)"
|
||||
wget "$MODEL" -O "$default_model"
|
||||
|
||||
info "Downloading the scorer..."
|
||||
mkdir -p "$(dirname $default_scorer)"
|
||||
wget "$SCORER" -O "$default_scorer"
|
||||
deactivate
|
||||
}
|
||||
|
|
84
main.go
84
main.go
|
@ -1,84 +0,0 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"time"
|
||||
"github.com/joho/godotenv"
|
||||
tele "gopkg.in/telebot.v3"
|
||||
)
|
||||
|
||||
const user = "lxsameer"
|
||||
const version = "0.1.0"
|
||||
|
||||
func main1() {
|
||||
err := godotenv.Load(".env")
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
return
|
||||
}
|
||||
|
||||
pref := tele.Settings{
|
||||
Token: os.Getenv("BOT_TOKEN"),
|
||||
Poller: &tele.LongPoller{Timeout: 10 * time.Second},
|
||||
}
|
||||
|
||||
b, err := tele.NewBot(pref)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
return
|
||||
}
|
||||
|
||||
b.Handle(tele.OnText, func(c tele.Context) error {
|
||||
me := c.Sender().Username
|
||||
if (me != user) {
|
||||
return c.Send("Get lost!")
|
||||
}
|
||||
return c.Send("Get lost!")
|
||||
})
|
||||
|
||||
b.Handle("/version", func(c tele.Context) error {
|
||||
me := c.Sender().Username
|
||||
if (me != user) {
|
||||
return c.Send("Get lost!")
|
||||
}
|
||||
|
||||
return c.Send(version)
|
||||
})
|
||||
|
||||
b.Handle(tele.OnVoice, func(c tele.Context) error {
|
||||
me := c.Sender().Username
|
||||
if (me != user) {
|
||||
return c.Send("Get lost!")
|
||||
}
|
||||
|
||||
fmt.Println("Got a voice!")
|
||||
audio := c.Message().Voice.File
|
||||
time := c.Message().Time().Format("2006-01-02_15:04:05")
|
||||
c.Bot().Download(&audio, time + ".ogg")
|
||||
fmt.Printf("Got ahthnthnt voice!\n")
|
||||
|
||||
// _, err = b.FileReader.Read(content)
|
||||
|
||||
// if (err != nil) {
|
||||
// fmt.Printf("Error: %s\n", err)
|
||||
// return c.Send("Error!")
|
||||
// }
|
||||
|
||||
// f, err := os.Create("/tmp/f.mp3")
|
||||
|
||||
// if (err != nil) {
|
||||
// fmt.Printf("Error: %s\n", err)
|
||||
// return c.Send("Error!")
|
||||
// }
|
||||
// defer f.Close()
|
||||
|
||||
// f.Write(content)
|
||||
|
||||
fmt.Println("user:", me)
|
||||
return c.Send("Got it!")
|
||||
})
|
||||
|
||||
b.Start()
|
||||
}
|
59
orion.go
59
orion.go
|
@ -21,38 +21,73 @@ import (
|
|||
"flag"
|
||||
"log"
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
|
||||
"lxsameer.com/go/orion/pkg/core"
|
||||
"github.com/asticode/go-asticoqui"
|
||||
"github.com/joho/godotenv"
|
||||
|
||||
)
|
||||
|
||||
var model = flag.String("model", "", "Path to the model (protocol buffer binary file)")
|
||||
var scorer = flag.String("scorer", "", "Path to the external scorer")
|
||||
var model = flag.String("model", "models/default.tflite", "Path to the model (protocol buffer binary file)")
|
||||
var scorer = flag.String("scorer", "models/default.scorer", "Path to the external scorer")
|
||||
var owner = flag.String("owner", "", "Telegram user id that is allowed to use this bot")
|
||||
var storage = flag.String("voice-storage", "", "Where to store the voices")
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
log.SetFlags(0)
|
||||
|
||||
if *model == "" {
|
||||
err := godotenv.Load(".env")
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
return
|
||||
}
|
||||
|
||||
if *model == "" || *scorer == "" {
|
||||
// In case of error print error and print usage
|
||||
// This can also be done by passing -h or --help flags
|
||||
fmt.Fprintf(flag.CommandLine.Output(), "Usage of %s:\n", os.Args[0])
|
||||
flag.PrintDefaults()
|
||||
return
|
||||
}
|
||||
// // Initialize Coqui
|
||||
// m, err := asticoqui.New(*model)
|
||||
// if err != nil {
|
||||
// log.Fatal("Failed initializing model: ", err)
|
||||
// }
|
||||
// defer m.Close()
|
||||
a1 := "/home/lxsameer/src/orion/2022-04-09_11:48:57.ogg"
|
||||
a2 := "/home/lxsameer/src/orion/blah.wav"
|
||||
err := core.ConvertOggtoWav(&a1, &a2)
|
||||
|
||||
if *storage == "" {
|
||||
dir, err := os.UserHomeDir()
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
return
|
||||
}
|
||||
*storage = filepath.Join(dir, ".orion", "storage")
|
||||
}
|
||||
|
||||
bot, err := core.CreateBot(storage)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
return
|
||||
}
|
||||
|
||||
bot.Owner = *owner
|
||||
|
||||
// Initialize Coqui
|
||||
m, err := asticoqui.New(*model)
|
||||
if err != nil {
|
||||
log.Fatal("Failed initializing model: ", err)
|
||||
}
|
||||
defer m.Close()
|
||||
|
||||
if err := m.EnableExternalScorer(*scorer); err != nil {
|
||||
log.Fatal("Failed enabling external scorer: ", err)
|
||||
return
|
||||
}
|
||||
|
||||
bot.Model = m
|
||||
|
||||
bot.StartBot()
|
||||
|
||||
// a1 := "/home/lxsameer/src/orion/2022-04-09_11:48:57.ogg"
|
||||
// a2 := "/home/lxsameer/src/orion/blah.wav"
|
||||
// err = core.ConvertOggtoWav(&a1, &a2)
|
||||
|
||||
log.Println("done!")
|
||||
}
|
||||
|
|
|
@ -21,7 +21,7 @@ import (
|
|||
)
|
||||
|
||||
func ConvertOggtoWav(inputFile *string, outputFile *string) error {
|
||||
cmd := exec.Command("ffmpeg", "-i", *inputFile, *outputFile)
|
||||
cmd := exec.Command("ffmpeg", "-i", *inputFile, "-ar", "16000", *outputFile)
|
||||
if err := cmd.Run(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
|
216
pkg/core/core.go
216
pkg/core/core.go
|
@ -1,12 +1,210 @@
|
|||
/*
|
||||
Orion --- Speech to text bot
|
||||
|
||||
Copyright (c) 2022 Sameer Rahmani <lxsameer@gnu.org>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 2 of the License.
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
package core
|
||||
|
||||
// import (
|
||||
// "flag"
|
||||
// "fmt"
|
||||
// "io"
|
||||
// "log"
|
||||
// "os"
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
// "github.com/asticode/go-asticoqui"
|
||||
// "github.com/cryptix/wav"
|
||||
// )
|
||||
"github.com/asticode/go-asticoqui"
|
||||
"github.com/cryptix/wav"
|
||||
tele "gopkg.in/telebot.v3"
|
||||
)
|
||||
|
||||
type Bot struct {
|
||||
Owner string
|
||||
StoragePath string
|
||||
VoiceStorage string
|
||||
Token string
|
||||
Model *asticoqui.Model
|
||||
ExtendedMetaData bool
|
||||
MaxResults uint
|
||||
}
|
||||
|
||||
func CreateBot(storage *string) (*Bot,error) {
|
||||
voiceStorage := filepath.Join(*storage, "voices")
|
||||
err := os.MkdirAll(voiceStorage, 0750)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &Bot{
|
||||
Token: os.Getenv("BOT_TOKEN"),
|
||||
MaxResults: 5,
|
||||
ExtendedMetaData: false,
|
||||
StoragePath: *storage,
|
||||
VoiceStorage: voiceStorage,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (bot *Bot) StartBot() {
|
||||
pref := tele.Settings{
|
||||
Token: bot.Token,
|
||||
// TODO: Move this to the config
|
||||
Poller: &tele.LongPoller{Timeout: 10 * time.Second},
|
||||
}
|
||||
|
||||
b, err := tele.NewBot(pref)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
return
|
||||
}
|
||||
|
||||
// b.Handle(tele.OnText, func(c tele.Context) error {
|
||||
// if !bot.isOwner(c) {
|
||||
// return nil
|
||||
// }
|
||||
// })
|
||||
|
||||
b.Handle(tele.OnVoice, func(c tele.Context) error {
|
||||
if !bot.isOwner(c) {
|
||||
return nil
|
||||
}
|
||||
|
||||
log.Println("Got a voice!")
|
||||
filepath, err := bot.StoreVoice(c)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
text, err := bot.ConvertToText(filepath)
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
s := strings.Join(*text, "**")
|
||||
|
||||
c.Send(s)
|
||||
fmt.Println(s)
|
||||
|
||||
return c.Send("Got it!")
|
||||
})
|
||||
|
||||
b.Start()
|
||||
}
|
||||
|
||||
func (bot *Bot) isOwner(c tele.Context) bool {
|
||||
me := c.Sender().Username
|
||||
log.Printf("[Info] User '%s' is trying to connect!", me)
|
||||
|
||||
if me != bot.Owner {
|
||||
c.Send("Get lost!")
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (bot *Bot) StoreVoice(c tele.Context) (*string, error) {
|
||||
v := c.Message().Voice.File
|
||||
time := c.Message().Time().Format("2006-01-02_15:04:05")
|
||||
|
||||
path := filepath.Join(bot.VoiceStorage, time + ".ogg")
|
||||
wavePath := filepath.Join(bot.VoiceStorage, time + ".wav")
|
||||
|
||||
if err := c.Bot().Download(&v, path); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := ConvertOggtoWav(&path, &wavePath); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := os.Remove(path); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &wavePath, nil
|
||||
}
|
||||
|
||||
|
||||
func (bot *Bot) ConvertToText(voice *string) (*[]string, error) {
|
||||
// Stat audio
|
||||
i, err := os.Stat(*voice)
|
||||
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("stating %s failed: %w", *voice, err)
|
||||
}
|
||||
|
||||
// Open audio
|
||||
f, err := os.Open(*voice)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("opening %s failed: %w", *voice, err)
|
||||
}
|
||||
|
||||
// Create reader
|
||||
r, err := wav.NewReader(f, i.Size())
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("creating new reader failed: %w", err)
|
||||
}
|
||||
|
||||
// Read
|
||||
var d []int16
|
||||
for {
|
||||
// Read sample
|
||||
s, err := r.ReadSample()
|
||||
if err == io.EOF {
|
||||
break
|
||||
} else if err != nil {
|
||||
return nil, fmt.Errorf("reading sample failed: %w", err)
|
||||
}
|
||||
|
||||
// Append
|
||||
d = append(d, int16(s))
|
||||
}
|
||||
|
||||
// Speech to text
|
||||
var results []string
|
||||
|
||||
if bot.ExtendedMetaData {
|
||||
metadata, err := bot.Model.SpeechToTextWithMetadata(d, bot.MaxResults)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed converting speech to text: ", err)
|
||||
}
|
||||
defer metadata.Close()
|
||||
results = metadataToStrings(metadata)
|
||||
} else {
|
||||
res, err := bot.Model.SpeechToText(d)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed converting speech to text: ", err)
|
||||
}
|
||||
results = []string{res}
|
||||
}
|
||||
|
||||
// for _, res := range results {
|
||||
// fmt.Println("Text:", res)
|
||||
// }
|
||||
return &results, nil
|
||||
}
|
||||
|
||||
|
||||
func metadataToStrings(m *asticoqui.Metadata) []string {
|
||||
results := make([]string, 0, m.NumTranscripts())
|
||||
for _, tr := range m.Transcripts() {
|
||||
var res string
|
||||
for _, tok := range tr.Tokens() {
|
||||
res += tok.Text()
|
||||
}
|
||||
res += fmt.Sprintf(" [%0.3f]", tr.Confidence())
|
||||
results = append(results, res)
|
||||
}
|
||||
return results
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue