238 lines
4.8 KiB
Go
238 lines
4.8 KiB
Go
/*
|
|
Orion --- Speech to text bot
|
|
|
|
Copyright (c) 2022 Sameer Rahmani <lxsameer@gnu.org>
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation, either version 2 of the License.
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
package core
|
|
|
|
import (
|
|
"fmt"
|
|
"io"
|
|
"io/ioutil"
|
|
"log"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/asticode/go-asticoqui"
|
|
"github.com/cryptix/wav"
|
|
tele "gopkg.in/telebot.v3"
|
|
)
|
|
|
|
type Bot struct {
|
|
Owner string
|
|
StoragePath string
|
|
VoiceStorage string
|
|
Token string
|
|
Model *asticoqui.Model
|
|
ExtendedMetaData bool
|
|
MaxResults uint
|
|
db *DB
|
|
}
|
|
|
|
type Entry struct {
|
|
FilePath string
|
|
Transcript string
|
|
Visited bool
|
|
}
|
|
|
|
func CreateBot(storage *string) (*Bot,error) {
|
|
voiceStorage := filepath.Join(*storage, "voices")
|
|
dbpath := filepath.Join(*storage, "db")
|
|
err := os.MkdirAll(voiceStorage, 0750)
|
|
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
db, err := CreateDB(&dbpath)
|
|
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return &Bot{
|
|
Token: os.Getenv("BOT_TOKEN"),
|
|
MaxResults: 5,
|
|
ExtendedMetaData: false,
|
|
StoragePath: *storage,
|
|
VoiceStorage: voiceStorage,
|
|
db: db,
|
|
}, nil
|
|
}
|
|
|
|
func (bot *Bot) ShutDown() error {
|
|
log.Println("[info] Shutting down...")
|
|
bot.db.Close()
|
|
bot.Model.Close()
|
|
return nil
|
|
}
|
|
|
|
func (bot *Bot) StartBot() {
|
|
pref := tele.Settings{
|
|
Token: bot.Token,
|
|
// TODO: Move this to the config
|
|
Poller: &tele.LongPoller{Timeout: 10 * time.Second},
|
|
}
|
|
|
|
b, err := tele.NewBot(pref)
|
|
if err != nil {
|
|
log.Fatal(err)
|
|
return
|
|
}
|
|
|
|
b.Handle(tele.OnVoice, func(c tele.Context) error {
|
|
if !bot.isOwner(c) {
|
|
return nil
|
|
}
|
|
c.Send("...")
|
|
log.Println("Got a voice!")
|
|
filepath, err := bot.StoreVoice(c)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
text, err := bot.ConvertToText(filepath)
|
|
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
s := strings.Join(*text, " ")
|
|
|
|
err = bot.SaveTranscript(filepath, &s)
|
|
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
c.Send(s)
|
|
fmt.Println(s)
|
|
|
|
return nil
|
|
})
|
|
|
|
b.Start()
|
|
}
|
|
|
|
func (bot *Bot) SaveTranscript(filepath *string, text *string) error {
|
|
return ioutil.WriteFile(*filepath + ".txt", []byte(*text), 0600)
|
|
}
|
|
|
|
func (bot *Bot) isOwner(c tele.Context) bool {
|
|
me := c.Sender().Username
|
|
log.Printf("[Info] User '%s' is trying to connect!", me)
|
|
|
|
if me != bot.Owner {
|
|
c.Send("Get lost!")
|
|
return false
|
|
}
|
|
return true
|
|
}
|
|
|
|
func (bot *Bot) StoreVoice(c tele.Context) (*string, error) {
|
|
v := c.Message().Voice.File
|
|
time := c.Message().Time().Format("2006-01-02_15:04:05")
|
|
|
|
path := filepath.Join(bot.VoiceStorage, time + ".ogg")
|
|
wavePath := filepath.Join(bot.VoiceStorage, time + ".wav")
|
|
|
|
if err := c.Bot().Download(&v, path); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if err := ConvertOggtoWav(&path, &wavePath); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if err := os.Remove(path); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return &wavePath, nil
|
|
}
|
|
|
|
|
|
func (bot *Bot) ConvertToText(voice *string) (*[]string, error) {
|
|
// Stat audio
|
|
i, err := os.Stat(*voice)
|
|
|
|
if err != nil {
|
|
return nil, fmt.Errorf("stating %s failed: %w", *voice, err)
|
|
}
|
|
|
|
// Open audio
|
|
f, err := os.Open(*voice)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("opening %s failed: %w", *voice, err)
|
|
}
|
|
|
|
// Create reader
|
|
r, err := wav.NewReader(f, i.Size())
|
|
if err != nil {
|
|
return nil, fmt.Errorf("creating new reader failed: %w", err)
|
|
}
|
|
|
|
// Read
|
|
var d []int16
|
|
for {
|
|
// Read sample
|
|
s, err := r.ReadSample()
|
|
if err == io.EOF {
|
|
break
|
|
} else if err != nil {
|
|
return nil, fmt.Errorf("reading sample failed: %w", err)
|
|
}
|
|
|
|
// Append
|
|
d = append(d, int16(s))
|
|
}
|
|
|
|
// Speech to text
|
|
var results []string
|
|
|
|
if bot.ExtendedMetaData {
|
|
metadata, err := bot.Model.SpeechToTextWithMetadata(d, bot.MaxResults)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed converting speech to text: ", err)
|
|
}
|
|
defer metadata.Close()
|
|
results = metadataToStrings(metadata)
|
|
} else {
|
|
res, err := bot.Model.SpeechToText(d)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed converting speech to text: ", err)
|
|
}
|
|
results = []string{res}
|
|
}
|
|
|
|
// for _, res := range results {
|
|
// fmt.Println("Text:", res)
|
|
// }
|
|
return &results, nil
|
|
}
|
|
|
|
|
|
func metadataToStrings(m *asticoqui.Metadata) []string {
|
|
results := make([]string, 0, m.NumTranscripts())
|
|
for _, tr := range m.Transcripts() {
|
|
var res string
|
|
for _, tok := range tr.Tokens() {
|
|
res += tok.Text()
|
|
}
|
|
res += fmt.Sprintf(" [%0.3f]", tr.Confidence())
|
|
results = append(results, res)
|
|
}
|
|
return results
|
|
}
|