Orion/pkg/core/core.go

238 lines
4.8 KiB
Go

/*
Orion --- Speech to text bot
Copyright (c) 2022 Sameer Rahmani <lxsameer@gnu.org>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package core
import (
"fmt"
"io"
"io/ioutil"
"log"
"os"
"path/filepath"
"strings"
"time"
"github.com/asticode/go-asticoqui"
"github.com/cryptix/wav"
tele "gopkg.in/telebot.v3"
)
type Bot struct {
Owner string
StoragePath string
VoiceStorage string
Token string
Model *asticoqui.Model
ExtendedMetaData bool
MaxResults uint
db *DB
}
type Entry struct {
FilePath string
Transcript string
Visited bool
}
func CreateBot(storage *string) (*Bot,error) {
voiceStorage := filepath.Join(*storage, "voices")
dbpath := filepath.Join(*storage, "db")
err := os.MkdirAll(voiceStorage, 0750)
if err != nil {
return nil, err
}
db, err := CreateDB(&dbpath)
if err != nil {
return nil, err
}
return &Bot{
Token: os.Getenv("BOT_TOKEN"),
MaxResults: 5,
ExtendedMetaData: false,
StoragePath: *storage,
VoiceStorage: voiceStorage,
db: db,
}, nil
}
func (bot *Bot) ShutDown() error {
log.Println("[info] Shutting down...")
bot.db.Close()
bot.Model.Close()
return nil
}
func (bot *Bot) StartBot() {
pref := tele.Settings{
Token: bot.Token,
// TODO: Move this to the config
Poller: &tele.LongPoller{Timeout: 10 * time.Second},
}
b, err := tele.NewBot(pref)
if err != nil {
log.Fatal(err)
return
}
b.Handle(tele.OnVoice, func(c tele.Context) error {
if !bot.isOwner(c) {
return nil
}
c.Send("...")
log.Println("Got a voice!")
filepath, err := bot.StoreVoice(c)
if err != nil {
return err
}
text, err := bot.ConvertToText(filepath)
if err != nil {
return err
}
s := strings.Join(*text, " ")
err = bot.SaveTranscript(filepath, &s)
if err != nil {
return err
}
c.Send(s)
fmt.Println(s)
return nil
})
b.Start()
}
func (bot *Bot) SaveTranscript(filepath *string, text *string) error {
return ioutil.WriteFile(*filepath + ".txt", []byte(*text), 0600)
}
func (bot *Bot) isOwner(c tele.Context) bool {
me := c.Sender().Username
log.Printf("[Info] User '%s' is trying to connect!", me)
if me != bot.Owner {
c.Send("Get lost!")
return false
}
return true
}
func (bot *Bot) StoreVoice(c tele.Context) (*string, error) {
v := c.Message().Voice.File
time := c.Message().Time().Format("2006-01-02_15:04:05")
path := filepath.Join(bot.VoiceStorage, time + ".ogg")
wavePath := filepath.Join(bot.VoiceStorage, time + ".wav")
if err := c.Bot().Download(&v, path); err != nil {
return nil, err
}
if err := ConvertOggtoWav(&path, &wavePath); err != nil {
return nil, err
}
if err := os.Remove(path); err != nil {
return nil, err
}
return &wavePath, nil
}
func (bot *Bot) ConvertToText(voice *string) (*[]string, error) {
// Stat audio
i, err := os.Stat(*voice)
if err != nil {
return nil, fmt.Errorf("stating %s failed: %w", *voice, err)
}
// Open audio
f, err := os.Open(*voice)
if err != nil {
return nil, fmt.Errorf("opening %s failed: %w", *voice, err)
}
// Create reader
r, err := wav.NewReader(f, i.Size())
if err != nil {
return nil, fmt.Errorf("creating new reader failed: %w", err)
}
// Read
var d []int16
for {
// Read sample
s, err := r.ReadSample()
if err == io.EOF {
break
} else if err != nil {
return nil, fmt.Errorf("reading sample failed: %w", err)
}
// Append
d = append(d, int16(s))
}
// Speech to text
var results []string
if bot.ExtendedMetaData {
metadata, err := bot.Model.SpeechToTextWithMetadata(d, bot.MaxResults)
if err != nil {
return nil, fmt.Errorf("failed converting speech to text: ", err)
}
defer metadata.Close()
results = metadataToStrings(metadata)
} else {
res, err := bot.Model.SpeechToText(d)
if err != nil {
return nil, fmt.Errorf("failed converting speech to text: ", err)
}
results = []string{res}
}
// for _, res := range results {
// fmt.Println("Text:", res)
// }
return &results, nil
}
func metadataToStrings(m *asticoqui.Metadata) []string {
results := make([]string, 0, m.NumTranscripts())
for _, tr := range m.Transcripts() {
var res string
for _, tok := range tr.Tokens() {
res += tok.Text()
}
res += fmt.Sprintf(" [%0.3f]", tr.Confidence())
results = append(results, res)
}
return results
}