/* Orion --- Speech to text bot Copyright (c) 2022 Sameer Rahmani This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ package core import ( "fmt" "io" "io/ioutil" "log" "os" "path/filepath" "strings" "time" "github.com/asticode/go-asticoqui" "github.com/cryptix/wav" tele "gopkg.in/telebot.v3" ) type Bot struct { Owner string StoragePath string VoiceStorage string Token string Model *asticoqui.Model ExtendedMetaData bool MaxResults uint db *DB } type Entry struct { FilePath string Transcript string Visited bool } func CreateBot(storage *string) (*Bot,error) { voiceStorage := filepath.Join(*storage, "voices") dbpath := filepath.Join(*storage, "db") err := os.MkdirAll(voiceStorage, 0750) if err != nil { return nil, err } db, err := CreateDB(&dbpath) if err != nil { return nil, err } return &Bot{ Token: os.Getenv("BOT_TOKEN"), MaxResults: 5, ExtendedMetaData: false, StoragePath: *storage, VoiceStorage: voiceStorage, db: db, }, nil } func (bot *Bot) ShutDown() error { log.Println("[info] Shutting down...") bot.db.Close() bot.Model.Close() return nil } func (bot *Bot) StartBot() { pref := tele.Settings{ Token: bot.Token, // TODO: Move this to the config Poller: &tele.LongPoller{Timeout: 10 * time.Second}, } b, err := tele.NewBot(pref) if err != nil { log.Fatal(err) return } b.Handle(tele.OnVoice, func(c tele.Context) error { if !bot.isOwner(c) { return nil } c.Send("...") log.Println("Got a voice!") filepath, err := bot.StoreVoice(c) if err != nil { return err } text, err := bot.ConvertToText(filepath) if err != nil { return err } s := strings.Join(*text, " ") err = bot.SaveTranscript(filepath, &s) if err != nil { return err } c.Send(s) fmt.Println(s) return nil }) b.Start() } func (bot *Bot) SaveTranscript(filepath *string, text *string) error { return ioutil.WriteFile(*filepath + ".txt", []byte(*text), 0600) } func (bot *Bot) isOwner(c tele.Context) bool { me := c.Sender().Username log.Printf("[Info] User '%s' is trying to connect!", me) if me != bot.Owner { c.Send("Get lost!") return false } return true } func (bot *Bot) StoreVoice(c tele.Context) (*string, error) { v := c.Message().Voice.File time := c.Message().Time().Format("2006-01-02_15:04:05") path := filepath.Join(bot.VoiceStorage, time + ".ogg") wavePath := filepath.Join(bot.VoiceStorage, time + ".wav") if err := c.Bot().Download(&v, path); err != nil { return nil, err } if err := ConvertOggtoWav(&path, &wavePath); err != nil { return nil, err } if err := os.Remove(path); err != nil { return nil, err } return &wavePath, nil } func (bot *Bot) ConvertToText(voice *string) (*[]string, error) { // Stat audio i, err := os.Stat(*voice) if err != nil { return nil, fmt.Errorf("stating %s failed: %w", *voice, err) } // Open audio f, err := os.Open(*voice) if err != nil { return nil, fmt.Errorf("opening %s failed: %w", *voice, err) } // Create reader r, err := wav.NewReader(f, i.Size()) if err != nil { return nil, fmt.Errorf("creating new reader failed: %w", err) } // Read var d []int16 for { // Read sample s, err := r.ReadSample() if err == io.EOF { break } else if err != nil { return nil, fmt.Errorf("reading sample failed: %w", err) } // Append d = append(d, int16(s)) } // Speech to text var results []string if bot.ExtendedMetaData { metadata, err := bot.Model.SpeechToTextWithMetadata(d, bot.MaxResults) if err != nil { return nil, fmt.Errorf("failed converting speech to text: ", err) } defer metadata.Close() results = metadataToStrings(metadata) } else { res, err := bot.Model.SpeechToText(d) if err != nil { return nil, fmt.Errorf("failed converting speech to text: ", err) } results = []string{res} } // for _, res := range results { // fmt.Println("Text:", res) // } return &results, nil } func metadataToStrings(m *asticoqui.Metadata) []string { results := make([]string, 0, m.NumTranscripts()) for _, tr := range m.Transcripts() { var res string for _, tok := range tr.Tokens() { res += tok.Text() } res += fmt.Sprintf(" [%0.3f]", tr.Confidence()) results = append(results, res) } return results }