I have an ExampleSstreaming class which actually I got from GitHub of IBM Watson SDK (speech to text service demo). Here it is
public class ExampleStreaming : MonoBehaviour
{
private int m_RecordingRoutine = 0;
private string m_MicrophoneID = null;
private AudioClip m_Recording = null;
private int m_RecordingBufferSize = 5;
private int m_RecordingHZ = 22050;
private SpeechToText m_SpeechToText = new SpeechToText();
void Start()
{
LogSystem.InstallDefaultReactors();
Log.Debug("ExampleStreaming", "Start();");
Active = true;
Debug.Log("start");
StartRecording();
}
public void Update() {
Debug.Log(m_SpeechToText.IsListening);
}
public bool Active
{
get { return m_SpeechToText.IsListening; }
set
{
if (value && !m_SpeechToText.IsListening)
{
m_SpeechToText.DetectSilence = true;
m_SpeechToText.EnableWordConfidence = false;
m_SpeechToText.EnableTimestamps = false;
m_SpeechToText.SilenceThreshold = 0.03f;
m_SpeechToText.MaxAlternatives = 1;
m_SpeechToText.EnableContinousRecognition = true;
m_SpeechToText.EnableInterimResults = true;
m_SpeechToText.OnError = OnError;
m_SpeechToText.StartListening(OnRecognize);
}
else if (!value && m_SpeechToText.IsListening)
{
m_SpeechToText.StopListening();
}
}
}
private void StartRecording()
{
if (m_RecordingRoutine == 0)
{
Debug.Log("m_RecordingRoutine");
UnityObjectUtil.StartDestroyQueue();
m_RecordingRoutine = Runnable.Run(RecordingHandler());
}
}
private void StopRecording()
{
if (m_RecordingRoutine != 0)
{
Microphone.End(m_MicrophoneID);
Runnable.Stop(m_RecordingRoutine);
m_RecordingRoutine = 0;
}
}
private void OnError(string error)
{
Active = false;
Log.Debug("ExampleStreaming", "Error! {0}", error);
}
private IEnumerator RecordingHandler()
{
Log.Debug("ExampleStreaming", "devices: {0}", Microphone.devices);
m_MicrophoneID = Microphone.devices[0];
Debug.Log("m_MicrophoneID : " + m_MicrophoneID);
m_Recording = Microphone.Start(m_MicrophoneID, true, m_RecordingBufferSize, m_RecordingHZ);
yield return null; // let m_RecordingRoutine get set..
Debug.Log("m_Recording : " + m_Recording.length);
if (m_Recording == null)
{
Debug.Log("m_Recording is null");
StopRecording();
yield break;
}
bool bFirstBlock = true;
int midPoint = m_Recording.samples / 2;
float[] samples = null;
while (m_RecordingRoutine != 0 && m_Recording != null)
{
int writePos = Microphone.GetPosition(m_MicrophoneID);
if (writePos > m_Recording.samples || !Microphone.IsRecording(m_MicrophoneID))
{
Log.Error("MicrophoneWidget", "Microphone disconnected.");
StopRecording();
yield break;
}
if ((bFirstBlock && writePos >= midPoint)
|| (!bFirstBlock && writePos < midPoint))
{
// front block is recorded, make a RecordClip and pass it onto our callback.
samples = new float[midPoint];
m_Recording.GetData(samples, bFirstBlock ? 0 : midPoint);
AudioData record = new AudioData();
record.MaxLevel = Mathf.Max(samples);
record.Clip = AudioClip.Create("Recording", midPoint, m_Recording.channels, m_RecordingHZ, false);
record.Clip.SetData(samples, 0);
m_SpeechToText.OnListen(record);
bFirstBlock = !bFirstBlock;
}
else
{
// calculate the number of samples remaining until we ready for a block of audio,
// and wait that amount of time it will take to record.
int remaining = bFirstBlock ? (midPoint - writePos) : (m_Recording.samples - writePos);
float timeRemaining = (float)remaining / (float)m_RecordingHZ;
yield return new WaitForSeconds(timeRemaining);
}
}
yield break;
}
private void OnRecognize(SpeechRecognitionEvent result)
{
Debug.Log("OnRecognize");
if (result != null && result.results.Length > 0)
{
foreach (var res in result.results)
{
foreach (var alt in res.alternatives)
{
string text = alt.transcript;
Debug.Log(text);
Log.Debug("ExampleStreaming", string.Format("{0} ({1}, {2:0.00})\n", text, res.final ? "Final" : "Interim", alt.confidence));
}
}
}
}
}
and this is the line i add to get microphone. I just edit it to provide Microphone Device at zero index which was actually null (I don't know why, is this intentionally left or an error), in the function RecordingHandler .
m_MicrophoneID = Microphone.devices[0];
but unfortunately it is not showing any output log in EventOnRecognize which i think that it should execute.
Wile it displaying these logs, after some seconds (as i given length 5 of the audio). What i am doing wrong, i am unable to understand that how speech to text.
[DEBUG] OnListenClosed(), State = DISCONNECTED
[DEBUG] KeepAlive exited.
I have also tried IBM Watson Speech To text Scene it is also not showing anything.