Incorporating speech in a Swing application

1) First you need to download a speech engine. For example, you can download for free the Microsoft’s Speech SDK here. (Beware: version 5.1 is 68MB). Use the tools available to make sure the SDK is properly installed and trained to your voice.

2) Then get an implementation of Sun’s Java Speech API (JSAPI). The one I used in this example is from the CloudGarden. Follow the instructions on this page to find out how to install: copy the files jsapi.dll and jsapi.jar to JRE_HOME/lib/ext and adjust your classpath so that it includes jsapi.jar, necessary to compile following sample program.

3) This sample program was written by looking at the Cloud Garden examples. I dictated the following text to the app: “Last night, after a productive day of work, I joined my girlfriend in bed and quoted a very famous poet, who’s name escapes me. I said: ‘I feel great sexual desire for you right now.’. She said she’s not into poetry.”. The result (see window) was better than I expected.

Main.java:

import javax.speech.recognition.*;
import javax.speech.synthesis.*;
import javax.speech.*;
 
import javax.swing.text.*;
import java.awt.event.*;
import javax.swing.*;
import java.awt.*;
 
public class Main extends JFrame
{ 
   TextComponentSpeechEnabler tcse;
 
   public Main() {
      addWindowListener(new WindowAdapter() {
         public void windowClosing(WindowEvent we) {
            tcse.deallocate();
            System.exit(1);
         }
      });
 
      getContentPane().setLayout(new BorderLayout(10, 10));
      JTextArea textArea = new JTextArea();
      textArea.setLineWrap(true);
 
      try {
         tcse = new TextComponentSpeechEnabler(textArea);
         getContentPane().add(BorderLayout.CENTER, tcse.getPanel());
      }
      catch(Exception e) {
         e.printStackTrace();
      }
   }
 
   public static void main(String []args) {
      Main main = new Main();
      main.setSize(500, 500);
      main.setVisible(true);
   }
}
 
class TextComponentSpeechEnabler
{
   static Recognizer recognizer;
 
   JPanel         textComponentPanel;
   JTextComponent textComponent;
   JTextField     speakerName;
   JTextArea      statusArea;
 
   public TextComponentSpeechEnabler(JTextComponent textComponent) throws Exception {
      this.textComponent = textComponent;
      
      createPanel();
      initializeRecognizer();
   }
  
   public void initializeRecognizer() throws Exception {
      recognizer = Central.createRecognizer(null);
      recognizer.addResultListener(new MyResultListener());
      recognizer.allocate();
      recognizer.waitEngineState(Recognizer.ALLOCATED);
 
      SpeakerManager speakerManager = recognizer.getSpeakerManager();
      SpeakerProfile[] speakers = speakerManager.listKnownSpeakers();
      for (int i=0; i<speakers.length; i++) {
         addStatusText("Found profile of " + speakers[i].getName());
      }
 
      addStatusText("Current profile is " +
                                   speakerManager.getCurrentSpeaker().getName());
      speakerName.setText(speakerManager.getCurrentSpeaker().getName());
      speakerManager.setCurrentSpeaker(speakers[0]);
 
      DictationGrammar dictation = recognizer.getDictationGrammar("dictation");
      dictation.setEnabled(true);
 
      recognizer.commitChanges();
 
      recognizer.requestFocus();
      recognizer.resume(); 
   }
 
   public void deallocate() {
      System.out.println("deallocated!");
      try {
         recognizer.deallocate();
      }
      catch(Exception e) {
         e.printStackTrace();
      }
   }
 
   private void createPanel() {
      textComponentPanel = new JPanel(new BorderLayout(10, 10));
      JPanel northPanel = new JPanel(new FlowLayout(FlowLayout.LEFT));
      northPanel.add(new JLabel("Speaker name:  "));
      speakerName = new JTextField(25);
      northPanel.add(speakerName);
      textComponentPanel.add(BorderLayout.NORTH, northPanel);
      textComponentPanel.add(BorderLayout.CENTER, new JScrollPane(textComponent));
 
      statusArea = new JTextArea(10, 50);
      textComponentPanel.add(BorderLayout.SOUTH, new JScrollPane(statusArea));   
   }
  
   public JPanel getPanel() {
      return textComponentPanel;
   }
 
   public void addStatusText(final String s) {
      SwingUtilities.invokeLater(new Runnable() {
         public void run() {
            statusArea.append(s + "n");
            statusArea.setCaretPosition(statusArea.getDocument().getLength());
         }
      });
   }
 
   public void addText(final String s) {
      SwingUtilities.invokeLater(new Runnable() {
         public void run() {
            textComponent.setText(textComponent.getText() + s);
            textComponent.setCaretPosition(textComponent.getDocument().getLength());
         }
      });
   }
  
   class MyResultListener extends ResultAdapter {
      public void resultRejected(ResultEvent e) {
      }
 
      public void resultCreated(ResultEvent e) {
      }
 
      public void resultUpdated(ResultEvent e) {
      }
 
      public void resultAccepted(ResultEvent e) {
         FinalResult finalResult = (FinalResult)(e.getSource());
         ResultToken tokens[] = null;
         tokens = finalResult.getBestTokens();
 
         StringBuffer sb = new StringBuffer();
         for (int i=0; i<tokens.length; i++) {
            sb.append(tokens[i].getSpokenText() + " ");  
         }
         addText(sb.toString());
         addStatusText("" + finalResult);
      }
   }
}

Feel free to mail me any improvements!