From 45515b8760785d487f43da084cbc5f36c4ea01fb Mon Sep 17 00:00:00 2001
From: kridneb <109475719+nebkrid@users.noreply.github.com>
Date: Mon, 23 Jan 2023 23:31:50 +0100
Subject: [PATCH 1/5] First implementation of RecognitionService for testing
 and reviewing so that dicio / vosk is registered in system as speech
 recognition service which can be queried by other apps without any dicio UI.
 - splitted VoskInputDevice.java in 3 parts: The dicio recognition service
 SttService.java using vosk, the SpeechRecogServiceInputDevice.java as a more
 generalized Input for Dicio and the VoskInputDevice.java which handles
 downloading of vosk models - added preference option to use system provided
 stt service for dicio instead of vosk

---
 app/src/main/AndroidManifest.xml              |  59 ++-
 .../java/org/stypox/dicio/MainActivity.java   |  36 +-
 .../input/SpeechRecogServiceInputDevice.java  | 248 +++++++++++
 .../stypox/dicio/input/VoskInputDevice.java   | 301 +++----------
 .../dicio/input/stt_service/SttService.java   | 394 ++++++++++++++++++
 .../org/stypox/dicio/settings/IOFragment.java |  15 +-
 app/src/main/res/values/arrays.xml            |   2 +
 app/src/main/res/values/strings.xml           |   1 +
 app/src/main/res/values/strings_keys.xml      |   1 +
 app/src/main/res/xml/stt_service_metadata.xml |   5 +
 10 files changed, 782 insertions(+), 280 deletions(-)
 create mode 100644 app/src/main/java/org/stypox/dicio/input/SpeechRecogServiceInputDevice.java
 create mode 100644 app/src/main/java/org/stypox/dicio/input/stt_service/SttService.java
 create mode 100644 app/src/main/res/xml/stt_service_metadata.xml
diff --git a/app/src/main/AndroidManifest.xml b/app/src/main/AndroidManifest.xml
index bce8401cc..c88bf7958 100644
--- a/app/src/main/AndroidManifest.xml
+++ b/app/src/main/AndroidManifest.xml
@@ -1,47 +1,49 @@
 <?xml version="1.0" encoding="utf-8"?>
 <manifest xmlns:android="http://schemas.android.com/apk/res/android"
     xmlns:tools="http://schemas.android.com/tools"
-    android:installLocation="auto"> <!-- Allow installing also on external storage -->
+    android:installLocation="auto" >
 
+    <!-- Allow installing also on external storage -->
     <uses-permission android:name="android.permission.INTERNET" />
-    <uses-permission android:name="android.permission.RECORD_AUDIO" />
-
-    <!-- required by the download manager for APIs < Q -->
+    <uses-permission android:name="android.permission.RECORD_AUDIO" /> <!-- required by the download manager for APIs < Q -->
     <uses-permission
         android:name="android.permission.WRITE_EXTERNAL_STORAGE"
-        android:maxSdkVersion="28" />
-
-    <!-- the open skill needs to query all apps -->
+        android:maxSdkVersion="28" /> <!-- the open skill needs to query all apps -->
     <uses-permission
         android:name="android.permission.QUERY_ALL_PACKAGES"
-        tools:ignore="QueryAllPackagesPermission" />
-
-    <!-- the telephone skill needs to query contacts and call them -->
+        tools:ignore="QueryAllPackagesPermission" /> <!-- the telephone skill needs to query contacts and call them -->
     <uses-permission android:name="android.permission.READ_CONTACTS" />
     <uses-permission android:name="android.permission.CALL_PHONE" />
 
+    <queries><!--To access Speech recognizer via system interface on Android 11+
+     https://developer.android.com/reference/android/speech/SpeechRecognizer#createSpeechRecognizer(android.content.Context,%20android.content.ComponentName)-->
+        <intent>
+            <action
+                android:name="android.speech.RecognitionService" />
+        </intent>
+    </queries>
+
     <application
         android:name=".App"
         android:allowBackup="true"
+        android:dataExtractionRules="@xml/data_extraction_rules"
         android:fullBackupContent="@xml/full_backup_content"
         android:icon="@mipmap/ic_launcher"
         android:label="@string/app_name"
         android:roundIcon="@mipmap/ic_launcher_round"
         android:supportsRtl="true"
         android:theme="@style/DarkAppTheme"
-        tools:ignore="GoogleAppIndexingWarning"
-        android:dataExtractionRules="@xml/data_extraction_rules">
+        tools:ignore="GoogleAppIndexingWarning" >
 
         <activity
             android:name=".MainActivity"
             android:exported="true"
             android:theme="@style/SplashScreenTheme"
-            android:windowSoftInputMode="stateUnspecified|adjustResize">
+            android:windowSoftInputMode="stateUnspecified|adjustResize" >
             <intent-filter>
                 <action android:name="android.intent.action.MAIN" />
                 <category android:name="android.intent.category.LAUNCHER" />
             </intent-filter>
-
             <intent-filter>
                 <action android:name="android.intent.action.ASSIST" />
                 <category android:name="android.intent.category.DEFAULT" />
@@ -51,28 +53,45 @@
                 android:name="com.android.systemui.action_assist_icon"
                 android:resource="@mipmap/ic_launcher" />
         </activity>
-
         <activity
             android:name=".settings.SettingsActivity"
             android:exported="false" />
-
         <activity
             android:name=".error.ErrorActivity"
             android:exported="false" />
-
         <activity
             android:name=".input.stt_service.SttServiceActivity"
-            android:taskAffinity=""
             android:excludeFromRecents="true"
             android:exported="true"
+            android:taskAffinity=""
             android:theme="@style/SttServiceDarkAppTheme"
-            android:windowSoftInputMode="adjustResize">
-
+            android:windowSoftInputMode="adjustResize" >
             <intent-filter>
                 <category android:name="android.intent.category.DEFAULT" />
                 <action android:name="android.speech.action.RECOGNIZE_SPEECH" />
             </intent-filter>
         </activity>
 
+        <service
+            android:name=".input.stt_service.SttService"
+            android:enabled="true"
+            android:exported="true"
+            android:description="@string/pref_input_method_vosk"
+            android:icon="@mipmap/ic_launcher"
+            android:permission="android.permission.RECORD_AUDIO"
+        >
+<!-- TODO check the following tags whether these are helpful too           -->
+        <!--            android:directBootAware=["true" | "false"]-->
+        <!--        android:foregroundServiceType="microphone" -->
+        <!--        android:label="string resource"-->
+            <intent-filter>
+                <action android:name="android.speech.RecognitionService"/>
+                <category android:name="android.intent.category.DEFAULT" />
+            </intent-filter>
+            <meta-data
+                android:name="android.speech"
+                android:resource="@xml/stt_service_metadata" />
+        </service>
     </application>
+
 </manifest>
\ No newline at end of file
diff --git a/app/src/main/java/org/stypox/dicio/MainActivity.java b/app/src/main/java/org/stypox/dicio/MainActivity.java
index 0c1fe170e..90bc56c4d 100644
--- a/app/src/main/java/org/stypox/dicio/MainActivity.java
+++ b/app/src/main/java/org/stypox/dicio/MainActivity.java
@@ -1,8 +1,5 @@
 package org.stypox.dicio;
 
-import static android.Manifest.permission.RECORD_AUDIO;
-import static android.content.pm.PackageManager.PERMISSION_GRANTED;
-
 import android.content.Intent;
 import android.content.SharedPreferences;
 import android.os.Bundle;
@@ -13,23 +10,16 @@
 import android.widget.ProgressBar;
 import android.widget.ScrollView;
 
-import androidx.annotation.NonNull;
-import androidx.annotation.Nullable;
-import androidx.appcompat.app.ActionBarDrawerToggle;
-import androidx.appcompat.widget.SearchView;
-import androidx.appcompat.widget.Toolbar;
-import androidx.core.app.ActivityCompat;
-import androidx.core.view.GravityCompat;
-import androidx.drawerlayout.widget.DrawerLayout;
-import androidx.preference.PreferenceManager;
-
 import com.google.android.material.floatingactionbutton.ExtendedFloatingActionButton;
 import com.google.android.material.navigation.NavigationView;
 
+import org.dicio.skill.output.GraphicalOutputDevice;
+import org.dicio.skill.output.SpeechOutputDevice;
 import org.stypox.dicio.eval.SkillEvaluator;
 import org.stypox.dicio.eval.SkillRanker;
 import org.stypox.dicio.input.InputDevice;
 import org.stypox.dicio.input.SpeechInputDevice;
+import org.stypox.dicio.input.SpeechRecogServiceInputDevice;
 import org.stypox.dicio.input.ToolbarInputDevice;
 import org.stypox.dicio.input.VoskInputDevice;
 import org.stypox.dicio.input.stt_service.SttServiceActivity;
@@ -42,8 +32,19 @@
 import org.stypox.dicio.skills.SkillHandler;
 import org.stypox.dicio.util.BaseActivity;
 import org.stypox.dicio.util.PermissionUtils;
-import org.dicio.skill.output.GraphicalOutputDevice;
-import org.dicio.skill.output.SpeechOutputDevice;
+
+import androidx.annotation.NonNull;
+import androidx.annotation.Nullable;
+import androidx.appcompat.app.ActionBarDrawerToggle;
+import androidx.appcompat.widget.SearchView;
+import androidx.appcompat.widget.Toolbar;
+import androidx.core.app.ActivityCompat;
+import androidx.core.view.GravityCompat;
+import androidx.drawerlayout.widget.DrawerLayout;
+import androidx.preference.PreferenceManager;
+
+import static android.Manifest.permission.RECORD_AUDIO;
+import static android.content.pm.PackageManager.PERMISSION_GRANTED;
 
 public class MainActivity extends BaseActivity
         implements NavigationView.OnNavigationItemSelectedListener {
@@ -304,6 +305,11 @@ private InputDevice buildPrimaryInputDevice() {
                 .getString(getString(R.string.pref_key_input_method), "");
         if (preference.equals(getString(R.string.pref_val_input_method_text))) {
             return new ToolbarInputDevice();
+        } else if (preference.equals(getString(R.string.pref_val_input_method_systemStt))) {
+            //TODO make a hint/data privacy warning etc. in preference when this one is chosen that
+            // the speech dicio records is given to a third party app according to system
+            // settings
+            return new SpeechRecogServiceInputDevice(this);
         } else { // default
             return new VoskInputDevice(this);
         }
diff --git a/app/src/main/java/org/stypox/dicio/input/SpeechRecogServiceInputDevice.java b/app/src/main/java/org/stypox/dicio/input/SpeechRecogServiceInputDevice.java
new file mode 100644
index 000000000..13dbd7742
--- /dev/null
+++ b/app/src/main/java/org/stypox/dicio/input/SpeechRecogServiceInputDevice.java
@@ -0,0 +1,248 @@
+package org.stypox.dicio.input;
+
+import android.app.Activity;
+import android.content.Intent;
+import android.os.Bundle;
+import android.speech.RecognizerIntent;
+import android.speech.SpeechRecognizer;
+import android.util.Log;
+import android.widget.Toast;
+
+import org.stypox.dicio.R;
+
+import java.util.ArrayList;
+
+import androidx.annotation.StringRes;
+import androidx.preference.PreferenceManager;
+
+import static org.stypox.dicio.util.StringUtils.isNullOrEmpty;
+
+public class SpeechRecogServiceInputDevice extends SpeechInputDevice
+        implements android.speech.RecognitionListener {
+
+    public static final String TAG = SpeechRecogServiceInputDevice.class.getSimpleName();
+    private Activity activity;
+
+    private boolean startListeningOnLoaded = false;
+
+    private SpeechRecognizer speechRecognizer;
+    private boolean currentlyListening = false;
+
+
+    /////////////////////
+    // Exposed methods //
+    /////////////////////
+
+    public SpeechRecogServiceInputDevice(final Activity activity) {
+        this.activity = activity;
+    }
+
+    @Override
+    public void load() {
+        load(false); // the user did not press on a button, so manual=false
+    }
+
+    /**
+     * @param manual if this is true and the model is not already downloaded, do not start
+     *               downloading it. See {@link #tryToGetInput(boolean)}.
+     */
+    protected void load(final boolean manual) {
+        if (speechRecognizer == null) {
+            onLoading();
+            speechRecognizer = getRecognizer();
+            speechRecognizer.setRecognitionListener(this);
+
+            if (startListeningOnLoaded) {
+                startListeningOnLoaded = false;
+                tryToGetInput(manual);
+            } else {
+                onInactive();
+            }
+        }
+    }
+
+    /**
+     * initializes the recognizers by calling the appropritate
+     * {@link SpeechRecognizer}.createSpeechRecognizer() . Default is system provided recognizer.
+     * Overwrite this in case you want to specify.
+     * @return the {@link SpeechRecognizer}
+     */
+    protected SpeechRecognizer getRecognizer() {
+        return SpeechRecognizer.createSpeechRecognizer(activity);
+    }
+
+    /**
+     * Override this to specify which Intent shall be used in
+     * {@link SpeechRecognizer}.startListening()
+     * @return the {@link Intent} according to {@link RecognizerIntent}
+     */
+    protected Intent getRecognizerIntent() {
+        final Intent i = new Intent();
+        i.putExtra(RecognizerIntent.EXTRA_LANGUAGE, PreferenceManager
+                .getDefaultSharedPreferences(activity)
+                .getString(activity.getString(R.string.pref_key_language), "en"));
+        return i;
+    }
+
+    @Override
+    public void cleanup() {
+        super.cleanup();
+        cancelGettingInput();
+
+        activity = null;
+    }
+
+    @Override
+    public synchronized void tryToGetInput(final boolean manual) {
+        if (speechRecognizer == null) {
+            startListeningOnLoaded = true;
+            load(manual); // not loaded before, retry
+            return; // recognizer not ready
+        }
+
+        super.tryToGetInput(manual);
+
+        Log.d(TAG, "starting recognizer");
+
+        onLoading();
+        speechRecognizer.startListening(getRecognizerIntent());
+        currentlyListening = true;
+    }
+
+    @Override
+    public void cancelGettingInput() {
+        if (speechRecognizer != null && currentlyListening) {
+            //call stoplistening only if it is running! Otherwise ERROR_CLIENT will be reported
+            speechRecognizer.cancel();
+        }
+        startListeningOnLoaded = false;
+    }
+
+    /////////////////////
+    // Other utilities //
+    /////////////////////
+
+    protected void asyncMakeToast(@StringRes final int message) {
+        activity.runOnUiThread(() ->
+                Toast.makeText(activity, activity.getString(message), Toast.LENGTH_SHORT).show());
+    }
+
+
+    ///////////////////////////
+    // Recognition Callbacks //
+    ///////////////////////////
+
+    @Override
+    public void onReadyForSpeech(final Bundle bundle) {
+        Log.d(TAG, "onReadyForSpeech");
+        onListening();
+        currentlyListening = true;
+    }
+
+    @Override
+    public void onBeginningOfSpeech() {
+        //no usecase for dicio
+        Log.d(TAG, "onBeginningOfSpeech");
+    }
+
+    @Override
+    public void onRmsChanged(final float v) {
+        //no usecase for dicio
+        Log.d(TAG, "onRmsChanged");
+    }
+
+    @Override
+    public void onBufferReceived(final byte[] bytes) {
+        //no usecase for dicio
+        Log.d(TAG, "onBufferReceived");
+    }
+
+    @Override
+    public void onEndOfSpeech() {
+        Log.d(TAG, "onEndOfSpeech");
+        currentlyListening = false;
+        onInactive();
+    }
+
+    @Override
+    public void onError(final int i) {
+        Log.d(TAG, "onError called with error code = " + i);
+        switch (i) {
+            case SpeechRecognizer.ERROR_AUDIO:
+                notifyError(new Throwable("ERROR_AUDIO"));
+                break;
+            case SpeechRecognizer.ERROR_CLIENT:
+                notifyError(new Throwable("ERROR_CLIENT"));
+                break;
+            case SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS:
+                notifyError(new Throwable("ERROR_INSUFFICIENT_PERMISSIONS"));
+                break;
+            case SpeechRecognizer.ERROR_LANGUAGE_NOT_SUPPORTED:
+                notifyError(new Throwable("ERROR_LANGUAGE_NOT_SUPPORTED"));
+                break;
+            case SpeechRecognizer.ERROR_LANGUAGE_UNAVAILABLE:
+                notifyError(new Throwable("ERROR_LANGUAGE_UNAVAILABLE"));
+                break;
+            case SpeechRecognizer.ERROR_NETWORK:
+                notifyError(new Throwable("ERROR_NETWORK"));
+                break;
+            case SpeechRecognizer.ERROR_NETWORK_TIMEOUT:
+                notifyError(new Throwable("ERROR_NETWORK_TIMEOUT"));
+                break;
+            case SpeechRecognizer.ERROR_NO_MATCH:
+                Log.d(TAG, "ERROR_NO_MATCH");
+                notifyNoInputReceived();
+                break;
+            case SpeechRecognizer.ERROR_RECOGNIZER_BUSY:
+                notifyError(new Throwable("ERROR_RECOGNIZER_BUSY"));
+                break;
+            case SpeechRecognizer.ERROR_SERVER:
+                notifyError(new Throwable("ERROR_SERVER"));
+                break;
+            case SpeechRecognizer.ERROR_SERVER_DISCONNECTED:
+                notifyError(new Throwable("ERROR_SERVER_DISCONNECTED"));
+                break;
+            case SpeechRecognizer.ERROR_SPEECH_TIMEOUT:
+                notifyError(new Throwable("ERROR_SPEECH_TIMEOUT"));
+                break;
+            case SpeechRecognizer.ERROR_TOO_MANY_REQUESTS:
+                notifyError(new Throwable("ERROR_TOO_MANY_REQUESTS"));
+                break;
+            default:
+                Log.w(TAG, "onError called with unexpected error code = " + i);
+                notifyError(new Throwable("Unexpected error code = " + i));
+        }
+        //reset views
+        onEndOfSpeech(); // e.g. Google does not send this after error like No_Match
+
+
+    }
+
+    @Override
+    public void onResults(final Bundle bundle) {
+        final ArrayList<String> results = bundle.getStringArrayList(
+                SpeechRecognizer.RESULTS_RECOGNITION);
+        Log.d(TAG, "onResult called with s = " + results.toString());
+        notifyInputReceived(results);
+    }
+
+    @Override
+    public void onPartialResults(final Bundle bundle) {
+        final ArrayList<String> results = bundle.getStringArrayList(
+                SpeechRecognizer.RESULTS_RECOGNITION);
+        Log.d(TAG, "onPartialResult called with s = " + results.toString());
+        final String partialInput = results.get(0);
+        if (!isNullOrEmpty(partialInput)) {
+            notifyPartialInputReceived(partialInput);
+        }
+    }
+
+    @Override
+    public void onEvent(final int i, final Bundle bundle) {
+        //android docs: "Reserved for adding future events"
+        Log.d(TAG, "onEvent");
+    }
+
+
+
+}
diff --git a/app/src/main/java/org/stypox/dicio/input/VoskInputDevice.java b/app/src/main/java/org/stypox/dicio/input/VoskInputDevice.java
index eb5011fba..0c9089694 100644
--- a/app/src/main/java/org/stypox/dicio/input/VoskInputDevice.java
+++ b/app/src/main/java/org/stypox/dicio/input/VoskInputDevice.java
@@ -1,60 +1,48 @@
 package org.stypox.dicio.input;
 
-import static org.stypox.dicio.util.LocaleUtils.LocaleResolutionResult;
-import static org.stypox.dicio.util.LocaleUtils.UnsupportedLocaleException;
-import static org.stypox.dicio.util.LocaleUtils.resolveSupportedLocale;
-import static org.stypox.dicio.util.StringUtils.isNullOrEmpty;
-
 import android.app.Activity;
 import android.app.DownloadManager;
 import android.content.BroadcastReceiver;
+import android.content.ComponentName;
 import android.content.Context;
 import android.content.Intent;
 import android.content.IntentFilter;
 import android.content.SharedPreferences;
 import android.net.Uri;
+import android.speech.SpeechRecognizer;
 import android.util.Log;
-import android.widget.Toast;
-
-import androidx.annotation.Nullable;
-import androidx.annotation.StringRes;
-import androidx.core.os.LocaleListCompat;
-import androidx.preference.PreferenceManager;
 
-import org.stypox.dicio.BuildConfig;
 import org.stypox.dicio.R;
 import org.stypox.dicio.Sections;
-import org.json.JSONException;
-import org.json.JSONObject;
-import org.vosk.LibVosk;
-import org.vosk.LogLevel;
-import org.vosk.Model;
-import org.vosk.Recognizer;
-import org.vosk.android.RecognitionListener;
-import org.vosk.android.SpeechService;
+import org.stypox.dicio.input.stt_service.SttService;
 
 import java.io.BufferedOutputStream;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.FileOutputStream;
 import java.io.IOException;
-import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.zip.ZipEntry;
 import java.util.zip.ZipInputStream;
 
+import androidx.annotation.Nullable;
+import androidx.core.os.LocaleListCompat;
+import androidx.preference.PreferenceManager;
 import io.reactivex.rxjava3.android.schedulers.AndroidSchedulers;
 import io.reactivex.rxjava3.core.Completable;
 import io.reactivex.rxjava3.disposables.CompositeDisposable;
 import io.reactivex.rxjava3.schedulers.Schedulers;
 
-public class VoskInputDevice extends SpeechInputDevice {
+import static org.stypox.dicio.util.LocaleUtils.LocaleResolutionResult;
+import static org.stypox.dicio.util.LocaleUtils.UnsupportedLocaleException;
+import static org.stypox.dicio.util.LocaleUtils.resolveSupportedLocale;
+
+public class VoskInputDevice extends SpeechRecogServiceInputDevice  {
 
     public static final String TAG = VoskInputDevice.class.getSimpleName();
     public static final String MODEL_PATH = "/vosk-model";
     public static final String MODEL_ZIP_FILENAME = "model.zip";
-    public static final float SAMPLE_RATE = 44100.0f;
 
     /**
      * All small models from <a href="https://alphacephei.com/vosk/models">Vosk</a>
@@ -92,18 +80,13 @@ public class VoskInputDevice extends SpeechInputDevice {
     private final CompositeDisposable disposables = new CompositeDisposable();
     @Nullable private BroadcastReceiver downloadingBroadcastReceiver = null;
     private Long currentModelDownloadId = null;
-    @Nullable private SpeechService speechService = null;
-
-    private boolean currentlyInitializingRecognizer = false;
-    private boolean startListeningOnLoaded = false;
-    private boolean currentlyListening = false;
-
 
     /////////////////////
     // Exposed methods //
     /////////////////////
 
     public VoskInputDevice(final Activity activity) {
+        super(activity);
         this.activity = activity;
     }
 
@@ -116,84 +99,71 @@ public void load() {
      * @param manual if this is true and the model is not already downloaded, do not start
      *               downloading it. See {@link #tryToGetInput(boolean)}.
      */
-    private void load(final boolean manual) {
-        if (speechService == null && !currentlyInitializingRecognizer) {
-            if (new File(getModelDirectory(), "ivector").exists()) {
-                // one directory is in the correct place, so everything should be ok
-                Log.d(TAG, "Vosk model in place");
-
-                currentlyInitializingRecognizer = true;
-                onLoading();
-
-                disposables.add(Completable.fromAction(this::initializeRecognizer)
-                        .subscribeOn(Schedulers.io())
-                        .observeOn(AndroidSchedulers.mainThread())
-                        .subscribe(() -> {
-                            currentlyInitializingRecognizer = false;
-                            if (startListeningOnLoaded) {
-                                startListeningOnLoaded = false;
-                                tryToGetInput(manual);
-                            } else {
-                                onInactive();
-                            }
-                        }, throwable -> {
-                            currentlyInitializingRecognizer = false;
-                            if ("Failed to initialize recorder. Microphone might be already in use."
-                                    .equals(throwable.getMessage())) {
-                                notifyError(new UnableToAccessMicrophoneException());
-                            } else {
-                                notifyError(throwable);
-                            }
-                            onInactive();
-                        }));
-
-            } else {
-                Log.d(TAG, "Vosk model not in place");
-                final DownloadManager downloadManager =
-                        (DownloadManager) activity.getSystemService(Context.DOWNLOAD_SERVICE);
-
-                if (currentModelDownloadId == null) {
-                    Log.d(TAG, "Vosk model is not already being downloaded");
-
-                    if (manual) {
-                        // the model needs to be downloaded and no download has already started;
-                        // the user manually triggered the input device, so he surely wants the
-                        // model to be downloaded, so we can proceed
-                        onLoading();
-                        try {
-                            final LocaleResolutionResult result = resolveSupportedLocale(
-                                    LocaleListCompat.create(Sections.getCurrentLocale()),
-                                    MODEL_URLS.keySet());
-                            startDownloadingModel(downloadManager, result.supportedLocaleString);
-                        } catch (final UnsupportedLocaleException e) {
-                            asyncMakeToast(R.string.vosk_model_unsupported_language);
-                            e.printStackTrace();
-                            onRequiresDownload();
-                        }
+    protected void load(final boolean manual) {
+        if (new File(getModelDirectory(), "ivector").exists()) {
+            // one directory is in the correct place, so everything should be ok
+            Log.d(TAG, "Vosk model in place");
+            super.load(manual);
+        } else {
+            Log.d(TAG, "Vosk model not in place");
+            final DownloadManager downloadManager =
+                    (DownloadManager) activity.getSystemService(Context.DOWNLOAD_SERVICE);
 
-                    } else {
-                        // loading the model would require downloading it, but the user didn't
-                        // explicitly tell the voice recognizer to download files, so notify them
-                        // that a download is required
+            if (currentModelDownloadId == null) {
+                Log.d(TAG, "Vosk model is not already being downloaded");
+
+                if (manual) {
+                    // the model needs to be downloaded and no download has already started;
+                    // the user manually triggered the input device, so he surely wants the
+                    // model to be downloaded, so we can proceed
+                    onLoading();
+                    try {
+                        final LocaleResolutionResult result = resolveSupportedLocale(
+                                LocaleListCompat.create(Sections.getCurrentLocale()),
+                                MODEL_URLS.keySet());
+                        startDownloadingModel(downloadManager, result.supportedLocaleString);
+                    } catch (final UnsupportedLocaleException e) {
+                        asyncMakeToast(R.string.vosk_model_unsupported_language);
+                        e.printStackTrace();
                         onRequiresDownload();
                     }
 
                 } else {
-                    Log.d(TAG, "Vosk model already being downloaded: " + currentModelDownloadId);
+                    // loading the model would require downloading it, but the user didn't
+                    // explicitly tell the voice recognizer to download files, so notify them
+                    // that a download is required
+                    onRequiresDownload();
                 }
+
+            } else {
+                Log.d(TAG, "Vosk model already being downloaded: " + currentModelDownloadId);
             }
         }
     }
 
+    @Override
+    protected SpeechRecognizer getRecognizer() {
+        SpeechRecognizer sr = SpeechRecognizer.createSpeechRecognizer(activity,
+                new ComponentName(activity, SttService.class));
+        //additionally call startService so that service is not directly destroyed after
+        //speech recognizer is unbound (especially important if SttServiceActivity is
+        // only called from other apps. If dicio app is closed, service is destroyed anyway,
+        // too. Avoid destroyin in order to avoid re-initialization of SpeechService
+        //(observed when manually closed - check if this happens too when closed by system
+        // due to inactivity)
+        //works also when battery optimization is enabled
+        //TODO check long term behaviour with and without battery optimization
+        //TODO check how to call startService if neither Dicio Main app nor
+        // Dicios SttServiceActivity is called but directly
+        // SpeechRecognizer.createSpeechRecognizer by a 3rd party app
+        activity.startService(new Intent(activity, SttService.class));
+        return sr;
+    }
+
     @Override
     public void cleanup() {
         super.cleanup();
         disposables.clear();
-        if (speechService != null) {
-            speechService.stop();
-            speechService.shutdown();
-            speechService = null;
-        }
 
         if (currentModelDownloadId != null) {
             final DownloadManager downloadManager =
@@ -209,117 +179,6 @@ public void cleanup() {
         activity = null;
     }
 
-    @Override
-    public synchronized void tryToGetInput(final boolean manual) {
-        if (currentlyInitializingRecognizer) {
-            startListeningOnLoaded = true;
-            return;
-        } else if (speechService == null) {
-            startListeningOnLoaded = true;
-            load(manual); // not loaded before, retry
-            return; // recognizer not ready
-        }
-
-        if (currentlyListening) {
-            return;
-        }
-        currentlyListening = true;
-        super.tryToGetInput(manual);
-
-        Log.d(TAG, "starting recognizer");
-
-        speechService.startListening(new RecognitionListener() {
-
-            @Override
-            public void onPartialResult(final String s) {
-                Log.d(TAG, "onPartialResult called with s = " + s);
-                if (!currentlyListening) {
-                    return;
-                }
-
-                String partialInput = null;
-                try {
-                    partialInput = new JSONObject(s).getString("partial");
-                } catch (final JSONException e) {
-                    e.printStackTrace();
-                }
-
-                if (!isNullOrEmpty(partialInput)) {
-                    notifyPartialInputReceived(partialInput);
-                }
-            }
-
-            @Override
-            public void onResult(final String s) {
-                Log.d(TAG, "onResult called with s = " + s);
-                if (!currentlyListening) {
-                    return;
-                }
-
-                stopRecognizer();
-
-                final ArrayList<String> inputs = new ArrayList<>();
-                try {
-                    final JSONObject jsonResult = new JSONObject(s);
-                    final int size = jsonResult.getJSONArray("alternatives").length();
-                    for (int i = 0; i < size; i++) {
-                        final String text = jsonResult.getJSONArray("alternatives")
-                                .getJSONObject(i).getString("text");
-                        if (!isNullOrEmpty(text)) {
-                            inputs.add(text);
-                        }
-                    }
-                } catch (final JSONException e) {
-                    e.printStackTrace();
-                }
-
-                if (inputs.isEmpty()) {
-                    notifyNoInputReceived();
-                } else {
-                    notifyInputReceived(inputs);
-                }
-            }
-
-            @Override
-            public void onFinalResult(final String s) {
-                Log.d(TAG, "onFinalResult called with s = " + s);
-                // TODO
-            }
-
-            @Override
-            public void onError(final Exception e) {
-                Log.d(TAG, "onError called");
-                stopRecognizer();
-                notifyError(e);
-            }
-
-            @Override
-            public void onTimeout() {
-                Log.d(TAG, "onTimeout called");
-                stopRecognizer();
-                notifyNoInputReceived();
-            }
-        });
-        onListening();
-    }
-
-    @Override
-    public void cancelGettingInput() {
-        if (currentlyListening) {
-            if (speechService != null) {
-                speechService.stop();
-            }
-            notifyNoInputReceived();
-
-            // call onInactive() only if we really were listening, so that the SpeechInputDevice
-            // state icon is preserved if something different from "microphone on" was being shown
-            onInactive();
-        }
-
-        startListeningOnLoaded = false;
-        currentlyListening = false;
-    }
-
     /**
      * Deletes the Vosk model downloaded in the {@link Context#getFilesDir()} if it exists. It also
      * stops any Vosk model download currently in progress based on the id stored in settings.
@@ -337,31 +196,6 @@ public static void deleteCurrentModel(final Context context) {
     }
 
 
-    ////////////////////
-    // Initialization //
-    ////////////////////
-
-    private synchronized void initializeRecognizer() throws IOException {
-        Log.d(TAG, "initializing recognizer");
-
-        LibVosk.setLogLevel(BuildConfig.DEBUG ? LogLevel.DEBUG : LogLevel.WARNINGS);
-        final Model model = new Model(getModelDirectory().getAbsolutePath());
-        final Recognizer recognizer = new Recognizer(model, SAMPLE_RATE);
-        recognizer.setMaxAlternatives(5);
-        this.speechService = new SpeechService(recognizer, SAMPLE_RATE);
-    }
-
-    private void stopRecognizer() {
-        currentlyListening = false;
-
-        if (speechService != null) {
-            speechService.stop();
-        }
-
-        onInactive();
-    }
-
-
     ////////////////////
     // Model download //
     ////////////////////
@@ -560,13 +394,4 @@ private void updateCurrentDownloadId(final Context context, final Long id) {
         }
     }
 
-
-    /////////////////////
-    // Other utilities //
-    /////////////////////
-
-    private void asyncMakeToast(@StringRes final int message) {
-        activity.runOnUiThread(() ->
-                Toast.makeText(activity, activity.getString(message), Toast.LENGTH_SHORT).show());
-    }
 }
diff --git a/app/src/main/java/org/stypox/dicio/input/stt_service/SttService.java b/app/src/main/java/org/stypox/dicio/input/stt_service/SttService.java
new file mode 100644
index 000000000..244fb75d2
--- /dev/null
+++ b/app/src/main/java/org/stypox/dicio/input/stt_service/SttService.java
@@ -0,0 +1,394 @@
+package org.stypox.dicio.input.stt_service;
+
+import android.content.Intent;
+import android.os.Build;
+import android.os.Bundle;
+import android.os.RemoteException;
+import android.speech.RecognitionService;
+import android.speech.RecognizerIntent;
+import android.speech.SpeechRecognizer;
+import android.util.Log;
+import android.widget.Toast;
+
+import org.json.JSONArray;
+import org.json.JSONException;
+import org.json.JSONObject;
+import org.stypox.dicio.BuildConfig;
+import org.stypox.dicio.R;
+import org.vosk.LibVosk;
+import org.vosk.LogLevel;
+import org.vosk.Model;
+import org.vosk.Recognizer;
+import org.vosk.android.SpeechService;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+
+import androidx.annotation.Nullable;
+import io.reactivex.rxjava3.android.schedulers.AndroidSchedulers;
+import io.reactivex.rxjava3.core.Completable;
+import io.reactivex.rxjava3.disposables.CompositeDisposable;
+import io.reactivex.rxjava3.schedulers.Schedulers;
+
+import static org.stypox.dicio.util.StringUtils.isNullOrEmpty;
+
+public class SttService extends RecognitionService {
+    protected class RecognitionListener implements org.vosk.android.RecognitionListener {
+        private boolean firstPartialResultReceived = false;
+
+        @Override
+        public void onPartialResult(final String s) {
+            Log.d(TAG, "onPartialResult called with s = " + s);
+
+            String partialInput = null;
+            try {
+                partialInput = new JSONObject(s).getString("partial");
+            } catch (final JSONException e) {
+                e.printStackTrace();
+            }
+
+            if (!isNullOrEmpty(partialInput)) {
+                if (!firstPartialResultReceived) {
+                    firstPartialResultReceived = true;
+                    try {
+                        callback.beginningOfSpeech();
+                    } catch (final RemoteException e) {
+                        logRemoteException(e);
+                    }
+                }
+                final String[] partialInputArray = {partialInput};
+                final Bundle partResult = new Bundle();
+                partResult.putStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION,
+                        new ArrayList<>(Arrays.asList(partialInputArray)));
+                try {
+                    callback.partialResults(partResult);
+                } catch (final RemoteException e) {
+                    logRemoteException(e);
+                }
+            }
+        }
+
+        @Override
+        public void onResult(final String s) {
+            Log.d(TAG, "onResult called with s = " + s);
+
+            stopRecognizer();
+
+            final ArrayList<String> inputs = new ArrayList<>();
+            float[] confidences = null;
+            try {
+                final JSONObject jsonResult = new JSONObject(s);
+                final JSONArray alternatives = jsonResult.getJSONArray("alternatives");
+                int size = alternatives.length();
+                for (int i = 0; i < size; i++) {
+                    final String text = alternatives.getJSONObject(i).getString("text");
+                    if (!isNullOrEmpty(text)) {
+                        inputs.add(text);
+                    }
+                }
+                //final size may change if empty entries exist
+                size = inputs.size();
+                confidences = new float[size];
+                for (int i = 0; i < size; i++) {
+                    confidences[i] = (float) alternatives.getJSONObject(i)
+                            .getDouble("confidence");
+                }
+
+            } catch (final JSONException e) {
+                e.printStackTrace();
+            }
+
+            if (inputs.isEmpty()) {
+                try {
+                    callback.error(SpeechRecognizer.ERROR_NO_MATCH);
+                } catch (final RemoteException e) {
+                    logRemoteException(e);
+                }
+            } else {
+                final Bundle results = new Bundle();
+                results.putStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION, inputs);
+                results.putFloatArray(SpeechRecognizer.CONFIDENCE_SCORES, confidences);
+                try {
+                    callback.results(results);
+                } catch (final RemoteException e) {
+                    logRemoteException(e);
+                }
+            }
+        }
+
+        @Override
+        public void onFinalResult(final String s) {
+            Log.d(TAG, "onFinalResult called with s = " + s);
+            firstPartialResultReceived = false; //reset for next input
+            try {
+                //only notify endOfSpeech because s is currently always empty - even if onResult
+                // was not empty before
+                callback.endOfSpeech();
+            } catch (final RemoteException e) {
+                logRemoteException(e);
+            }
+        }
+
+        @Override
+        public void onError(final Exception e) {
+            Log.e(TAG, "onError", e);
+            stopRecognizer();
+            try {
+                //The Error message is quite general because there is no "generic error code"
+                callback.error(SpeechRecognizer.ERROR_SERVER);
+            } catch (final RemoteException ex) {
+                Log.e(TAG, "onError", e);
+            }
+        }
+
+        @Override
+        public void onTimeout() {
+            Log.d(TAG, "onTimeout called");
+            stopRecognizer();
+            try {
+                callback.error(SpeechRecognizer.ERROR_SPEECH_TIMEOUT);
+            } catch (final RemoteException e) {
+                logRemoteException(e);
+            }
+        }
+    }
+
+    /**
+     docs of SpeechService
+     <a href="https://github.com/alphacep/vosk-api/blob/master/android/lib/src/main/java/org/vosk/
+     android/SpeechService.java">...</a>
+     */
+    @Nullable
+    private SpeechService speechService = null;
+    private boolean currentlyInitializingRecognizer = false;
+    public static final String MODEL_PATH = "/vosk-model";
+    public static final String TAG = SttService.class.getSimpleName();
+    private final CompositeDisposable disposables = new CompositeDisposable();
+    public static final float SAMPLE_RATE = 44100.0f;
+    private boolean currentlyListening = false;
+    private boolean startListeningOnLoaded = false;
+    private boolean onStartCommandCalled = false;
+    private Intent lastRequestedIntent = null;
+    Callback callback;
+
+//TODO support onCheckRecognitionSupport
+//TODO support onTriggerModelDownload
+
+    @Override
+    public void onCreate() {
+        super.onCreate();
+        load();
+        Log.d(TAG, "onCreate");
+    }
+
+    @Override
+    public int onStartCommand(final Intent intent, final int flags, final int startId) {
+        Log.d(TAG, "onStartCommand");
+        onStartCommandCalled = true;
+        return super.onStartCommand(intent, flags, startId);
+    }
+
+    @Override
+    public boolean onUnbind(final Intent intent) {
+        Log.d(TAG, "onUnbind");
+        return super.onUnbind(intent);
+    }
+
+
+    @Override
+    public void onRebind(final Intent intent) {
+        Log.d(TAG, "onRebind");
+        super.onRebind(intent);
+    }
+
+
+    @Override
+    public void onDestroy() {
+        Log.d(TAG, "onDestroy");
+        disposables.clear();
+        if (speechService != null) {
+            stopRecognizer();
+            speechService.shutdown();
+            speechService = null;
+        }
+        super.onDestroy();
+    }
+
+    @Override
+    protected void onStartListening(final Intent intent, final Callback newCallback) {
+        Log.d(TAG, "onStartListening");
+        Log.d(TAG, "onStartCommand called is " + onStartCommandCalled);
+        //TODO remove toast or make different type of speech recognition hint or a preference option
+        // to disable
+        Toast.makeText(this, this.getString(R.string.pref_input_method_vosk),
+                Toast.LENGTH_SHORT).show();
+        //TODO maybe check here for audio permission of the caller (but already in manifest of this
+        // service declared => should not happen?): Need a test app without permission
+        // https://developer.android.com/reference/android/speech/RecognitionService#
+        // onStartListening(android.content.Intent,%20android.speech.RecognitionService.Callback)
+        this.callback = newCallback;
+        lastRequestedIntent = intent;
+        tryToGetInput();
+
+        //TODO support Intent Extras if possible with vosk
+        // EXTRA_LANGUAGE / EXTRA_LANGUAGE_PREFERENCE / EXTRA_ONLY_RETURN_LANGUAGE_PREFERENCE
+        // Further Extras which may be interesting
+        // EXTRA_LANGUAGE_MODEL / LANGUAGE_MODEL_FREE_FORM /   LANGUAGE_MODEL_WEB_SEARCH
+        // EXTRA_SEGMENTED_SESSION
+        // EXTRA_SPEECH_INPUT_COMPLETE_SILENCE_LENGTH_MILLIS /
+        // EXTRA_SPEECH_INPUT_POSSIBLY_COMPLETE_SILENCE_LENGTH_MILLIS
+        // EXTRA_SPEECH_INPUT_MINIMUM_LENGTH_MILLIS
+        // EXTRA_AUDIO_SOURCE / EXTRA_AUDIO_SOURCE_CHANNEL_COUNT /
+        // EXTRA_AUDIO_SOURCE_ENCODING / EXTRA_AUDIO_SOURCE_SAMPLING_RATE
+        // EXTRA_BIASING_STRINGS
+        // EXTRA_ENABLE_BIASING_DEVICE_CONTEXT
+
+    }
+
+    @Override
+    protected void onCancel(final Callback newCallback) {
+        Log.d(TAG, "onCancel");
+        stopRecognizer();
+    }
+
+    @Override
+    protected void onStopListening(final Callback newCallback) {
+        Log.d(TAG, "onStopListening");
+        if (currentlyListening) {
+            stopRecognizer();
+        }
+    }
+
+
+
+
+
+    private void load() {
+        if (speechService == null && !currentlyInitializingRecognizer) {
+            if (new File(getModelDirectory(), "ivector").exists()) {
+                // one directory is in the correct place, so everything should be ok
+                Log.d(TAG, "Vosk model in place");
+
+                currentlyInitializingRecognizer = true;
+
+                disposables.add(Completable.fromAction(this::initializeRecognizer)
+                        .subscribeOn(Schedulers.io())
+                        .observeOn(AndroidSchedulers.mainThread())
+                        .subscribe(() -> {
+                            currentlyInitializingRecognizer = false;
+                            if (startListeningOnLoaded) {
+                                startListeningOnLoaded = false;
+                                tryToGetInput();
+                            }
+                        }, throwable -> {
+                            currentlyInitializingRecognizer = false;
+                            if ("Failed to initialize recorder. Microphone might be already in use."
+                                    .equals(throwable.getMessage())) {
+                                callback.error(SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS);
+                            } else {
+                                Log.e(TAG, "load()->initializeRecognizer", throwable);
+                                callback.error(SpeechRecognizer.ERROR_SERVER);
+                            }
+                        }));
+
+            } else {
+                try {
+                    if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.S) {
+                        callback.error(SpeechRecognizer.ERROR_LANGUAGE_UNAVAILABLE);
+                    } else {
+                        callback.error(SpeechRecognizer.ERROR_SERVER);
+                    }
+                } catch (final RemoteException e) {
+                    logRemoteException(e);
+                }
+            }
+        }
+    }
+    public synchronized void tryToGetInput() {
+        if (currentlyInitializingRecognizer) {
+            startListeningOnLoaded = true;
+            return;
+        } else if (speechService == null) {
+            try {
+                callback.error(SpeechRecognizer.ERROR_SERVER);
+            } catch (final RemoteException e) {
+                logRemoteException(e);
+            }
+            return; // recognizer not ready
+        }
+        //(only one client can be connected via system to speech recognizer (otherwise
+        // ERROR_BUSY seems to be reported) - check whether currently listening checks are
+        // necessary at all) - on the other hand they do not harm
+        if (currentlyListening) {
+            try {
+                if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.S) {
+                    callback.error(SpeechRecognizer.ERROR_TOO_MANY_REQUESTS);
+                } else {
+                    //more generic
+                    callback.error(SpeechRecognizer.ERROR_SERVER);
+                }
+            } catch (final RemoteException e) {
+                logRemoteException(e);
+            }
+            return;
+        }
+
+        currentlyListening = true;
+        Log.d(TAG, "starting recognizer");
+
+        speechService.startListening(new RecognitionListener());
+
+        try {
+            callback.readyForSpeech(null);
+        } catch (final RemoteException e) {
+            logRemoteException(e);
+        }
+    }
+
+    private void logRemoteException(final RemoteException e) {
+        Log.e(TAG, "Remote exception on callback information", e);
+    }
+
+    private File getModelDirectory() {
+        return new File(this.getFilesDir(), MODEL_PATH);
+    }
+
+
+    ////////////////////
+    // Vosk Initialization //
+    ////////////////////
+
+    private synchronized void initializeRecognizer() throws IOException {
+        Log.d(TAG, "initializing recognizer");
+
+        LibVosk.setLogLevel(BuildConfig.DEBUG ? LogLevel.DEBUG : LogLevel.WARNINGS);
+        final Model model = new Model(getModelDirectory().getAbsolutePath());
+        final Recognizer recognizer = new Recognizer(model, SAMPLE_RATE);
+        recognizer.setMaxAlternatives(
+                lastRequestedIntent.getIntExtra(RecognizerIntent.EXTRA_MAX_RESULTS, 5));
+        this.speechService = new SpeechService(recognizer, SAMPLE_RATE);
+    }
+
+    /**
+     * save to call if
+     */
+    private void stopRecognizer() {
+        if (speechService != null) {
+            speechService.stop(); //does nothing if recognition is not active.
+        } else if (currentlyListening) {
+            //(actually currentlyListening should never be true at this point-however does not harm)
+            //means SpeechRecognizer.startListening was called, but endOfSpeech not yet
+            // make sure to free resources so that speech recognizer is not supposed to be busy
+            try {
+                callback.endOfSpeech();
+            } catch (final RemoteException e) {
+                logRemoteException(e);
+            }
+        }
+        currentlyListening = false;
+
+
+    }
+}
diff --git a/app/src/main/java/org/stypox/dicio/settings/IOFragment.java b/app/src/main/java/org/stypox/dicio/settings/IOFragment.java
index b0c5e19b5..2f50cc16d 100644
--- a/app/src/main/java/org/stypox/dicio/settings/IOFragment.java
+++ b/app/src/main/java/org/stypox/dicio/settings/IOFragment.java
@@ -2,11 +2,11 @@
 
 import android.os.Bundle;
 
-import androidx.preference.PreferenceFragmentCompat;
-
 import org.stypox.dicio.R;
 import org.stypox.dicio.input.VoskInputDevice;
 
+import androidx.preference.PreferenceFragmentCompat;
+
 public class IOFragment extends PreferenceFragmentCompat {
     @Override
     public void onCreatePreferences(final Bundle savedInstanceState, final String rootKey) {
@@ -20,11 +20,12 @@ public void onCreatePreferences(final Bundle savedInstanceState, final String ro
                     }
                     return true;
                 });
-        findPreference(getString(R.string.pref_key_input_method))
-                .setOnPreferenceChangeListener((preference, newValue) -> {
-                    VoskInputDevice.deleteCurrentModel(requireContext());
-                    return true;
-                });
+//TODO Discuss whether this is needed. At least for debugging commented
+//        findPreference(getString(R.string.pref_key_input_method))
+//                .setOnPreferenceChangeListener((preference, newValue) -> {
+//                    VoskInputDevice.deleteCurrentModel(requireContext());
+//                    return true;
+//                });
     }
 
     @Override
diff --git a/app/src/main/res/values/arrays.xml b/app/src/main/res/values/arrays.xml
index 3fc982afc..c220794a8 100644
--- a/app/src/main/res/values/arrays.xml
+++ b/app/src/main/res/values/arrays.xml
@@ -39,10 +39,12 @@
     <string-array name="pref_input_method_entries">
         <item>@string/pref_input_method_vosk</item>
         <item>@string/pref_input_method_text</item>
+        <item>@string/pref_input_method_systemStt</item>
     </string-array>
     <string-array name="pref_input_method_entry_values">
         <item>@string/pref_val_input_method_vosk</item>
         <item>@string/pref_val_input_method_text</item>
+        <item>@string/pref_val_input_method_systemStt</item>
     </string-array>
 
     <string-array name="pref_speech_output_method_entries">
diff --git a/app/src/main/res/values/strings.xml b/app/src/main/res/values/strings.xml
index a25938f2e..2f35808f8 100644
--- a/app/src/main/res/values/strings.xml
+++ b/app/src/main/res/values/strings.xml
@@ -53,6 +53,7 @@
     <string name="pref_input_method">Input method</string>
     <string name="pref_input_method_summary">Choose the service to use to talk to Dicio - %1$s</string>
     <string name="pref_input_method_text">Text box</string>
+    <string name="pref_input_method_systemStt">System provided text-to-speech-service (speech is handled outside Dicio)</string>
     <string name="pref_input_method_vosk">Vosk offline speech recognition</string>
     <string name="pref_speech_output_method">Speech output method</string>
     <string name="pref_speech_output_method_summary">Choose the service Dicio should use to talk to you - %1$s</string>
diff --git a/app/src/main/res/values/strings_keys.xml b/app/src/main/res/values/strings_keys.xml
index dcbab3204..03bb7adfd 100644
--- a/app/src/main/res/values/strings_keys.xml
+++ b/app/src/main/res/values/strings_keys.xml
@@ -9,6 +9,7 @@
     <string name="pref_key_input_method" translatable="false">input_method</string>
     <string name="pref_val_input_method_text" translatable="false">text</string>
     <string name="pref_val_input_method_vosk" translatable="false">vosk</string>
+    <string name="pref_val_input_method_systemStt" translatable="false">sytemStt</string>
 
     <string name="pref_key_speech_output_method" translatable="false">speech_output_method</string>
     <string name="pref_val_speech_output_method_android" translatable="false">android</string>
diff --git a/app/src/main/res/xml/stt_service_metadata.xml b/app/src/main/res/xml/stt_service_metadata.xml
new file mode 100644
index 000000000..776b6335d
--- /dev/null
+++ b/app/src/main/res/xml/stt_service_metadata.xml
@@ -0,0 +1,5 @@
+<?xml version="1.0" encoding="utf-8"?>
+<recognition-service xmlns:android="http://schemas.android.com/apk/res/android"
+    android:settingsActivity="org.stypox.dicio.settings.SettingsActivity"
+    >
+</recognition-service>
\ No newline at end of file

From 2eb87ebb8a0509efb522fdf6892099cdaf1d3aaa Mon Sep 17 00:00:00 2001
From: kridneb <109475719+nebkrid@users.noreply.github.com>
Date: Sun, 5 Feb 2023 10:36:25 +0100
Subject: [PATCH 2/5] - Bugfix: Breakdowns in background - Bugfix: Load new
 model when language changed - Bugfix: Breakdown when no model is downloaded -
 Implemented error message notifications for analyzing errors when in
 background - Audio Permission requirement in manifest declaration of the STT
 service removed, since it may cause breakdowns in calling app instead of
 reporting ERROR_INSUFFICIENT_PERMISSION

---
 app/src/main/AndroidManifest.xml              |  10 +-
 .../dicio/input/stt_service/SttService.java   | 246 +++++++++++++-----
 2 files changed, 187 insertions(+), 69 deletions(-)

diff --git a/app/src/main/AndroidManifest.xml b/app/src/main/AndroidManifest.xml
index dfc685c0c..82fc06f32 100644
--- a/app/src/main/AndroidManifest.xml
+++ b/app/src/main/AndroidManifest.xml
@@ -5,13 +5,16 @@
 
     <!-- Allow installing also on external storage -->
     <uses-permission android:name="android.permission.INTERNET" />
-    <uses-permission android:name="android.permission.RECORD_AUDIO" /> <!-- required by the download manager for APIs < Q -->
+    <uses-permission android:name="android.permission.RECORD_AUDIO" />
+    <!-- required by the download manager for APIs < Q -->
     <uses-permission
         android:name="android.permission.WRITE_EXTERNAL_STORAGE"
-        android:maxSdkVersion="28" /> <!-- the open skill needs to query all apps -->
+        android:maxSdkVersion="28" />
+    <!-- the open skill needs to query all apps -->
     <uses-permission
         android:name="android.permission.QUERY_ALL_PACKAGES"
-        tools:ignore="QueryAllPackagesPermission" /> <!-- the telephone skill needs to query contacts and call them -->
+        tools:ignore="QueryAllPackagesPermission" />
+    <!-- the telephone skill needs to query contacts and call them -->
     <uses-permission android:name="android.permission.READ_CONTACTS" />
     <uses-permission android:name="android.permission.CALL_PHONE" />
 
@@ -77,7 +80,6 @@
             android:exported="true"
             android:description="@string/pref_input_method_vosk"
             android:icon="@mipmap/ic_launcher"
-            android:permission="android.permission.RECORD_AUDIO"
         >
 <!-- TODO check the following tags whether these are helpful too           -->
         <!--            android:directBootAware=["true" | "false"]-->
diff --git a/app/src/main/java/org/stypox/dicio/input/stt_service/SttService.java b/app/src/main/java/org/stypox/dicio/input/stt_service/SttService.java
index 244fb75d2..b660171b0 100644
--- a/app/src/main/java/org/stypox/dicio/input/stt_service/SttService.java
+++ b/app/src/main/java/org/stypox/dicio/input/stt_service/SttService.java
@@ -15,6 +15,9 @@
 import org.json.JSONObject;
 import org.stypox.dicio.BuildConfig;
 import org.stypox.dicio.R;
+import org.stypox.dicio.error.ErrorInfo;
+import org.stypox.dicio.error.ErrorUtils;
+import org.stypox.dicio.error.UserAction;
 import org.vosk.LibVosk;
 import org.vosk.LogLevel;
 import org.vosk.Model;
@@ -101,11 +104,7 @@ public void onResult(final String s) {
             }
 
             if (inputs.isEmpty()) {
-                try {
-                    callback.error(SpeechRecognizer.ERROR_NO_MATCH);
-                } catch (final RemoteException e) {
-                    logRemoteException(e);
-                }
+                callbackErrorReport(SpeechRecognizer.ERROR_NO_MATCH);
             } else {
                 final Bundle results = new Bundle();
                 results.putStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION, inputs);
@@ -134,24 +133,16 @@ public void onFinalResult(final String s) {
         @Override
         public void onError(final Exception e) {
             Log.e(TAG, "onError", e);
+            showErrorNotification(e);
             stopRecognizer();
-            try {
-                //The Error message is quite general because there is no "generic error code"
-                callback.error(SpeechRecognizer.ERROR_SERVER);
-            } catch (final RemoteException ex) {
-                Log.e(TAG, "onError", e);
-            }
+            callbackErrorReport(SpeechRecognizer.ERROR_SERVER);
         }
 
         @Override
         public void onTimeout() {
             Log.d(TAG, "onTimeout called");
             stopRecognizer();
-            try {
-                callback.error(SpeechRecognizer.ERROR_SPEECH_TIMEOUT);
-            } catch (final RemoteException e) {
-                logRemoteException(e);
-            }
+            callbackErrorReport(SpeechRecognizer.ERROR_SPEECH_TIMEOUT);
         }
     }
 
@@ -162,6 +153,8 @@ public void onTimeout() {
      */
     @Nullable
     private SpeechService speechService = null;
+    private Model model;
+    private long modelDownloadDate;
     private boolean currentlyInitializingRecognizer = false;
     public static final String MODEL_PATH = "/vosk-model";
     public static final String TAG = SttService.class.getSimpleName();
@@ -173,13 +166,11 @@ public void onTimeout() {
     private Intent lastRequestedIntent = null;
     Callback callback;
 
-//TODO support onCheckRecognitionSupport
-//TODO support onTriggerModelDownload
-
     @Override
     public void onCreate() {
         super.onCreate();
-        load();
+        LibVosk.setLogLevel(BuildConfig.DEBUG ? LogLevel.DEBUG : LogLevel.WARNINGS);
+        initialize();
         Log.d(TAG, "onCreate");
     }
 
@@ -208,11 +199,7 @@ public void onRebind(final Intent intent) {
     public void onDestroy() {
         Log.d(TAG, "onDestroy");
         disposables.clear();
-        if (speechService != null) {
-            stopRecognizer();
-            speechService.shutdown();
-            speechService = null;
-        }
+        shutdownSpeechService();
         super.onDestroy();
     }
 
@@ -220,18 +207,80 @@ public void onDestroy() {
     protected void onStartListening(final Intent intent, final Callback newCallback) {
         Log.d(TAG, "onStartListening");
         Log.d(TAG, "onStartCommand called is " + onStartCommandCalled);
+        this.callback = newCallback;
+        //TODO check permission. Actually it seems this is already done by the system interface
+        // (reports SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS) , but it is
+        // explicitly recommended in the SpeechRecognizer documentation. However the way it is in
+        // the docs does not work here due to API Level for requested calls (and since Audio
+        // Recorder is not directly implemented here but by vosk library)
+        // https://developer.android.com/reference/android/speech/RecognitionService
+        // However even if there is a way for app without permission, not a security issue since
+        // stt service notifies user when speech input is started
+        if (android.os.Build.VERSION.SDK_INT >= android.os.Build.VERSION_CODES.M) {
+            final String callingPackageName = getPackageManager().getPackagesForUid(
+                    newCallback.getCallingUid())[0];
+//Not working this way - check fails even for dicio
+//            int permissionState = PermissionChecker.checkCallingPermission(this,
+//                    "android.permission.RECORD_AUDIO", callingPackageName);
+//            if (permissionState != PermissionChecker.PERMISSION_GRANTED){
+//                callbackErrorReport(SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS);
+//                return;
+//            }
+        }
+        if (speechService != null && !recogIntentExtrasEquals(lastRequestedIntent, intent)) {
+            shutdownSpeechService();
+            if (intent.hasExtra(RecognizerIntent.EXTRA_LANGUAGE)) {
+                //check if language change is the reason
+                Log.d(TAG, "requested language = "
+                        + intent.getStringExtra(RecognizerIntent.EXTRA_LANGUAGE));
+                if (!lastRequestedIntent.hasExtra(RecognizerIntent.EXTRA_LANGUAGE)
+                        || !lastRequestedIntent.getStringExtra(RecognizerIntent.EXTRA_LANGUAGE)
+                        .equals(intent.getStringExtra(RecognizerIntent.EXTRA_LANGUAGE))) {
+                    //Since at the moment only one language at the time is supported, just check
+                    // whether the downloaded model has changed. Otherwise use the language which
+                    // is installed anyway
+                    if (getModelDirectory().lastModified() != modelDownloadDate) {
+                        Log.d(TAG, "model last modified " + getModelDirectory().lastModified());
+                        Log.d(TAG, "model_download_date " + modelDownloadDate);
+                        model = null; //forces reloading
+                        shutdownSpeechService(); //forces reloading
+                    }
+                }
+            }
+        }
+        lastRequestedIntent = intent;
+
         //TODO remove toast or make different type of speech recognition hint or a preference option
         // to disable
         Toast.makeText(this, this.getString(R.string.pref_input_method_vosk),
                 Toast.LENGTH_SHORT).show();
-        //TODO maybe check here for audio permission of the caller (but already in manifest of this
-        // service declared => should not happen?): Need a test app without permission
-        // https://developer.android.com/reference/android/speech/RecognitionService#
-        // onStartListening(android.content.Intent,%20android.speech.RecognitionService.Callback)
-        this.callback = newCallback;
-        lastRequestedIntent = intent;
         tryToGetInput();
 
+    }
+
+    /**
+     * in order to identify whether a new recognizer has to be loaded or not
+     * @return true if all Extras, which are supported by this STT service, are equal
+     */
+    protected boolean recogIntentExtrasEquals(final Intent i1, final Intent i2) {
+        final Bundle ie1 = i1.getExtras();
+        final Bundle ie2 = i2.getExtras();
+        final String[] supportedExtras = {RecognizerIntent.EXTRA_LANGUAGE,
+                RecognizerIntent.EXTRA_MAX_RESULTS};
+        for (final String key: supportedExtras) {
+            final Object extra1 = ie1.get(key);
+            final Object extra2 = ie2.get(key);
+            //return false if they are not equal or one (but noth both) is null
+            if (extra1 != null) {
+                if (!extra1.equals(extra2)) {
+                    return false;
+                }
+            } else if (extra2 != null) {
+                return false;
+            }
+        }
+        return true;
+
         //TODO support Intent Extras if possible with vosk
         // EXTRA_LANGUAGE / EXTRA_LANGUAGE_PREFERENCE / EXTRA_ONLY_RETURN_LANGUAGE_PREFERENCE
         // Further Extras which may be interesting
@@ -244,9 +293,9 @@ protected void onStartListening(final Intent intent, final Callback newCallback)
         // EXTRA_AUDIO_SOURCE_ENCODING / EXTRA_AUDIO_SOURCE_SAMPLING_RATE
         // EXTRA_BIASING_STRINGS
         // EXTRA_ENABLE_BIASING_DEVICE_CONTEXT
-
     }
 
+
     @Override
     protected void onCancel(final Callback newCallback) {
         Log.d(TAG, "onCancel");
@@ -265,7 +314,7 @@ protected void onStopListening(final Callback newCallback) {
 
 
 
-    private void load() {
+    private void initialize() {
         if (speechService == null && !currentlyInitializingRecognizer) {
             if (new File(getModelDirectory(), "ivector").exists()) {
                 // one directory is in the correct place, so everything should be ok
@@ -273,7 +322,7 @@ private void load() {
 
                 currentlyInitializingRecognizer = true;
 
-                disposables.add(Completable.fromAction(this::initializeRecognizer)
+                disposables.add(Completable.fromAction(this::loadModel)
                         .subscribeOn(Schedulers.io())
                         .observeOn(AndroidSchedulers.mainThread())
                         .subscribe(() -> {
@@ -284,25 +333,19 @@ private void load() {
                             }
                         }, throwable -> {
                             currentlyInitializingRecognizer = false;
-                            if ("Failed to initialize recorder. Microphone might be already in use."
-                                    .equals(throwable.getMessage())) {
-                                callback.error(SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS);
-                            } else {
-                                Log.e(TAG, "load()->initializeRecognizer", throwable);
-                                callback.error(SpeechRecognizer.ERROR_SERVER);
-                            }
+                            showErrorNotification(throwable);
                         }));
 
             } else {
-                try {
+                if (callback != null) {
                     if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.S) {
-                        callback.error(SpeechRecognizer.ERROR_LANGUAGE_UNAVAILABLE);
+                        callbackErrorReport(SpeechRecognizer.ERROR_LANGUAGE_UNAVAILABLE);
                     } else {
-                        callback.error(SpeechRecognizer.ERROR_SERVER);
+                        callbackErrorReport(SpeechRecognizer.ERROR_SERVER);
                     }
-                } catch (final RemoteException e) {
-                    logRemoteException(e);
                 }
+                showErrorNotification(
+                        new Throwable(getString(R.string.vosk_model_unsupported_language)));
             }
         }
     }
@@ -310,27 +353,45 @@ public synchronized void tryToGetInput() {
         if (currentlyInitializingRecognizer) {
             startListeningOnLoaded = true;
             return;
+        } else if (model == null) {
+            Log.w(TAG, "tryToGetInput model==null");
+            initialize(); //try to load anew
+            startListeningOnLoaded = true;
+            return; // recognizer not ready
+        } else if (getModelDirectory().lastModified() != modelDownloadDate) {
+            //if model has changed / updated / etc...
+            Log.i(TAG, "model directory modified date changed - load it anew");
+            Log.d(TAG, "model last modified " + getModelDirectory().lastModified());
+            Log.d(TAG, "model_download_date " + modelDownloadDate);
+            model = null; //reset
+            shutdownSpeechService();
+            initialize(); //load new one
+            startListeningOnLoaded = true;
+            return; // recognizer not ready
         } else if (speechService == null) {
             try {
-                callback.error(SpeechRecognizer.ERROR_SERVER);
-            } catch (final RemoteException e) {
-                logRemoteException(e);
+                loadSpeechService();
+            } catch (final IOException e) {
+                if ("Failed to initialize recorder. Microphone might be already in use."
+                        .equals(e.getMessage())) {
+                    callbackErrorReport(SpeechRecognizer.ERROR_AUDIO);
+                } else {
+                    Log.e(TAG, "load()->initializeRecognizer", e);
+                    showErrorNotification(e);
+                    callbackErrorReport(SpeechRecognizer.ERROR_SERVER);
+                }
+                return;
             }
-            return; // recognizer not ready
         }
         //(only one client can be connected via system to speech recognizer (otherwise
         // ERROR_BUSY seems to be reported) - check whether currently listening checks are
         // necessary at all) - on the other hand they do not harm
         if (currentlyListening) {
-            try {
-                if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.S) {
-                    callback.error(SpeechRecognizer.ERROR_TOO_MANY_REQUESTS);
-                } else {
-                    //more generic
-                    callback.error(SpeechRecognizer.ERROR_SERVER);
-                }
-            } catch (final RemoteException e) {
-                logRemoteException(e);
+            if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.S) {
+                callbackErrorReport(SpeechRecognizer.ERROR_TOO_MANY_REQUESTS);
+            } else {
+                //more generic
+                callbackErrorReport(SpeechRecognizer.ERROR_SERVER);
             }
             return;
         }
@@ -349,26 +410,81 @@ public synchronized void tryToGetInput() {
 
     private void logRemoteException(final RemoteException e) {
         Log.e(TAG, "Remote exception on callback information", e);
+        showErrorNotification(e);
+    }
+
+    /**
+     * wrapper for
+     * calling {@link RecognitionService.Callback#error(int)} and catches the remote exception
+     * @param errorType see {@link RecognitionService.Callback#error(int)}
+     */
+    protected void callbackErrorReport(final int errorType) {
+        try {
+            callback.error(errorType);
+        } catch (final RemoteException e) {
+            logRemoteException(e);
+        } catch (final NullPointerException e) {
+            showErrorNotification(e);
+        }
     }
 
     private File getModelDirectory() {
         return new File(this.getFilesDir(), MODEL_PATH);
     }
 
+    protected void showErrorNotification(final Throwable t) {
+        final ErrorInfo ei = new ErrorInfo(t, UserAction.STT_SERVICE_SPEECH_TO_TEXT);
+        ErrorUtils.createNotification(this, ei);
+    }
+
 
     ////////////////////
     // Vosk Initialization //
     ////////////////////
 
-    private synchronized void initializeRecognizer() throws IOException {
-        Log.d(TAG, "initializing recognizer");
+    /**
+     * load the vosk model. Most time consuming procedure of recognizer intitializiation
+     */
+    private synchronized void loadModel() {
+        Log.d(TAG, "load Model");
+        final long t0 = System.currentTimeMillis();
+        model = new Model(getModelDirectory().getAbsolutePath());
+        modelDownloadDate = getModelDirectory().lastModified();
+        final long t1 = (System.currentTimeMillis() - t0);
+        Log.i(TAG, "Loading Model takes " + t1 + " ms");
+    }
 
-        LibVosk.setLogLevel(BuildConfig.DEBUG ? LogLevel.DEBUG : LogLevel.WARNINGS);
-        final Model model = new Model(getModelDirectory().getAbsolutePath());
+    /**
+     * load the recognizer. call this if a intent with new parameters (compared to last one) is
+     * received
+     */
+    private void loadSpeechService() throws IOException {
+        if (speechService != null) {
+            //first shutdown the old one, if a new one is requested
+            shutdownSpeechService();
+        }
+
+        final long t0 = System.currentTimeMillis();
         final Recognizer recognizer = new Recognizer(model, SAMPLE_RATE);
-        recognizer.setMaxAlternatives(
-                lastRequestedIntent.getIntExtra(RecognizerIntent.EXTRA_MAX_RESULTS, 5));
+        if (lastRequestedIntent != null) {
+            recognizer.setMaxAlternatives(
+                    lastRequestedIntent.getIntExtra(RecognizerIntent.EXTRA_MAX_RESULTS, 5));
+
+        }
         this.speechService = new SpeechService(recognizer, SAMPLE_RATE);
+        Log.i(TAG, "Loading SpeechService takes " + (System.currentTimeMillis() - t0)  + " ms");
+    }
+
+    /**
+     * only shut down speech service
+     * this still keeps the language model in cache for faster start of speech service
+     */
+    protected void shutdownSpeechService() {
+        if (speechService != null) {
+            stopRecognizer();
+            speechService.shutdown();
+            speechService = null;
+        }
     }
 
     /**

From 6cb85c58b3e8378f174446ce0566f957ea6fda2d Mon Sep 17 00:00:00 2001
From: kridneb <109475719+nebkrid@users.noreply.github.com>
Date: Tue, 7 Feb 2023 20:42:29 +0100
Subject: [PATCH 3/5] - Added Sound Preference (choosable per package, for data
 privacy security in order to notify user that speech input is started from
 background).

---
 .../stt_service/MakeSoundPreference.java      | 94 +++++++++++++++++++
 .../dicio/input/stt_service/SttService.java   | 47 +++++++---
 app/src/main/res/values/strings.xml           | 10 +-
 app/src/main/res/values/strings_keys.xml      |  5 +
 app/src/main/res/xml/pref_io.xml              | 35 +++++--
 5 files changed, 168 insertions(+), 23 deletions(-)
 create mode 100644 app/src/main/java/org/stypox/dicio/input/stt_service/MakeSoundPreference.java

diff --git a/app/src/main/java/org/stypox/dicio/input/stt_service/MakeSoundPreference.java b/app/src/main/java/org/stypox/dicio/input/stt_service/MakeSoundPreference.java
new file mode 100644
index 000000000..49799a1c0
--- /dev/null
+++ b/app/src/main/java/org/stypox/dicio/input/stt_service/MakeSoundPreference.java
@@ -0,0 +1,94 @@
+package org.stypox.dicio.input.stt_service;
+
+import android.content.Context;
+import android.content.SharedPreferences;
+import android.preference.PreferenceManager;
+import android.util.AttributeSet;
+
+import org.stypox.dicio.R;
+
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+
+import androidx.annotation.NonNull;
+import androidx.annotation.Nullable;
+import androidx.preference.MultiSelectListPreference;
+
+/**
+ * a MultiSelectListPreference which uses R.string.pref_key_stt_onbegin_nosound_entries as entries
+ * and entry values
+ */
+public class MakeSoundPreference extends MultiSelectListPreference {
+    final SharedPreferences preferences;
+    final String helperPrefKey;
+    final String[] ownPackageName = new String[1];
+
+    public MakeSoundPreference(@NonNull final Context context, @Nullable final AttributeSet attrs,
+                               final int defStyleAttr, final int defStyleRes) {
+        super(context, attrs, defStyleAttr, defStyleRes);
+        preferences = PreferenceManager.getDefaultSharedPreferences(context);
+        helperPrefKey = context.getString(R.string.pref_key_stt_onlisten_sound_entries);
+        ownPackageName[0] = context.getPackageName();
+    }
+
+    public MakeSoundPreference(@NonNull final Context context, @Nullable final AttributeSet attrs,
+                               final int defStyleAttr) {
+        super(context, attrs, defStyleAttr);
+        preferences = PreferenceManager.getDefaultSharedPreferences(context);
+        helperPrefKey = context.getString(R.string.pref_key_stt_onlisten_sound_entries);
+        ownPackageName[0] = context.getPackageName();
+    }
+
+    public MakeSoundPreference(@NonNull final Context context, @Nullable final AttributeSet attrs) {
+        super(context, attrs);
+        preferences = PreferenceManager.getDefaultSharedPreferences(context);
+        helperPrefKey = context.getString(R.string.pref_key_stt_onlisten_sound_entries);
+        ownPackageName[0] = context.getPackageName();
+    }
+
+    public MakeSoundPreference(@NonNull final Context context) {
+        super(context);
+        preferences = PreferenceManager.getDefaultSharedPreferences(context);
+        helperPrefKey = context.getString(R.string.pref_key_stt_onlisten_sound_entries);
+        ownPackageName[0] = context.getPackageName();
+    }
+
+    @Override
+    public CharSequence[] getEntries() {
+        final Set<String> entries = preferences.getStringSet(helperPrefKey,
+                new HashSet<>(Arrays.asList(ownPackageName)));
+        final String[] back = new String[entries.size()];
+        int i = 0;
+        for (final String e: entries) {
+            back[i] = e;
+            i++;
+        }
+        return back;
+    }
+
+    @Override
+    public CharSequence[] getEntryValues() {
+        return getEntries();
+    }
+    //
+//    protected void runtimePopulateEntries(Context context){
+//        final SharedPreferences settings = PreferenceManager.getDefaultSharedPreferences(context);
+//        settings.getStringSet()
+//        final List<CharSequence> entries = new ArrayList<>(Arrays.asList(getEntries()));
+//        final List<CharSequence> entriesValues = new ArrayList<>(Arrays.asList(getEntries()));
+//        setEntries(entries.toArray(new CharSequence[]{}));
+//        setEntryValues(entriesValues.toArray(new CharSequence[]{}));
+//    }
+//
+//    public void addEntry(CharSequence newEntry) {
+//        final Set<CharSequence> entries = new HashSet<>(Arrays.asList(getEntries()));
+//        entries.add(newEntry);
+//        setEntries(entries.toArray(new CharSequence[]{}));
+//    }
+//    public void addEntryValue(CharSequence newEntry) {
+//        final List<CharSequence> entryValues = new ArrayList<>(Arrays.asList(getEntries()));
+//        entryValues.add(newEntry);
+//        setEntryValues(entryValues.toArray(new CharSequence[]{}));
+//    }
+}
diff --git a/app/src/main/java/org/stypox/dicio/input/stt_service/SttService.java b/app/src/main/java/org/stypox/dicio/input/stt_service/SttService.java
index b660171b0..4936e720c 100644
--- a/app/src/main/java/org/stypox/dicio/input/stt_service/SttService.java
+++ b/app/src/main/java/org/stypox/dicio/input/stt_service/SttService.java
@@ -1,6 +1,10 @@
 package org.stypox.dicio.input.stt_service;
 
 import android.content.Intent;
+import android.content.SharedPreferences;
+import android.media.Ringtone;
+import android.media.RingtoneManager;
+import android.net.Uri;
 import android.os.Build;
 import android.os.Bundle;
 import android.os.RemoteException;
@@ -8,7 +12,6 @@
 import android.speech.RecognizerIntent;
 import android.speech.SpeechRecognizer;
 import android.util.Log;
-import android.widget.Toast;
 
 import org.json.JSONArray;
 import org.json.JSONException;
@@ -28,8 +31,11 @@
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
 
 import androidx.annotation.Nullable;
+import androidx.preference.PreferenceManager;
 import io.reactivex.rxjava3.android.schedulers.AndroidSchedulers;
 import io.reactivex.rxjava3.core.Completable;
 import io.reactivex.rxjava3.disposables.CompositeDisposable;
@@ -208,7 +214,7 @@ protected void onStartListening(final Intent intent, final Callback newCallback)
         Log.d(TAG, "onStartListening");
         Log.d(TAG, "onStartCommand called is " + onStartCommandCalled);
         this.callback = newCallback;
-        //TODO check permission. Actually it seems this is already done by the system interface
+        //Regarding check permission: Actually it seems this is already done by the system interface
         // (reports SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS) , but it is
         // explicitly recommended in the SpeechRecognizer documentation. However the way it is in
         // the docs does not work here due to API Level for requested calls (and since Audio
@@ -216,16 +222,33 @@ protected void onStartListening(final Intent intent, final Callback newCallback)
         // https://developer.android.com/reference/android/speech/RecognitionService
         // However even if there is a way for app without permission, not a security issue since
         // stt service notifies user when speech input is started
-        if (android.os.Build.VERSION.SDK_INT >= android.os.Build.VERSION_CODES.M) {
+        final SharedPreferences preferences = PreferenceManager.getDefaultSharedPreferences(this);
+        final boolean makeSound = preferences.getBoolean(
+                getString(R.string.pref_key_stt_onlisten_sound), true);
+        if (makeSound && android.os.Build.VERSION.SDK_INT >= android.os.Build.VERSION_CODES.M) {
             final String callingPackageName = getPackageManager().getPackagesForUid(
                     newCallback.getCallingUid())[0];
-//Not working this way - check fails even for dicio
-//            int permissionState = PermissionChecker.checkCallingPermission(this,
-//                    "android.permission.RECORD_AUDIO", callingPackageName);
-//            if (permissionState != PermissionChecker.PERMISSION_GRANTED){
-//                callbackErrorReport(SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS);
-//                return;
-//            }
+            final Set<String> exceptedPackages = preferences.getStringSet(
+                    getString(R.string.pref_key_stt_sound_onlisten), new HashSet<>());
+            if (exceptedPackages.contains(callingPackageName)) {
+                Log.i(TAG, "Suppressed stt onbegin sound for package " + callingPackageName);
+            } else {
+                final Uri notification = RingtoneManager.getDefaultUri(
+                        RingtoneManager.TYPE_NOTIFICATION);
+                final Ringtone r = RingtoneManager.getRingtone(this, notification);
+                r.play();
+                final Set<String> knownPackages = preferences.getStringSet(
+                        getString(R.string.pref_key_stt_onlisten_sound_entries), new HashSet<>());
+                if (!knownPackages.contains(callingPackageName)) {
+                    //add to preference entries to offer to user whether it shall be excepted
+                    final HashSet<String> extendedKnownPackages = new HashSet<>(knownPackages);
+                    extendedKnownPackages.add(callingPackageName);
+                    preferences.edit().putStringSet(
+                            getString(R.string.pref_key_stt_onlisten_sound_entries),
+                                    extendedKnownPackages)
+                            .apply();
+                }
+            }
         }
         if (speechService != null && !recogIntentExtrasEquals(lastRequestedIntent, intent)) {
             shutdownSpeechService();
@@ -250,10 +273,6 @@ protected void onStartListening(final Intent intent, final Callback newCallback)
         }
         lastRequestedIntent = intent;
 
-        //TODO remove toast or make different type of speech recognition hint or a preference option
-        // to disable
-        Toast.makeText(this, this.getString(R.string.pref_input_method_vosk),
-                Toast.LENGTH_SHORT).show();
         tryToGetInput();
 
     }
diff --git a/app/src/main/res/values/strings.xml b/app/src/main/res/values/strings.xml
index 2f35808f8..2557f4295 100644
--- a/app/src/main/res/values/strings.xml
+++ b/app/src/main/res/values/strings.xml
@@ -69,9 +69,17 @@
     <string name="pref_search_engine_duckduckgo" translatable="false">DuckDuckGo</string>
     <string name="pref_weather_default_city">Default city</string>
     <string name="pref_weather_default_city_using_ip_info">Set the city to use for weather when you do not explicitly say one. The current behaviour is to get the location from IP info.</string>
-    <string name="pref_stt_auto_finish_title">Directly send result of speech to text service</string>
+    <string name="pref_stt_auto_finish_title">Directly send result (for STT with dicio UI)</string>
     <string name="pref_stt_auto_finish_summary_on">Automatically send speech result to requesting app when listening finishes</string>
     <string name="pref_stt_auto_finish_summary_off">Wait for manual confirmation before sending speech result to requesting app</string>
+    <string name="pref_stt_onlisten_sound_title">Sound</string>
+    <string name="pref_stt_onlisten_sound_summary_on">Play a sound when speech input starts</string>
+    <string name="pref_stt_onlisten_sound_summary_off">Sound on speech input start is disabled</string>
+    <string name="pref_stt_sound_onlisten_title">Exceptions from sound notification</string>
+    <string name="pref_stt_sound_onlisten_title_dialog">Disable sound per known application</string>
+    <string name="pref_stt_sound_onlisten_title_summary">Choose which applications can request speech input without sound notifications. Requires at least Android 6 (Marshmallow).</string>
+
+
     <string name="eval_missing_permissions">The skill \"%1$s\" needs these permissions to work: %2$s</string>
     <string name="eval_fatal_error">Could not evaluate your request</string>
     <string name="eval_network_error">Network error</string>
diff --git a/app/src/main/res/values/strings_keys.xml b/app/src/main/res/values/strings_keys.xml
index 03bb7adfd..b18008f2c 100644
--- a/app/src/main/res/values/strings_keys.xml
+++ b/app/src/main/res/values/strings_keys.xml
@@ -26,4 +26,9 @@
     <string name="pref_key_weather_default_city" translatable="false">weather_default_city</string>
 
     <string name="pref_key_stt_auto_finish" translatable="false">stt_auto_finish</string>
+
+    <string name="pref_key_stt_onlisten_sound" translatable="false">pref_key_stt_onlisten_sound</string>
+    <string name="pref_key_stt_sound_onlisten" translatable="false">pref_key_stt_sound_onlisten</string>
+    <string name="pref_key_stt_onlisten_sound_entries" translatable="false">pref_key_stt_onlisten_sound_entries</string>
+    <string-array name="pref_stt_empty_array"/>
 </resources>
\ No newline at end of file
diff --git a/app/src/main/res/xml/pref_io.xml b/app/src/main/res/xml/pref_io.xml
index 66ea4efc0..55a32408e 100644
--- a/app/src/main/res/xml/pref_io.xml
+++ b/app/src/main/res/xml/pref_io.xml
@@ -27,13 +27,32 @@
         android:key="@string/pref_key_speech_output_method"
         android:summary="@string/pref_speech_output_method_summary"
         android:title="@string/pref_speech_output_method" />
+    <PreferenceCategory android:title="@string/stt_service">
+        <SwitchPreference
+            android:defaultValue="true"
+            android:key="@string/pref_key_stt_auto_finish"
+            android:title="@string/pref_stt_auto_finish_title"
+            android:summaryOn="@string/pref_stt_auto_finish_summary_on"
+            android:summaryOff="@string/pref_stt_auto_finish_summary_off"
+            android:icon="?attr/iconRecordVoiceOver"
+            />
+        <SwitchPreference
+            android:defaultValue="true"
+            android:key="@string/pref_key_stt_onlisten_sound"
+            android:title="@string/pref_stt_onlisten_sound_title"
+            android:summaryOn="@string/pref_stt_onlisten_sound_summary_on"
+            android:summaryOff="@string/pref_stt_onlisten_sound_summary_off"
+            />
 
-    <SwitchPreference
-        android:defaultValue="true"
-        android:key="@string/pref_key_stt_auto_finish"
-        android:title="@string/pref_stt_auto_finish_title"
-        android:summaryOn="@string/pref_stt_auto_finish_summary_on"
-        android:summaryOff="@string/pref_stt_auto_finish_summary_off"
-        android:icon="?attr/iconRecordVoiceOver"
-        />
+        <org.stypox.dicio.input.stt_service.MakeSoundPreference
+            android:key="@string/pref_key_stt_sound_onlisten"
+            android:title="@string/pref_stt_sound_onlisten_title"
+            android:dialogTitle="@string/pref_stt_sound_onlisten_title_dialog"
+            android:summary="@string/pref_stt_sound_onlisten_title_summary"
+            android:defaultValue="@array/pref_stt_empty_array"
+            android:entries="@array/pref_stt_empty_array"
+            android:entryValues="@array/pref_stt_empty_array"
+            android:dependency="@string/pref_key_stt_onlisten_sound"
+            />
+    </PreferenceCategory>
 </PreferenceScreen>
\ No newline at end of file

From a730a870ef9cdb12b08d6e7aac61d75f8371a863 Mon Sep 17 00:00:00 2001
From: kridneb <109475719+nebkrid@users.noreply.github.com>
Date: Thu, 9 Feb 2023 22:25:38 +0100
Subject: [PATCH 4/5] Renamed SpeechRecogServiceInputDevice

---
 app/src/main/java/org/stypox/dicio/MainActivity.java        | 4 ++--
 ...ceInputDevice.java => AndroidSttServiceInputDevice.java} | 6 +++---
 .../main/java/org/stypox/dicio/input/VoskInputDevice.java   | 2 +-
 .../java/org/stypox/dicio/input/stt_service/SttService.java | 3 +++
 4 files changed, 9 insertions(+), 6 deletions(-)
 rename app/src/main/java/org/stypox/dicio/input/{SpeechRecogServiceInputDevice.java => AndroidSttServiceInputDevice.java} (97%)

diff --git a/app/src/main/java/org/stypox/dicio/MainActivity.java b/app/src/main/java/org/stypox/dicio/MainActivity.java
index 90bc56c4d..1f1b40b38 100644
--- a/app/src/main/java/org/stypox/dicio/MainActivity.java
+++ b/app/src/main/java/org/stypox/dicio/MainActivity.java
@@ -19,7 +19,7 @@
 import org.stypox.dicio.eval.SkillRanker;
 import org.stypox.dicio.input.InputDevice;
 import org.stypox.dicio.input.SpeechInputDevice;
-import org.stypox.dicio.input.SpeechRecogServiceInputDevice;
+import org.stypox.dicio.input.AndroidSttServiceInputDevice;
 import org.stypox.dicio.input.ToolbarInputDevice;
 import org.stypox.dicio.input.VoskInputDevice;
 import org.stypox.dicio.input.stt_service.SttServiceActivity;
@@ -309,7 +309,7 @@ private InputDevice buildPrimaryInputDevice() {
             //TODO make a hint/data privacy warning etc. in preference when this one is chosen that
             // the speech dicio records is given to a third party app according to system
             // settings
-            return new SpeechRecogServiceInputDevice(this);
+            return new AndroidSttServiceInputDevice(this);
         } else { // default
             return new VoskInputDevice(this);
         }
diff --git a/app/src/main/java/org/stypox/dicio/input/SpeechRecogServiceInputDevice.java b/app/src/main/java/org/stypox/dicio/input/AndroidSttServiceInputDevice.java
similarity index 97%
rename from app/src/main/java/org/stypox/dicio/input/SpeechRecogServiceInputDevice.java
rename to app/src/main/java/org/stypox/dicio/input/AndroidSttServiceInputDevice.java
index 13dbd7742..8a15a55b1 100644
--- a/app/src/main/java/org/stypox/dicio/input/SpeechRecogServiceInputDevice.java
+++ b/app/src/main/java/org/stypox/dicio/input/AndroidSttServiceInputDevice.java
@@ -17,10 +17,10 @@
 
 import static org.stypox.dicio.util.StringUtils.isNullOrEmpty;
 
-public class SpeechRecogServiceInputDevice extends SpeechInputDevice
+public class AndroidSttServiceInputDevice extends SpeechInputDevice
         implements android.speech.RecognitionListener {
 
-    public static final String TAG = SpeechRecogServiceInputDevice.class.getSimpleName();
+    public static final String TAG = AndroidSttServiceInputDevice.class.getSimpleName();
     private Activity activity;
 
     private boolean startListeningOnLoaded = false;
@@ -33,7 +33,7 @@ public class SpeechRecogServiceInputDevice extends SpeechInputDevice
     // Exposed methods //
     /////////////////////
 
-    public SpeechRecogServiceInputDevice(final Activity activity) {
+    public AndroidSttServiceInputDevice(final Activity activity) {
         this.activity = activity;
     }
 
diff --git a/app/src/main/java/org/stypox/dicio/input/VoskInputDevice.java b/app/src/main/java/org/stypox/dicio/input/VoskInputDevice.java
index 0c9089694..877a1648a 100644
--- a/app/src/main/java/org/stypox/dicio/input/VoskInputDevice.java
+++ b/app/src/main/java/org/stypox/dicio/input/VoskInputDevice.java
@@ -38,7 +38,7 @@
 import static org.stypox.dicio.util.LocaleUtils.UnsupportedLocaleException;
 import static org.stypox.dicio.util.LocaleUtils.resolveSupportedLocale;
 
-public class VoskInputDevice extends SpeechRecogServiceInputDevice  {
+public class VoskInputDevice extends AndroidSttServiceInputDevice {
 
     public static final String TAG = VoskInputDevice.class.getSimpleName();
     public static final String MODEL_PATH = "/vosk-model";
diff --git a/app/src/main/java/org/stypox/dicio/input/stt_service/SttService.java b/app/src/main/java/org/stypox/dicio/input/stt_service/SttService.java
index 4936e720c..60a65f6b3 100644
--- a/app/src/main/java/org/stypox/dicio/input/stt_service/SttService.java
+++ b/app/src/main/java/org/stypox/dicio/input/stt_service/SttService.java
@@ -512,6 +512,9 @@ protected void shutdownSpeechService() {
     private void stopRecognizer() {
         if (speechService != null) {
             speechService.stop(); //does nothing if recognition is not active.
+//TODO test whether some devices need shutdown call everytime in order to / conflict with performens if yes
+//            speechService.shutdown();
+//            speechService = null;
         } else if (currentlyListening) {
             //(actually currentlyListening should never be true at this point-however does not harm)
             //means SpeechRecognizer.startListening was called, but endOfSpeech not yet

From 9e46d32b92065195a4677e663ed3619715951e0c Mon Sep 17 00:00:00 2001
From: kridneb <109475719+nebkrid@users.noreply.github.com>
Date: Fri, 10 Feb 2023 23:49:10 +0100
Subject: [PATCH 5/5] Updated README.md for clarify different ways of STT
 service in android

---
 README.md | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index ec207b1a9..3acb4ceb3 100644
--- a/README.md
+++ b/README.md
@@ -40,6 +40,10 @@ Currently Dicio answers questions about:
 ## Speech to text
 
 Dicio uses [Vosk](https://github.com/alphacep/vosk-api/) as its speech to text (`STT`) engine. In order to be able to run on every phone small models are employed, weighing `~50MB`. The download from [here](https://alphacephei.com/vosk/models) starts automatically whenever needed, so the app language can be changed seamlessly.
+Dicio exports vosk as a speech-to-text service to the android system. Other apps can query this by different ways:
+- [Via an intent](https://developer.android.com/reference/android/speech/RecognizerIntent), which shows up a dicio UI for speech input. The result is then provided to the requesting app (automatically of after user agreed as set in dicio settings).
+- [From background](https://developer.android.com/reference/android/speech/SpeechRecognizer), if the requesting app has the record audio permission and dicio is set as speech input within settings -> apps -> default apps -> assistant (the exact path may vary depending on the Android version)
+- If you want to use it as a "speech keyboard" (IME), you currently still need an app which use the Android speech-to-text-service and provides an IME (e.g. [this one](https://github.com/Kaljurand/K6nele))
 
 ## Contributing
 
@@ -57,7 +61,6 @@ When contributing keep in mind that other people may have **needs** and **views
 If you want to translate Dicio to a new language you have to follow these **steps**:
 <ul><li>
   Translate the <b>strings used inside the app</b> via <a href="https://hosted.weblate.org/engage/dicio-android/">Weblate</a>. If your language isn't already there, add it with <a href="https://hosted.weblate.org/new-lang/dicio-android/strings/">tool -> start new translation</a>.
-  </br>
   <a href="https://hosted.weblate.org/engage/dicio-android/">
   <img src="https://hosted.weblate.org/widgets/dicio-android/-/287x66-grey.png" alt="Translation status" />
   </a>