From cc36ba74a786a643b75ef92d0f49f173ed64490e Mon Sep 17 00:00:00 2001
From: azayz <azizbellaweid@hotmail.com>
Date: Tue, 4 Apr 2023 17:24:16 +0100
Subject: [PATCH 1/3] docs: add audio2text showcase

Signed-off-by: azayz <azizbellaweid@hotmail.com>
---
 docs/tutorial/audio2text/audio2text.md    | 77 +++++++++++++++++++++++
 docs/tutorial/audio2text/requirements.txt |  1 +
 2 files changed, 78 insertions(+)
 create mode 100644 docs/tutorial/audio2text/audio2text.md
 create mode 100644 docs/tutorial/audio2text/requirements.txt

diff --git a/docs/tutorial/audio2text/audio2text.md b/docs/tutorial/audio2text/audio2text.md
new file mode 100644
index 00000000000..fd913785aa1
--- /dev/null
+++ b/docs/tutorial/audio2text/audio2text.md
@@ -0,0 +1,77 @@
+# Creating an Audio to Text App with Jina and DocArray V2
+
+This is how you can build an Audio to Text app using both Jina and DocarrayV2
+
+We will use: 
+
+* DocarrayV2: Helps us to load and preprocess multimodal data such as image, text and audio in our case
+* Jina: Helps us serve the model quickly and create a client
+
+First let's install requirements
+
+## 💾 Installation
+
+```bash
+pip install -r requirments.txt
+```
+
+Now let's import necessary libraries
+
+
+```python
+import whisper
+from jina import Executor, requests, Deployment
+from docarray import BaseDoc, DocArray
+from docarray.typing import AudioUrl
+```
+
+Now we need to create the schema of our input and output documents. Since our input is an audio
+our input schema should contain an AudioUrl like the following
+
+```python
+class AudioURL(BaseDoc):
+    audio: AudioUrl
+```
+
+As for the output schema we would like to receive the transcribed text so we use the following:
+
+```python
+class Response(BaseDoc):
+    text: str
+```
+
+Now it's time we create our model, we wrap our model into Jina Executor, this allows us to serve to model
+later on and expose its endpoint /transcribe
+
+```python
+class WhisperExecutor(Executor):
+    def __init__(self, device: str, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.model = whisper.load_model("medium.en", device=device)
+
+    @requests
+    def transcribe(self, docs: DocArray[AudioURL], **kwargs) -> DocArray[Response]:
+        response_docs = DocArray[Response]()
+        for doc in docs:
+            transcribed_text = self.model.transcribe(str(doc.audio))['text']
+            response_docs.append(Response(text=transcribed_text))
+        return response_docs
+```
+
+Now we can leverage Deployment object provided by Jina to use this executor
+then we send a request to transcribe endpoint. Here we are using an audio file previously recorded
+that says, "A Man reading a book" saved under resources/audio.mp3 but feel free to use your own audio.
+
+```python
+with Deployment(
+    uses=WhisperExecutor, uses_with={'device': "cpu"}, port=12349, timeout_ready=-1
+) as d:
+    docs = d.post(
+        on='/transcribe',
+        inputs=[AudioURL(audio='resources/audio.mp3')],
+        return_type=DocArray[Response],
+    )
+    print(docs[0].text)
+```
+
+And we get the transcribed result!
\ No newline at end of file
diff --git a/docs/tutorial/audio2text/requirements.txt b/docs/tutorial/audio2text/requirements.txt
new file mode 100644
index 00000000000..03e394c0d5f
--- /dev/null
+++ b/docs/tutorial/audio2text/requirements.txt
@@ -0,0 +1 @@
+openai-whisper==20230308
\ No newline at end of file

From ace14239ab867ce8b001b7f333d2e6c28aa1ff2a Mon Sep 17 00:00:00 2001
From: azayz <azizbellaweid@hotmail.com>
Date: Wed, 5 Apr 2023 08:07:27 +0100
Subject: [PATCH 2/3] refactor: move to how to

Signed-off-by: azayz <azizbellaweid@hotmail.com>
---
 docs/{tutorial/audio2text => how_to}/audio2text.md | 12 +++++++-----
 docs/tutorial/audio2text/requirements.txt          |  1 -
 2 files changed, 7 insertions(+), 6 deletions(-)
 rename docs/{tutorial/audio2text => how_to}/audio2text.md (88%)
 delete mode 100644 docs/tutorial/audio2text/requirements.txt

diff --git a/docs/tutorial/audio2text/audio2text.md b/docs/how_to/audio2text.md
similarity index 88%
rename from docs/tutorial/audio2text/audio2text.md
rename to docs/how_to/audio2text.md
index fd913785aa1..5d7a03b5efa 100644
--- a/docs/tutorial/audio2text/audio2text.md
+++ b/docs/how_to/audio2text.md
@@ -12,7 +12,9 @@ First let's install requirements
 ## 💾 Installation
 
 ```bash
-pip install -r requirments.txt
+pip install transformers
+pip install openai-whisper
+pip install jina
 ```
 
 Now let's import necessary libraries
@@ -21,7 +23,7 @@ Now let's import necessary libraries
 ```python
 import whisper
 from jina import Executor, requests, Deployment
-from docarray import BaseDoc, DocArray
+from docarray import BaseDoc, DocList
 from docarray.typing import AudioUrl
 ```
 
@@ -50,8 +52,8 @@ class WhisperExecutor(Executor):
         self.model = whisper.load_model("medium.en", device=device)
 
     @requests
-    def transcribe(self, docs: DocArray[AudioURL], **kwargs) -> DocArray[Response]:
-        response_docs = DocArray[Response]()
+    def transcribe(self, docs: DocList[AudioURL], **kwargs) -> DocList[Response]:
+        response_docs = DocList[Response]()
         for doc in docs:
             transcribed_text = self.model.transcribe(str(doc.audio))['text']
             response_docs.append(Response(text=transcribed_text))
@@ -69,7 +71,7 @@ with Deployment(
     docs = d.post(
         on='/transcribe',
         inputs=[AudioURL(audio='resources/audio.mp3')],
-        return_type=DocArray[Response],
+        return_type=DocList[Response],
     )
     print(docs[0].text)
 ```
diff --git a/docs/tutorial/audio2text/requirements.txt b/docs/tutorial/audio2text/requirements.txt
deleted file mode 100644
index 03e394c0d5f..00000000000
--- a/docs/tutorial/audio2text/requirements.txt
+++ /dev/null
@@ -1 +0,0 @@
-openai-whisper==20230308
\ No newline at end of file

From 5509cfcf03e0c80a7f6297b86f357631ba8b8017 Mon Sep 17 00:00:00 2001
From: azayz <azizbellaweid@hotmail.com>
Date: Wed, 5 Apr 2023 09:10:03 +0100
Subject: [PATCH 3/3] fix: add line to mkdocs

Signed-off-by: azayz <azizbellaweid@hotmail.com>
---
 docs/how_to/audio2text.md | 2 +-
 mkdocs.yml                | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/how_to/audio2text.md b/docs/how_to/audio2text.md
index 5d7a03b5efa..fcec869ce0f 100644
--- a/docs/how_to/audio2text.md
+++ b/docs/how_to/audio2text.md
@@ -1,6 +1,6 @@
 # Creating an Audio to Text App with Jina and DocArray V2
 
-This is how you can build an Audio to Text app using both Jina and DocarrayV2
+This is how you can build an Audio to Text app using Jina, Docarray and Whisper
 
 We will use: 
 
diff --git a/mkdocs.yml b/mkdocs.yml
index 9e4209520ef..ca72a966197 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -82,5 +82,6 @@ nav:
     - how_to/add_doc_index.md
     - how_to/multimodal_training_and_serving.md
     - how_to/optimize_performance_with_id_generation.md
+    - how_to/audio2text.md
   - ...
   - Contributing: CONTRIBUTING.md