Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 5 additions & 14 deletions c_src/xav/xav_reader.c
Original file line number Diff line number Diff line change
Expand Up @@ -228,22 +228,13 @@ static int init_audio_converter(struct XavReader *xav_reader) {
}

enum AVSampleFormat out_sample_fmt;
if (strcmp(xav_reader->out_format, "u8") == 0) {
out_sample_fmt = AV_SAMPLE_FMT_U8;
} else if (strcmp(xav_reader->out_format, "s16") == 0) {
out_sample_fmt = AV_SAMPLE_FMT_S16;
} else if (strcmp(xav_reader->out_format, "s32") == 0) {
out_sample_fmt = AV_SAMPLE_FMT_S32;
} else if (strcmp(xav_reader->out_format, "s64") == 0) {
out_sample_fmt = AV_SAMPLE_FMT_S64;
} else if (strcmp(xav_reader->out_format, "f32") == 0) {
out_sample_fmt = AV_SAMPLE_FMT_FLT;
} else if (strcmp(xav_reader->out_format, "f64") == 0) {
out_sample_fmt = AV_SAMPLE_FMT_DBL;
} else if (strcmp(xav_reader->out_format, "nil") == 0) {
if (strcmp(xav_reader->out_format, "nil") == 0) {
out_sample_fmt = av_get_alt_sample_fmt(xav_reader->reader->c->sample_fmt, 0);
} else {
return -1;
out_sample_fmt = av_get_sample_fmt(xav_reader->out_format);
if (out_sample_fmt == AV_SAMPLE_FMT_NONE) {
return -1;
}
}

struct ChannelLayout in_chlayout, out_chlayout;
Expand Down
7 changes: 0 additions & 7 deletions lib/xav/decoder.ex
Original file line number Diff line number Diff line change
Expand Up @@ -108,15 +108,13 @@ defmodule Xav.Decoder do
:ok

{:ok, {data, format, width, height, pts}} ->
format = normalize_format(format)
{:ok, Xav.Frame.new(data, format, width, height, pts)}

# Sometimes, audio converter might not return data immediately.
{:ok, {"", _format, _samples, _pts}} ->
:ok

{:ok, {data, format, samples, pts}} ->
format = normalize_format(format)
{:ok, Xav.Frame.new(data, format, samples, pts)}

{:error, _reason} = error ->
Expand Down Expand Up @@ -151,9 +149,4 @@ defmodule Xav.Decoder do
{:error, reason} -> raise "Failed to flush decoder: #{inspect(reason)}"
end
end

# Use the same formats as Nx
defp normalize_format(:flt), do: :f32
defp normalize_format(:dbl), do: :f64
defp normalize_format(other), do: other
end
29 changes: 21 additions & 8 deletions lib/xav/frame.ex
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,15 @@ defmodule Xav.Frame do

@typedoc """
Possible audio samples formats.

To get the complete list of sample formats, check `Xav.sample_formats/0`.
"""
@type audio_format() :: :u8 | :s16 | :s32 | :s64 | :f32 | :f64
@type audio_format() :: atom()

@typedoc """
Possible video frame formats.

The list of accepted formats are all `ffmpeg` pixel formats. For a complete list run:

```sh
ffmpeg -pix_fmts
```
To get the complete list of pixel formats, check `Xav.pixel_formats/0`.

An example of a pixel format is `:rgb24`.
"""
Expand Down Expand Up @@ -77,16 +75,31 @@ defmodule Xav.Frame do
Converts a frame to an Nx tensor.

In case of a video frame, dimension names of the newly created tensor are `[:height, :width, :channels]`.

For video frames, the only supported pixel formats are:
* `:rgb24`
* `:bgr24`
"""
@spec to_nx(t()) :: Nx.Tensor.t()
def to_nx(%__MODULE__{type: :video} = frame) do
def to_nx(%__MODULE__{type: :video, format: format} = frame)
when format in [:rgb24, :bgr24] do
frame.data
|> Nx.from_binary(:u8)
|> Nx.reshape({frame.height, frame.width, 3}, names: [:height, :width, :channels])
end

def to_nx(%__MODULE__{type: :audio} = frame) do
Nx.from_binary(frame.data, frame.format)
Nx.from_binary(frame.data, normalize_format(frame.format))
end

defp normalize_format(:flt), do: :f32
defp normalize_format(:fltp), do: :f32
defp normalize_format(:dbl), do: :f64
defp normalize_format(:dblp), do: :f64
defp normalize_format(:u8p), do: :u8
defp normalize_format(:s16p), do: :s16
defp normalize_format(:s32p), do: :s32
defp normalize_format(:s64p), do: :s64
defp normalize_format(format), do: format
end
end
8 changes: 3 additions & 5 deletions lib/xav/reader.ex
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@ defmodule Xav.Reader do
Audio/video file reader.
"""

@audio_out_formats [:u8, :s16, :s32, :s64, :f32, :f64]

@reader_options_schema [
read: [
type: {:in, [:audio, :video]},
Expand All @@ -17,10 +15,10 @@ defmodule Xav.Reader do
doc: "Whether the path points to the camera"
],
out_format: [
type: {:in, @audio_out_formats},
type: :atom,
doc: """
The output format of the audio samples. It should be one of
the following values: `#{Enum.join(@audio_out_formats, ", ")}`.
The output format of the audio samples. For a list of available
sample formats check `Xav.sample_formats/0`.

For video samples, it is always `:rgb24`.
"""
Expand Down
2 changes: 1 addition & 1 deletion test/decoder_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ defmodule Xav.DecoderTest do
test "audio" do
decoder = Xav.Decoder.new(:opus)

assert {:ok, %Xav.Frame{data: data, samples: 960, pts: 0, format: :f32}} =
assert {:ok, %Xav.Frame{data: data, samples: 960, pts: 0, format: :flt}} =
Xav.Decoder.decode(decoder, @opus_frame)

assert byte_size(data) == 7680
Expand Down
2 changes: 1 addition & 1 deletion test/reader_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ defmodule Xav.ReaderTest do
Xav.Reader.stream!(path,
read: :audio,
out_channels: 1,
out_format: :f32,
out_format: :flt,
out_sample_rate: 16_000
)
|> Enum.map(&Xav.Frame.to_nx(&1))
Expand Down