Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions _modules/QEfficient/cloud/execute.html
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,6 @@
<p class="caption" role="heading"><span class="caption-text">QAIC Finetune</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../source/finetune.html">Finetune Infra</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../source/finetune.html#expose-qaic-accelerator-devices">Expose QAIC accelerator devices</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../source/finetune.html#start-docker-container">Start Docker container</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Blogs</span></p>
<ul>
Expand Down Expand Up @@ -283,16 +281,20 @@ <h1>Source code for QEfficient.cloud.execute</h1><div class="highlight"><pre>
</div>
</section>
</div>
<div class="rst-versions" data-toggle="rst-versions" role="note" aria-label="versions">
<div class="rst-versions" data-toggle="rst-versions" role="note" aria-label="versions">
<span class="rst-current-version" data-toggle="rst-current-version">
Version: Main
<span class="fa fa-caret-down"></span>
</span>
<div class="rst-other-versions">
Versions
<dl>
<dd><a href="../index.html">main</a></dd>
<dd><a href="release/v1.18/index.html">release/v1.18</a></dd>
<dd><a href="../../../index.html">main</a></dd>
<dd><a href="../../../source/release/v1.18/index.html">release/v1.18</a></dd>
<dd><a href="../../../source/release/v1.19/index.html">release/v1.19</a></dd>
<dd><a href="../../../source/release/v1.20/index.html">release/v1.20</a></dd>
<dd><a href="../../../source/release/v1.21/index.html">release/v1.21</a></dd>
<dd><a href="../../../source/release/v1.21.6/index.html">release/v1.21.6</a></dd>
</dl>
</div>
</div><script>
Expand Down
12 changes: 7 additions & 5 deletions _modules/QEfficient/cloud/export.html
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,6 @@
<p class="caption" role="heading"><span class="caption-text">QAIC Finetune</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../source/finetune.html">Finetune Infra</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../source/finetune.html#expose-qaic-accelerator-devices">Expose QAIC accelerator devices</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../source/finetune.html#start-docker-container">Start Docker container</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Blogs</span></p>
<ul>
Expand Down Expand Up @@ -295,16 +293,20 @@ <h1>Source code for QEfficient.cloud.export</h1><div class="highlight"><pre>
</div>
</section>
</div>
<div class="rst-versions" data-toggle="rst-versions" role="note" aria-label="versions">
<div class="rst-versions" data-toggle="rst-versions" role="note" aria-label="versions">
<span class="rst-current-version" data-toggle="rst-current-version">
Version: Main
<span class="fa fa-caret-down"></span>
</span>
<div class="rst-other-versions">
Versions
<dl>
<dd><a href="../index.html">main</a></dd>
<dd><a href="release/v1.18/index.html">release/v1.18</a></dd>
<dd><a href="../../../index.html">main</a></dd>
<dd><a href="../../../source/release/v1.18/index.html">release/v1.18</a></dd>
<dd><a href="../../../source/release/v1.19/index.html">release/v1.19</a></dd>
<dd><a href="../../../source/release/v1.20/index.html">release/v1.20</a></dd>
<dd><a href="../../../source/release/v1.21/index.html">release/v1.21</a></dd>
<dd><a href="../../../source/release/v1.21.6/index.html">release/v1.21.6</a></dd>
</dl>
</div>
</div><script>
Expand Down
12 changes: 7 additions & 5 deletions _modules/QEfficient/cloud/finetune.html
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,6 @@
<p class="caption" role="heading"><span class="caption-text">QAIC Finetune</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../source/finetune.html">Finetune Infra</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../source/finetune.html#expose-qaic-accelerator-devices">Expose QAIC accelerator devices</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../source/finetune.html#start-docker-container">Start Docker container</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Blogs</span></p>
<ul>
Expand Down Expand Up @@ -578,16 +576,20 @@ <h1>Source code for QEfficient.cloud.finetune</h1><div class="highlight"><pre>
</div>
</section>
</div>
<div class="rst-versions" data-toggle="rst-versions" role="note" aria-label="versions">
<div class="rst-versions" data-toggle="rst-versions" role="note" aria-label="versions">
<span class="rst-current-version" data-toggle="rst-current-version">
Version: Main
<span class="fa fa-caret-down"></span>
</span>
<div class="rst-other-versions">
Versions
<dl>
<dd><a href="../index.html">main</a></dd>
<dd><a href="release/v1.18/index.html">release/v1.18</a></dd>
<dd><a href="../../../index.html">main</a></dd>
<dd><a href="../../../source/release/v1.18/index.html">release/v1.18</a></dd>
<dd><a href="../../../source/release/v1.19/index.html">release/v1.19</a></dd>
<dd><a href="../../../source/release/v1.20/index.html">release/v1.20</a></dd>
<dd><a href="../../../source/release/v1.21/index.html">release/v1.21</a></dd>
<dd><a href="../../../source/release/v1.21.6/index.html">release/v1.21.6</a></dd>
</dl>
</div>
</div><script>
Expand Down
34 changes: 23 additions & 11 deletions _modules/QEfficient/cloud/infer.html
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,6 @@
<p class="caption" role="heading"><span class="caption-text">QAIC Finetune</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../source/finetune.html">Finetune Infra</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../source/finetune.html#expose-qaic-accelerator-devices">Expose QAIC accelerator devices</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../source/finetune.html#start-docker-container">Start Docker container</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Blogs</span></p>
<ul>
Expand Down Expand Up @@ -260,6 +258,7 @@ <h1>Source code for QEfficient.cloud.infer</h1><div class="highlight"><pre>
<span class="n">qnn_config</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
<span class="n">trust_remote_code</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="n">ccl_enabled</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">bool</span><span class="p">]</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="n">use_onnx_subfunctions</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">False</span><span class="p">,</span>
<span class="o">**</span><span class="n">kwargs</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
Expand Down Expand Up @@ -326,6 +325,8 @@ <h1>Source code for QEfficient.cloud.infer</h1><div class="highlight"><pre>
<span class="sd"> Path of the QNN Config parameters file. Default is None.</span>
<span class="sd"> trust_remote_code : bool, optional</span>
<span class="sd"> If True, trusts remote code when loading models from HuggingFace. Default is False.</span>
<span class="sd"> use_onnx_subfunctions : bool, optional</span>
<span class="sd"> Enables ONNX subfunctions during export and compile. Default is False.</span>
<span class="sd"> **kwargs :</span>
<span class="sd"> Additional compiler options passed directly to `qaic-compile`. Any flag supported by</span>
<span class="sd"> `qaic-compile` can be passed. Parameters are converted to flags as follows:</span>
Expand All @@ -352,12 +353,10 @@ <h1>Source code for QEfficient.cloud.infer</h1><div class="highlight"><pre>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">cache_dir</span> <span class="o">=</span> <span class="n">check_and_assign_cache_dir</span><span class="p">(</span><span class="n">local_model_dir</span><span class="p">,</span> <span class="n">cache_dir</span><span class="p">)</span>

<span class="k">if</span> <span class="s2">&quot;--mxfp6&quot;</span> <span class="ow">in</span> <span class="n">sys</span><span class="o">.</span><span class="n">argv</span><span class="p">:</span>
<span class="k">if</span> <span class="n">args</span><span class="o">.</span><span class="n">mxfp6</span><span class="p">:</span>
<span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="s2">&quot;mxfp6 is going to be deprecated in a future release, use -mxfp6_matmul instead.&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="s2">&quot;--mxint8&quot;</span> <span class="ow">in</span> <span class="n">sys</span><span class="o">.</span><span class="n">argv</span><span class="p">:</span>
<span class="k">if</span> <span class="n">args</span><span class="o">.</span><span class="n">mxint8</span><span class="p">:</span>
<span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="s2">&quot;mxint8 is going to be deprecated in a future release, use -mxint8_kv_cache instead.&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="s2">&quot;--mxfp6&quot;</span> <span class="ow">in</span> <span class="n">sys</span><span class="o">.</span><span class="n">argv</span> <span class="ow">and</span> <span class="n">mxfp6</span><span class="p">:</span>
<span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="s2">&quot;mxfp6 is going to be deprecated in a future release, use -mxfp6_matmul instead.&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="s2">&quot;--mxint8&quot;</span> <span class="ow">in</span> <span class="n">sys</span><span class="o">.</span><span class="n">argv</span> <span class="ow">and</span> <span class="n">mxint8</span><span class="p">:</span>
<span class="n">logger</span><span class="o">.</span><span class="n">warning</span><span class="p">(</span><span class="s2">&quot;mxint8 is going to be deprecated in a future release, use -mxint8_kv_cache instead.&quot;</span><span class="p">)</span>

<span class="n">qaic_config</span> <span class="o">=</span> <span class="p">{</span><span class="s2">&quot;ccl_enabled&quot;</span><span class="p">:</span> <span class="kc">True</span><span class="p">}</span> <span class="k">if</span> <span class="n">ccl_enabled</span> <span class="k">else</span> <span class="kc">None</span>

Expand Down Expand Up @@ -401,6 +400,7 @@ <h1>Source code for QEfficient.cloud.infer</h1><div class="highlight"><pre>
<span class="n">allow_mxint8_mdp_io</span><span class="o">=</span><span class="n">allow_mxint8_mdp_io</span><span class="p">,</span>
<span class="n">enable_qnn</span><span class="o">=</span><span class="n">enable_qnn</span><span class="p">,</span>
<span class="n">qnn_config</span><span class="o">=</span><span class="n">qnn_config</span><span class="p">,</span>
<span class="n">use_onnx_subfunctions</span><span class="o">=</span><span class="n">use_onnx_subfunctions</span><span class="p">,</span>
<span class="o">**</span><span class="n">kwargs</span><span class="p">,</span>
<span class="p">)</span>

Expand Down Expand Up @@ -503,6 +503,14 @@ <h1>Source code for QEfficient.cloud.infer</h1><div class="highlight"><pre>
<span class="n">action</span><span class="o">=</span><span class="s2">&quot;store_true&quot;</span><span class="p">,</span>
<span class="n">help</span><span class="o">=</span><span class="s2">&quot;Compress Present/Past KV to MXINT8 using CustomIO config, default is False&quot;</span><span class="p">,</span>
<span class="p">)</span>
<span class="n">parser</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
<span class="s2">&quot;--use-onnx-subfunctions&quot;</span><span class="p">,</span>
<span class="s2">&quot;--use_onnx_subfunctions&quot;</span><span class="p">,</span>
<span class="n">dest</span><span class="o">=</span><span class="s2">&quot;use_onnx_subfunctions&quot;</span><span class="p">,</span>
<span class="n">action</span><span class="o">=</span><span class="s2">&quot;store_true&quot;</span><span class="p">,</span>
<span class="n">default</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span>
<span class="n">help</span><span class="o">=</span><span class="s2">&quot;Enable ONNX subfunctions during export/compile.&quot;</span><span class="p">,</span>
<span class="p">)</span>
<span class="n">parser</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span>
<span class="s2">&quot;--num_cores&quot;</span><span class="p">,</span> <span class="s2">&quot;--num-cores&quot;</span><span class="p">,</span> <span class="nb">type</span><span class="o">=</span><span class="nb">int</span><span class="p">,</span> <span class="n">required</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">help</span><span class="o">=</span><span class="s2">&quot;Number of cores to compile on Cloud AI 100&quot;</span>
<span class="p">)</span>
Expand Down Expand Up @@ -610,16 +618,20 @@ <h1>Source code for QEfficient.cloud.infer</h1><div class="highlight"><pre>
</div>
</section>
</div>
<div class="rst-versions" data-toggle="rst-versions" role="note" aria-label="versions">
<div class="rst-versions" data-toggle="rst-versions" role="note" aria-label="versions">
<span class="rst-current-version" data-toggle="rst-current-version">
Version: Main
<span class="fa fa-caret-down"></span>
</span>
<div class="rst-other-versions">
Versions
<dl>
<dd><a href="../index.html">main</a></dd>
<dd><a href="release/v1.18/index.html">release/v1.18</a></dd>
<dd><a href="../../../index.html">main</a></dd>
<dd><a href="../../../source/release/v1.18/index.html">release/v1.18</a></dd>
<dd><a href="../../../source/release/v1.19/index.html">release/v1.19</a></dd>
<dd><a href="../../../source/release/v1.20/index.html">release/v1.20</a></dd>
<dd><a href="../../../source/release/v1.21/index.html">release/v1.21</a></dd>
<dd><a href="../../../source/release/v1.21.6/index.html">release/v1.21.6</a></dd>
</dl>
</div>
</div><script>
Expand Down
Loading