Skip to content

Commit 1052903

Browse files
Merge branch 'main' into export-D83859440
2 parents 7ff6e63 + 1a8acf6 commit 1052903

162 files changed

Lines changed: 7601 additions & 958 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
53a2908a10f414a2f85caa06703a26a40e873869
1+
cf9d09490c7f6685ec68d5db3acf2e0d73c54d00

.ci/scripts/setup-samsung-linux-deps.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ download_ai_lite_core() {
1313
API_BASE="https://soc-developer.semiconductor.samsung.com/api/v1/resource/ai-litecore/download"
1414
API_KEY=$SAMSUNG_AI_LITECORE_KEY
1515

16-
VERSION="0.5"
16+
VERSION="0.7"
1717
OS_NAME="Ubuntu 22.04"
1818
OUT_FILE="/tmp/exynos-ai-litecore-v${VERSION}.tar.gz"
1919
TARGET_PATH="/tmp/exynos_ai_lite_core"
@@ -62,7 +62,7 @@ install_enn_backend() {
6262
export PYTHONPATH=${PYTHONPATH:-}:${EXECUTORCH_ROOT}/..
6363
}
6464

65-
AI_LITE_CORE_VERSION=0.5.0
65+
AI_LITE_CORE_VERSION=0.7.0
6666

6767
download_ai_lite_core ${AI_LITE_CORE_VERSION}
6868
install_enn_backend

.ci/scripts/test_llava.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ EXECUTORCH_COMMON_CMAKE_ARGS=" \
3838
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
3939
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
4040
-DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
41+
-DEXECUTORCH_BUILD_EXTENSION_NAMED_DATA_MAP=ON \
4142
-DEXECUTORCH_BUILD_EXTENSION_LLM=ON \
4243
-DEXECUTORCH_BUILD_EXTENSION_LLM_RUNNER=ON \
4344
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \

.github/workflows/cuda.yml

Lines changed: 130 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,8 @@ jobs:
8787
export LD_LIBRARY_PATH=/opt/conda/lib:$LD_LIBRARY_PATH
8888
PYTHON_EXECUTABLE=python source .ci/scripts/test_model.sh "${{ matrix.model }}" cmake cuda
8989
90-
test-voxtral-cuda-e2e:
91-
name: test-voxtral-cuda-e2e
90+
export-voxtral-cuda-artifact:
91+
name: export-voxtral-cuda-artifact
9292
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
9393
permissions:
9494
id-token: write
@@ -104,6 +104,7 @@ jobs:
104104
gpu-arch-version: 12.6
105105
use-custom-docker-registry: false
106106
submodules: recursive
107+
upload-artifact: voxtral-cuda-export
107108
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
108109
script: |
109110
set -eux
@@ -118,6 +119,7 @@ jobs:
118119
OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
119120
pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION}
120121
pip install mistral-common librosa
122+
pip list
121123
echo "::endgroup::"
122124
123125
echo "::group::Export Voxtral"
@@ -129,9 +131,58 @@ jobs:
129131
--device cuda \
130132
--max_seq_len 1024 \
131133
--output_dir ./
134+
python -m executorch.extension.audio.mel_spectrogram \
135+
--feature_size 128 \
136+
--stack_output \
137+
--max_audio_len 300 \
138+
--output_file voxtral_preprocessor.pte
139+
140+
test -f model.pte
141+
test -f aoti_cuda_blob.ptd
142+
test -f voxtral_preprocessor.pte
132143
echo "::endgroup::"
133144
134-
echo "::group::Build Voxtral Runner"
145+
echo "::group::Store Voxtral Artifacts"
146+
mkdir -p "${RUNNER_ARTIFACT_DIR}"
147+
cp model.pte "${RUNNER_ARTIFACT_DIR}/"
148+
cp aoti_cuda_blob.ptd "${RUNNER_ARTIFACT_DIR}/"
149+
cp voxtral_preprocessor.pte "${RUNNER_ARTIFACT_DIR}/"
150+
ls -al "${RUNNER_ARTIFACT_DIR}"
151+
echo "::endgroup::"
152+
153+
benchmark-voxtral-cuda:
154+
name: benchmark-voxtral-cuda
155+
needs: export-voxtral-cuda-artifact
156+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
157+
permissions:
158+
id-token: write
159+
contents: read
160+
strategy:
161+
fail-fast: false
162+
with:
163+
timeout: 90
164+
runner: linux.g5.4xlarge.nvidia.gpu
165+
gpu-arch-type: cuda
166+
gpu-arch-version: 12.6
167+
use-custom-docker-registry: false
168+
submodules: recursive
169+
download-artifact: voxtral-cuda-export
170+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
171+
script: |
172+
set -eux
173+
174+
echo "::group::Setup ExecuTorch Requirements"
175+
CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON" ./install_requirements.sh
176+
pip list
177+
echo "::endgroup::"
178+
179+
echo "::group::Prepare Voxtral Artifacts"
180+
cp "${RUNNER_ARTIFACT_DIR}/model.pte" .
181+
cp "${RUNNER_ARTIFACT_DIR}/aoti_cuda_blob.ptd" .
182+
ls -al model.pte aoti_cuda_blob.ptd
183+
echo "::endgroup::"
184+
185+
echo "::group::Build Voxtral Benchmark"
135186
cmake -DCMAKE_BUILD_TYPE=Release \
136187
-DEXECUTORCH_BUILD_CUDA=ON \
137188
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
@@ -142,31 +193,90 @@ jobs:
142193
cmake --build cmake-out -j$(( $(nproc) - 1 )) --target voxtral_runner
143194
echo "::endgroup::"
144195
196+
echo "::group::Run Voxtral Benchmark"
197+
198+
export LD_LIBRARY_PATH=/opt/conda/lib:$LD_LIBRARY_PATH
199+
cmake-out/backends/cuda/voxtral_runner model.pte aoti_cuda_blob.ptd
200+
201+
echo "::endgroup::"
202+
203+
test-voxtral-cuda-e2e:
204+
name: test-voxtral-cuda-e2e
205+
needs: export-voxtral-cuda-artifact
206+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
207+
permissions:
208+
id-token: write
209+
contents: read
210+
strategy:
211+
fail-fast: false
212+
with:
213+
timeout: 90
214+
runner: linux.g5.4xlarge.nvidia.gpu
215+
gpu-arch-type: cuda
216+
gpu-arch-version: 12.6
217+
use-custom-docker-registry: false
218+
submodules: recursive
219+
download-artifact: voxtral-cuda-export
220+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
221+
script: |
222+
set -eux
223+
224+
echo "::group::Setup ExecuTorch Requirements"
225+
CMAKE_ARGS="-DEXECUTORCH_BUILD_CUDA=ON" ./install_requirements.sh
226+
pip list
227+
echo "::endgroup::"
228+
229+
echo "::group::Prepare Voxtral Artifacts"
230+
cp "${RUNNER_ARTIFACT_DIR}/model.pte" .
231+
cp "${RUNNER_ARTIFACT_DIR}/aoti_cuda_blob.ptd" .
232+
cp "${RUNNER_ARTIFACT_DIR}/voxtral_preprocessor.pte" .
233+
TOKENIZER_URL="https://huggingface.co/mistralai/Voxtral-Mini-3B-2507/resolve/main/tekken.json"
234+
curl -L $TOKENIZER_URL -o tekken.json
235+
ls -al model.pte aoti_cuda_blob.ptd voxtral_preprocessor.pte tekken.json
236+
echo "::endgroup::"
237+
238+
echo "::group::Download Test Audio File"
239+
AUDIO_URL="https://github.com/voxserv/audio_quality_testing_samples/raw/refs/heads/master/testaudio/16000/test01_20s.wav"
240+
curl -L $AUDIO_URL -o poem.wav
241+
echo "::endgroup::"
242+
243+
echo "::group::Build Voxtral Runner"
244+
cmake --preset llm \
245+
-DEXECUTORCH_BUILD_CUDA=ON \
246+
-DCMAKE_INSTALL_PREFIX=cmake-out \
247+
-DCMAKE_BUILD_TYPE=Release \
248+
-Bcmake-out -S.
249+
cmake --build cmake-out -j$(( $(nproc) - 1 )) --target install --config Release
250+
251+
cmake -DEXECUTORCH_BUILD_CUDA=ON \
252+
-DCMAKE_BUILD_TYPE=Release \
253+
-Sexamples/models/voxtral \
254+
-Bcmake-out/examples/models/voxtral/
255+
cmake --build cmake-out/examples/models/voxtral --target voxtral_runner --config Release
256+
echo "::endgroup::"
257+
145258
echo "::group::Run Voxtral Runner"
146-
# Capture output and allow exit code 139 if we have the expected printout
147259
set +e
148260
export LD_LIBRARY_PATH=/opt/conda/lib:$LD_LIBRARY_PATH
149-
OUTPUT=$(cmake-out/backends/cuda/voxtral_runner model.pte aoti_cuda_blob.ptd 2>&1)
261+
OUTPUT=$(cmake-out/examples/models/voxtral/voxtral_runner \
262+
--model_path model.pte \
263+
--data_path aoti_cuda_blob.ptd \
264+
--tokenizer_path tekken.json \
265+
--audio_path poem.wav \
266+
--processor_path voxtral_preprocessor.pte \
267+
--temperature 0 2>&1)
150268
EXIT_CODE=$?
151269
set -e
152270
153271
echo "$OUTPUT"
154272
155-
# Check if the output contains "Run latency (ms):"
156-
if echo "$OUTPUT" | grep -q "Run latency (ms):"; then
157-
echo "Found expected output: 'Run latency (ms):'"
158-
if [ $EXIT_CODE -eq 139 ]; then
159-
echo "Exit code 139 (segfault) detected, but passing since we have the expected output"
160-
exit 0
161-
elif [ $EXIT_CODE -ne 0 ]; then
162-
echo "Unexpected exit code: $EXIT_CODE"
163-
exit $EXIT_CODE
164-
else
165-
echo "Command succeeded with exit code 0"
166-
exit 0
167-
fi
168-
else
169-
echo "Expected output 'Run latency (ms):' not found in output"
273+
if ! echo "$OUTPUT" | grep -iq "poem"; then
274+
echo "Expected output 'poem' not found in output"
170275
exit 1
171276
fi
277+
278+
if [ $EXIT_CODE -ne 0 ]; then
279+
echo "Unexpected exit code: $EXIT_CODE"
280+
exit $EXIT_CODE
281+
fi
172282
echo "::endgroup::"

.github/workflows/pull.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -935,6 +935,12 @@ jobs:
935935
python -m executorch.examples.samsung.aot_compiler --model_name=$model -c E9955
936936
done
937937
938+
# Test quant models
939+
model_scripts="deeplab_v3 edsr inception_v3 inception_v4 mobilenet_v2 mobilenet_v3 resnet18 resnet50 vit wav2letter"
940+
for m_script in $model_scripts; do
941+
python -m executorch.examples.samsung.scripts.${m_script} -c e9955 -p A8W8
942+
done
943+
938944
# Test ops
939945
python -m unittest discover -s backends/samsung/test/ops -p "test_*.py"
940946

0 commit comments

Comments
 (0)