diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 0000000..1d888b8 --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,68 @@ +name: Lint + +on: + push: + branches: + - main + pull_request: + # Allow to trigger the workflow manually + workflow_dispatch: + +permissions: + contents: read + +concurrency: + group: "${{ github.workflow }}-${{ github.ref }}" + cancel-in-progress: ${{ github.event_name == 'pull_request' }} + +env: + CUDA_VERSION: "11.7" + +jobs: + lint: + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + submodules: "recursive" + fetch-depth: 1 + + - name: Set up Python 3.9 + uses: actions/setup-python@v5 + with: + python-version: "3.9" + update-environment: true + + - name: Upgrade pip + run: | + python -m pip install --upgrade pip setuptools wheel + + - name: Install TorchOpt + env: + USE_FP16: "OFF" + TORCH_CUDA_ARCH_LIST: "Auto" + run: | + python -m pip install torch numpy pybind11 + python -m pip install -vvv --no-build-isolation --editable '.[lint]' + + - name: pre-commit + run: | + make pre-commit + + - name: ruff + run: | + make ruff + + - name: flake8 + run: | + make flake8 + + - name: isort and black + run: | + make py-format + + - name: addlicense + run: | + make addlicense \ No newline at end of file diff --git a/README.md b/README.md index 40e8a43..98b60a1 100644 --- a/README.md +++ b/README.md @@ -132,18 +132,34 @@ tokenizer = vl_chat_processor.tokenizer vl_gpt: MultiModalityCausalLM = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=True) vl_gpt = vl_gpt.to(torch.bfloat16).cuda().eval() +## single image conversation example conversation = [ { "role": "User", "content": "Describe each stage of this image.", - "images": ["./images/training_pipelines.jpg"] + "images": ["./images/training_pipelines.jpg"], }, - { - "role": "Assistant", - "content": "" - } + {"role": "Assistant", "content": ""}, ] +## multiple images (or in-context learning) conversation example +# conversation = [ +# { +# "role": "User", +# "content": "A dog wearing nothing in the foreground, " +# "a dog wearing a santa hat, " +# "a dog wearing a wizard outfit, and " +# "what's the dog wearing?", +# "images": [ +# "images/dog_a.png", +# "images/dog_b.png", +# "images/dog_c.png", +# "images/dog_d.png", +# ], +# }, +# {"role": "Assistant", "content": ""} +# ] + # load images and prepare for inputs pil_images = load_pil_images(conversation) prepare_inputs = vl_chat_processor( diff --git a/images/dog_a.png b/images/dog_a.png new file mode 100644 index 0000000..956caab Binary files /dev/null and b/images/dog_a.png differ diff --git a/images/dog_b.png b/images/dog_b.png new file mode 100644 index 0000000..221f1d1 Binary files /dev/null and b/images/dog_b.png differ diff --git a/images/dog_c.png b/images/dog_c.png new file mode 100644 index 0000000..283a182 Binary files /dev/null and b/images/dog_c.png differ diff --git a/images/dog_d.png b/images/dog_d.png new file mode 100644 index 0000000..d9ff5d6 Binary files /dev/null and b/images/dog_d.png differ diff --git a/inference.py b/inference.py index 94da96e..d51ea3a 100644 --- a/inference.py +++ b/inference.py @@ -33,6 +33,7 @@ vl_gpt: MultiModalityCausalLM = AutoModelForCausalLM.from_pretrained( ) vl_gpt = vl_gpt.to(torch.bfloat16).cuda().eval() +# single image conversation example conversation = [ { "role": "User", @@ -42,6 +43,23 @@ conversation = [ {"role": "Assistant", "content": ""}, ] +# multiple images (or in-context learning) conversation example +# conversation = [ +# { +# "role": "User", +# "content": "A dog wearing nothing in the foreground, " +# "a dog wearing a santa hat, " +# "a dog wearing a wizard outfit, and " +# "what's the dog wearing?", +# "images": [ +# "images/dog_a.png", +# "images/dog_b.png", +# "images/dog_c.png", +# "images/dog_d.png", +# ], +# }, +# {"role": "Assistant", "content": ""} +# ] # load images and prepare for inputs pil_images = load_pil_images(conversation) diff --git a/pyproject.toml b/pyproject.toml index ad5905b..876d479 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,6 +34,20 @@ gradio = [ "markdown==3.4.1", "SentencePiece==0.1.96" ] +lint = [ + "isort", + "black[jupyter] >= 22.6.0", + "pylint[spelling] >= 2.15.0", + "flake8", + "flake8-bugbear", + "flake8-comprehensions", + "flake8-docstrings", + "flake8-pyi", + "flake8-simplify", + "ruff", + "pyenchant", + "pre-commit", +] [tool.setuptools] packages = {find = {exclude = ["images"]}}