Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .env.template
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,5 @@ TELEMETRY_ENABLED=true

# Logging
LOG_LEVEL=INFO
# STARFISH_LOCAL_STORAGE_DIR=
# STARFISH_LOCAL_STORAGE_DIR=
JINA_AI_API_KEY=jina_api_key
10 changes: 6 additions & 4 deletions .github/workflows/lint-and-test.yaml
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
name: Starfish testing workflow

on:
push:
branches:
- main
- dev
# push:
# branches:
# - main
# - dev
pull_request:
branches:
- main
- dev
- '!f/pypi_release'

jobs:
test-integration:
if: github.event.pull_request.head.ref != 'f/pypi_release'
runs-on: ubuntu-latest

steps:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ on:
push:
tags:
- 'v*'
# branches:
# - 'main'

jobs:
deploy:
Expand All @@ -12,6 +14,14 @@ jobs:
- uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Verify tag is on main branch
run: |
TAG_NAME=${GITHUB_REF#refs/tags/}
COMMIT=$(git rev-parse $TAG_NAME)
if ! git branch --contains $COMMIT | grep -qw main; then
echo "::error::Tag $TAG_NAME must be created from main branch"
exit 1
fi
- name: Set up Python
uses: actions/setup-python@v4
with:
Expand Down
107 changes: 107 additions & 0 deletions .github/workflows/publish_testpypi.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
name: Publish to Test PyPI

on:
push:
tags:
- 'test-v*'
branches:
- 'f/pypi_release'

jobs:
deploy_testpypi:
#if: true
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0 # Required for full commit history check
- name: Verify tag is on dev branch
run: |
TAG_NAME=${GITHUB_REF#refs/tags/}
COMMIT=$(git rev-parse $TAG_NAME)
if ! git branch --contains $COMMIT | grep -qw dev; then
echo "::error::Tag $TAG_NAME must be created from dev branch"
exit 1
fi
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.x'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install build twine
- name: Build and publish
env:
#TWINE_USERNAME: ${{ secrets.TEST_PYPI_USERNAME }}
TWINE_USERNAME: __token__
TWINE_PASSWORD: ${{ secrets.TEST_PYPI_PASSWORD }}
#ACTIONS_STEP_DEBUG: true
run: |
# echo "TWINE_PASSWORD first 5 chars: ${TWINE_PASSWORD:0:184}"
# echo "TWINE_PASSWORD length: ${#TWINE_PASSWORD}"
python -m build
twine upload --verbose --repository-url https://test.pypi.org/legacy/ dist/*

test-colab:
needs: deploy_testpypi
runs-on: ubuntu-latest
#a Public "Colab-like" Image
container:
image: jupyter/minimal-notebook:latest
options: --user root # Run as root to avoid permission issues
permissions:
contents: write
steps:
- uses: actions/checkout@v3
with:
sparse-checkout: |
tests/*
examples/data_factory_release_check.ipynb
sparse-checkout-cone-mode: false
- name: Update system packages
run: |
apt-get update
apt-get install -y libssl3 # Removed sudo since we're running as root
- name: Print Python and Jupyter versions
run: |
python --version
pip list | grep -E 'jupyter|ipykernel|nbconvert|notebook'
# Authenticate to GCP
# - name: Authenticate to GCP
# uses: google-github-actions/auth@v1
# with:
# credentials_json: ${{ secrets.GCP_SA_KEY }}

# # Configure Docker to use GCR credentials
# - name: Configure Docker for GCR
# uses: google-github-actions/docker-auth@v1

# # Now you can pull the image
# - name: Use Colab base image
# run: docker pull gcr.io/colab-images/base:latest

# --no-prompt --no-input \ suppress the output
- name: Run Colab-style tests
run: |
if ! jupyter nbconvert --execute --to notebook --inplace \
--ExecutePreprocessor.kernel_name=python3 \
--ExecutePreprocessor.timeout=120 \
--no-prompt --no-input \
--stdout \
examples/data_factory_release_check.ipynb; then
echo "::error::Notebook execution failed"
exit 1
fi
echo "Notebook executed successfully. Summary:" && \
jupyter nbconvert --to markdown --stdout \
examples/data_factory_release_check.ipynb | \
grep -E '^#|^##' || true

# Add tag deletion step
- name: Delete triggering tag after successful test
if: startsWith(github.ref, 'refs/tags/test-v')
run: |
gh api -X DELETE /repos/$GITHUB_REPOSITORY/git/refs/tags/${GITHUB_REF#refs/tags/}
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
# Adhoc stuff
input.json
output.json
.serena/
docs/
/vibe_coding/response.md
/dev/
todo
.local/
Expand Down
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
[submodule "internal"]
path = internal
url = https://github.com/starfishdata/starfish_internal.git
[submodule "docs_mintlify"]
path = docs_mintlify
url = https://github.com/starfishdata/docs.git
20 changes: 16 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,21 @@ docstring:
test:
poetry run pytest tests/

install:
@echo "Installing dependencies..."
poetry install
poetry run pre-commit install --install-hooks
install: install-extras

#poetry install --extras "code_execution vllm" --with dev
# Install with specific extras
#make install EXTRAS="pdf"
# Install all extras
#make install EXTRAS="all"
# Install without extras (default)
#make install
install-extras:
@echo "Installing dependencies with extras: $(EXTRAS)"
poetry install $(if $(EXTRAS),--extras "$(EXTRAS)",) --with dev

start-client_claude:
python src/starfish/data_mcp/client_claude.py src/starfish/data_mcp/server.py

start-client_openai:
python src/starfish/data_mcp/client_openai.py
16 changes: 16 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,22 @@ Key Features:
pip install starfish-core
```

### Optional Dependencies

Starfish supports optional dependencies for specific file parsers. Install only what you need:

```bash
# Install specific parsers
pip install "starfish-core[pdf]" # PDF support
pip install "starfish-core[docx]" # Word document support
pip install "starfish-core[ppt]" # PowerPoint support
pip install "starfish-core[excel]" # Excel support
pip install "starfish-core[youtube]" # YouTube support

# Install all parser dependencies
pip install "starfish-core[all]"
```

## Configuration

Starfish uses environment variables for configuration. We provide a `.env.template` file to help you get started quickly:
Expand Down
1 change: 1 addition & 0 deletions docs_mintlify
Submodule docs_mintlify added at 6ad0ad
4 changes: 2 additions & 2 deletions examples/data_factory.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -659,7 +659,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "starfish-T7IInzTH-py3.11",
"display_name": ".venv",
"language": "python",
"name": "python3"
},
Expand All @@ -673,7 +673,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.7"
"version": "3.11.4"
}
},
"nbformat": 4,
Expand Down
Loading