Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/run_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ jobs:
uses: actions/cache@v3
with:
path: /home/runner/work/syncode/syncode/cache/mask_stores/
key: files-${{ hashFiles('syncode/parsers/grammars/python_grammar.lark', 'syncode/dfa_mask_store.py') }}
key: files-${{ hashFiles('syncode/parsers/grammars/python.lark', 'syncode/dfa_mask_store.py') }}
- name: Run Tests
run: |
python3 -m unittest tests.test_misc
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ SynCode depends on HuggingFace [transformers](https://github.com/huggingface/tra

| SynCode version | Required transformers version | Python version |
| -------------- | ----------------------------- | -------------- |
| `v0.4.10` (latest) | `v4.44.0` | 3.6 - 3.12 |
| `v0.4.11` (latest) | `v4.51.0` | 3.6 - 3.12 |

**Note:** Python 3.13 is not currently supported due to dependency constraints.

Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "syncode"
version="0.4.10"
version="0.4.11"
requires-python = ">=3.6,<3.13"
description = "Grammar-guided code generation tool"
readme = "README.md"
Expand All @@ -24,7 +24,7 @@ dependencies = [
"regex==2023.8.8",
"torch",
"tqdm",
"transformers==4.44.0",
"transformers==4.51.0",
"datasets",
"jsonschema",
]
Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
accelerate
fire
interegular
regex==2023.8.8
torch
tqdm
transformers==4.44.0; python_version < "3.13"
transformers==4.51.0; python_version < "3.13"
datasets
jsonschema
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,14 @@
"regex==2023.8.8",
"torch",
"tqdm",
"transformers==4.44.0",
"transformers==4.51.0",
"datasets",
"jsonschema"
]

setuptools.setup(
name="syncode",
version="0.4.10",
version="0.4.11",
author="Shubham Ugare",
author_email="shubhamugare@gmail.com",
description="This package provides the tool for grammar augmented LLM generation.",
Expand Down
36 changes: 26 additions & 10 deletions syncode/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,21 +12,32 @@


def load_model(model_name, device, quantize, device_map = None):
torch_dtype = torch.bfloat16 if quantize else "auto"
device_map = device_map if device_map is not None else "auto"

attn_implementation = None
if "gemma-3" in model_name:
# This is due to the gemma-3 issue with SDPA implementation
# https://github.com/google-deepmind/gemma/issues/169
attn_implementation = "eager"
logging.info("Using slower \"eager\" attention implementation for gemma-3 due to issue with SDPA implementation")

if model_name == 'test':
model = AutoModelForCausalLM.from_pretrained('bigcode/tiny_starcoder_py').to(device)
elif model_name == 'test-instruct':
model = AutoModelForCausalLM.from_pretrained("rahuldshetty/tiny-starcoder-instruct")
else:
if device_map is not None:
if (quantize):
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, cache_dir=HF_CACHE, token=HF_ACCESS_TOKEN, trust_remote_code=True, device_map = device_map).eval()
else:
model = AutoModelForCausalLM.from_pretrained(model_name, cache_dir=HF_CACHE, token=HF_ACCESS_TOKEN, trust_remote_code=True, device_map = device_map).eval()
else:
if (quantize):
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, cache_dir=HF_CACHE, token=HF_ACCESS_TOKEN, trust_remote_code=True).eval().to(device)
else:
model = AutoModelForCausalLM.from_pretrained(model_name, cache_dir=HF_CACHE, token=HF_ACCESS_TOKEN, trust_remote_code=True).eval().to(device)
logging.info(f"Loading model {model_name} with device:{device}, device_map:{device_map}, torch_dtype:{torch_dtype}")
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch_dtype,
cache_dir=HF_CACHE,
token=HF_ACCESS_TOKEN,
trust_remote_code=True,
device_map = device_map,
attn_implementation=attn_implementation
).eval()
return model

def load_tokenizer(model_name):
Expand All @@ -35,7 +46,12 @@ def load_tokenizer(model_name):
elif model_name == 'test-instruct':
tokenizer = AutoTokenizer.from_pretrained("rahuldshetty/tiny-starcoder-instruct")
else:
tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=HF_CACHE, token=HF_ACCESS_TOKEN, trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(
model_name,
cache_dir=HF_CACHE,
token=HF_ACCESS_TOKEN,
trust_remote_code=True
)
return tokenizer

def get_output_path(model_name, grammar, dataset, num_samples, mode):
Expand Down
5 changes: 4 additions & 1 deletion syncode/evaluation/json_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,10 @@ def run_eval_for_task(syncode, num_samples_per_task, problem, samples, pbar, tas
else:
problem["prompt"][0]['content'] = f"{problem['prompt'][0]['content']}\nOnly output JSON.\nJSON:\n"

prompt = syncode.model.tokenizer.apply_chat_template(problem["prompt"], tokenize = False)
if syncode.model.tokenizer.chat_template is not None:
prompt = syncode.model.tokenizer.apply_chat_template(problem["prompt"], tokenize = False)
else:
prompt = problem["prompt"][0]['content']

batch_completions = syncode.model.generate_grammar_constrained_completion(prompt, num_samples_per_task)
for completion_id, completion in enumerate(batch_completions):
Expand Down
Loading