diff --git a/notebooks/tests/emoji_test.ipynb b/notebooks/tests/emoji_test.ipynb new file mode 100644 index 00000000..4ef7d3c6 --- /dev/null +++ b/notebooks/tests/emoji_test.ipynb @@ -0,0 +1,653 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Loading checkpoint shards: 100%|██████████| 4/4 [00:00<00:00, 7.98it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2025-03-28 22:42:15,809-syncode.mask_store.mask_store] - Using cache: False and fsm path cache/mask_stores/PreTrainedTokenizerFast/grammar_strict_2646269638_128000.pkl exist: False\n", + "[2025-03-28 22:42:15,809-syncode.mask_store.mask_store] - Creating mask store for PreTrainedTokenizerFast and custom, may take more than 10 minutes. Caching at /home/shubham/syncode/cache/mask_stores/PreTrainedTokenizerFast/grammar_strict_2646269638_128000.pkl.\n", + "[2025-03-28 22:42:16,417-syncode.mask_store.fsm_set] - 4 FSMs with 39 states initialized in 0.00 seconds\n", + "[2025-03-28 22:42:16,418-syncode.mask_store.mask_store] - Ignore whitespace is True\n", + "[2025-03-28 22:42:16,419-syncode.mask_store.mask_store] - Number of 2 length terminal sequences reduced from 16 to 4\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 39/39 [00:06<00:00, 5.83it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2025-03-28 22:42:23,118-syncode.mask_store.mask_store] - Time taken to create mask store: 7.31 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "from syncode.infer import Syncode\n", + "\n", + "grammar = r\"\"\"\n", + " // Lark grammar to validate single emoji output for TweetEval dataset\n", + " start: emoji\n", + "\n", + " // Define the 3 emojis from the TweetEval emoji dataset\n", + " emoji: \"😍\" | \"😂\" | \"😉\" \n", + "\n", + " // Ensure there is no whitespace or other characters\n", + " %import common.WS\n", + " %ignore WS\n", + " \"\"\"\n", + "\n", + "syn_llm = Syncode(model=\"meta-llama/Llama-3.1-8B-Instruct\", grammar=grammar, new_mask_store=True, max_new_tokens=5)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n", + "Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Testing the model with the first 20 examples:\n", + "Tweet: en Pelham Parkway\n", + "Expected Emoji: 😂\n", + "Generated Emoji: 😂\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: This little lady is 26 weeks pregnant today! Excited for baby Cam to come! @ Springfield,…\n", + "Expected Emoji: 😍\n", + "Generated Emoji: 😍\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: Sunrise. @ Miami South Beach,Florida\n", + "Expected Emoji: 😍\n", + "Generated Emoji: 😂\n", + "--------------------------------------------------\n", + "Tweet: About to Tumble into Tuesday. #Jeep #JeepWave #jeeplife #Jeepbeef #jeepnation #JeepPorn…\n", + "Expected Emoji: 😂\n", + "Generated Emoji: 😂\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: It took a fall festival, but she's finally behind bars @user @ Duluth Fall Festival\n", + "Expected Emoji: 😉\n", + "Generated Emoji: 😂\n", + "--------------------------------------------------\n", + "Tweet: These escort cards by heartpapersoul for our lavender theme are so pretty! #coastsidecouture…\n", + "Expected Emoji: 😍\n", + "Generated Emoji: 😍\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: It's #MargaritaMonday! Come take a picture in our cute #Loteria frame @ Milpa Kitchen & Cantina\n", + "Expected Emoji: 😍\n", + "Generated Emoji: 😂\n", + "--------------------------------------------------\n", + "Tweet: Can’t get enough of these amazing shelves! Bringing a little bit of local history into this…\n", + "Expected Emoji: 😍\n", + "Generated Emoji: 😍\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: There's nothing like being able to balayage with color •••••••••••#clt #clthair…\n", + "Expected Emoji: 😍\n", + "Generated Emoji: 😍\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: @user @ Maryland\n", + "Expected Emoji: 😂\n", + "Generated Emoji: 😂\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: SQUAD GOALS ...#torontobride #torontowedding #hellomidge #weddingparty #weddingflorist…\n", + "Expected Emoji: 😍\n", + "Generated Emoji: 😂\n", + "--------------------------------------------------\n", + "Tweet: I think I prefer to be Kansas Senate President, rather than Kansas House Speaker. @ Topeka Toll\n", + "Expected Emoji: 😂\n", + "Generated Emoji: 😂\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: She got a ButtholeButton @ City of Mount Vernon, NY\n", + "Expected Emoji: 😂\n", + "Generated Emoji: 😂\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: Quality day so far with special guest appearance for the yakayard! @user @user\n", + "Expected Emoji: 😂\n", + "Generated Emoji: 😂\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: She has the cutest poses @ Alpine Loop\n", + "Expected Emoji: 😍\n", + "Generated Emoji: 😍\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: Probably one of my favorite pieces, the Picasso's and Monet's didn't hurt to look at either …\n", + "Expected Emoji: 😍\n", + "Generated Emoji: 😍\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: At my favorite spot not pictured: veggie dumplings that I devoured @user @user\n", + "Expected Emoji: 😍\n", + "Generated Emoji: 😍\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: La Tole en #Miami Down Town en el Real Chapeo #LaToleEnUsa #Chapiadora #Florida #USA…\n", + "Expected Emoji: 😂\n", + "Generated Emoji: 😂\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: Imagine if this beauty was pulling up your driveway this morning. @ Molly Maid of Greater…\n", + "Expected Emoji: 😍\n", + "Generated Emoji: 😍\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: #mood shit goes up @ Comfort L.A\n", + "Expected Emoji: 😂\n", + "Generated Emoji: 😂\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: My boys #utahherewecome#brycecanyon#zionationalpark @ Wyoming - Utah Border\n", + "Expected Emoji: 😍\n", + "Generated Emoji: 😍\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: All of the 14 juillet eve bread bake dans le bistro chez les Biavaschis.... Pictured are…\n", + "Expected Emoji: 😉\n", + "Generated Emoji: 😍\n", + "--------------------------------------------------\n", + "Tweet: Can this be my future home?! #santamonica #santamonicapier…\n", + "Expected Emoji: 😍\n", + "Generated Emoji: 😍\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: This almost put me in the holiday spirit. ALMOST. lol…\n", + "Expected Emoji: 😉\n", + "Generated Emoji: 😍\n", + "--------------------------------------------------\n", + "Tweet: Almost 7am, still dark AF out, but what is this sky?! { #nyc #goodmorning #earlymornings…\n", + "Expected Emoji: 😍\n", + "Generated Emoji: 😍\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: It's a tradition. Have to toe-touch on a cliff #dancer #nyctovegas #summer2017 #takemeback…\n", + "Expected Emoji: 😂\n", + "Generated Emoji: 😂\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: The aftermath! I’m always late posting because I’m greedy! @user eating breakfast…\n", + "Expected Emoji: 😂\n", + "Generated Emoji: 😂\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: the view was gorgeous @ Westerly, Rhode Island\n", + "Expected Emoji: 😉\n", + "Generated Emoji: 😂\n", + "--------------------------------------------------\n", + "Tweet: Toronto sunset views #Toronto #highrise #balconyviews #exploreontario #sky #sunset #pretty…\n", + "Expected Emoji: 😍\n", + "Generated Emoji: 😍\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: Somebody's pissed cause he sleepy @ Silver Creek, Austell, Georgia\n", + "Expected Emoji: 😂\n", + "Generated Emoji: 😍\n", + "--------------------------------------------------\n", + "Tweet: That Christmas concert was L I T @ Lake Highlands High\n", + "Expected Emoji: 😉\n", + "Generated Emoji: 😍\n", + "--------------------------------------------------\n", + "Tweet: Even magenta can be subtle ! #mollyatfuse #lanzavibes #historicfrankfort #qualitytouchfoil…\n", + "Expected Emoji: 😉\n", + "Generated Emoji: 😍\n", + "--------------------------------------------------\n", + "Tweet: the fall festivities have begun @ Burt's Pumpkin Farm\n", + "Expected Emoji: 😍\n", + "Generated Emoji: 😍\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: Um do you all see this woman!? Needless to say, we had a blast celebrating the new…\n", + "Expected Emoji: 😍\n", + "Generated Emoji: 😂\n", + "--------------------------------------------------\n", + "Tweet: Style #18039 and you look absolutely beautiful faryalmakhdoom#KarishmaCreationsFamily…\n", + "Expected Emoji: 😍\n", + "Generated Emoji: 😍\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: Yuh! That neck though #iMeantToHitThat@willdabeast__ @user @ Playground LA\n", + "Expected Emoji: 😂\n", + "Generated Emoji: 😂\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: All aboard!! The hot mess express Jr.!! Sooo cute though! #baileybird @ Fish Hatchery Park\n", + "Expected Emoji: 😂\n", + "Generated Emoji: 😂\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: Happy birthday to one of the sweetest human beings @user also my crafty friend to many…\n", + "Expected Emoji: 😉\n", + "Generated Emoji: 😂\n", + "--------------------------------------------------\n", + "Tweet: This Gold's has iron plates. I think I'm in love . Deadlifts. 225, 245, 265 & 275. Sets of 3-5.…\n", + "Expected Emoji: 😍\n", + "Generated Emoji: 😂\n", + "--------------------------------------------------\n", + "Tweet: Back 2 da Bullshit! @user beno49ers #hahaha #tacomachit #washington @ Tacoma, Washington\n", + "Expected Emoji: 😂\n", + "Generated Emoji: 😂\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: Trying a new place to eat! Also, working on selfies. We pretty much suck. @ Schooners\n", + "Expected Emoji: 😂\n", + "Generated Emoji: 😂\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: Don’t know if a gift basket could beat this. @ Cloverdale Historic…\n", + "Expected Emoji: 😍\n", + "Generated Emoji: 😍\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: Reminiscing with the #sundaybest #fashiondesign #stagedesign #setdesign #vimvigor #eventdesign…\n", + "Expected Emoji: 😍\n", + "Generated Emoji: 😍\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: My #Daugther and her #Bestfriend at #skyzone @ Sky Zone Virginia Beach\n", + "Expected Emoji: 😍\n", + "Generated Emoji: 😂\n", + "--------------------------------------------------\n", + "Tweet: Throwback Thursday when my husband had too much help from German Shepherds @ The Town Of…\n", + "Expected Emoji: 😂\n", + "Generated Emoji: 😂\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: ...my boi is starting 5... Y'all know y'all can't tell me ish …\n", + "Expected Emoji: 😂\n", + "Generated Emoji: 😍\n", + "--------------------------------------------------\n", + "Tweet: Solo los hairdressers lo entenderán ! Lol Are you looking for a real balayage hair style…\n", + "Expected Emoji: 😂\n", + "Generated Emoji: 😂\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: Jesi and Dylan ar Thursday\"s show .#Repost jesiringofire・・・The future is gay af with…\n", + "Expected Emoji: 😍\n", + "Generated Emoji: 😂\n", + "--------------------------------------------------\n", + "Tweet: Literally no words needed. @ Little Flock Baptist Church\n", + "Expected Emoji: 😍\n", + "Generated Emoji: 😂\n", + "--------------------------------------------------\n", + "Tweet: Makeup & Lash Extensions done by me @user #beatbymir #bbm #phillymua #phillylashtech…\n", + "Expected Emoji: 😍\n", + "Generated Emoji: 😍\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: How I look when I text you \"LMAOOOOO IM CRYING OMG \" @ University of…\n", + "Expected Emoji: 😂\n", + "Generated Emoji: 😂\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: The best way to spend a rainy day at s with you. chezellekc @user\n", + "Expected Emoji: 😍\n", + "Generated Emoji: 😍\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: #Lailah as Shimmer and Shine...lol her poses had me crying #happyholloween @ Boston,…\n", + "Expected Emoji: 😂\n", + "Generated Emoji: 😂\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: 3 years of our relationship have gone by and I still wonder how I got so lucky Through the good…\n", + "Expected Emoji: 😍\n", + "Generated Emoji: 😍\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: My favorite self-care luxury how do you pamper yourself before the hustle of the work week…\n", + "Expected Emoji: 😍\n", + "Generated Emoji: 😍\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: 2012 #Throwback Model Shoot for #Umidubs@umidubs @user @user\n", + "Expected Emoji: 😉\n", + "Generated Emoji: 😍\n", + "--------------------------------------------------\n", + "Tweet: “From the cradle to the rave” - @user @ The Woodlawn Cemetery & Conservancy\n", + "Expected Emoji: 😂\n", + "Generated Emoji: 😂\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: Surprise 40th #birthday #celebration for our bestie, Tara! We all love you so much Happy…\n", + "Expected Emoji: 😍\n", + "Generated Emoji: 😂\n", + "--------------------------------------------------\n", + "Tweet: Glad I got to see my spotted friend on my day off #EDSFTG…\n", + "Expected Emoji: 😍\n", + "Generated Emoji: 😍\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: I seriously have grandma hands! but oh well got my nails done again and I kinda like it #gold…\n", + "Expected Emoji: 😂\n", + "Generated Emoji: 😂\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: This Boutique Here! #Fashion #Style #BoutiqueShopping #MRKTSilverlake @ MRKT\n", + "Expected Emoji: 😍\n", + "Generated Emoji: 😍\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: Woke up to SNOW! Freakin snow! I’m not sure Mina has ever seen this mina_the_boxer…\n", + "Expected Emoji: 😂\n", + "Generated Emoji: 😍\n", + "--------------------------------------------------\n", + "Tweet: .Some days you need comfort food and you need someone else to prepare it for you. -Thank you…\n", + "Expected Emoji: 😉\n", + "Generated Emoji: 😉\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: @ Cannon Beach, Oregon\n", + "Expected Emoji: 😂\n", + "Generated Emoji: 😂\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: When you get to be a little kid for one class. Lol #peterpan…\n", + "Expected Emoji: 😂\n", + "Generated Emoji: 😍\n", + "--------------------------------------------------\n", + "Tweet: Any game spent with these two is a win in my book @ MO Brew\n", + "Expected Emoji: 😉\n", + "Generated Emoji: 😍\n", + "--------------------------------------------------\n", + "Tweet: Feedback please!???? Considering returning to closer to my natural hair color, which is almost…\n", + "Expected Emoji: 😉\n", + "Generated Emoji: 😍\n", + "--------------------------------------------------\n", + "Tweet: He was once a happy kid #cutest #whathappened #loveyou @ Kendall, Florida\n", + "Expected Emoji: 😂\n", + "Generated Emoji: 😍\n", + "--------------------------------------------------\n", + "Tweet: That time I cheated and choco_la_tr3 said it was ok ​ ​ ​ ​ lol @ Los Angeles, California\n", + "Expected Emoji: 😂\n", + "Generated Emoji: 😂\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: Have some chick with weave complain about cultural appropriation around me.... Lmao\n", + "Expected Emoji: 😂\n", + "Generated Emoji: 😂\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: en Houston, Texas\n", + "Expected Emoji: 😂\n", + "Generated Emoji: 😂\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: Long Island all the way en Long Island, NY\n", + "Expected Emoji: 😍\n", + "Generated Emoji: 😂\n", + "--------------------------------------------------\n", + "Tweet: Here's a rare photo captured of me locked into my computer editing videos like a machine Do…\n", + "Expected Emoji: 😂\n", + "Generated Emoji: 😍\n", + "--------------------------------------------------\n", + "Tweet: Must have Fall pieces coming to the blog soon! (It will get cold and stay cold eventually )…\n", + "Expected Emoji: 😂\n", + "Generated Emoji: 😍\n", + "--------------------------------------------------\n", + "Tweet: Woman, they will come and they will go...if you're doing it right #sarahhesterross…\n", + "Expected Emoji: 😉\n", + "Generated Emoji: 😂\n", + "--------------------------------------------------\n", + "Tweet: Thanks @user for making my hair gorgeous again! @user\n", + "Expected Emoji: 😍\n", + "Generated Emoji: 😍\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: When you find a bus to accessorize your dress ..#accessories #weekend #roadtrip #florida…\n", + "Expected Emoji: 😂\n", + "Generated Emoji: 😂\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: Big Baby Miller saids he can eat 8 Cheeseburgers and still beat Dillian Whyte. #MILLERWHYTE…\n", + "Expected Emoji: 😂\n", + "Generated Emoji: 😂\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: idk what’s cuter...him or the reflection of me in his eyes @ Greene County, Pennsylvania\n", + "Expected Emoji: 😍\n", + "Generated Emoji: 😍\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: A fun day finding our perfect Christmas tree! Athena approves #christmastree #december…\n", + "Expected Emoji: 😍\n", + "Generated Emoji: 😍\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: Got magic? #ido #magic #quotesandsayings #quotestoliveby #quotesdaily #quotesgram @ The…\n", + "Expected Emoji: 😉\n", + "Generated Emoji: 😂\n", + "--------------------------------------------------\n", + "Tweet: I just wanted a closer view @ Hollywood Casino Amphitheatre Chicago\n", + "Expected Emoji: 😂\n", + "Generated Emoji: 😍\n", + "--------------------------------------------------\n", + "Tweet: The awesome view from school the other day! Forgot I had this. @ Williamson, West Virginia\n", + "Expected Emoji: 😂\n", + "Generated Emoji: 😍\n", + "--------------------------------------------------\n", + "Tweet: Happy #WeddingWednesday! We can’t get over these delicious looking cakes from 2017! #wedding…\n", + "Expected Emoji: 😍\n", + "Generated Emoji: 😍\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: Had to let the hair down on them last night #TeamJustinLopez rocking my #HusselCollective…\n", + "Expected Emoji: 😂\n", + "Generated Emoji: 😂\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: With my babe (@ Growler & Gill - @user in Nanuet, NY)\n", + "Expected Emoji: 😍\n", + "Generated Emoji: 😍\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: Norel's 35th was a blast ...tees denim and bling theme @ Fleetwood Roller Rink\n", + "Expected Emoji: 😍\n", + "Generated Emoji: 😂\n", + "--------------------------------------------------\n", + "Tweet: Ain't he just so handsome? I adore and love this man and everything he is and wants to be. He…\n", + "Expected Emoji: 😍\n", + "Generated Emoji: 😍\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: Hey there punkin @ Bi-Water Farm & Greenhouse\n", + "Expected Emoji: 😉\n", + "Generated Emoji: 😉\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: So only half the team was invited to dinner #awkward (guess what other half showed up at same restaurant?? )\n", + "Expected Emoji: 😂\n", + "Generated Emoji: 😂\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: Non rule following asses, non signal using asses, non talented driving asses #sok #sokshit…\n", + "Expected Emoji: 😂\n", + "Generated Emoji: 😂\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: It's like a flower delivery. #localherbsdelivered #parsley #beautiful #sohealthy #good4uri…\n", + "Expected Emoji: 😍\n", + "Generated Emoji: 😂\n", + "--------------------------------------------------\n", + "Tweet: We had a time.. Madevu Ladies Vegas Vacation..... love my mumsy…\n", + "Expected Emoji: 😍\n", + "Generated Emoji: 😍\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: Playing in hair . @ I. McCleod Salon\n", + "Expected Emoji: 😍\n", + "Generated Emoji: 😍\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: I’m just going to show the food. #ricardocordew #food#good#nc#highpoint#family #connection…\n", + "Expected Emoji: 😂\n", + "Generated Emoji: 😍\n", + "--------------------------------------------------\n", + "Tweet: Mo Fam! @ New Orleans, Louisiana\n", + "Expected Emoji: 😍\n", + "Generated Emoji: 😍\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: For your entertainment, I present to you my very musically inclined children Note: Please…\n", + "Expected Emoji: 😂\n", + "Generated Emoji: 😍\n", + "--------------------------------------------------\n", + "Tweet: Happy 5 year Anniversary to my Queen @user @ Bahama Breeze…\n", + "Expected Emoji: 😍\n", + "Generated Emoji: 😍\n", + "Correct!\n", + "--------------------------------------------------\n", + "Tweet: We getting serious tonight! CarPro Cquartz and Gyeon being applied with a follow up of Reload…\n", + "Expected Emoji: 😉\n", + "Generated Emoji: 😍\n", + "--------------------------------------------------\n", + "Tweet: The NEW 2018 #Ford #F150. #fordsofinstagram #fordf150 #fordtrucks #fordsuperduty #fordlover…\n", + "Expected Emoji: 😍\n", + "Generated Emoji: 😂\n", + "--------------------------------------------------\n", + "Total Correct Predictions: 63 out of 100\n" + ] + } + ], + "source": [ + "prompt = \"\"\"You are evaluating tweets to assign the most appropriate emoji. \n", + "\n", + "INSTRUCTIONS:\n", + "1. Read the tweet below carefully.\n", + "2. Select the SINGLE most appropriate emoji that captures the sentiment or topic.\n", + "3. Respond with ONLY that emoji - no words, explanations, or other characters.\n", + "\n", + "The emoji must be one of the 3 valid options from this set:\n", + "😍 😂 😉\n", + "\n", + "Example:\n", + "Tweet: \"I love this new movie!\"\n", + "Your response: 😍\n", + "\n", + "Tweet: \"This is so funny!\"\n", + "Your response: 😂\n", + "\n", + "Tweet: \"{tweet_text}\"\n", + "Your response:\n", + "\"\"\"\n", + "\n", + "from datasets import load_dataset\n", + "dataset = load_dataset(\"tweet_eval\", \"emoji\")\n", + "\n", + "# Create mapping from integer labels to emoji characters\n", + "emoji_map = {\n", + " 1: \"😍\", 2: \"😂\", 14: \"😉\"\n", + "}\n", + "\n", + "cnt_correct = 0\n", + "\n", + "# Filter 100 examples from the test set where the label is 5 valid emojis 😍 😂 😉\n", + "examples = []\n", + "total_examples = 100\n", + "\n", + "for i in range(len(dataset['test'])):\n", + " label = dataset['test'][i]['label']\n", + " if label in emoji_map:\n", + " examples.append(dataset['test'][i])\n", + " if len(examples) == total_examples:\n", + " break\n", + "\n", + "# Test the model with the first 20 examples\n", + "print(\"\\nTesting the model with the first 20 examples:\")\n", + "\n", + "for i in range(total_examples):\n", + " # Get the tweet text and label from the dataset\n", + " tweet_text = examples[i]['text']\n", + " label = examples[i]['label']\n", + " \n", + " # Map the integer label to the corresponding emoji\n", + " expected_emoji = emoji_map[label]\n", + " \n", + " # Create the prompt with the tweet text\n", + " prompt_with_tweet = prompt.format(tweet_text=tweet_text)\n", + " \n", + " # Generate the response using Syncode\n", + " response = syn_llm.infer(prompt_with_tweet)\n", + " generated_empoji = response[0].strip()[0]\n", + " # except Exception as e:\n", + " # print(f\"Error generating emoji for tweet {i}: {e}\")\n", + " # generated_empoji = None\n", + "\n", + " # Print the results\n", + " print(f\"Tweet: {tweet_text}\")\n", + " print(f\"Expected Emoji: {expected_emoji}\")\n", + " print(f\"Generated Emoji: {generated_empoji}\")\n", + "\n", + " if generated_empoji == expected_emoji:\n", + " print(\"Correct!\")\n", + " cnt_correct += 1\n", + "\n", + " print(\"-\" * 50)\n", + "\n", + "# Print the total number of correct predictions\n", + "print(f\"Total Correct Predictions: {cnt_correct} out of {total_examples}\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "codex", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/syncode/mask_store/byte_fsm.py b/syncode/mask_store/byte_fsm.py index c50fa265..a00845fd 100644 --- a/syncode/mask_store/byte_fsm.py +++ b/syncode/mask_store/byte_fsm.py @@ -113,10 +113,12 @@ def _build_byte_fsm(self, regex_fsm): # Create intermediate states for the multi-byte character current = state for i, byte in enumerate(char_bytes): - if byte not in self.alphabet: + if byte not in self.byte_to_category: # Add the byte to the alphabet with a new category - byte_category = f"{byte}_{i}" + byte_category = f"b{byte}" self.byte_to_category[byte] = byte_category + else: + byte_category = self.byte_to_category[byte] if i < len(char_bytes) - 1: if byte_category not in self.transitions[current]: diff --git a/tests/mask_store/test_byte_fsm.py b/tests/mask_store/test_byte_fsm.py index 42acd499..186804d7 100644 --- a/tests/mask_store/test_byte_fsm.py +++ b/tests/mask_store/test_byte_fsm.py @@ -123,7 +123,8 @@ def test_consume_prefix(self): ]), ('"[^"”“]+"', [ ('\"key”', (False, None)), - ]) + ]), + ('😘', [(b"\xf0\x9f\x98", (True, b""))]) ] for pattern, test_cases in prefix_test_cases: