|
| 1 | +import json |
| 2 | +import requests |
| 3 | +from transformers import pipeline |
| 4 | + |
| 5 | +# Initialize the summarization pipeline with the explicitly specified model |
| 6 | +summarizer = pipeline("summarization", model="facebook/bart-large-cnn") |
| 7 | + |
| 8 | +aigencyBaseUrl = "https://api.aigency.augm.link" |
| 9 | + |
| 10 | + |
| 11 | +def search_vector_db(context): |
| 12 | + url = f"{aigencyBaseUrl}/context/similar" |
| 13 | + headers = {"Content-Type": "application/json"} |
| 14 | + data = {"context": context, "limit": 5} |
| 15 | + response = requests.post(url, headers=headers, data=json.dumps(data)) |
| 16 | + results = response.json() |
| 17 | + return results["contexts"] |
| 18 | + |
| 19 | + |
| 20 | +def handle(req): |
| 21 | + """ |
| 22 | + Process the input JSON, search for similar documents in the vector database, |
| 23 | + concatenate all similar texts, summarize them using an LLM model, and return the results. |
| 24 | +
|
| 25 | + Args: |
| 26 | + req (str): A JSON string with the following structure: |
| 27 | + { |
| 28 | + "context": { |
| 29 | + "namespace": "dapplets.near/parser/twitter", |
| 30 | + "contextType": "post", |
| 31 | + "id": "1234567890123456789", |
| 32 | + "parsedContext": { |
| 33 | + "text": "Text to search in the vector database.", |
| 34 | + "authorFullname": "John Doe", |
| 35 | + "authorUsername": "john_doe", |
| 36 | + "authorImg": "https://example.com/image.png", |
| 37 | + "createdAt": "2025-02-19T20:33:29.000Z", |
| 38 | + "url": "https://twitter.com/john_doe/status/1234567890123456789" |
| 39 | + } |
| 40 | + } |
| 41 | + } |
| 42 | +
|
| 43 | + Returns: |
| 44 | + str: A JSON string with the following structure: |
| 45 | + { |
| 46 | + "context": { |
| 47 | + "namespace": "dapplets.near/agent/vector-search", |
| 48 | + "contextType": "similarity", |
| 49 | + "id": "<same as input id>", |
| 50 | + "parsedContext": { |
| 51 | + "results": [<list of similar documents>], |
| 52 | + "summary": "<summarized text>" |
| 53 | + } |
| 54 | + } |
| 55 | + } |
| 56 | + """ |
| 57 | + # Deserialize the input JSON |
| 58 | + data = json.loads(req) |
| 59 | + |
| 60 | + # Extract the text to search from the parsed context |
| 61 | + context = data["context"] |
| 62 | + |
| 63 | + # Perform the search in the vector database. |
| 64 | + similar_contexts = search_vector_db(context) |
| 65 | + |
| 66 | + # Concatenate all similar document texts into one continuous string |
| 67 | + combined_text = " ".join([doc["parsedContext"]["text"] for doc in similar_contexts]) |
| 68 | + |
| 69 | + # Use the summarization model (LLM) to summarize the combined text |
| 70 | + # Adjust max_length and min_length as needed |
| 71 | + summary_result = summarizer( |
| 72 | + combined_text, max_length=150, min_length=30, do_sample=False |
| 73 | + ) |
| 74 | + summary_text = summary_result[0]["summary_text"] if summary_result else "" |
| 75 | + |
| 76 | + # Build the output JSON with the search results and the summary |
| 77 | + output = { |
| 78 | + "context": { |
| 79 | + "namespace": "dapplets.near/agent/associative-summarizer", |
| 80 | + "contextType": "similarity", |
| 81 | + "id": data["context"]["id"], |
| 82 | + "parsedContext": {"results": similar_contexts, "summary": summary_text}, |
| 83 | + } |
| 84 | + } |
| 85 | + |
| 86 | + # Serialize and return the output JSON |
| 87 | + return json.dumps(output) |
| 88 | + |
| 89 | + |
| 90 | +# if __name__ == "__main__": |
| 91 | +# # Test input JSON |
| 92 | +# test_input = { |
| 93 | +# "context": { |
| 94 | +# "namespace": "dapplets.near/parser/twitter", |
| 95 | +# "contextType": "post", |
| 96 | +# "id": "1234567890123456789", |
| 97 | +# "parsedContext": { |
| 98 | +# "text": "Example text to search for similar associations in the vector database.", |
| 99 | +# "authorFullname": "John Doe", |
| 100 | +# "authorUsername": "john_doe", |
| 101 | +# "authorImg": "https://example.com/image.png", |
| 102 | +# "createdAt": "2025-02-19T20:33:29.000Z", |
| 103 | +# "url": "https://twitter.com/john_doe/status/1234567890123456789", |
| 104 | +# }, |
| 105 | +# } |
| 106 | +# } |
| 107 | + |
| 108 | +# # Serialize the test input and call the handle function |
| 109 | +# test_req = json.dumps(test_input) |
| 110 | +# result = handle(test_req) |
| 111 | +# print(result) |
0 commit comments