Close Menu
    Facebook X (Twitter) Instagram
    Articles Stock
    • Home
    • Technology
    • AI
    • Pages
      • About ArticlesStock — AI & Technology Journalist
      • Contact us
      • Disclaimer For Articles Stock
      • Privacy Policy
      • Terms and Conditions
    Facebook X (Twitter) Instagram
    Articles Stock
    AI

    A Coding Implementation to Construct an AI-Powered File Sort Detection and Safety Evaluation Pipeline with Magika and OpenAI

    Naveed AhmadBy Naveed Ahmad19/04/2026Updated:20/04/2026No Comments3 Mins Read
    blog 1 11


    !pip set up magika openai -q
    
    
    import os, io, json, zipfile, textwrap, hashlib, tempfile, getpass
    from pathlib import Path
    from collections import Counter
    from magika import Magika
    from magika.sorts import MagikaResult, PredictionMode
    from openai import OpenAI
    
    
    print("🔑 Enter your OpenAI API key (enter is hidden):")
    api_key = getpass.getpass("OpenAI API Key: ")
    consumer  = OpenAI(api_key=api_key)
    
    
    strive:
       consumer.fashions.listing()
       print("✅ OpenAI linked successfullyn")
    besides Exception as e:
       elevate SystemExit(f"❌ OpenAI connection failed: {e}")
    
    
    m = Magika()
    print("✅ Magika loaded successfullyn")
    print(f"   module model : {m.get_module_version()}")
    print(f"   mannequin title     : {m.get_model_name()}")
    print(f"   output sorts   : {len(m.get_output_content_types())} supported labelsn")
    
    
    def ask_gpt(system: str, person: str, mannequin: str = "gpt-4o", max_tokens: int = 600) -> str:
       resp = consumer.chat.completions.create(
           mannequin=mannequin,
           max_tokens=max_tokens,
           messages=[
               {"role": "system", "content": system},
               {"role": "user",   "content": user},
           ],
       )
       return resp.selections[0].message.content material.strip()
    
    
    print("=" * 60)
    print("SECTION 1 — Core API + GPT Plain-Language Rationalization")
    print("=" * 60)
    
    
    samples = {
       "Python":     b'import osndef greet(title):n    print(f"Whats up, {title}")n',
       "JavaScript": b'const fetch = require("node-fetch");nasync operate getData() { return await fetch("/api"); }',
       "CSV":        b'title,age,citynAlice,30,NYCnBob,25,LAn',
       "JSON":       b'{"title": "Alice", "scores": [10, 20, 30], "lively": true}',
       "Shell":      b'#!/bin/bashnecho "Whats up"nfor i in $(seq 1 5); do echo $i; performed',
       "PDF magic":  b'%PDF-1.4n1 0 objn<< /Sort /Catalog >>nendobjn',
       "ZIP magic":  bytes([0x50, 0x4B, 0x03, 0x04]) + bytes(26),
    }
    
    
    print(f"n{'Label':<12} {'MIME Sort':<30} {'Rating':>6}")
    print("-" * 52)
    magika_labels = []
    for title, uncooked in samples.gadgets():
       res = m.identify_bytes(uncooked)
       magika_labels.append(res.output.label)
       print(f"{res.output.label:<12} {res.output.mime_type:<30} {res.rating:>5.1%}")
    
    
    clarification = ask_gpt(
       system="You're a concise ML engineer. Clarify in 4–5 sentences.",
       person=(
           f"Magika is Google's AI file-type detector. It simply recognized these sorts from uncooked bytes: "
           f"{magika_labels}. Clarify how a deep-learning mannequin detects file sorts from "
           "simply bytes, and why this beats counting on file extensions."
       ),
       max_tokens=250,
    )
    print(f"n💬 GPT on how Magika works:n{textwrap.fill(clarification, 72)}n")
    
    
    print("=" * 60)
    print("SECTION 2 — Batch Identification + GPT Abstract")
    print("=" * 60)
    
    
    tmp_dir = Path(tempfile.mkdtemp())
    file_specs = {
       "code.py":     b"import sysnprint(sys.model)n",
       "fashion.css":   b"physique { font-family: Arial; margin: 0; }n",
       "knowledge.json":   b'[{"id": 1, "val": "foo"}, {"id": 2, "val": "bar"}]',
       "script.sh":   b"#!/bin/shnecho Whats up Worldn",
       "doc.html":    b"

    Whats up

    ", "config.yaml": b"server:n host: localhostn port: 8080n", "question.sql": b"CREATE TABLE t (id INT PRIMARY KEY, title TEXT);n", "notes.md": b"# Headingnn- merchandise onen- merchandise twon", } paths = [] for fname, content material in file_specs.gadgets(): p = tmp_dir / fname p.write_bytes(content material) paths.append(p) outcomes = m.identify_paths(paths) batch_summary = [ {"file": p.name, "label": r.output.label, "group": r.output.group, "score": f"{r.score:.1%}"} for p, r in zip(paths, results) ] print(f"n{'File':<18} {'Label':<14} {'Group':<12} {'Rating':>6}") print("-" * 54) for row in batch_summary: print(f"{row['file']:<18} {row['label']:<14} {row['group']:<12} {row['score']:>6}") gpt_summary = ask_gpt( system="You're a DevSecOps skilled. Be concise and sensible.", person=( f"A file add scanner detected these file sorts in a batch: " f"{json.dumps(batch_summary)}. " "In 3–4 sentences, summarise what sort of venture this seems like " "and flag any file sorts which may warrant further scrutiny." ), max_tokens=220, ) print(f"n💬 GPT venture evaluation:n{textwrap.fill(gpt_summary, 72)}n")



    Source link

    Naveed Ahmad

    Naveed Ahmad is a technology journalist and AI writer at ArticlesStock, covering artificial intelligence, machine learning, and emerging tech policy. Read his latest articles.

    Related Posts

    OpenAI’s existential questions | TechCrunch

    20/04/2026

    Meet OpenMythos: An Open-Supply PyTorch Reconstruction of Claude Mythos The place 770M Parameters Match a 1.3B Transformer

    20/04/2026

    The 12-month window | TechCrunch

    20/04/2026
    Leave A Reply Cancel Reply

    Categories
    • AI
    Recent Comments
      Facebook X (Twitter) Instagram Pinterest
      © 2026 ThemeSphere. Designed by ThemeSphere.

      Type above and press Enter to search. Press Esc to cancel.