[ home / overboard ] [ soy / qa / mtv / dem ] [ int / pol ] [ a / asp / biz / fit / k / r9k / sude / tech / tv / v / x ] [ q / news / chive / rules / pass / bans ] [ wiki / booru / irc ]

A banner for soyjak.party

/tech/ - Soyence and Technology

Download more RAM for your Mac here
Catalog
Email
Subject
Comment
File
Password (For file deletion.)

File: chimpy.jpg 📥︎ (58.18 KB, 642x642) ImgOps

 â„–6697[Quote]

It's not perfect but it's kind of working so far.

// UserScript
// @name Spam Filter for Soyjak.st
// @namespace http://tampermonkey.net/
// @version 2.0
// @description Hide spammy posts automatically
// @match *://soyjak.st/*
// /UserScript

const thresholds = {
repeatThreshold: 3,
densityThreshold: 0.22,
elongationThreshold: 4
};

// isElongated function
function isElongated(word, threshold) {
let charCount = 1;
for (let i = 1; i < word.length; i++) {
if (word[i] === word[i - 1]) {
charCount++;
if (charCount >= threshold) {
return true;
}
} else {
charCount = 1;
}
}
return false;
}

// isSpam function
function isSpam(message, thresholds) {
// Check for repeated words and their density
const words = message.toLowerCase().split(/\s+/);
const wordCount = {};
const totalWords = words.length;

for (const word of words) {
if (!word.trim()) continue;
if (isElongated(word, thresholds.elongationThreshold)) {
return true;
}
wordCount[word] = (wordCount[word] || 0) + 1;
}

for (const [word, count] of Object.entries(wordCount)) {
const density = count / totalWords;
if (count >= thresholds.repeatThreshold && density >= thresholds.densityThreshold) {
return true;
}
}

return false;
}


const postElements = document.querySelectorAll('div.post.reply div.body');

postElements.forEach((el) => {
const message = el.textContent.trim();
if (isSpam(message, thresholds)) {
const postContainer = el.closest('.post');
if (postContainer) {
postContainer.style.display = 'none'; // Hide the whole post
const next = postContainer.nextSibling;
if (next && next.nodeName === 'BR') {
next.remove();
}
}
}
});

 â„–6708[Quote]

File: GKXWZQWakAAGmaP.jpeg 📥︎ (213.65 KB, 960x1280) ImgOps

>>6697 (OP)
Thanks a lot

 â„–6709[Quote]

File: ClipboardImage.png 📥︎ (23.27 KB, 495x592) ImgOps

Something you can do about these type of replies?

 â„–6713[Quote]

>>6709
Working on that like rn lol.

 â„–6716[Quote]

not gonna install your chink spyware

 â„–6717[Quote]

>>6716
You don't belong here if you can't read code, baboon

 â„–6719[Quote]

File: 3255235.webm 📥︎ (25.68 MB, 720x820) ImgOps

>4gegs need scripts to handle spam so their clitties don't leak

 â„–6721[Quote]

>>6719
gotta filter out boring niggers somehow

 â„–6724[Quote]

File: 37735.png 📥︎ (60.78 KB, 318x266) ImgOps

>>>6719 (You)
>gotta filter out boring niggers somehow

 â„–6733[Quote]

File: 1721541607630790.jpg 📥︎ (436.19 KB, 2048x1536) ImgOps

>>6709
Try this, changed the isElongated function to count the letters in each word instead of just checking if the previous letter is the same as the current word in the for loop.
Didn't realize 4chan/soyjak posts are just one line with no /n.


// UserScript
// @name Spam Filter for Soyjak.st
// @version 2.1
// @description Hide spammy posts automatically
// @match *://soyjak.st/*
// /UserScript

const thresholds = {
repeatThreshold: 3,
densityThreshold: 0.22,
elongationThreshold: 3
};

// isElongated function
function isElongated(word, threshold) {
let elongation = {};
for (let i = 1; i < word.length; i++) {
elongation[word[i]] = (elongation[word[i]] || 0) + 1;
if (elongation[word[i]] >= threshold) {
console.log(word[i], 'count:', elongation[word[i]])
return true;
}
}
return false;
}

// isSpam function
function isSpam(message, thresholds) {
// Check for repeated words and their density
const words = message.toLowerCase().split(/\s+/);
const wordCount = {};
let totalWords = 0;
for (const word of words) {
if (!word.trim()) continue;
if (/^>>/.test(word)) continue;
totalWords++;

if (isElongated(word, thresholds.elongationThreshold)) {
console.log(`${word} is spam!!!`);
return true;
}
wordCount[word] = (wordCount[word] || 0) + 1;
}

for (const [word, count] of Object.entries(wordCount)) {
const density = count / totalWords;
if (count >= thresholds.repeatThreshold && density >= thresholds.densityThreshold) {
return true;
}
}

return false;
}


const postElements = document.querySelectorAll('div.post.reply div.body');

postElements.forEach((el) => {
const message = el.textContent.trim();
if (isSpam(message, thresholds)) {
const postContainer = el.closest('.post');
if (postContainer) {
postContainer.style.display = 'none'; // Hide the whole post
const next = postContainer.nextSibling;
if (next && next.nodeName === 'BR') {
next.remove();
}
}
}
});

 â„–6734[Quote]

>>6733
Oh lmfao the script even blocked this post because of this line
*://soyjak.st/*//

 â„–6735[Quote]

>>6709
GEEEEEG
COPE
>COPE
COPE
COPE
>COPE
COPE>COPE
>COPE
COPE
>COPE
COPE
>COPE
COPE

 â„–6739[Quote]

>>6733
Try mine instead

const thresholds = {
repeatThreshold: 3,
densityThreshold: 0.22,
elongationThreshold: 4
};

// isElongated function
function isElongated(word, threshold) {
let charCount = 1;
for (let i = 1; i < word.length; i++) {
if (word[i] === word[i - 1]) {
charCount++;
if (charCount >= threshold) {
return true;
}
} else {
charCount = 1;
}
}
return false;
}

// isSpam function (improved)
function isSpam(message, thresholds) {
const words = message.toLowerCase().split(/\s+/);
const wordCount = {};
const totalWords = words.length;

for (const word of words) {
if (!word.trim()) continue;
if (isElongated(word, thresholds.elongationThreshold)) {
return true;
}
wordCount[word] = (wordCount[word] || 0) + 1;
}

for (const [word, count] of Object.entries(wordCount)) {
const density = count / totalWords;
if (count >= thresholds.repeatThreshold && density >= thresholds.densityThreshold) {
return true;
}
}

// Normalize lines: remove ">", trim whitespace, lowercase
const lines = message.split('\n')
.map(line => line.replace(/^>+/, '').trim().toLowerCase())
.filter(Boolean);

const lineCount = {};
for (const line of lines) {
lineCount[line] = (lineCount[line] || 0) + 1;
}

for (const count of Object.values(lineCount)) {
if (count / lines.length >= 0.5 && lines.length > 5) {
return true; // Over 50% of lines are the same
}
}

return false;
}

// Main filtering logic
const postElements = document.querySelectorAll('div.post.reply div.body');

postElements.forEach((el) => {
const message = el.textContent.trim();
if (isSpam(message, thresholds)) {
const postContainer = el.closest('.post');
if (postContainer) {
postContainer.style.display = 'none'; // Hide the whole post
const next = postContainer.nextSibling;
if (next && next.nodeName === 'BR') {
next.remove();
}
}
}
});

 â„–6742[Quote]

Make sure the @match line at the top looks like this

// @match *://*soyjak.st/*

 â„–6746[Quote]

>>6697 (OP)
bro i just /.{500}/ and it filters 99% of spam, but long posts are also filtered unadvisedly

 â„–6759[Quote]

File: 1721330145327533.jpg 📥︎ (664.21 KB, 1007x1007) ImgOps

>>6746
I was bored and I found it fun to work on.

Now handles post references correctly in spam filter,
skip valid >>123456 quotes but check following text for spam.
Only checks for post numbers between 100 and 999999999 inclusive.

// UserScript
// @name Spam Filter for Soyjak.st
// @namespace http://tampermonkey.net/
// @version 2.2
// @description Hide spammy posts automatically
// @match *://*soyjak.st/*
// /UserScript

const thresholds = {
repeatThreshold: 3,
densityThreshold: 0.22,
elongationThreshold: 3
};

// isElongated function
function isElongated(word, threshold) {
let elongation = {};
for (let i = 1; i < word.length; i++) {
elongation[word[i]] = (elongation[word[i]] || 0) + 1;
if (elongation[word[i]] >= threshold) {
return true;
}
}
return false;
}

// isSpam function
function isSpam(message, thresholds) {
// Check for repeated words and their density
const words = message.toLowerCase().split(/\s+/);
const wordCount = {};
let totalWords = 0;

for (let word of words) {
if (!word.trim()) continue;
quoteMatch = word.match(/^>>(\d{3,9})/);
if (quoteMatch) {
// Remove the quoted part for spam checking
word = word.slice(quoteMatch[0].length);
if (!word) continue;
}

totalWords++;

if (isElongated(word, thresholds.elongationThreshold)) {
console.log(`${word} is spam!!!`);
return true;
}
wordCount[word] = (wordCount[word] || 0) + 1;
}

for (const [word, count] of Object.entries(wordCount)) {
const density = count / totalWords;
if (count >= thresholds.repeatThreshold && density >= thresholds.densityThreshold) {
return true;
}
}

return false;
}

const postElements = document.querySelectorAll('div.post.reply div.body');

postElements.forEach((el) => {
const message = el.textContent.trim();
if (isSpam(message, thresholds)) {
const postContainer = el.closest('.post');
if (postContainer) {
postContainer.style.display = 'none'; // Hide the whole post
const next = postContainer.nextSibling;
if (next && next.nodeName === 'BR') {
next.remove();
}
}
}
});

 â„–6830[Quote]

heading {
font-weight: normal !important;
}

.post.reply:has(.files[style="display: none;"]),
.post.reply:has(.files[style="display: none;"]) + br,
.post.reply:has(.body[style="display: none;"]),
.post.reply:has(.body[style="display: none;"]) + br,
div.post.reply:has(a[href*="sage"]),
div.post.reply:has(a[href*="sage"]) + br {
display: none !important;
}

div.post.reply div.body {
overflow-y: auto;
max-height: 50vh;
}

div.post.reply {
max-width: 98% !important;
width: 98% !important;
}

.post.reply:has([download*="cado"]),
.post.reply:has([download*="cado"]) + br,
div.mix:has(img[data-subject*="coinslot" i]) {
display: none !important;
}

 â„–6955[Quote]

File: 1743489895994183.jpg 📥︎ (137.3 KB, 1440x1799) ImgOps

Now it dynamically filters for new posts before you can see how retarded it was.

// UserScript
// @name Spam Filter for Soyjak.st
// @namespace http://tampermonkey.net/
// @version 2.3
// @description Hide spammy posts automatically
// @match *://*soyjak.st/*
// /UserScript

const thresholds = {
repeatThreshold: 10,
densityThreshold: 0.22,
elongationThreshold: 10
};

// isElongated function
function isElongated(word, threshold) {
let elongation = {};
for (let i = 1; i < word.length; i++) {
elongation[word[i]] = (elongation[word[i]] || 0) + 1;
if (elongation[word[i]] >= threshold) {
return true;
}
}
return false;
}

// isSpam function
function isSpam(message, thresholds) {
// Check for repeated words and their density
const words = message.toLowerCase().split(/\s+/);
const wordCount = {};
let totalWords = 0;

for (let word of words) {
if (!word.trim()) continue;

quoteMatch = word.match(/^>>(\d{1,9})/);
// Check for quotes and remove them
while (quoteMatch) {
// Remove the quoted part for spam checking
word = word.slice(quoteMatch[0].length);
quoteMatch = word.match(/^>>(\d{1,9})/);
if (!word) continue;
}

totalWords++;

if (isElongated(word, thresholds.elongationThreshold)) {
console.log(`${word} is spam!!!`);
return true;
}
wordCount[word] = (wordCount[word] || 0) + 1;
}

for (const count of Object.values(wordCount)) {
const density = count / totalWords;
if (count >= thresholds.repeatThreshold && density >= thresholds.densityThreshold) {
return true;
}
}

return false;
}
// Function to process a single post element
function processPostElement(el) {
const message = el.textContent.trim();
if (isSpam(message, thresholds)) {
const postContainer = el.closest('.post');
if (postContainer) {
postContainer.style.display = 'none'; // Hides the whole post
const next = postContainer.nextSibling;
if (next && next.nodeName === 'BR') {
next.remove();
}
}
}
}

const postElements = document.querySelectorAll('div.post.reply div.body');
postElements.forEach(processPostElement);

// MutationObserver to watch for new posts
const observer = new MutationObserver((mutations) => {
mutations.forEach((mutation) => {
if (mutation.type === 'childList' && mutation.addedNodes.length > 0) {
mutation.addedNodes.forEach((node) => {
// Check if the added node is a post or contains posts
if (node.nodeType === Node.ELEMENT_NODE) {
const newPostBodies = node.matches('div.post.reply div.body')
? [node]
: node.querySelectorAll('div.post.reply div.body');
newPostBodies.forEach(processPostElement);
}
});
}
});
});

// Observe the parent container
const threadContainer = document.querySelector('div.thread') || document.body;
observer.observe(threadContainer, {
childList: true,
subtree: true
});

 â„–6961[Quote]

File: GEG.png 📥︎ (12.01 KB, 600x800) ImgOps

raping slitty chinky yellow whores with my BWC

 â„–7029[Quote]

File: catacando.gif 📥︎ (2.75 MB, 1720x1632) ImgOps

spent too much time on this raisin
TY for filters i am codelet

 â„–7051[Quote]

>>6697 (OP)
This script is a spam detection and filtering system for a website (likely a forum or message board). It identifies spam messages by looking for three specific patterns and then hides posts that match these patterns. Here's how it works:

### Spam Detection Logic

The script identifies spam through three criteria:

1. Word elongation - It checks if any word has the same character repeated multiple times in a row (e.g., "hellooooo"). If a character repeats 4 or more times consecutively (the `elongationThreshold`), the message is considered spam.

2. Word repetition - It counts how many times each word appears in a message. If any word appears 3 or more times (the `repeatThreshold`), it might be spam.

3. Word density - For repeated words, it calculates how much of the total message consists of that word. If a word appears frequently enough to make up 22% or more of the total words (the `densityThreshold`) AND it meets the repetition threshold, the message is considered spam.

### Implementation Steps

1. The `isElongated()` function checks for character repetition within words
2. The `isSpam()` function:
- Splits the message into words
- Checks each word for elongation
- Counts word frequency
- Calculates word density
- Returns true if any spam criteria are met

3. The DOM manipulation section:
- Finds all message body elements on the page with `document.querySelectorAll('div.post.reply div.body')`
- Tests each message with the spam detection function
- Hides posts identified as spam by setting `display: 'none'`
- Removes any line break (`<br>`) elements that follow the hidden post

### How It Fights Spam

This targets common spam tactics like:
- Repeated words (e.g., "buy buy buy buy")
- Stretched out text (e.g., "hiiiiiii" or "frreeeeee")
- Messages with extremely low word variety

It's particularly effective against low-effort spam that relies on repetition and unusual text patterns, while being simple enough to run client-side in a browser.

 â„–7083[Quote]

coal

 â„–7091[Quote]

jannies should block these scripts so 4cucks had to adapt to bait geg

 â„–7467[Quote]

File: 1741119330284628.jpg 📥︎ (703.72 KB, 1440x1797) ImgOps

The spam filter now flags image and quote spam. Try this thread to test it.
>>>/soy/10710928

[code]
// UserScript
// @name Spam Filter for Soyjak.st
// @namespace http://tampermonkey.net/
// @version 3.0
// @description Hide spammy posts automatically
// @match *://*soyjak.st/*
// /UserScript

const thresholds = {
repeatThreshold: 10,
densityThreshold: 0.22,
elongationThreshold: 10,
maxImagesThreshold: 5,
imageSrcRepeatThreshold: 5,
};

// isElongated function
function isElongated(word, threshold) {
let elongation = {};
for (let i = 1; i < word.length; i++) {
elongation[word[i]] = (elongation[word[i]] || 0) + 1;
if (elongation[word[i]] >= threshold) {
return true;
}
}
return false;
}

// Checks for word, quote, and image spam
function isSpam(element, thresholds) {
// Check text content for repeated words and density
const message = element.textContent.toLowerCase();
const words = message.split(/\s+/);
const wordCount = {};
let totalQuotes = 0;
let totalWords = 0;

for (let word of words) {
if (!word.trim()) continue;

let quoteMatch = word.match(/^>>(\d{1,9})/);
while (quoteMatch) {
if (++totalQuotes > thresholds.repeatThreshold) {
return true;
}
word = word.slice(quoteMatch[0].length);
quoteMatch = word.match(/^>>(\d{1,9})/);
if (!word) continue;
}

totalWords++;

if (isElongated(word, thresholds.elongationThreshold)) {
return true;
}
wordCount[word] = (wordCount[word] || 0) + 1;
}

for (const count of Object.values(wordCount)) {
const density = count / totalWords;
if (count >= thresholds.repeatThreshold && density >= thresholds.densityThreshold) {
return true;
}
}

// Check for <img> tags
const images = element.querySelectorAll('img');
const imageSrcCount = {};

images.forEach((img) => {
const src = img.getAttribute('src') || '';
imageSrcCount[src] = (imageSrcCount[src] || 0) + 1;
});

if (images.length > thresholds.maxImagesThreshold) {
return true;
}


for (const count of Object.values(imageSrcCount)) {
if (count >= thresholds.imageSrcRepeatThreshold) {
return true;
}
}
return false;
}

// Function to process a single post element
function processPostElement(el) {
if (!isSpam(el, thresholds)) {
return;
}

const postContainer = el.closest('.post');

if (!postContainer) {
return;
}
postContainer.style.display = 'none'; // Hide the whole post
const next = postContainer.nextSibling;
if (next && next.nodeName === 'BR') {
next.remove();
}
}

// Set up MutationObserver to watch for new posts
const observer = new MutationObserver((mutations) => {
mutations.forEach((mutation) => {
if (mutation.type ! 'childList' || mutation.addedNodes.length = 0) return;

mutation.addedNodes.forEach((node) => {
if (node.nodeType === Node.ELEMENT_NODE) {
const newPostBodies = node.matches('div.post.reply div.body')
? [node]
: node.querySelectorAll('div.post.reply div.body');
newPostBodies.forEach(processPostElement);
}
});
});
});

// Observe the parent container of posts
const threadContainer = document.querySelector('div.thread') || document.body;
observer.observe(threadContainer, {
childList: true,
subtree: true
});

// Process existing posts, main logic
const postElements = document.querySelectorAll('div.post.reply div.body');
postElements.forEach(processPostElement);
[/code]

 â„–7469[Quote]

if (mutation.type != 'childList' || mutation.addedNodes.length = 0) return;

Dammit, the '= = =' makes it large and red. Just copy the code above and replace that line , should work regardless.

 â„–7524[Quote]

File: ClipboardImage.png 📥︎ (15.91 KB, 807x105) ImgOps

>>7469
Should look like this then?

 â„–7531[Quote]

>>7524
Should be != = without the space

 â„–7533[Quote]

>>7524
And it should be mutation.addedNodes.length == 0

 â„–7942[Quote]

>>6697 (OP)
>not using MutationObserver
ngmi

 â„–7953[Quote]

>>6697 (OP)
dilate

 â„–7954[Quote]

>>6716
based

 â„–8103[Quote]

>>7467
No one fucking cares about your pajeet code nigger

 â„–8173[Quote]

File: 1678934169050599.jpg 📥︎ (47.76 KB, 1024x796) ImgOps

forgot dot in the beginning (custom CSS that adds scroll bars to long posts and fully hides the hidden posts not just minimizes them also hides some nicocado spam):



.heading {
font-weight: normal !important;
}

.post.reply:has(.body[style="display: none;"]),
.post.reply:has(.body[style="display: none;"]) + br,
div.post.reply:has(a[href*="sage"]),
div.post.reply:has(a[href*="sage"]) + br {
display: none !important;
}

div.post.reply div.body {
overflow-y: auto;
max-height: 50vh;
}

div.post.reply {
max-width: 98% !important;
width: 98% !important;
}

div.mix:has(img[data-subject*="coinslot" i]) {
display: none !important;
}

 â„–8174[Quote]

>>7467
>// Set up MutationObserver to watch for new posts
based. we're evolving

 â„–8425[Quote]

>>7467
What a steaming pile of raisin, you niggers actually think it's fine to write code like this? Holy raisin, the learn 2 code meme is real.

 â„–8551[Quote]

>>8425
give some constructive feedback on what's wrong with it or shut the fuck up nigger

 â„–8552[Quote]

>>8551
I'm not gonna spoonfeed you everything that's wrong with it, retarded nigger.

 â„–8564[Quote]

ITT: yellow fever niggas vibe coding a shoddy userscript spam filter so they can spam their yellow fever threads on a woahjack appreciation forum

 â„–8607[Quote]

>>6697 (OP)
Don't ever attempt to write any piece of code ever again, this is literally embarassing to look at. It's utterly pathetic, kill yourself. It doesn't even work.

 â„–8609[Quote]

>>8607
>It doesn't even work.
works on my machine

 â„–8612[Quote]

>>6697 (OP)
>>6733
>>6739
>>6759
>>6955
Just stop, you can't code.
>>7467

 â„–8613[Quote]

>>8564

u gay nigga

 â„–8631[Quote]

>>8552
>I can't list a single example
the shut the fuck up nigger

 â„–8648[Quote]

>>8631
>needs to be spoonfed
lmfao

 â„–8715[Quote]

File: Screenshot 2025-04-04 2116….png 📥︎ (3.53 MB, 1920x1080) ImgOps

OP, I appreciate the work you've done
>>8173
Yours too

 â„–9154[Quote]

>>8715
based

 â„–9363[Quote]

>>6697 (OP)
Thanks. Still can't get the built in filter to work in the catalog.

 â„–9368[Quote]

>>9363
yea the catalog is its own type of beast

 â„–9387[Quote]

>>8612
>>8607
>>8552
>>8425

As a professional software engineer of 10 years I don't see anything obviously wrong with this code
What are you niggers smoking

 â„–9393[Quote]

File: 1742585916692335.webm 📥︎ (3.32 MB, 1080x1766) ImgOps

came here for chimp

 â„–9460[Quote]

>>6697 (OP)
If you can get filters working in 4chanx, it'll help with filtering immensely

 â„–10621[Quote]

>>6733
>>6733
Thank You Anon

 â„–10678[Quote]

>>6697 (OP)
i just remembered suckless surf supports userscripts in ~/.surf/script.js
who needs an extension?

 â„–12107[Quote]

File: 1746064650596406.jpg 📥︎ (194.55 KB, 1061x1592) ImgOps

bump

 â„–12161[Quote]

this woman attracted my eyes



[Return][Catalog][Go to top][Post a Reply]
Delete Post [ ]
[ home / overboard ] [ soy / qa / mtv / dem ] [ int / pol ] [ a / asp / biz / fit / k / r9k / sude / tech / tv / v / x ] [ q / news / chive / rules / pass / bans ] [ wiki / booru / irc ]