setup_ubuntu.sh
1 #!/bin/bash 2 # Setup script for News Scraper Daemon on Ubuntu 3 # Run this script on your Ubuntu machine to set up the scraper 4 5 set -e 6 7 echo "========================================" 8 echo "News Scraper Daemon - Ubuntu Setup" 9 echo "========================================" 10 11 # Colors for output 12 RED='\033[0;31m' 13 GREEN='\033[0;32m' 14 YELLOW='\033[1;33m' 15 NC='\033[0m' # No Color 16 17 # Get the directory where this script is located 18 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" 19 REPO_ROOT="$(dirname "$(dirname "$SCRIPT_DIR")")" 20 21 echo -e "${YELLOW}Repository root: $REPO_ROOT${NC}" 22 23 # Check if running as root 24 if [ "$EUID" -eq 0 ]; then 25 echo -e "${RED}Please do not run this script as root${NC}" 26 exit 1 27 fi 28 29 # Step 1: Install system dependencies 30 echo "" 31 echo -e "${GREEN}Step 1: Installing system dependencies...${NC}" 32 sudo apt-get update 33 sudo apt-get install -y python3 python3-pip python3-venv git 34 35 # Step 2: Create virtual environment 36 echo "" 37 echo -e "${GREEN}Step 2: Setting up Python virtual environment...${NC}" 38 cd "$REPO_ROOT" 39 python3 -m venv venv 40 source venv/bin/activate 41 42 # Step 3: Install Python dependencies 43 echo "" 44 echo -e "${GREEN}Step 3: Installing Python dependencies...${NC}" 45 pip install --upgrade pip 46 pip install -r scripts/scraper/requirements.txt 47 48 # Step 4: Install Playwright browsers 49 echo "" 50 echo -e "${GREEN}Step 4: Installing Playwright browsers...${NC}" 51 playwright install chromium 52 playwright install-deps chromium 53 54 # Step 5: Set up logs directory 55 echo "" 56 echo -e "${GREEN}Step 5: Setting up logs directory...${NC}" 57 mkdir -p logs 58 touch logs/.gitkeep 59 60 # Step 6: Configure git 61 echo "" 62 echo -e "${GREEN}Step 6: Configuring git...${NC}" 63 git config user.name "News Scraper Bot" 64 git config user.email "scraper@local" 65 66 # Step 7: Configuration Setup 67 echo "" 68 echo -e "${YELLOW}Step 7: Configuration Setup${NC}" 69 echo "" 70 71 # Get Fork Repo 72 echo "Enter your GitHub fork repository (e.g., 'username/Hong-Kong-Fire-Documentary')" 73 read -p "Fork repo: " FORK_REPO 74 75 if [ -z "$FORK_REPO" ]; then 76 echo -e "${RED}Fork repo is required!${NC}" 77 exit 1 78 fi 79 80 # Get GitHub Token 81 echo "" 82 echo "You need a GitHub Personal Access Token (PAT) with these permissions:" 83 echo " - Contents: Read and Write" 84 echo " - Pull requests: Read and Write" 85 echo "" 86 echo "Generate one at: https://github.com/settings/tokens?type=beta" 87 echo "" 88 89 read -p "Enter your GitHub token: " GITHUB_TOKEN 90 91 if [ -z "$GITHUB_TOKEN" ]; then 92 echo -e "${RED}GitHub token is required!${NC}" 93 exit 1 94 fi 95 96 # Create environment file 97 ENV_FILE="$HOME/.scraper_env" 98 cat > "$ENV_FILE" << EOF 99 GITHUB_TOKEN=$GITHUB_TOKEN 100 FORK_REPO=$FORK_REPO 101 EOF 102 chmod 600 "$ENV_FILE" 103 echo -e "${GREEN}Configuration saved to $ENV_FILE${NC}" 104 105 # Export for current session 106 export GITHUB_TOKEN 107 export FORK_REPO 108 109 # Step 8: Test the daemon 110 echo "" 111 echo -e "${GREEN}Step 8: Testing the daemon...${NC}" 112 echo "Running a single sync cycle..." 113 114 cd "$REPO_ROOT" 115 source venv/bin/activate 116 python scripts/scraper/daemon.py --once 117 echo -e "${GREEN}Test completed!${NC}" 118 119 # Step 9: Set up systemd service 120 echo "" 121 echo -e "${YELLOW}Step 9: systemd Service Setup${NC}" 122 echo "" 123 read -p "Do you want to install the systemd service? (y/n): " INSTALL_SERVICE 124 125 if [ "$INSTALL_SERVICE" = "y" ]; then 126 # Create service file from template 127 SERVICE_FILE="/tmp/news-scraper.service" 128 129 cat > "$SERVICE_FILE" << EOF 130 [Unit] 131 Description=Hong Kong Fire Documentary News Scraper Daemon 132 After=network-online.target 133 Wants=network-online.target 134 135 [Service] 136 Type=simple 137 User=$USER 138 Group=$USER 139 WorkingDirectory=$REPO_ROOT 140 EnvironmentFile=$HOME/.scraper_env 141 ExecStart=$REPO_ROOT/venv/bin/python $REPO_ROOT/scripts/scraper/daemon.py 142 Restart=on-failure 143 RestartSec=30 144 StandardOutput=journal 145 StandardError=journal 146 147 [Install] 148 WantedBy=multi-user.target 149 EOF 150 151 # Install service 152 sudo cp "$SERVICE_FILE" /etc/systemd/system/news-scraper.service 153 sudo systemctl daemon-reload 154 sudo systemctl enable news-scraper 155 156 echo "" 157 read -p "Start the service now? (y/n): " START_NOW 158 if [ "$START_NOW" = "y" ]; then 159 sudo systemctl start news-scraper 160 echo -e "${GREEN}Service started!${NC}" 161 echo "" 162 echo "View logs with: journalctl -u news-scraper -f" 163 echo "Check status with: sudo systemctl status news-scraper" 164 fi 165 166 echo -e "${GREEN}Service installed!${NC}" 167 fi 168 169 echo "" 170 echo "========================================" 171 echo -e "${GREEN}Setup Complete!${NC}" 172 echo "========================================" 173 echo "" 174 echo "To run the daemon manually:" 175 echo " cd $REPO_ROOT" 176 echo " source venv/bin/activate" 177 echo " source ~/.scraper_env" 178 echo " python scripts/scraper/daemon.py" 179 echo "" 180 echo "To check service status:" 181 echo " sudo systemctl status news-scraper" 182 echo "" 183 echo "To view logs:" 184 echo " journalctl -u news-scraper -f" 185 echo " # or" 186 echo " tail -f $REPO_ROOT/logs/scraper.log" 187 echo "" 188