Compare commits

..

6 Commits

Author SHA1 Message Date
qpismont d4666fb36e Merge pull request 'prepare first release with graceful shutdown + containerfile + push to' (#4) from 1.0 into main
ci/woodpecker/push/tests Pipeline was successful
ci/woodpecker/tag/release Pipeline was successful
Reviewed-on: #4
2026-06-12 22:38:25 +02:00
qpismont cf59455d4a Fix release job
ci/woodpecker/push/tests Pipeline was successful
2026-06-12 20:15:20 +00:00
qpismont c7387a3b28 Fix tests job
ci/woodpecker/push/tests Pipeline was successful
2026-06-12 19:57:28 +00:00
qpismont 433021d607 Add webhook already handled check
ci/woodpecker/push/tests Pipeline failed
ci/woodpecker/pr/tests Pipeline failed
Fix all tests
Add woodpecker ci (tests + release)
2026-06-12 19:56:32 +00:00
qpismont 3c32cd20b6 Readme :) 2026-06-10 20:01:21 +00:00
qpismont a30d7c5d90 prepare first release with graceful shutdown + containerfile + push to
hub script
2026-06-10 19:23:17 +00:00
20 changed files with 382 additions and 25 deletions
+4
View File
@@ -0,0 +1,4 @@
target/
.env
.devcontainer/
docs/
+18
View File
@@ -0,0 +1,18 @@
HTTP_PORT=3000
BOT_NAME=Herald
WEBHOOK_SIG_HEADER_SECRET=
OPEN_ROUTER_API_KEY=
OPEN_ROUTER_MODEL=deepseek/deepseek-v4-flash
OPEN_ROUTER_TIMEOUT=120
BOT_MAX_CONCURRENT=5
GITEA_URL=https://gitea.example.com
GITEA_TOKEN=
GITEA_TIMEOUT=30
# Optional
SENTRY_DSN=
RUST_LOG=info
+19
View File
@@ -0,0 +1,19 @@
when:
event:
- tag
steps:
- name: release-docker
image: quay.io/buildah/stable
privileged: true
volumes:
- /data/woodpecker-builds:/data
commands:
- echo $DOCKER_PASSWORD | buildah login docker.io -u $DOCKER_USERNAME --password-stdin
- chmod +x scripts/build.sh
- bash scripts/build.sh
environment:
DOCKER_USERNAME:
from_secret: docker_username
DOCKER_PASSWORD:
from_secret: docker_password
+15
View File
@@ -0,0 +1,15 @@
when:
event:
- push
steps:
- name: clippy
image: rust:1.96
commands:
- rustup component add clippy
- cargo clippy
- name: test
image: rust:1.96
commands:
- cargo test
+11
View File
@@ -0,0 +1,11 @@
{
"lsp": {
"rust-analyzer": {
"initialization_options": {
"check": {
"command": "clippy"
}
}
}
}
}
Generated
+1 -1
View File
@@ -786,7 +786,7 @@ checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a"
[[package]]
name = "herald"
version = "0.1.0"
version = "1.0.0"
dependencies = [
"anyhow",
"axum",
+1 -1
View File
@@ -1,6 +1,6 @@
[package]
name = "herald"
version = "0.1.0"
version = "1.0.0"
edition = "2024"
[dependencies]
+12
View File
@@ -0,0 +1,12 @@
FROM rust:1.96 as builder
WORKDIR /app
COPY . .
RUN cargo build --release
FROM debian:trixie-slim
WORKDIR /app
COPY --from=builder /app/target/release/herald .
CMD [ "./herald" ]
+53
View File
@@ -0,0 +1,53 @@
# Herald
Herald is a Gitea bot that performs automated AI-powered code reviews on pull requests using [OpenRouter](https://openrouter.ai/).
## Features
- Listens for Gitea webhook events and triggers code reviews on pull request comments
- Streams reviews back to Gitea as PR comments
- Concurrent review processing with configurable parallelism
- Graceful shutdown — in-progress reviews finish before the process exits
- Error tracking via Sentry
- Tiny memory footprint (~4MB) thanks to Rust
## Installation
**Requirements:** Rust toolchain ([rustup.rs](https://rustup.rs))
```sh
cargo build --release
./target/release/herald
```
Herald reads its configuration from environment variables (a `.env` file is supported):
| Variable | Description |
|---|---|
| `HTTP_PORT` | Port to listen on |
| `BOT_NAME` | The bot's Gitea username (used to detect mentions) |
| `WEBHOOK_SIG_HEADER_SECRET` | Gitea webhook secret for signature verification |
| `OPEN_ROUTER_API_KEY` | OpenRouter API key |
| `OPEN_ROUTER_MODEL` | Model to use (e.g. `deepseek/deepseek-v4-flash`) |
| `OPEN_ROUTER_TIMEOUT` | OpenRouter request timeout in seconds |
| `BOT_MAX_CONCURRENT` | Maximum number of concurrent reviews |
| `GITEA_URL` | Base URL of your Gitea instance |
| `GITEA_TOKEN` | Gitea API token |
| `GITEA_TIMEOUT` | Gitea API request timeout in seconds |
| `SENTRY_DSN` | *(optional)* Sentry DSN for error tracking |
| `RUST_LOG` | *(optional)* Log level, defaults to `info` |
## Development
The easiest way to get started is with the provided [Dev Container](https://containers.dev/) (VS Code or Zed with the dev container extension).
Open the project and reopen it in the container — the Rust toolchain is pre-installed.
**Without Dev Container**, you just need a Rust toolchain:
```sh
rustup toolchain install stable
cargo run
```
Copy `.env.example` to `.env` and fill in your values before running.
View File
+22
View File
@@ -0,0 +1,22 @@
#!/usr/bin/env bash
set -euo pipefail
IMAGE="tintounn/herald"
if [ -z "${CI_COMMIT_TAG:-}" ]; then
echo "Error: CI_COMMIT_TAG is not set" >&2
exit 1
fi
TAG="${CI_COMMIT_TAG}"
echo "Building ${IMAGE}:${TAG}..."
buildah build \
--file Containerfile \
--tag "docker.io/${IMAGE}:${TAG}" \
.
echo "Pushing ${IMAGE}:${TAG}..."
buildah push "${IMAGE}:${TAG}"
echo "Done: ${IMAGE}:${TAG}"
+17 -7
View File
@@ -1,5 +1,4 @@
use anyhow::anyhow;
use axum::body::{Body, Bytes, to_bytes};
use axum::body::{Bytes, to_bytes};
use axum::extract::{FromRef, FromRequest, State};
use axum::http::Request;
use axum::response::IntoResponse;
@@ -15,12 +14,14 @@ use tower::ServiceBuilder;
use tower_http::trace::TraceLayer;
use tracing::{info, instrument};
use tokio_util::sync::CancellationToken;
use crate::consts::{GITEA_EVENT_TYPE_HEADER_NAME, GITEA_SIG_HEADER_NAME, MAX_WEBHOOK_BODY_SIZE};
use crate::errors::AppError;
use crate::gitea::WebhookType;
use crate::state::AppState;
pub async fn start(app_state: AppState) -> anyhow::Result<()> {
pub async fn start(app_state: AppState, shutdown: CancellationToken) -> anyhow::Result<()> {
let http_port = app_state.config.http_port;
let app = Router::new()
@@ -38,8 +39,12 @@ pub async fn start(app_state: AppState) -> anyhow::Result<()> {
info!("Listening API on port {}", http_port);
axum::serve(listener, app)
.with_graceful_shutdown(async move { shutdown.cancelled().await })
.await
.map_err(anyhow::Error::from)
.inspect(|_| info!("API shutting down complete"))?;
Ok(())
}
async fn root() -> &'static str {
@@ -53,10 +58,15 @@ async fn webhook(
) -> Result<impl IntoResponse, AppError> {
tracing::Span::current().record("webhook_type", tracing::field::debug(&wb));
app_state
.bot_tx
.try_send(wb)
.map_err(|_| AppError::ChannelFullErr)?;
let event_id = wb.event_id();
if app_state.bot.check_and_mark(event_id).await {
return Err(AppError::AlreadyProcessedErr);
}
if app_state.bot_tx.try_send(wb).is_err() {
app_state.bot.unmark(event_id).await;
return Err(AppError::ChannelFullErr);
}
Ok((StatusCode::CREATED, "Task started"))
}
+100 -6
View File
@@ -4,7 +4,9 @@ use crate::{
open_router::OpenRouterClient,
};
use serde::Deserialize;
use std::{sync::Arc, time::Duration};
use std::{collections::HashSet, sync::Arc, time::Duration};
use tokio::sync::Mutex;
use tokio_util::sync::CancellationToken;
use tracing::{error, info, instrument};
#[derive(Deserialize, Debug)]
@@ -29,6 +31,7 @@ pub struct Bot {
open_router_client: OpenRouterClient,
http_client: reqwest::Client,
max_concurrent: usize,
actions_handled: Arc<Mutex<HashSet<u64>>>,
}
impl Bot {
@@ -45,6 +48,7 @@ impl Bot {
)?,
max_concurrent: config.bot_max_concurrent,
config,
actions_handled: Arc::new(Mutex::new(HashSet::new())),
http_client: reqwest::Client::builder()
.timeout(Duration::from_secs(gitea_timeout))
.build()?,
@@ -54,14 +58,23 @@ impl Bot {
pub async fn start(
&self,
mut rx: tokio::sync::mpsc::Receiver<WebhookType>,
shutdown: CancellationToken,
) -> anyhow::Result<()> {
info!("Bot started");
let sem = Arc::new(tokio::sync::Semaphore::new(self.max_concurrent));
let mut tasks = tokio::task::JoinSet::new();
while let Some(wb) = rx.recv().await {
// Drain completed tasks to avoid the JoinSet growing unbounded
loop {
let wb = tokio::select! {
biased;
_ = shutdown.cancelled() => break,
msg = rx.recv() => match msg {
Some(wb) => wb,
None => break,
},
};
while let Some(res) = tasks.try_join_next() {
if let Err(e) = res {
error!("Task panicked: {e}");
@@ -78,11 +91,9 @@ impl Bot {
});
}
// When all webhook tasks have completed, we can safely exit
// properly before returning
tasks.join_all().await;
info!("Bot shutting down, channel closed");
info!("Bot shutting down complete");
Ok(())
}
@@ -108,4 +119,87 @@ impl Bot {
}
}
}
pub async fn check_and_mark(&self, event_id: u64) -> bool {
let mut action_handled_lock = self.actions_handled.lock().await;
!action_handled_lock.insert(event_id)
}
pub async fn unmark(&self, event_id: u64) {
let mut action_handled_lock = self.actions_handled.lock().await;
action_handled_lock.remove(&event_id);
}
}
#[cfg(test)]
mod tests {
use super::*;
fn make_actions_handled() -> Arc<Mutex<HashSet<u64>>> {
Arc::new(Mutex::new(HashSet::new()))
}
async fn check_and_mark(actions_handled: &Arc<Mutex<HashSet<u64>>>, event_id: u64) -> bool {
let mut lock = actions_handled.lock().await;
!lock.insert(event_id)
}
async fn unmark(actions_handled: &Arc<Mutex<HashSet<u64>>>, event_id: u64) {
let mut lock = actions_handled.lock().await;
lock.remove(&event_id);
}
#[tokio::test]
async fn test_check_and_mark_first_call_returns_false() {
let actions_handled = make_actions_handled();
assert!(!check_and_mark(&actions_handled, 1).await);
}
#[tokio::test]
async fn test_check_and_mark_second_call_returns_true() {
let actions_handled = make_actions_handled();
check_and_mark(&actions_handled, 1).await;
assert!(check_and_mark(&actions_handled, 1).await);
}
#[tokio::test]
async fn test_check_and_mark_different_ids_are_independent() {
let actions_handled = make_actions_handled();
check_and_mark(&actions_handled, 1).await;
assert!(!check_and_mark(&actions_handled, 2).await);
}
#[tokio::test]
async fn test_unmark_allows_reprocessing() {
let actions_handled = make_actions_handled();
check_and_mark(&actions_handled, 1).await;
unmark(&actions_handled, 1).await;
assert!(!check_and_mark(&actions_handled, 1).await);
}
#[tokio::test]
async fn test_unmark_nonexistent_id_is_noop() {
let actions_handled = make_actions_handled();
unmark(&actions_handled, 99).await;
assert!(!check_and_mark(&actions_handled, 99).await);
}
#[tokio::test]
async fn test_check_and_mark_concurrent_same_id() {
let actions_handled = make_actions_handled();
let actions_handled2 = Arc::clone(&actions_handled);
let t1 = tokio::spawn(async move { check_and_mark(&actions_handled, 42).await });
let t2 = tokio::spawn(async move { check_and_mark(&actions_handled2, 42).await });
let (r1, r2) = tokio::join!(t1, t2);
let results = [r1.unwrap(), r2.unwrap()];
// exactement un seul des deux doit retourner false (non traité)
assert_eq!(results.iter().filter(|&&r| !r).count(), 1);
// l'autre doit retourner true (déjà traité)
assert_eq!(results.iter().filter(|&&r| r).count(), 1);
}
}
+1 -1
View File
@@ -35,7 +35,7 @@ pub async fn exec_review(
let bot_result: Result<ReviewResult, anyhow::Error> = async {
let git_diff =
download_git_diff(&http_client, &review_payload.pull_request.diff_url).await?;
download_git_diff(http_client, &review_payload.pull_request.diff_url).await?;
let diff_for_llm = format_diff_for_review(&git_diff);
+31
View File
@@ -49,3 +49,34 @@ pub fn try_get_env(key: &str) -> anyhow::Result<String> {
Ok(env)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_try_get_env_returns_value() {
unsafe { std::env::set_var("TEST_ENV_PRESENT", "hello") };
assert_eq!(try_get_env("TEST_ENV_PRESENT").unwrap(), "hello");
}
#[test]
fn test_try_get_env_missing_var_returns_error() {
unsafe { std::env::remove_var("TEST_ENV_MISSING") };
assert!(try_get_env("TEST_ENV_MISSING").is_err());
}
#[test]
fn test_try_get_env_empty_value_returns_error() {
unsafe { std::env::set_var("TEST_ENV_EMPTY", "") };
let err = try_get_env("TEST_ENV_EMPTY").unwrap_err();
assert!(err.to_string().contains("TEST_ENV_EMPTY"));
}
#[test]
fn test_try_get_env_whitespace_only_returns_error() {
unsafe { std::env::set_var("TEST_ENV_WHITESPACE", " ") };
let err = try_get_env("TEST_ENV_WHITESPACE").unwrap_err();
assert!(err.to_string().contains("TEST_ENV_WHITESPACE"));
}
}
+4
View File
@@ -25,6 +25,9 @@ pub enum AppError {
#[error("Channel full")]
ChannelFullErr,
#[error("Already processed")]
AlreadyProcessedErr,
#[error(transparent)]
BadJsonStructErr(#[from] serde_json::Error),
@@ -58,6 +61,7 @@ impl IntoResponse for AppError {
StatusCode::UNAUTHORIZED,
"WebHook sig header is invalid".to_string(),
),
AppError::AlreadyProcessedErr => (StatusCode::OK, "Already processed".to_string()),
AppError::ChannelFullErr => {
sentry_anyhow::capture_anyhow(&anyhow!("Max concurrent tasks reached"));
(
+38 -5
View File
@@ -158,6 +158,14 @@ pub enum WebhookType {
Review(ReviewPayload),
}
impl WebhookType {
pub fn event_id(&self) -> u64 {
match self {
WebhookType::Review(payload) => payload.comment.id,
}
}
}
#[derive(Deserialize, Debug)]
pub struct ReviewPayload {
pub action: String,
@@ -229,7 +237,12 @@ mod tests {
"action": "created",
"pull_request": {
"id": 42,
"diff_url": "https://mydiff.fr"
"diff_url": "https://mydiff.fr",
"number": 1,
"title": "My PR"
},
"repository": {
"full_name": "owner/repo"
},
"comment": {
"id": 7,
@@ -288,7 +301,12 @@ mod tests {
"action": "edited",
"pull_request": {
"id": 1,
"diff_url": "https://mydiff.fr"
"diff_url": "https://mydiff.fr",
"number": 1,
"title": "My PR"
},
"repository": {
"full_name": "owner/repo"
},
"comment": {
"id": 1,
@@ -314,7 +332,12 @@ mod tests {
"action": "created",
"pull_request": {
"id": 99,
"diff_url": "https://mydiff.fr"
"diff_url": "https://mydiff.fr",
"number": 5,
"title": "My PR"
},
"repository": {
"full_name": "owner/repo"
},
"comment": {
"id": 12,
@@ -346,7 +369,12 @@ mod tests {
"action": "created",
"pull_request": {
"id": 1,
"diff_url": "https://mydiff.fr"
"diff_url": "https://mydiff.fr",
"number": 1,
"title": "My PR"
},
"repository": {
"full_name": "owner/repo"
},
"comment": {
"id": 1,
@@ -367,7 +395,12 @@ mod tests {
"action": "created",
"pull_request": {
"id": 1,
"diff_url": "https://mydiff.fr"
"diff_url": "https://mydiff.fr",
"number": 1,
"title": "My PR"
},
"repository": {
"full_name": "owner/repo"
},
"comment": {
"id": 1,
+32 -2
View File
@@ -1,5 +1,10 @@
use std::sync::Arc;
use crate::{bot::Bot, gitea::WebhookType, state::AppState};
use dotenvy::dotenv;
use tokio::signal::unix::{SignalKind, signal};
use tokio_util::sync::CancellationToken;
use tracing::{info, warn};
use tracing_subscriber::{EnvFilter, fmt, layer::SubscriberExt, util::SubscriberInitExt};
@@ -54,11 +59,36 @@ async fn run() -> anyhow::Result<()> {
"Starting Herald"
);
let shutdown = CancellationToken::new();
let bot = Bot::new(config.clone())?;
let (tx, rx) = tokio::sync::mpsc::channel::<WebhookType>(config.bot_max_concurrent * 2);
let app_state = AppState { bot_tx: tx, config };
let app_state = AppState {
bot_tx: tx,
bot: bot.clone(),
config,
};
tokio::try_join!(bot.start(rx), api::start(app_state))?;
let signal = async {
let mut sigterm = signal(SignalKind::terminate())?;
let mut sigint = signal(SignalKind::interrupt())?;
tokio::select! {
_ = sigterm.recv() => info!("Received SIGTERM"),
_ = sigint.recv() => info!("Received SIGINT"),
}
info!("Shutting down...");
shutdown.cancel();
anyhow::Ok(())
};
tokio::try_join!(
bot.start(rx, shutdown.clone()),
api::start(app_state, shutdown.clone()),
signal
)?;
info!("Shutdown complete");
Ok(())
}
+1 -1
View File
@@ -42,7 +42,7 @@ impl OpenRouterClient {
Ok(ChatResult {
message: response.choices[0]
.content()
.map(|msg| String::from(msg))
.map(String::from)
.ok_or(anyhow::anyhow!("No content"))?,
cost: response.usage.and_then(|u| u.cost),
})
+2 -1
View File
@@ -1,7 +1,8 @@
use crate::{env::EnvConfig, gitea::WebhookType};
use crate::{bot::Bot, env::EnvConfig, gitea::WebhookType};
#[derive(Clone)]
pub struct AppState {
pub bot_tx: tokio::sync::mpsc::Sender<WebhookType>,
pub bot: Bot,
pub config: EnvConfig,
}