Compare commits

28 Commits

Author SHA1 Message Date
qpismont 7f24d7657c Merge pull request 'Fix missing env var error' (#5) from 1.0.1 into main
ci/woodpecker/tag/release Pipeline was successful
Reviewed-on: #5
2026-06-16 22:05:24 +02:00
qpismont a613fdb99e Another trace clear 2026-06-16 18:53:33 +00:00
qpismont 975581093a Add missing sentry error backtrace
Clear traces spam
2026-06-16 18:49:35 +00:00
qpismont 00d46ce968 Fix AI review
Webhook action check before user check
2026-06-12 22:01:49 +00:00
qpismont 3f6c5b5559 Fix missing env var error
ci/woodpecker/push/tests Pipeline was successful
2026-06-12 21:27:12 +00:00
qpismont d4666fb36e Merge pull request 'prepare first release with graceful shutdown + containerfile + push to' (#4) from 1.0 into main
ci/woodpecker/push/tests Pipeline was successful
ci/woodpecker/tag/release Pipeline was successful
Reviewed-on: #4
2026-06-12 22:38:25 +02:00
qpismont cf59455d4a Fix release job
ci/woodpecker/push/tests Pipeline was successful
2026-06-12 20:15:20 +00:00
qpismont c7387a3b28 Fix tests job
ci/woodpecker/push/tests Pipeline was successful
2026-06-12 19:57:28 +00:00
qpismont 433021d607 Add webhook already handled check
ci/woodpecker/push/tests Pipeline failed
ci/woodpecker/pr/tests Pipeline failed
Fix all tests
Add woodpecker ci (tests + release)
2026-06-12 19:56:32 +00:00
qpismont 3c32cd20b6 Readme :) 2026-06-10 20:01:21 +00:00
qpismont a30d7c5d90 prepare first release with graceful shutdown + containerfile + push to
hub script
2026-06-10 19:23:17 +00:00
qpismont 9175f9b3a2 Merge pull request 'Starting impl Sentry and tracing' (#3) from tracing into main
Reviewed-on: #3
2026-06-10 20:27:26 +02:00
qpismont 71ebfdd276 tasks.join_next => join_all() 2026-06-10 18:26:55 +00:00
qpismont 3d751ae6c6 drain completed tasks and log webhook queue stats 2026-06-10 18:19:18 +00:00
qpismont 6599c20c30 verify_signature before adding body to sentry event 2026-06-10 18:08:35 +00:00
qpismont a2d898c07d Fix sentry http request info
Add async bot running with semaphore
2026-06-10 17:50:24 +00:00
qpismont efb35d5a8a continue tracing impl 2026-06-09 21:17:03 +00:00
qpismont 39c2afa0a7 Starting impl Sentry and tracing 2026-06-09 20:58:38 +00:00
qpismont 3984a7d3ba Merge pull request 'started gitea api impl' (#2) from gitea_api into main
Reviewed-on: #2
2026-06-07 10:23:31 +02:00
qpismont aa0dbdcc7a Extract review logic into bot_actions module
Move `exec_review`, `download_git_diff`, and review formatting
to a new `bot_actions::review` module. Update the review flow to
post inline review comments via the Gitea API and simplify the
comment markdown to a summary. Add diff formatting that
preprocesses added lines with line numbers for the LLM prompt.
2026-06-06 17:27:35 +00:00
qpismont ced1fca563 Add gitea download git diff limit 2026-06-05 19:34:29 +00:00
qpismont 6aa653e846 Add http status check for gitea api 2026-06-05 18:52:59 +00:00
qpismont 3501e4ae9d Use reqwest client with timeout in gitea.rs and bot.rs 2026-06-05 18:48:02 +00:00
qpismont 01e13f0081 Add default authorization header for gitea api (remove query string)
Add review cost
2026-06-05 18:39:38 +00:00
qpismont cd5c5b9478 Use reqwest 0.12 with rustls-tls and add timeouts
Also improve review prompt with line calculation instructions, switch
feedback to French, and enable reasoning for OpenRouter.
2026-06-03 20:51:21 +00:00
qpismont de81232201 Integrate OpenRouter for AI-powered code review
Add openrouter-rs dependency, review prompt, and markdown formatting.
Update comment API to accept dynamic body. Adjust devcontainer for
podman compatibility.
2026-06-03 19:38:00 +00:00
qpismont 4966d08d18 first comment ! :D 2026-06-02 20:30:02 +00:00
qpismont 10ebee389e started gitea api impl 2026-06-02 19:52:50 +00:00
24 changed files with 2583 additions and 338 deletions
+1
View File
@@ -11,6 +11,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
curl \ curl \
git \ git \
build-essential \ build-essential \
libssl-dev \
cmake \ cmake \
pkg-config \ pkg-config \
clang \ clang \
+1 -11
View File
@@ -12,18 +12,8 @@
"containerEnv": { "containerEnv": {
"SHELL": "/bin/bash" "SHELL": "/bin/bash"
}, },
"customizations": {
"vscode": {
"extensions": ["rust-lang.rust-analyzer", "tamasfe.even-better-toml", "fill-labs.dependi"],
"settings": {
"[rust]": {
"editor.defaultFormatter": "rust-lang.rust-analyzer",
"editor.formatOnSave": true
}
}
}
},
"workspaceMount": "source=${localWorkspaceFolder},target=/workspaces/herald,type=bind", "workspaceMount": "source=${localWorkspaceFolder},target=/workspaces/herald,type=bind",
"workspaceFolder": "/workspaces/herald", "workspaceFolder": "/workspaces/herald",
"runArgs": ["--userns=keep-id", "--security-opt", "label=disable"],
"appPort": [3000] "appPort": [3000]
} }
+4
View File
@@ -0,0 +1,4 @@
target/
.env
.devcontainer/
docs/
+19
View File
@@ -0,0 +1,19 @@
HTTP_PORT=3000
BOT_NAME=Herald
WEBHOOK_SIG_HEADER_SECRET=
OPEN_ROUTER_API_KEY=
OPEN_ROUTER_MODEL=deepseek/deepseek-v4-flash
OPEN_ROUTER_TIMEOUT=120
BOT_MAX_CONCURRENT=5
GITEA_URL=https://gitea.example.com
GITEA_TOKEN=
GITEA_TIMEOUT=30
# Optional
SENTRY_DSN=
RUST_LOG=info
RUST_BACKTRACE=1
+19
View File
@@ -0,0 +1,19 @@
when:
event:
- tag
steps:
- name: release-docker
image: quay.io/buildah/stable
privileged: true
volumes:
- /data/woodpecker-builds:/data
commands:
- echo $DOCKER_PASSWORD | buildah login docker.io -u $DOCKER_USERNAME --password-stdin
- chmod +x scripts/build.sh
- bash scripts/build.sh
environment:
DOCKER_USERNAME:
from_secret: docker_username
DOCKER_PASSWORD:
from_secret: docker_password
+15
View File
@@ -0,0 +1,15 @@
when:
event:
- push
steps:
- name: clippy
image: rust:1.96
commands:
- rustup component add clippy
- cargo clippy
- name: test
image: rust:1.96
commands:
- cargo test
+11
View File
@@ -0,0 +1,11 @@
{
"lsp": {
"rust-analyzer": {
"initialization_options": {
"check": {
"command": "clippy"
}
}
}
}
}
Generated
+1486 -273
View File
File diff suppressed because it is too large Load Diff
+17 -4
View File
@@ -1,19 +1,32 @@
[package] [package]
name = "herald" name = "herald"
version = "0.1.0" version = "1.0.1"
edition = "2024" edition = "2024"
[profile.release]
debug = 1
[dependencies] [dependencies]
reqwest = { version = "0.13", features = ["json"] } reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] }
tokio = { version = "1.52", features = ["full"] } tokio = { version = "1.52", features = ["full"] }
tokio-stream = "0.1"
tokio-util = "0.7"
futures-util = "0.3"
serde_json = "1.0" serde_json = "1.0"
serde = { version = "1.0", features = ["derive"] } serde = { version = "1.0", features = ["derive"] }
sentry = { version = "0.48", features = ["tower-axum-matched-path"] }
sentry-anyhow = { version = "0.48", features = ["backtrace"] }
openrouter-rs = "0.10"
dotenvy = "0.15" dotenvy = "0.15"
tower = "0.5"
tower-http = {version = "0.6", features = ["trace"] }
tracing = "0.1"
tracing-subscriber = { version = "0.3", features=["env-filter"] }
axum = "0.8" axum = "0.8"
anyhow = "1.0" anyhow = { version = "1.0", features = ["backtrace"] }
thiserror = "2.0" thiserror = "2.0"
hmac = "0.13" hmac = "0.13"
sha2 = "0.11" sha2 = "0.11"
hex = "0.4" hex = "0.4"
subtle = "2.6" subtle = "2.6"
bytes = "1.11" bytes = "1.11"
+12
View File
@@ -0,0 +1,12 @@
FROM rust:1.96 as builder
WORKDIR /app
COPY . .
RUN cargo build --release
FROM debian:trixie-slim
WORKDIR /app
COPY --from=builder /app/target/release/herald .
CMD [ "./herald" ]
+53
View File
@@ -0,0 +1,53 @@
# Herald
Herald is a Gitea bot that performs automated AI-powered code reviews on pull requests using [OpenRouter](https://openrouter.ai/).
## Features
- Listens for Gitea webhook events and triggers code reviews on pull request comments
- Streams reviews back to Gitea as PR comments
- Concurrent review processing with configurable parallelism
- Graceful shutdown — in-progress reviews finish before the process exits
- Error tracking via Sentry
- Tiny memory footprint (~4MB) thanks to Rust
## Installation
**Requirements:** Rust toolchain ([rustup.rs](https://rustup.rs))
```sh
cargo build --release
./target/release/herald
```
Herald reads its configuration from environment variables (a `.env` file is supported):
| Variable | Description |
|---|---|
| `HTTP_PORT` | Port to listen on |
| `BOT_NAME` | The bot's Gitea username (used to detect mentions) |
| `WEBHOOK_SIG_HEADER_SECRET` | Gitea webhook secret for signature verification |
| `OPEN_ROUTER_API_KEY` | OpenRouter API key |
| `OPEN_ROUTER_MODEL` | Model to use (e.g. `deepseek/deepseek-v4-flash`) |
| `OPEN_ROUTER_TIMEOUT` | OpenRouter request timeout in seconds |
| `BOT_MAX_CONCURRENT` | Maximum number of concurrent reviews |
| `GITEA_URL` | Base URL of your Gitea instance |
| `GITEA_TOKEN` | Gitea API token |
| `GITEA_TIMEOUT` | Gitea API request timeout in seconds |
| `SENTRY_DSN` | *(optional)* Sentry DSN for error tracking |
| `RUST_LOG` | *(optional)* Log level, defaults to `info` |
## Development
The easiest way to get started is with the provided [Dev Container](https://containers.dev/) (VS Code or Zed with the dev container extension).
Open the project and reopen it in the container — the Rust toolchain is pre-installed.
**Without Dev Container**, you just need a Rust toolchain:
```sh
rustup toolchain install stable
cargo run
```
Copy `.env.example` to `.env` and fill in your values before running.
View File
+22
View File
@@ -0,0 +1,22 @@
#!/usr/bin/env bash
set -euo pipefail
IMAGE="tintounn/herald"
if [ -z "${CI_COMMIT_TAG:-}" ]; then
echo "Error: CI_COMMIT_TAG is not set" >&2
exit 1
fi
TAG="${CI_COMMIT_TAG}"
echo "Building ${IMAGE}:${TAG}..."
buildah build \
--file Containerfile \
--tag "docker.io/${IMAGE}:${TAG}" \
.
echo "Pushing ${IMAGE}:${TAG}..."
buildah push "${IMAGE}:${TAG}"
echo "Done: ${IMAGE}:${TAG}"
+52 -5
View File
@@ -1,38 +1,74 @@
use axum::body::{Bytes, to_bytes}; use axum::body::{Bytes, to_bytes};
use axum::extract::{FromRef, FromRequest}; use axum::extract::{FromRef, FromRequest, State};
use axum::response::{IntoResponse, Response}; use axum::http::Request;
use axum::response::IntoResponse;
use axum::routing::{get, post}; use axum::routing::{get, post};
use axum::{Json, Router}; use axum::{Json, Router};
use hmac::{Hmac, KeyInit, Mac}; use hmac::{Hmac, KeyInit, Mac};
use reqwest::StatusCode;
use sentry::integrations::tower::{NewSentryLayer, SentryHttpLayer};
use serde_json::Value; use serde_json::Value;
use sha2::Sha256; use sha2::Sha256;
use subtle::ConstantTimeEq; use subtle::ConstantTimeEq;
use tower::ServiceBuilder;
use tower_http::trace::TraceLayer;
use tracing::{info, instrument};
use tokio_util::sync::CancellationToken;
use crate::consts::{GITEA_EVENT_TYPE_HEADER_NAME, GITEA_SIG_HEADER_NAME, MAX_WEBHOOK_BODY_SIZE}; use crate::consts::{GITEA_EVENT_TYPE_HEADER_NAME, GITEA_SIG_HEADER_NAME, MAX_WEBHOOK_BODY_SIZE};
use crate::errors::AppError; use crate::errors::AppError;
use crate::gitea::WebhookType; use crate::gitea::WebhookType;
use crate::state::AppState; use crate::state::AppState;
pub async fn start(app_state: AppState) -> anyhow::Result<()> { pub async fn start(app_state: AppState, shutdown: CancellationToken) -> anyhow::Result<()> {
let http_port = app_state.config.http_port; let http_port = app_state.config.http_port;
let app = Router::new() let app = Router::new()
.route("/", get(root)) .route("/", get(root))
.route("/webhook", post(webhook)) .route("/webhook", post(webhook))
.layer(
ServiceBuilder::new()
.layer(NewSentryLayer::<Request<_>>::new_from_top())
.layer(SentryHttpLayer::new())
.layer(TraceLayer::new_for_http()),
)
.with_state(app_state); .with_state(app_state);
let listener = tokio::net::TcpListener::bind(format!("0.0.0.0:{}", http_port)).await?; let listener = tokio::net::TcpListener::bind(format!("0.0.0.0:{}", http_port)).await?;
info!("Listening API on port {}", http_port);
axum::serve(listener, app) axum::serve(listener, app)
.with_graceful_shutdown(async move { shutdown.cancelled().await })
.await .await
.map_err(anyhow::Error::from) .map_err(anyhow::Error::from)
.inspect(|_| info!("API shutting down complete"))?;
Ok(())
} }
async fn root() -> &'static str { async fn root() -> &'static str {
"Hi, i'm Herald :)" "Hi, i'm Herald :)"
} }
async fn webhook(WebhookExtract(wb): WebhookExtract) -> Result<Response, AppError> { #[instrument(skip(app_state), fields(webhook_type), err)]
Ok("lol".into_response()) async fn webhook(
State(app_state): State<AppState>,
WebhookExtract(wb): WebhookExtract,
) -> Result<impl IntoResponse, AppError> {
tracing::Span::current().record("webhook_type", tracing::field::debug(&wb));
let event_id = wb.event_id();
if app_state.bot.check_and_mark(event_id).await {
return Err(AppError::AlreadyProcessedErr);
}
if app_state.bot_tx.try_send(wb).is_err() {
app_state.bot.unmark(event_id).await;
return Err(AppError::ChannelFullErr);
}
Ok((StatusCode::CREATED, "Task started"))
} }
pub struct WebhookExtract(pub WebhookType); pub struct WebhookExtract(pub WebhookType);
@@ -44,6 +80,7 @@ where
{ {
type Rejection = AppError; type Rejection = AppError;
#[instrument(skip(req, state), err)]
async fn from_request(req: axum::extract::Request, state: &S) -> Result<Self, Self::Rejection> { async fn from_request(req: axum::extract::Request, state: &S) -> Result<Self, Self::Rejection> {
let app_state = AppState::from_ref(state); let app_state = AppState::from_ref(state);
let headers = req.headers(); let headers = req.headers();
@@ -58,6 +95,16 @@ where
&body_bytes, &body_bytes,
)?; )?;
let body_str = String::from_utf8_lossy(&body_bytes).into_owned();
sentry::configure_scope(|scope| {
scope.add_event_processor(move |mut event| {
let mut request = event.request.take().unwrap_or_default();
request.data = Some(body_str.clone());
event.request = Some(request);
Some(event)
});
});
let webhook = parse_webhook(&type_header, &app_state.config.bot_name, &body_bytes)?; let webhook = parse_webhook(&type_header, &app_state.config.bot_name, &body_bytes)?;
Ok(WebhookExtract(webhook)) Ok(WebhookExtract(webhook))
} }
+202 -4
View File
@@ -1,13 +1,211 @@
use crate::{env::EnvConfig, gitea::WebhookType}; use crate::{
env::EnvConfig,
gitea::{GiteaAPI, WebhookType},
open_router::OpenRouterClient,
};
use serde::Deserialize;
use std::{collections::HashSet, sync::Arc, time::Duration};
use tokio::sync::Mutex;
use tokio_util::sync::CancellationToken;
use tracing::{error, info, instrument};
#[derive(Deserialize, Debug)]
pub struct ReviewResult {
pub reviews: Vec<ReviewItem>,
pub comment: String,
pub cost: Option<f64>,
}
#[derive(Deserialize, Debug)]
pub struct ReviewItem {
pub filename: String,
pub line: Option<u64>,
pub code: String,
pub message: String,
}
#[derive(Clone)]
pub struct Bot { pub struct Bot {
config: EnvConfig, config: EnvConfig,
gitea_api: GiteaAPI,
open_router_client: OpenRouterClient,
http_client: reqwest::Client,
max_concurrent: usize,
actions_handled: Arc<Mutex<HashSet<u64>>>,
} }
impl Bot { impl Bot {
pub fn new(config: EnvConfig) -> Self { pub fn new(config: EnvConfig) -> anyhow::Result<Self> {
Self { config } let gitea_timeout = config.gitea_timeout;
let open_router_timeout = config.open_router_timeout;
Ok(Self {
gitea_api: GiteaAPI::new(&config.gitea_url, &config.gitea_token, gitea_timeout)?,
open_router_client: OpenRouterClient::new(
&config.open_router_api_key,
&config.open_router_model,
open_router_timeout,
)?,
max_concurrent: config.bot_max_concurrent,
config,
actions_handled: Arc::new(Mutex::new(HashSet::new())),
http_client: reqwest::Client::builder()
.timeout(Duration::from_secs(gitea_timeout))
.build()?,
})
} }
pub async fn exec(&self, webhook: WebhookType) {} pub async fn start(
&self,
mut rx: tokio::sync::mpsc::Receiver<WebhookType>,
shutdown: CancellationToken,
) -> anyhow::Result<()> {
info!("Bot started");
let sem = Arc::new(tokio::sync::Semaphore::new(self.max_concurrent));
let mut tasks = tokio::task::JoinSet::new();
loop {
let wb = tokio::select! {
biased;
_ = shutdown.cancelled() => break,
msg = rx.recv() => match msg {
Some(wb) => wb,
None => break,
},
};
while let Some(res) = tasks.try_join_next() {
if let Err(e) = res {
error!("Task panicked: {e}");
}
}
info!(queued = rx.len(), active = tasks.len(), "Webhook received");
let permit = sem.clone().acquire_owned().await?;
let self_clone = self.clone();
tasks.spawn(async move {
self_clone.exec(wb).await;
drop(permit);
});
}
tasks.join_all().await;
info!("Bot shutting down complete");
Ok(())
}
#[instrument(skip(self, webhook), fields(repo, pr))]
pub async fn exec(&self, webhook: WebhookType) {
match &webhook {
WebhookType::Review(p) => {
tracing::Span::current().record("repo", &p.repository.full_name);
tracing::Span::current().record("pr", p.pull_request.number);
}
};
let exec_result = match webhook {
WebhookType::Review(review_payload) => crate::bot_actions::review::exec_review(
&self.gitea_api,
&self.open_router_client,
&self.http_client,
&self.config.open_router_model,
review_payload,
),
}
.await;
match exec_result {
Ok(_) => info!("Task completed"),
Err(err) => {
error!(%err, "Task error");
sentry_anyhow::capture_anyhow(&err);
}
}
}
pub async fn check_and_mark(&self, event_id: u64) -> bool {
let mut action_handled_lock = self.actions_handled.lock().await;
!action_handled_lock.insert(event_id)
}
pub async fn unmark(&self, event_id: u64) {
let mut action_handled_lock = self.actions_handled.lock().await;
action_handled_lock.remove(&event_id);
}
}
#[cfg(test)]
mod tests {
use super::*;
fn make_actions_handled() -> Arc<Mutex<HashSet<u64>>> {
Arc::new(Mutex::new(HashSet::new()))
}
async fn check_and_mark(actions_handled: &Arc<Mutex<HashSet<u64>>>, event_id: u64) -> bool {
let mut lock = actions_handled.lock().await;
!lock.insert(event_id)
}
async fn unmark(actions_handled: &Arc<Mutex<HashSet<u64>>>, event_id: u64) {
let mut lock = actions_handled.lock().await;
lock.remove(&event_id);
}
#[tokio::test]
async fn test_check_and_mark_first_call_returns_false() {
let actions_handled = make_actions_handled();
assert!(!check_and_mark(&actions_handled, 1).await);
}
#[tokio::test]
async fn test_check_and_mark_second_call_returns_true() {
let actions_handled = make_actions_handled();
check_and_mark(&actions_handled, 1).await;
assert!(check_and_mark(&actions_handled, 1).await);
}
#[tokio::test]
async fn test_check_and_mark_different_ids_are_independent() {
let actions_handled = make_actions_handled();
check_and_mark(&actions_handled, 1).await;
assert!(!check_and_mark(&actions_handled, 2).await);
}
#[tokio::test]
async fn test_unmark_allows_reprocessing() {
let actions_handled = make_actions_handled();
check_and_mark(&actions_handled, 1).await;
unmark(&actions_handled, 1).await;
assert!(!check_and_mark(&actions_handled, 1).await);
}
#[tokio::test]
async fn test_unmark_nonexistent_id_is_noop() {
let actions_handled = make_actions_handled();
unmark(&actions_handled, 99).await;
assert!(!check_and_mark(&actions_handled, 99).await);
}
#[tokio::test]
async fn test_check_and_mark_concurrent_same_id() {
let actions_handled = make_actions_handled();
let actions_handled2 = Arc::clone(&actions_handled);
let t1 = tokio::spawn(async move { check_and_mark(&actions_handled, 42).await });
let t2 = tokio::spawn(async move { check_and_mark(&actions_handled2, 42).await });
let (r1, r2) = tokio::join!(t1, t2);
let results = [r1.unwrap(), r2.unwrap()];
// exactement un seul des deux doit retourner false (non traité)
assert_eq!(results.iter().filter(|&&r| !r).count(), 1);
// l'autre doit retourner true (déjà traité)
assert_eq!(results.iter().filter(|&&r| r).count(), 1);
}
} }
+1
View File
@@ -0,0 +1 @@
pub mod review;
+214
View File
@@ -0,0 +1,214 @@
use futures_util::stream::TryStreamExt;
use tokio::io::AsyncReadExt;
use tokio_util::io::StreamReader;
use tracing::instrument;
use crate::{
bot::ReviewResult,
consts::{BOT_PROCESS_MSG, MAX_DIFF_SIZE, REVIEW_PROMPT},
gitea::{GiteaAPI, ReviewPayload},
open_router::OpenRouterClient,
};
#[instrument(skip(gitea_api, open_router_client, http_client, review_payload))]
pub async fn exec_review(
gitea_api: &GiteaAPI,
open_router_client: &OpenRouterClient,
http_client: &reqwest::Client,
model: &str,
review_payload: ReviewPayload,
) -> anyhow::Result<()> {
tracing::info!(
repo = %review_payload.repository.full_name,
pr = review_payload.pull_request.number,
action = %review_payload.action,
"Starting review"
);
let new_comment = gitea_api
.comment(
&BOT_PROCESS_MSG.replace("{model}", model),
&review_payload.repository.full_name,
review_payload.pull_request.number,
)
.await?;
let bot_result: Result<ReviewResult, anyhow::Error> = async {
let git_diff =
download_git_diff(http_client, &review_payload.pull_request.diff_url).await?;
let diff_for_llm = format_diff_for_review(&git_diff);
let bot_request = REVIEW_PROMPT
.replace("{subject}", &review_payload.pull_request.title)
.replace("{comment}", &review_payload.comment.body)
.replace("{diff}", &diff_for_llm);
let chat_result = open_router_client.chat(&bot_request).await?;
let mut review_result = serde_json::from_str::<ReviewResult>(&chat_result.message)?;
review_result.cost = chat_result.cost;
let final_review_markdown = review_result_to_markdown(&review_result);
gitea_api
.post_pull_request_review(
&review_result,
&final_review_markdown,
&review_payload.repository.full_name,
review_payload.pull_request.number,
)
.await?;
Ok(review_result)
}
.await;
match bot_result {
Ok(_) => {
gitea_api
.delete_comment(&review_payload.repository.full_name, new_comment.id)
.await
}
Err(e) => {
gitea_api
.edit_comment(
&format!("Error while reviewing: {}", e),
&review_payload.repository.full_name,
new_comment.id,
)
.await
}
}
}
fn review_result_to_markdown(review_result: &ReviewResult) -> String {
if review_result.reviews.is_empty() {
return String::from("No issues found. ✅");
}
let mut md = String::from("## Review Feedback\n\n");
md.push_str(&format!(
"### {} issues found.\n\n",
review_result.reviews.len()
));
if !review_result.comment.is_empty() {
md.push_str("\n---\n\n");
md.push_str("### Summary\n\n");
md.push_str(&review_result.comment);
md.push('\n');
}
if let Some(cost) = review_result.cost {
md.push_str("\n---\n\n");
md.push_str(&format!("### Cost: ${}", cost));
md.push('\n');
}
md
}
async fn download_git_diff(http_client: &reqwest::Client, url: &str) -> anyhow::Result<String> {
let response = http_client.get(url).send().await?;
let stream = response.bytes_stream().map_err(std::io::Error::other);
let mut buf = Vec::with_capacity(MAX_DIFF_SIZE);
StreamReader::new(stream)
.take((MAX_DIFF_SIZE + 1) as u64)
.read_to_end(&mut buf)
.await?;
if buf.len() > MAX_DIFF_SIZE {
anyhow::bail!("Git diff exceeds the maximum allowed size of 1 Mo");
}
Ok(String::from_utf8_lossy(&buf).into_owned())
}
fn format_diff_for_review(git_diff: &str) -> String {
let mut output = String::new();
let mut current_file: Option<&str> = None;
let mut new_line: u64 = 0;
for line in git_diff.lines() {
if let Some(rest) = line.strip_prefix("diff --git a/") {
if let Some(end) = rest.find(' ') {
current_file = Some(&rest[..end]);
}
new_line = 0;
continue;
}
if line.starts_with("---") || line.starts_with("+++") {
continue;
}
if line.starts_with("@@") && line.contains('+') {
if let Some(start) = parse_hunk_new_start(line) {
new_line = start;
}
continue;
}
let Some(filename) = current_file else {
continue;
};
if line.starts_with(' ') {
new_line += 1;
continue;
}
if let Some(code) = line.strip_prefix('+') {
use std::fmt::Write;
let _ = writeln!(output, "{filename}:{new_line}:{code}");
new_line += 1;
}
}
output
}
fn parse_hunk_new_start(hunk_header: &str) -> Option<u64> {
let plus_part = hunk_header.split('+').nth(1)?;
let num_str = plus_part.split(|c: char| !c.is_ascii_digit()).next()?;
num_str.parse::<u64>().ok()
}
#[cfg(test)]
#[test]
fn test_format_diff_for_review() {
let diff = concat!(
"diff --git a/src/foo.rs b/src/foo.rs\n",
"--- a/src/foo.rs\n",
"+++ b/src/foo.rs\n",
"@@ -1,3 +1,6 @@\n",
" fn main() {\n",
"+ let x = 1;\n",
" println!(\"hello\");\n",
"+ let y = 2;\n",
"+ let z = 3;\n",
" }\n",
"diff --git a/src/bar.rs b/src/bar.rs\n",
"--- a/src/bar.rs\n",
"+++ b/src/bar.rs\n",
"@@ -10,4 +10,6 @@\n",
" old context\n",
"+ let a = 10;\n",
" more context\n",
"+ let b = 20;\n",
);
let result = format_diff_for_review(diff);
let expected = concat!(
"src/foo.rs:2: let x = 1;\n",
"src/foo.rs:4: let y = 2;\n",
"src/foo.rs:5: let z = 3;\n",
"src/bar.rs:11: let a = 10;\n",
"src/bar.rs:13: let b = 20;\n",
);
assert_eq!(result, expected);
}
+41
View File
@@ -1,3 +1,44 @@
pub const GITEA_SIG_HEADER_NAME: &str = "x-gitea-signature"; pub const GITEA_SIG_HEADER_NAME: &str = "x-gitea-signature";
pub const GITEA_EVENT_TYPE_HEADER_NAME: &str = "x-gitea-event-type"; pub const GITEA_EVENT_TYPE_HEADER_NAME: &str = "x-gitea-event-type";
pub const MAX_WEBHOOK_BODY_SIZE: usize = 1024 * 1024; // 1 MiB pub const MAX_WEBHOOK_BODY_SIZE: usize = 1024 * 1024; // 1 MiB
pub const MAX_DIFF_SIZE: usize = 1024 * 1024; // 1 MiB
pub const BOT_PROCESS_MSG: &str = "
Review in progress with the model \"{model}\"...
";
pub const REVIEW_PROMPT: &str = "
You are a senior software engineer reviewing code changes.
Check good practices and code quality.
This is the pull request subject: \"{subject}\"
This is the user comment: \"{comment}\"
The code changes (only added lines, with line numbers):
{diff}
Please review the code changes and provide feedback.
IMPORTANT: the `line` field must be the line number shown before each line.
The provided code has the format: `filename:line:code`
Return your feedback, in french, with only this json format, reviews must contain each review
All fields are mandatory.
(filename field must contain the full path with extension) and comment must contain a final summary:
{
\"reviews\": [
{
\"filename\": \"\",
\"line\": ,
\"code\": \"\",
\"message\": \"\"
}
],
\"comment\": \"\"
}
";
+53 -7
View File
@@ -1,36 +1,82 @@
use anyhow::anyhow; use anyhow::anyhow;
use dotenvy::dotenv;
#[derive(Clone)] #[derive(Clone)]
pub struct EnvConfig { pub struct EnvConfig {
pub http_port: u16, pub http_port: u16,
pub webhook_secret: String, pub webhook_secret: String,
pub open_router_api_key: String, pub open_router_api_key: String,
pub open_router_model: String,
pub open_router_timeout: u64,
pub bot_name: String, pub bot_name: String,
pub bot_max_concurrent: usize,
pub gitea_url: String,
pub gitea_token: String,
pub gitea_timeout: u64,
} }
pub fn load_config() -> anyhow::Result<EnvConfig> { pub fn load_config() -> anyhow::Result<EnvConfig> {
dotenv().ok();
let http_port = try_get_env("HTTP_PORT")?.parse()?; let http_port = try_get_env("HTTP_PORT")?.parse()?;
let bot_name = try_get_env("BOT_NAME")?; let bot_name = try_get_env("BOT_NAME")?;
let webhook_secret = try_get_env("WEBHOOK_SIG_HEADER_SECRET")?; let webhook_secret = try_get_env("WEBHOOK_SIG_HEADER_SECRET")?;
let open_router_api_key = try_get_env("OPEN_ROUTER_API_KEY")?; let open_router_api_key = try_get_env("OPEN_ROUTER_API_KEY")?;
let open_router_model = try_get_env("OPEN_ROUTER_MODEL")?;
let open_router_timeout = try_get_env("OPEN_ROUTER_TIMEOUT")?.parse()?;
let bot_max_concurrent = try_get_env("BOT_MAX_CONCURRENT")?.parse()?;
let gitea_url = try_get_env("GITEA_URL")?;
let gitea_token = try_get_env("GITEA_TOKEN")?;
let gitea_timeout = try_get_env("GITEA_TIMEOUT")?.parse()?;
Ok(EnvConfig { Ok(EnvConfig {
http_port, http_port,
webhook_secret, webhook_secret,
bot_name, bot_name,
open_router_api_key, open_router_api_key,
open_router_model,
open_router_timeout,
bot_max_concurrent,
gitea_url,
gitea_token,
gitea_timeout,
}) })
} }
fn try_get_env(key: &str) -> anyhow::Result<String> { pub fn try_get_env(key: &str) -> anyhow::Result<String> {
let env = std::env::var(key)?; let env_value = std::env::var(key).map_err(|e| anyhow::anyhow!("{}: {}", key, e))?;
if env.trim().is_empty() { if env_value.trim().is_empty() {
return Err(anyhow!(format!("env var {} is empty", key))); return Err(anyhow!(format!("env var {} is empty", key)));
} }
Ok(env) Ok(env_value)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_try_get_env_returns_value() {
unsafe { std::env::set_var("TEST_ENV_PRESENT", "hello") };
assert_eq!(try_get_env("TEST_ENV_PRESENT").unwrap(), "hello");
}
#[test]
fn test_try_get_env_missing_var_returns_error() {
unsafe { std::env::remove_var("TEST_ENV_MISSING") };
assert!(try_get_env("TEST_ENV_MISSING").is_err());
}
#[test]
fn test_try_get_env_empty_value_returns_error() {
unsafe { std::env::set_var("TEST_ENV_EMPTY", "") };
let err = try_get_env("TEST_ENV_EMPTY").unwrap_err();
assert!(err.to_string().contains("TEST_ENV_EMPTY"));
}
#[test]
fn test_try_get_env_whitespace_only_returns_error() {
unsafe { std::env::set_var("TEST_ENV_WHITESPACE", " ") };
let err = try_get_env("TEST_ENV_WHITESPACE").unwrap_err();
assert!(err.to_string().contains("TEST_ENV_WHITESPACE"));
}
} }
+22 -4
View File
@@ -1,3 +1,4 @@
use anyhow::anyhow;
use axum::response::IntoResponse; use axum::response::IntoResponse;
use reqwest::StatusCode; use reqwest::StatusCode;
@@ -21,6 +22,12 @@ pub enum AppError {
#[error("WebHook have bad action")] #[error("WebHook have bad action")]
InvalidActionErr, InvalidActionErr,
#[error("Channel full")]
ChannelFullErr,
#[error("Already processed")]
AlreadyProcessedErr,
#[error(transparent)] #[error(transparent)]
BadJsonStructErr(#[from] serde_json::Error), BadJsonStructErr(#[from] serde_json::Error),
@@ -54,10 +61,21 @@ impl IntoResponse for AppError {
StatusCode::UNAUTHORIZED, StatusCode::UNAUTHORIZED,
"WebHook sig header is invalid".to_string(), "WebHook sig header is invalid".to_string(),
), ),
AppError::Other(_) => ( AppError::AlreadyProcessedErr => (StatusCode::OK, "Already processed".to_string()),
StatusCode::INTERNAL_SERVER_ERROR, AppError::ChannelFullErr => {
"Internal server error".to_string(), sentry_anyhow::capture_anyhow(&anyhow!("Max concurrent tasks reached"));
), (
StatusCode::SERVICE_UNAVAILABLE,
"Max concurrent tasks reached".to_string(),
)
}
AppError::Other(err) => {
sentry_anyhow::capture_anyhow(&err);
(
StatusCode::INTERNAL_SERVER_ERROR,
"Internal server error".to_string(),
)
}
} }
.into_response() .into_response()
} }
+207 -16
View File
@@ -1,17 +1,176 @@
use serde::Deserialize; use std::time::Duration;
use serde_json::Value;
use crate::errors::AppError; use serde::Deserialize;
use serde_json::{Value, json};
use tracing::instrument;
use crate::{bot::ReviewResult, errors::AppError};
#[derive(Clone)]
pub struct GiteaAPI {
base_url: String,
client: reqwest::Client,
}
impl GiteaAPI {
pub fn new(base_url: &str, token: &str, timeout: u64) -> anyhow::Result<Self> {
let mut default_headers = reqwest::header::HeaderMap::new();
default_headers.insert(
reqwest::header::HeaderName::from_static("authorization"),
reqwest::header::HeaderValue::from_str(&format!("Bearer {}", token))?,
);
Ok(Self {
base_url: String::from(base_url),
client: reqwest::Client::builder()
.timeout(Duration::from_secs(timeout))
.default_headers(default_headers)
.build()?,
})
}
#[instrument(skip(self))]
pub async fn comment(
&self,
body: &str,
full_name: &str,
index: u64,
) -> anyhow::Result<Comment> {
let url = format!(
"{}/api/v1/repos/{}/issues/{}/comments",
self.base_url, full_name, index
);
let res = self
.client
.post(url)
.json(&json!({
"body": body
}))
.send()
.await?;
if !res.status().is_success() {
return Err(anyhow::anyhow!("Failed to comment: {}", res.status()));
}
res.json::<Comment>().await.map_err(anyhow::Error::from)
}
#[instrument(skip(self))]
pub async fn edit_comment(
&self,
body: &str,
full_name: &str,
comment_id: u64,
) -> anyhow::Result<()> {
let url = format!(
"{}/api/v1/repos/{}/issues/comments/{}",
self.base_url, full_name, comment_id
);
let res = self
.client
.patch(url)
.json(&json!({
"body": body
}))
.send()
.await?;
if !res.status().is_success() {
return Err(anyhow::anyhow!("Failed to comment: {}", res.status()));
}
Ok(())
}
#[instrument(skip(self))]
pub async fn delete_comment(&self, full_name: &str, comment_id: u64) -> anyhow::Result<()> {
let url = format!(
"{}/api/v1/repos/{}/issues/comments/{}",
self.base_url, full_name, comment_id
);
let res = self.client.delete(url).send().await?;
if !res.status().is_success() {
return Err(anyhow::anyhow!(
"Failed to delete comment: {}",
res.status()
));
}
Ok(())
}
#[instrument(skip(self, review_result))]
pub async fn post_pull_request_review(
&self,
review_result: &ReviewResult,
final_comment: &str,
full_name: &str,
index: u64,
) -> anyhow::Result<()> {
let url = format!(
"{}/api/v1/repos/{}/pulls/{}/reviews",
self.base_url, full_name, index
);
let comments = &review_result
.reviews
.iter()
.filter(|r| r.line.is_some())
.map(|r| {
let path = r.filename.clone();
let line = r.line.unwrap_or(0);
let body = r.message.clone();
json!({
"path": path,
"new_position": line,
"body": body
})
})
.collect::<Vec<_>>();
let res = self
.client
.post(url)
.json(&json!({
"event": "COMMENT",
"body": final_comment,
"comments": comments
}))
.send()
.await?;
if !res.status().is_success() {
return Err(anyhow::anyhow!("Failed to post review: {}", res.status()));
}
Ok(())
}
}
#[derive(Debug)] #[derive(Debug)]
pub enum WebhookType { pub enum WebhookType {
Review(ReviewPayload), Review(ReviewPayload),
} }
impl WebhookType {
pub fn event_id(&self) -> u64 {
match self {
WebhookType::Review(payload) => payload.comment.id,
}
}
}
#[derive(Deserialize, Debug)] #[derive(Deserialize, Debug)]
pub struct ReviewPayload { pub struct ReviewPayload {
pub action: String, pub action: String,
pub pull_request: PullRequest, pub pull_request: PullRequest,
pub repository: Repository,
pub comment: Comment, pub comment: Comment,
} }
@@ -19,6 +178,8 @@ pub struct ReviewPayload {
pub struct PullRequest { pub struct PullRequest {
pub id: u64, pub id: u64,
pub diff_url: String, pub diff_url: String,
pub number: u64,
pub title: String,
} }
#[derive(Deserialize, Debug)] #[derive(Deserialize, Debug)]
@@ -33,6 +194,11 @@ pub struct User {
pub id: u64, pub id: u64,
} }
#[derive(Deserialize, Debug)]
pub struct Repository {
pub full_name: String,
}
impl WebhookType { impl WebhookType {
pub fn from_event(event: &str, bot_name: &str, json: Value) -> Result<Self, AppError> { pub fn from_event(event: &str, bot_name: &str, json: Value) -> Result<Self, AppError> {
let wb = match event { let wb = match event {
@@ -40,14 +206,6 @@ impl WebhookType {
_ => Err(AppError::UnknownEventErr), _ => Err(AppError::UnknownEventErr),
}?; }?;
let pr_body = match &wb {
WebhookType::Review(review_payload) => &review_payload.comment.body,
};
if !pr_body.starts_with(&format!("@{}", bot_name)) {
return Err(AppError::UnauthorizedUserErr);
}
let action = match &wb { let action = match &wb {
WebhookType::Review(review_payload) => &review_payload.action, WebhookType::Review(review_payload) => &review_payload.action,
}; };
@@ -56,6 +214,14 @@ impl WebhookType {
return Err(AppError::InvalidActionErr); return Err(AppError::InvalidActionErr);
} }
let pr_body = match &wb {
WebhookType::Review(review_payload) => &review_payload.comment.body,
};
if !pr_body.starts_with(&format!("@{}", bot_name)) {
return Err(AppError::UnauthorizedUserErr);
}
Ok(wb) Ok(wb)
} }
} }
@@ -71,7 +237,12 @@ mod tests {
"action": "created", "action": "created",
"pull_request": { "pull_request": {
"id": 42, "id": 42,
"diff_url": "https://mydiff.fr" "diff_url": "https://mydiff.fr",
"number": 1,
"title": "My PR"
},
"repository": {
"full_name": "owner/repo"
}, },
"comment": { "comment": {
"id": 7, "id": 7,
@@ -130,7 +301,12 @@ mod tests {
"action": "edited", "action": "edited",
"pull_request": { "pull_request": {
"id": 1, "id": 1,
"diff_url": "https://mydiff.fr" "diff_url": "https://mydiff.fr",
"number": 1,
"title": "My PR"
},
"repository": {
"full_name": "owner/repo"
}, },
"comment": { "comment": {
"id": 1, "id": 1,
@@ -156,7 +332,12 @@ mod tests {
"action": "created", "action": "created",
"pull_request": { "pull_request": {
"id": 99, "id": 99,
"diff_url": "https://mydiff.fr" "diff_url": "https://mydiff.fr",
"number": 5,
"title": "My PR"
},
"repository": {
"full_name": "owner/repo"
}, },
"comment": { "comment": {
"id": 12, "id": 12,
@@ -188,7 +369,12 @@ mod tests {
"action": "created", "action": "created",
"pull_request": { "pull_request": {
"id": 1, "id": 1,
"diff_url": "https://mydiff.fr" "diff_url": "https://mydiff.fr",
"number": 1,
"title": "My PR"
},
"repository": {
"full_name": "owner/repo"
}, },
"comment": { "comment": {
"id": 1, "id": 1,
@@ -209,7 +395,12 @@ mod tests {
"action": "created", "action": "created",
"pull_request": { "pull_request": {
"id": 1, "id": 1,
"diff_url": "https://mydiff.fr" "diff_url": "https://mydiff.fr",
"number": 1,
"title": "My PR"
},
"repository": {
"full_name": "owner/repo"
}, },
"comment": { "comment": {
"id": 1, "id": 1,
+78 -9
View File
@@ -1,25 +1,94 @@
use std::sync::Arc; use std::sync::Arc;
use tokio::sync::Mutex; use crate::{bot::Bot, gitea::WebhookType, state::AppState};
use crate::{bot::Bot, state::AppState}; use dotenvy::dotenv;
use tokio::signal::unix::{SignalKind, signal};
use tokio_util::sync::CancellationToken;
use tracing::{info, warn};
use tracing_subscriber::{EnvFilter, fmt, layer::SubscriberExt, util::SubscriberInitExt};
mod api; mod api;
mod bot; mod bot;
mod bot_actions;
mod consts; mod consts;
mod env; mod env;
mod errors; mod errors;
mod gitea; mod gitea;
mod open_router;
mod state; mod state;
#[tokio::main] fn main() -> anyhow::Result<()> {
async fn main() -> anyhow::Result<()> { dotenv().ok();
let config = env::load_config()?;
let app_state = AppState { tracing_subscriber::registry()
bot: Arc::new(Mutex::new(Bot::new(config.clone()))), .with(fmt::layer())
config: config, .with(
EnvFilter::try_from_default_env() // lit RUST_LOG depuis l'env
.unwrap_or_else(|_| EnvFilter::new("info")),
)
.init();
let _sentry_guard = if let Ok(sentry_dsn) = env::try_get_env("SENTRY_DSN") {
info!("Initialize sentry");
Some(sentry::init((
sentry_dsn,
sentry::ClientOptions {
release: sentry::release_name!(),
send_default_pii: true,
..Default::default()
},
)))
} else {
warn!("SENTRY_DSN not set, sentry will not be initialized");
None
}; };
api::start(app_state).await tokio::runtime::Runtime::new()?.block_on(run())
}
async fn run() -> anyhow::Result<()> {
let config = env::load_config()?;
info!(
port = config.http_port,
model = %config.open_router_model,
gitea_url = %config.gitea_url,
bot_name = %config.bot_name,
"Starting Herald"
);
let shutdown = CancellationToken::new();
let bot = Bot::new(config.clone())?;
let (tx, rx) = tokio::sync::mpsc::channel::<WebhookType>(config.bot_max_concurrent * 2);
let app_state = AppState {
bot_tx: tx,
bot: bot.clone(),
config,
};
let signal = async {
let mut sigterm = signal(SignalKind::terminate())?;
let mut sigint = signal(SignalKind::interrupt())?;
tokio::select! {
_ = sigterm.recv() => info!("Received SIGTERM"),
_ = sigint.recv() => info!("Received SIGINT"),
}
info!("Shutting down...");
shutdown.cancel();
anyhow::Ok(())
};
tokio::try_join!(
bot.start(rx, shutdown.clone()),
api::start(app_state, shutdown.clone()),
signal
)?;
info!("Shutdown complete");
Ok(())
} }
+50
View File
@@ -0,0 +1,50 @@
use std::time::Duration;
use openrouter_rs::{Message, api::chat::ChatCompletionRequest};
use tracing::instrument;
pub struct ChatResult {
pub message: String,
pub cost: Option<f64>,
}
#[derive(Clone)]
pub struct OpenRouterClient {
client: openrouter_rs::OpenRouterClient,
model: String,
}
impl OpenRouterClient {
pub fn new(token: &str, model: &str, timeout: u64) -> anyhow::Result<Self> {
Ok(Self {
client: openrouter_rs::OpenRouterClient::builder()
.api_key(token)
.http_client(
reqwest::Client::builder()
.timeout(Duration::from_secs(timeout))
.build()?,
)
.build()?,
model: String::from(model),
})
}
#[instrument(skip(self), err)]
pub async fn chat(&self, msg: &str) -> anyhow::Result<ChatResult> {
let request = ChatCompletionRequest::builder()
.model(&self.model)
.enable_reasoning()
.messages(vec![Message::new(openrouter_rs::types::Role::User, msg)])
.build()?;
let response = self.client.chat().create(&request).await?;
Ok(ChatResult {
message: response.choices[0]
.content()
.map(String::from)
.ok_or(anyhow::anyhow!("No content"))?,
cost: response.usage.and_then(|u| u.cost),
})
}
}
+3 -5
View File
@@ -1,10 +1,8 @@
use std::sync::Arc; use crate::{bot::Bot, env::EnvConfig, gitea::WebhookType};
use tokio::sync::Mutex;
use crate::{bot::Bot, env::EnvConfig};
#[derive(Clone)] #[derive(Clone)]
pub struct AppState { pub struct AppState {
pub bot: Arc<Mutex<Bot>>, pub bot_tx: tokio::sync::mpsc::Sender<WebhookType>,
pub bot: Bot,
pub config: EnvConfig, pub config: EnvConfig,
} }